update sources to 12.2.8

author Alwin Antreich <a.antreich@proxmox.com>

Wed, 5 Sep 2018 08:02:44 +0000 (10:02 +0200)

committer Alwin Antreich <a.antreich@proxmox.com>

Wed, 5 Sep 2018 08:02:44 +0000 (10:02 +0200)
author Alwin Antreich <a.antreich@proxmox.com>
Wed, 5 Sep 2018 08:02:44 +0000 (10:02 +0200)
committer Alwin Antreich <a.antreich@proxmox.com>
Wed, 5 Sep 2018 08:02:44 +0000 (10:02 +0200)
diff --git a/Makefile b/Makefile

index 9bc9fbe4684c7bf2a3d6cb6d8470e7740e4a5e14..94633d15c2b41376ad288ea4424db60861c825a6 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  RELEASE=5.2
  
  PACKAGE=ceph
-VER=12.2.7
+VER=12.2.8
  DEBREL=pve1
  
  SRCDIR=ceph
diff --git a/ceph/CMakeLists.txt b/ceph/CMakeLists.txt

index 2e2df1f6e75f1fabe235d01a4116123eeff0500e..c507dfaa2b928de28ac1fe9c0b7e118d0991c8bd 100644 (file)
--- a/ceph/CMakeLists.txt
+++ b/ceph/CMakeLists.txt
@@ -1,7 +1,7 @@
  cmake_minimum_required(VERSION 2.8.11)
  
  project(ceph)
-set(VERSION 12.2.7)
+set(VERSION 12.2.8)
  
  if(POLICY CMP0046)
    # Tweak policies (this one disables "missing" dependency warning)
@@ -254,13 +254,6 @@ if(WITH_KRBD AND WITHOUT_RBD)
    message(FATAL_ERROR "Cannot have WITH_KRBD with WITH_RBD.")
  endif()
  
-# embedded ceph daemon static library
-# NOTE: Ceph is mostly LGPL (see COPYING), which means that
-# static linking brings with it restrictions. Please be sure
-# to look at the LGPL license carefully before linking this library to
-# your code. See http://www.gnu.org/licenses/gpl-faq.html#LGPLStaticVsDynamic.
-option(WITH_EMBEDDED "build the embedded ceph daemon library" ON)
-
  option(WITH_LEVELDB "LevelDB is here" ON)
  if(WITH_LEVELDB)
    if(LEVELDB_PREFIX)
diff --git a/ceph/README.alpine.md b/ceph/README.alpine.md

index fb0424a88e76d3681ace3f08ff410f275d532ec7..0fe91088c109db585488f2aefa6abb4f231e0227 100644 (file)
--- a/ceph/README.alpine.md
+++ b/ceph/README.alpine.md
@@ -10,7 +10,7 @@ git clone https://github.com/ceph/ceph
  ### Build
  
  ```
-./run-make-check.sh -DWITH_EMBEDDED=OFF -DWITH_SYSTEM_BOOST=ON -DWITH_LTTNG=OFF -DWITH_REENTRANT_STRSIGNAL=ON -DWITH_THREAD_SAFE_RES_QUERY=ON
+./run-make-check.sh -DWITH_SYSTEM_BOOST=ON -DWITH_LTTNG=OFF -DWITH_REENTRANT_STRSIGNAL=ON -DWITH_THREAD_SAFE_RES_QUERY=ON
  ```
  
  ### Packaging
@@ -28,7 +28,7 @@ cd ceph/src
  
  or
  
-./test/docker-test.sh --os-type alpine --os-version edge -- ./run-make-check.sh -DWITH_EMBEDDED=OFF -DWITH_SYSTEM_BOOST=ON -DWITH_LTTNG=OFF -DWITH_REENTRANT_STRSIGNAL=ON -DWITH_THREAD_SAFE_RES_QUERY=ON
+./test/docker-test.sh --os-type alpine --os-version edge -- ./run-make-check.sh -DWITH_SYSTEM_BOOST=ON -DWITH_LTTNG=OFF -DWITH_REENTRANT_STRSIGNAL=ON -DWITH_THREAD_SAFE_RES_QUERY=ON
  
  ```
  
diff --git a/ceph/alpine/APKBUILD b/ceph/alpine/APKBUILD

index 90553c49eee8a5318ba725418c563805eb5ccd60..bfa3a349b91166e50360bab9aae193de342a374c 100644 (file)
--- a/ceph/alpine/APKBUILD
+++ b/ceph/alpine/APKBUILD
@@ -1,7 +1,7 @@
  # Contributor: John Coyle <dx9err@gmail.com>
  # Maintainer: John Coyle <dx9err@gmail.com>
  pkgname=ceph
-pkgver=12.2.7
+pkgver=12.2.8
  pkgrel=0
  pkgdesc="Ceph is a distributed object store and file system"
  pkgusers="ceph"
@@ -63,7 +63,7 @@ makedepends="
         xmlstarlet
         yasm
  "
-source="ceph-12.2.7.tar.bz2"
+source="ceph-12.2.8.tar.bz2"
  subpackages="
         $pkgname-base
         $pkgname-common
@@ -116,7 +116,7 @@ _sysconfdir=/etc
  _udevrulesdir=/etc/udev/rules.d
  _python_sitelib=/usr/lib/python2.7/site-packages
  
-builddir=$srcdir/ceph-12.2.7
+builddir=$srcdir/ceph-12.2.8
  
  build() {
         export CEPH_BUILD_VIRTUALENV=$builddir
@@ -142,7 +142,6 @@ build() {
                 -DWITH_PYTHON3=OFF \
                 -DWITH_LTTNG=OFF \
                 -DWITH_SYSTEM_BOOST=ON \
-               -DWITH_EMBEDDED=OFF \
                 -DWITH_TESTS=${_with_tests:-OFF} \
                 || return 1
         make -j${JOBS:-2} || return 1
diff --git a/ceph/alpine/APKBUILD.in b/ceph/alpine/APKBUILD.in

index e82dd20a50cfd64613437b6091361e9151c777fd..e987a1e007d55b251d4786f3e255aafd7b6e1752 100644 (file)
--- a/ceph/alpine/APKBUILD.in
+++ b/ceph/alpine/APKBUILD.in
@@ -142,7 +142,6 @@ build() {
                 -DWITH_PYTHON3=OFF \
                 -DWITH_LTTNG=OFF \
                 -DWITH_SYSTEM_BOOST=ON \
-               -DWITH_EMBEDDED=OFF \
                 -DWITH_TESTS=${_with_tests:-OFF} \
                 || return 1
         make -j${JOBS:-2} || return 1
diff --git a/ceph/ceph.spec b/ceph/ceph.spec

index 08ccdb6c2182ec3a92606cd46433e1b74b32a388..9309442d3046cd9252e9929bc508d0be19baa2af 100644 (file)
--- a/ceph/ceph.spec
+++ b/ceph/ceph.spec
@@ -61,7 +61,7 @@
  # main package definition
  #################################################################################
  Name:          ceph
-Version:       12.2.7
+Version:       12.2.8
  Release:       0%{?dist}
  %if 0%{?fedora} || 0%{?rhel}
  Epoch:         2
@@ -77,7 +77,7 @@ License:      LGPL-2.1 and CC-BY-SA-3.0 and GPL-2.0 and BSL-1.0 and BSD-3-Clause and
  Group:         System/Filesystems
  %endif
  URL:           http://ceph.com/
-Source0:       http://ceph.com/download/ceph-12.2.7.tar.bz2
+Source0:       http://ceph.com/download/ceph-12.2.8.tar.bz2
  %if 0%{?suse_version}
  %if 0%{?is_opensuse}
  ExclusiveArch:  x86_64 aarch64 ppc64 ppc64le
@@ -142,6 +142,7 @@ BuildRequires:      python
  BuildRequires: python-devel
  BuildRequires: python-nose
  BuildRequires: python-requests
+BuildRequires: python-six
  BuildRequires: python-virtualenv
  BuildRequires: snappy-devel
  BuildRequires: udev
@@ -322,6 +323,7 @@ Summary:        Ceph Manager Daemon
  Group:          System/Filesystems
  %endif
  Requires:       ceph-base = %{_epoch_prefix}%{version}-%{release}
+Requires:       python-six
  %if 0%{?fedora} || 0%{?rhel}
  Requires:       python-cherrypy
  Requires:       python-jinja2
@@ -779,7 +781,7 @@ python-rbd, python-rgw or python-cephfs instead.
  # common
  #################################################################################
  %prep
-%autosetup -p1 -n ceph-12.2.7
+%autosetup -p1 -n ceph-12.2.8
  
  %build
  %if 0%{with cephfs_java}
@@ -822,7 +824,6 @@ cmake .. \
      -DCMAKE_INSTALL_MANDIR=%{_mandir} \
      -DCMAKE_INSTALL_DOCDIR=%{_docdir}/ceph \
      -DCMAKE_INSTALL_INCLUDEDIR=%{_includedir} \
-    -DWITH_EMBEDDED=OFF \
      -DWITH_MANPAGE=ON \
      -DWITH_PYTHON3=ON \
      -DWITH_SYSTEMD=ON \
@@ -1448,7 +1449,7 @@ fi
      /usr/lib/systemd/systemd-sysctl %{_sysctldir}/90-ceph-osd.conf > /dev/null 2>&1 || :
  %endif
  # work around https://tracker.ceph.com/issues/24903
-chown -h ceph:ceph /var/lib/ceph/osd/*/block* 2>&1 > /dev/null || :
+chown -f -h ceph:ceph /var/lib/ceph/osd/*/block* 2>&1 > /dev/null || :
  
  %preun osd
  %if 0%{?suse_version}
diff --git a/ceph/ceph.spec.in b/ceph/ceph.spec.in

index 05e3e330ef1ec22d8a16902ca27b212e1fa8dd42..bb394f06b35dddf1fede8bfeb669b6c5b219f62f 100644 (file)
--- a/ceph/ceph.spec.in
+++ b/ceph/ceph.spec.in
@@ -142,6 +142,7 @@ BuildRequires:      python
  BuildRequires: python-devel
  BuildRequires: python-nose
  BuildRequires: python-requests
+BuildRequires: python-six
  BuildRequires: python-virtualenv
  BuildRequires: snappy-devel
  BuildRequires: udev
@@ -322,6 +323,7 @@ Summary:        Ceph Manager Daemon
  Group:          System/Filesystems
  %endif
  Requires:       ceph-base = %{_epoch_prefix}%{version}-%{release}
+Requires:       python-six
  %if 0%{?fedora} || 0%{?rhel}
  Requires:       python-cherrypy
  Requires:       python-jinja2
@@ -822,7 +824,6 @@ cmake .. \
      -DCMAKE_INSTALL_MANDIR=%{_mandir} \
      -DCMAKE_INSTALL_DOCDIR=%{_docdir}/ceph \
      -DCMAKE_INSTALL_INCLUDEDIR=%{_includedir} \
-    -DWITH_EMBEDDED=OFF \
      -DWITH_MANPAGE=ON \
      -DWITH_PYTHON3=ON \
      -DWITH_SYSTEMD=ON \
@@ -1448,7 +1449,7 @@ fi
      /usr/lib/systemd/systemd-sysctl %{_sysctldir}/90-ceph-osd.conf > /dev/null 2>&1 || :
  %endif
  # work around https://tracker.ceph.com/issues/24903
-chown -h ceph:ceph /var/lib/ceph/osd/*/block* 2>&1 > /dev/null || :
+chown -f -h ceph:ceph /var/lib/ceph/osd/*/block* 2>&1 > /dev/null || :
  
  %preun osd
  %if 0%{?suse_version}
diff --git a/ceph/cmake/modules/MergeStaticLibraries.cmake b/ceph/cmake/modules/MergeStaticLibraries.cmake

deleted file mode 100644 (file)

index 92d4156..0000000
--- a/ceph/cmake/modules/MergeStaticLibraries.cmake
+++ /dev/null
@@ -1,85 +0,0 @@
-# This function is a helper that will merge static libraries.
-# For example,
-#
-#    merge_static_libraries(mylib staticlibX staticlibY)
-#
-# mylib.a will generate a new static library mylib that is
-# a combination of staticlibX and staticlibY
-#
-function(merge_static_libraries target)
-
-    set(dummy_source ${CMAKE_CURRENT_BINARY_DIR}/${target}_dummy.c)
-    add_library(${target} STATIC ${dummy_source})
-
-    # remove duplicates
-    set(libs ${ARGN})
-    list(REMOVE_DUPLICATES libs)
-
-    # validate that all libs are static
-    foreach(lib ${libs})
-        if (NOT TARGET ${lib})
-            message(FATAL_ERROR "${lib} not a valid target")
-        endif()
-
-        get_target_property(libtype ${lib} TYPE)
-        if(NOT libtype STREQUAL "STATIC_LIBRARY")
-            message(FATAL_ERROR "${lib} not a static library")
-        endif()
-
-        # add a dependency on the lib
-        add_dependencies(${target} ${lib})
-    endforeach()
-
-    # Force the merged Make the generated dummy source file depended on all static input
-    # libs. If input lib changes,the source file is touched
-    # which causes the desired effect (relink).
-    add_custom_command(
-        OUTPUT  ${dummy_source}
-        COMMAND ${CMAKE_COMMAND} -E touch ${dummy_source}
-        DEPENDS ${libs})
-
-    # only LINUX is currently supported. OSX's libtool and windows lib.exe
-    # have native support for merging static libraries, and support for them
-    # can be easily added if required.
-    if(LINUX)
-        # generate a script to merge the static libraries in to the target
-        # library. see https://sourceware.org/binutils/docs/binutils/ar-scripts.html
-        set(mri_script "open $<TARGET_FILE:${target}>=")
-        foreach(lib ${libs})
-            # we use the generator expression TARGET_FILE to get the location
-            # of the library. this will not be expanded until the script file
-            # is written below
-            set(mri_script "${mri_script} addlib $<TARGET_FILE:${lib}>=")
-        endforeach()
-        set(mri_script "${mri_script} save=end")
-
-        add_custom_command(
-            TARGET ${target} POST_BUILD
-            COMMAND echo ${mri_script} | tr = \\\\n | ${CMAKE_AR} -M)
-    endif(LINUX)
-
-    message("-- MergeStaticLibraries: ${target}: merged ${libs}")
-
-    # we want to set the target_link_libraries correctly for the new merged
-    # static library. First we get the list of link libraries for each
-    # of the libs we are merging
-    set(link_libs)
-    foreach(lib ${libs})
-      get_property(trans TARGET ${lib} PROPERTY LINK_LIBRARIES)
-      list(APPEND link_libs ${trans})
-    endforeach()
-
-    if (link_libs)
-        # now remove the duplicates and any of the libraries we already merged
-        list(REMOVE_DUPLICATES link_libs)
-        foreach(lib ${libs})
-            list(REMOVE_ITEM link_libs ${lib})
-        endforeach()
-
-        # set the target link libraries
-        target_link_libraries(${target} ${link_libs})
-
-        message("-- MergeStaticLibraries: ${target}: remaining ${link_libs}")
-    endif()
-
-endfunction()
diff --git a/ceph/debian/changelog b/ceph/debian/changelog

index a95f9dc266356812a3f6cdec3cf5fa86ede6025d..642fc450fae7106b4fbf815b17944299e28abfef 100644 (file)
--- a/ceph/debian/changelog
+++ b/ceph/debian/changelog
@@ -1,3 +1,9 @@
+ceph (12.2.8-1) stable; urgency=medium
+
+  * New upstream release
+
+ -- Ceph Release Team <ceph-maintainers@ceph.com>  Thu, 30 Aug 2018 17:24:37 +0000
+
  ceph (12.2.7-1) stable; urgency=medium
  
    * New upstream release
diff --git a/ceph/debian/control b/ceph/debian/control

index bdc74471da301d38f23225bf62ba6bbc79646231..d5bc16e9eea836bd43c394e0a6b3dcb250701a94 100644 (file)
--- a/ceph/debian/control
+++ b/ceph/debian/control
@@ -55,6 +55,7 @@ Build-Depends: bc,
                 python-pecan,
                 python-prettytable,
                 python-setuptools,
+               python-six,
                 python-sphinx,
                 python-werkzeug,
                 python3-all-dev,
diff --git a/ceph/debian/rules b/ceph/debian/rules

index 9c63c99c31376ad5f233d239f27b8e06e791d451..99a3e8e9824c1100ad2e28a04aa42ad6c328f745 100755 (executable)
--- a/ceph/debian/rules
+++ b/ceph/debian/rules
@@ -5,7 +5,7 @@ export DESTDIR=$(CURDIR)/debian/tmp
  
  export DEB_HOST_ARCH      ?= $(shell dpkg-architecture -qDEB_HOST_ARCH)
  
-extraopts += -DUSE_CRYPTOPP=OFF -DWITH_OCF=ON -DWITH_LTTNG=ON -DWITH_PYTHON3=ON -DWITH_EMBEDDED=OFF
+extraopts += -DUSE_CRYPTOPP=OFF -DWITH_OCF=ON -DWITH_LTTNG=ON -DWITH_PYTHON3=ON
  extraopts += -DWITH_CEPHFS_JAVA=ON
  # assumes that ceph is exmpt from multiarch support, so we override the libdir.
  extraopts += -DCMAKE_INSTALL_LIBDIR=/usr/lib
diff --git a/ceph/do_freebsd.sh b/ceph/do_freebsd.sh

index bf9007488f06e31aecf6c44ca7bd3dcc28a4f6f3..946f83e572e5bb1813598f44eabf0a8a0925188f 100755 (executable)
--- a/ceph/do_freebsd.sh
+++ b/ceph/do_freebsd.sh
@@ -38,7 +38,6 @@ rm -rf build && ./do_cmake.sh "$*" \
         -D CEPH_MAN_DIR=man \
         -D WITH_LIBCEPHFS=OFF \
         -D WITH_CEPHFS=OFF \
-       -D WITH_EMBEDDED=OFF \
         -D WITH_MGR=YES \
         2>&1 | tee cmake.log
  
diff --git a/ceph/doc/ceph-volume/index.rst b/ceph/doc/ceph-volume/index.rst

index 387596d613bf72d4827ecaaeb7b89c80009ff26b..34094b73393a4c1617a8cd63fc479fdf87abae16 100644 (file)
--- a/ceph/doc/ceph-volume/index.rst
+++ b/ceph/doc/ceph-volume/index.rst
@@ -20,7 +20,7 @@ that may have been deployed with ``ceph-disk``.
  
  Migrating
  ---------
-Starting on Ceph version 12.2.2, ``ceph-disk`` is deprecated. Deprecation
+Starting on Ceph version 13.0.0, ``ceph-disk`` is deprecated. Deprecation
  warnings will show up that will link to this page. It is strongly suggested
  that users start consuming ``ceph-volume``. There are two paths for migrating:
  
@@ -53,6 +53,7 @@ and ``ceph-disk`` is fully disabled. Encryption is fully supported.
     systemd
     lvm/index
     lvm/activate
+   lvm/batch
     lvm/encryption
     lvm/prepare
     lvm/scan
diff --git a/ceph/doc/ceph-volume/lvm/batch.rst b/ceph/doc/ceph-volume/lvm/batch.rst

new file mode 100644 (file)

index 0000000..bf484b0
--- /dev/null
+++ b/ceph/doc/ceph-volume/lvm/batch.rst
@@ -0,0 +1,139 @@
+.. _ceph-volume-lvm-batch:
+
+``batch``
+===========
+This subcommand allows for multiple OSDs to be created at the same time given
+an input of devices. Depending on the device type (spinning drive, or solid
+state), the internal engine will decide the best approach to create the OSDs.
+
+This decision abstracts away the many nuances when creating an OSD: how large
+should a ``block.db`` be? How can one mix a solid state device with spinning
+devices in an efficient way?
+
+The process is similar to :ref:`ceph-volume-lvm-create`, and will do the
+preparation and activation at once, following the same workflow for each OSD.
+
+All the features that ``ceph-volume lvm create`` supports, like ``dmcrypt``,
+avoiding ``systemd`` units from starting, defining bluestore or filestore,
+are supported. Any fine-grained option that may affect a single OSD is not
+supported, for example: specifying where journals should be placed.
+
+
+.. _ceph-volume-lvm-batch_bluestore:
+
+``bluestore``
+-------------
+The :term:`bluestore` objectstore (the default) is used when creating multiple OSDs
+with the ``batch`` sub-command. It allows a few different scenarios depending
+on the input of devices:
+
+#. Devices are all spinning HDDs: 1 OSD is created per device
+#. Devices are all spinning SSDs: 2 OSDs are created per device
+#. Devices are a mix of HDDS and SSDs: data is placed on the spinning device,
+   the ``block.db`` is created on the SSD, as large as possible.
+
+
+.. note:: Although operations in ``ceph-volume lvm create`` allow usage of
+          ``block.wal`` it isn't supported with the ``batch`` sub-command
+
+
+.. _ceph-volume-lvm-batch_filestore:
+
+``filestore``
+-------------
+The :term:`filestore` objectstore can be used when creating multiple OSDs
+with the ``batch`` sub-command. It allows two different scenarios depending
+on the input of devices:
+
+#. Devices are all the same type (for example all spinning HDD or all SSDs):
+   1 OSD is created per device, collocating the journal in the same HDD.
+#. Devices are a mix of HDDS and SSDs: data is placed on the spinning device,
+   while the journal is created on the SSD using the sizing options from
+   ceph.conf and falling back to the default journal size of 5GB.
+
+
+When a mix of solid and spinning devices are used, ``ceph-volume`` will try to
+detect existing volume groups on the solid devices. If a VG is found, it will
+try to create the logical volume from there, otherwise raising an error if
+space is insufficient.
+
+If a raw solid device is used along with a device that has a volume group in
+addition to some spinning devices, ``ceph-volume`` will try to extend the
+existing volume group and then create a logical volume.
+
+.. _ceph-volume-lvm-batch_report:
+
+Reporting
+=========
+When a call is received to create OSDs, the tool will prompt the user to
+continue if the pre-computed output is acceptable. This output is useful to
+understand the outcome of the received devices. Once confirmation is accepted,
+the process continues.
+
+Although prompts are good to understand outcomes, it is incredibly useful to
+try different inputs to find the best product possible. With the ``--report``
+flag, one can prevent any actual operations and just verify outcomes from
+inputs.
+
+**pretty reporting**
+For two spinning devices, this is how the ``pretty`` report (the default) would
+look::
+
+    $ ceph-volume lvm batch --report /dev/sdb /dev/sdc
+
+    Total OSDs: 2
+
+      Type            Path                      LV Size         % of device
+    --------------------------------------------------------------------------------
+      [data]          /dev/sdb                  10.74 GB        100%
+    --------------------------------------------------------------------------------
+      [data]          /dev/sdc                  10.74 GB        100%
+
+
+
+**JSON reporting**
+Reporting can produce a richer output with ``JSON``, which gives a few more
+hints on sizing. This feature might be better for other tooling to consume
+information that will need to be transformed.
+
+For two spinning devices, this is how the ``JSON`` report would look::
+
+    $ ceph-volume lvm batch --report --format=json /dev/sdb /dev/sdc
+    {
+        "osds": [
+            {
+                "block.db": {},
+                "data": {
+                    "human_readable_size": "10.74 GB",
+                    "parts": 1,
+                    "path": "/dev/sdb",
+                    "percentage": 100,
+                    "size": 11534336000.0
+                }
+            },
+            {
+                "block.db": {},
+                "data": {
+                    "human_readable_size": "10.74 GB",
+                    "parts": 1,
+                    "path": "/dev/sdc",
+                    "percentage": 100,
+                    "size": 11534336000.0
+                }
+            }
+        ],
+        "vgs": [
+            {
+                "devices": [
+                    "/dev/sdb"
+                ],
+                "parts": 1
+            },
+            {
+                "devices": [
+                    "/dev/sdc"
+                ],
+                "parts": 1
+            }
+        ]
+    }
diff --git a/ceph/doc/man/8/ceph-bluestore-tool.rst b/ceph/doc/man/8/ceph-bluestore-tool.rst

index 7a7b0ea6a47ae5204e4882b55726f47701c1d203..f5e5fe2b885c180084ce2a9b2793ffab89b30e69 100644 (file)
--- a/ceph/doc/man/8/ceph-bluestore-tool.rst
+++ b/ceph/doc/man/8/ceph-bluestore-tool.rst
@@ -31,31 +31,31 @@ operations on a BlueStore instance.
  Commands
  ========
  
-.. option:: help
+:command:`help`
  
     show help
  
-.. option:: fsck
+:command:`fsck` [ --deep ]
  
     run consistency check on BlueStore metadata.  If *--deep* is specified, also read all object data and verify checksums.
  
-.. option:: repair
+:command:`repair`
  
     Run a consistency check *and* repair any errors we can.
  
-.. option:: bluefs-export
+:command:`bluefs-export`
  
     Export the contents of BlueFS (i.e., rocksdb files) to an output directory.
  
-.. option:: bluefs-bdev-sizes --path *osd path*
+:command:`bluefs-bdev-sizes` --path *osd path*
  
     Print the device sizes, as understood by BlueFS, to stdout.
  
-.. option:: bluefs-bdev-expand --path *osd path*
+:command:`bluefs-bdev-expand` --path *osd path*
  
     Instruct BlueFS to check the size of its block devices and, if they have expanded, make use of the additional space.
  
-.. option:: show-label --dev *device* [...]
+:command:`show-label` --dev *device* [...]
  
     Show device label(s).          
  
diff --git a/ceph/doc/man/8/ceph-volume.rst b/ceph/doc/man/8/ceph-volume.rst

index 50a2e52803928780463110ea6b71c6c895a10b80..197bea1aed932ae735e5e0fa89ba189833210e07 100644 (file)
--- a/ceph/doc/man/8/ceph-volume.rst
+++ b/ceph/doc/man/8/ceph-volume.rst
@@ -13,7 +13,7 @@ Synopsis
  |                 [--log-path LOG_PATH]
  
  | **ceph-volume** **lvm** [ *trigger* | *create* | *activate* | *prepare*
-| *zap* | *list*]
+| *zap* | *list* | *batch*]
  
  | **ceph-volume** **simple** [ *trigger* | *scan* | *activate* ]
  
@@ -43,6 +43,35 @@ activated.
  
  Subcommands:
  
+**batch**
+Creates OSDs from a list of devices using a ``filestore``
+or ``bluestore`` (default) setup. It will create all necessary volume groups
+and logical volumes required to have a working OSD.
+
+Example usage with three devices::
+
+    ceph-volume lvm batch --bluestore /dev/sda /dev/sdb /dev/sdc
+
+Optional arguments:
+
+* [-h, --help]          show the help message and exit
+* [--bluestore]         Use the bluestore objectstore (default)
+* [--filestore]         Use the filestore objectstore
+* [--yes]               Skip the report and prompt to continue provisioning
+* [--dmcrypt]           Enable encryption for the underlying OSD devices
+* [--crush-device-class] Define a CRUSH device class to assign the OSD to
+* [--no-systemd]         Do not enable or create any systemd units
+* [--report]         Report what the potential outcome would be for the
+                     current input (requires devices to be passed in)
+* [--format]         Output format when reporting (used along with
+                     --report), can be one of 'pretty' (default) or 'json'
+
+Required positional arguments:
+
+* <DEVICE>    Full path to a raw device, like ``/dev/sda``. Multiple
+              ``<DEVICE>`` paths can be passed in.
+
+
  **activate**
  Enables a systemd unit that persists the OSD ID and its UUID (also called
  ``fsid`` in Ceph CLI tools), so that at boot time it can understand what OSD is
diff --git a/ceph/doc/rados/configuration/filestore-config-ref.rst b/ceph/doc/rados/configuration/filestore-config-ref.rst

index 4dff60c1e6071ab66775fa9ba2deb260f0eea709..7f62cfeb305664c3f50bed26f2dc79e4ce7c1c94 100644 (file)
--- a/ceph/doc/rados/configuration/filestore-config-ref.rst
+++ b/ceph/doc/rados/configuration/filestore-config-ref.rst
@@ -297,7 +297,7 @@ Misc
                NOTE: A negative value means to disable subdir merging
  :Type: Integer
  :Required: No
-:Default: ``10``
+:Default: ``-10``
  
  
  ``filestore split multiple``
diff --git a/ceph/doc/rados/configuration/osd-config-ref.rst b/ceph/doc/rados/configuration/osd-config-ref.rst

index fae7078930b25c8bc19aa1181b660163ea14a342..f839122cf0f7bef9851da82a8956acf8720e0858 100644 (file)
--- a/ceph/doc/rados/configuration/osd-config-ref.rst
+++ b/ceph/doc/rados/configuration/osd-config-ref.rst
@@ -23,13 +23,13 @@ configuration file. To add settings directly to a specific Ceph OSD Daemon
  file. For example:
  
  .. code-block:: ini
-       
+
         [osd]
-               osd journal size = 1024
-       
+               osd journal size = 5120
+
         [osd.0]
                 host = osd-host-a
-               
+
         [osd.1]
                 host = osd-host-b
  
@@ -41,8 +41,10 @@ General Settings
  
  The following settings provide an Ceph OSD Daemon's ID, and determine paths to
  data and journals. Ceph deployment scripts typically generate the UUID
-automatically. We **DO NOT** recommend changing the default paths for data or
-journals, as it makes it more problematic to troubleshoot Ceph later. 
+automatically.
+
+.. warning:: **DO NOT** change the default paths for data or journals, as it
+             makes it more problematic to troubleshoot Ceph later.
  
  The journal size should be at least twice the product of the expected drive
  speed multiplied by ``filestore max sync interval``. However, the most common
@@ -55,21 +57,21 @@ that Ceph uses the entire partition for the journal.
  :Description: The universally unique identifier (UUID) for the Ceph OSD Daemon.
  :Type: UUID
  :Default: The UUID.
-:Note: The ``osd uuid`` applies to a single Ceph OSD Daemon. The ``fsid`` 
+:Note: The ``osd uuid`` applies to a single Ceph OSD Daemon. The ``fsid``
         applies to the entire cluster.
  
  
-``osd data`` 
+``osd data``
  
-:Description: The path to the OSDs data. You must create the directory when 
-              deploying Ceph. You should mount a drive for OSD data at this 
-              mount point. We do not recommend changing the default. 
+:Description: The path to the OSDs data. You must create the directory when
+              deploying Ceph. You should mount a drive for OSD data at this
+              mount point. We do not recommend changing the default.
  
  :Type: String
  :Default: ``/var/lib/ceph/osd/$cluster-$id``
  
  
-``osd max write size`` 
+``osd max write size``
  
  :Description: The maximum size of a write in megabytes.
  :Type: 32-bit Integer
@@ -80,10 +82,10 @@ that Ceph uses the entire partition for the journal.
  
  :Description: The largest client data message allowed in memory.
  :Type: 64-bit Unsigned Integer
-:Default: 500MB default. ``500*1024L*1024L`` 
+:Default: 500MB default. ``500*1024L*1024L``
  
  
-``osd class dir`` 
+``osd class dir``
  
  :Description: The class path for RADOS class plug-ins.
  :Type: String
@@ -96,7 +98,7 @@ File System Settings
  ====================
  Ceph builds and mounts file systems which are used for Ceph OSDs.
  
-``osd mkfs options {fs-type}`` 
+``osd mkfs options {fs-type}``
  
  :Description: Options used when creating a new Ceph OSD of type {fs-type}.
  
@@ -107,7 +109,7 @@ Ceph builds and mounts file systems which are used for Ceph OSDs.
  For example::
    ``osd mkfs options xfs = -f -d agcount=24``
  
-``osd mount options {fs-type}`` 
+``osd mount options {fs-type}``
  
  :Description: Options used when mounting a Ceph OSD of type {fs-type}.
  
@@ -129,32 +131,25 @@ the  following path::
  
         /var/lib/ceph/osd/$cluster-$id/journal
  
-Without performance optimization, Ceph stores the journal on the same disk as
-the Ceph OSD Daemons data. An Ceph OSD Daemon optimized for performance may use
-a separate disk to store journal data (e.g., a solid state drive delivers high
-performance journaling).
+When using a single device type (for example, spinning drives), the journals
+should be *colocated*: the logical volume (or partition) should be in the same
+device as the ``data`` logical volume.
  
-Ceph's default ``osd journal size`` is 0, so you will need to set this in your
-``ceph.conf`` file. A journal size should find the product of the ``filestore
-max sync interval`` and the expected throughput, and multiply the product by
-two (2)::  
-         
-       osd journal size = {2 * (expected throughput * filestore max sync interval)}
+When using a mix of fast (SSDs, NVMe) devices with slower ones (like spinning
+drives) it makes sense to place the journal on the faster device, while
+``data`` occupies the slower device fully.
  
-The expected throughput number should include the expected disk throughput
-(i.e., sustained data transfer rate), and network throughput. For example, 
-a 7200 RPM disk will likely have approximately 100 MB/s. Taking the ``min()``
-of the disk and network throughput should provide a reasonable expected 
-throughput. Some users just start off with a 10GB journal size. For 
-example::
+The default ``osd journal size`` value is 5120 (5 gigabytes), but it can be
+larger, in which case it will need to be set in the ``ceph.conf`` file::
  
-       osd journal size = 10000
  
+       osd journal size = 10240
  
-``osd journal`` 
+
+``osd journal``
  
  :Description: The path to the OSD's journal. This may be a path to a file or a
-              block device (such as a partition of an SSD). If it is a file, 
+              block device (such as a partition of an SSD). If it is a file,
                you must create the directory to contain it. We recommend using a
                drive separate from the ``osd data`` drive.
  
@@ -162,17 +157,12 @@ example::
  :Default: ``/var/lib/ceph/osd/$cluster-$id/journal``
  
  
-``osd journal size`` 
+``osd journal size``
  
-:Description: The size of the journal in megabytes. If this is 0, and the 
-              journal is a block device, the entire block device is used. 
-              Since v0.54, this is ignored if the journal is a block device, 
-              and the entire block device is used.
+:Description: The size of the journal in megabytes.
  
  :Type: 32-bit Integer
  :Default: ``5120``
-:Recommended: Begin with 1GB. Should be at least twice the product of the 
-              expected speed multiplied by ``filestore max sync interval``.
  
  
  See `Journal Config Reference`_ for additional details.
@@ -211,13 +201,13 @@ performance. You can adjust the following settings to increase or decrease
  scrubbing operations.
  
  
-``osd max scrubs`` 
+``osd max scrubs``
  
-:Description: The maximum number of simultaneous scrub operations for 
+:Description: The maximum number of simultaneous scrub operations for
                a Ceph OSD Daemon.
  
  :Type: 32-bit Int
-:Default: ``1`` 
+:Default: ``1``
  
  ``osd scrub begin hour``
  
@@ -248,33 +238,33 @@ scrubbing operations.
  :Default: ``true``
  
  
-``osd scrub thread timeout`` 
+``osd scrub thread timeout``
  
  :Description: The maximum time in seconds before timing out a scrub thread.
  :Type: 32-bit Integer
-:Default: ``60`` 
+:Default: ``60``
  
  
-``osd scrub finalize thread timeout`` 
+``osd scrub finalize thread timeout``
  
-:Description: The maximum time in seconds before timing out a scrub finalize 
+:Description: The maximum time in seconds before timing out a scrub finalize
                thread.
  
  :Type: 32-bit Integer
  :Default: ``60*10``
  
  
-``osd scrub load threshold`` 
+``osd scrub load threshold``
  
  :Description: The maximum load. Ceph will not scrub when the system load 
                (as defined by ``getloadavg()``) is higher than this number. 
                Default is ``0.5``.
  
  :Type: Float
-:Default: ``0.5`` 
+:Default: ``0.5``
  
  
-``osd scrub min interval`` 
+``osd scrub min interval``
  
  :Description: The minimal interval in seconds for scrubbing the Ceph OSD Daemon
                when the Ceph Storage Cluster load is low.
@@ -283,9 +273,9 @@ scrubbing operations.
  :Default: Once per day. ``60*60*24``
  
  
-``osd scrub max interval`` 
+``osd scrub max interval``
  
-:Description: The maximum interval in seconds for scrubbing the Ceph OSD Daemon 
+:Description: The maximum interval in seconds for scrubbing the Ceph OSD Daemon
                irrespective of cluster load.
  
  :Type: Float
@@ -320,7 +310,7 @@ scrubbing operations.
  
  ``osd deep scrub interval``
  
-:Description: The interval for "deep" scrubbing (fully reading all data). The 
+:Description: The interval for "deep" scrubbing (fully reading all data). The
                ``osd scrub load threshold`` does not affect this setting.
  
  :Type: Float
@@ -411,21 +401,21 @@ recovery operations to ensure optimal performance during recovery.
  
  ``osd client op priority``
  
-:Description: The priority set for client operations. It is relative to 
+:Description: The priority set for client operations. It is relative to
                ``osd recovery op priority``.
  
  :Type: 32-bit Integer
-:Default: ``63`` 
+:Default: ``63``
  :Valid Range: 1-63
  
  
  ``osd recovery op priority``
  
-:Description: The priority set for recovery operations. It is relative to 
+:Description: The priority set for recovery operations. It is relative to
                ``osd client op priority``.
  
  :Type: 32-bit Integer
-:Default: ``3`` 
+:Default: ``3``
  :Valid Range: 1-63
  
  
@@ -449,30 +439,30 @@ recovery operations to ensure optimal performance during recovery.
  :Valid Range: 1-63
  
  
-``osd op thread timeout`` 
+``osd op thread timeout``
  
  :Description: The Ceph OSD Daemon operation thread timeout in seconds.
  :Type: 32-bit Integer
-:Default: ``15`` 
+:Default: ``15``
  
  
-``osd op complaint time`` 
+``osd op complaint time``
  
  :Description: An operation becomes complaint worthy after the specified number
                of seconds have elapsed.
  
  :Type: Float
-:Default: ``30`` 
+:Default: ``30``
  
  
-``osd disk threads`` 
+``osd disk threads``
  
-:Description: The number of disk threads, which are used to perform background 
-              disk intensive OSD operations such as scrubbing and snap 
+:Description: The number of disk threads, which are used to perform background
+              disk intensive OSD operations such as scrubbing and snap
                trimming.
  
  :Type: 32-bit Integer
-:Default: ``1`` 
+:Default: ``1``
  
  ``osd disk thread ioprio class``
  
@@ -487,7 +477,7 @@ recovery operations to ensure optimal performance during recovery.
               operations. ``be`` is the default and is the same
               priority as all other threads in the OSD. ``rt`` means
               the disk thread will have precendence over all other
-             threads in the OSD. Note: Only works with the Linux Kernel 
+             threads in the OSD. Note: Only works with the Linux Kernel
               CFQ scheduler. Since Jewel scrubbing is no longer carried
               out by the disk iothread, see osd priority options instead.
  :Type: String
@@ -807,7 +797,7 @@ Daemons to restore the balance. The process of migrating placement groups and
  the objects they contain can reduce the cluster's operational performance
  considerably. To maintain operational performance, Ceph performs this migration
  with 'backfilling', which allows Ceph to set backfill operations to a lower
-priority than requests to read or write data. 
+priority than requests to read or write data.
  
  
  ``osd max backfills``
@@ -817,20 +807,20 @@ priority than requests to read or write data.
  :Default: ``1``
  
  
-``osd backfill scan min`` 
+``osd backfill scan min``
  
  :Description: The minimum number of objects per backfill scan.
  
  :Type: 32-bit Integer
-:Default: ``64`` 
+:Default: ``64``
  
  
-``osd backfill scan max`` 
+``osd backfill scan max``
  
  :Description: The maximum number of objects per backfill scan.
  
  :Type: 32-bit Integer
-:Default: ``512`` 
+:Default: ``512``
  
  
  ``osd backfill retry interval``
@@ -844,19 +834,19 @@ priority than requests to read or write data.
  OSD Map
  =======
  
-OSD maps reflect the OSD daemons operating in the cluster. Over time, the 
+OSD maps reflect the OSD daemons operating in the cluster. Over time, the
  number of map epochs increases. Ceph provides some settings to ensure that
  Ceph performs well as the OSD map grows larger.
  
  
  ``osd map dedup``
  
-:Description: Enable removing duplicates in the OSD map. 
+:Description: Enable removing duplicates in the OSD map.
  :Type: Boolean
  :Default: ``true``
  
  
-``osd map cache size`` 
+``osd map cache size``
  
  :Description: The number of OSD maps to keep cached.
  :Type: 32-bit Integer
@@ -865,21 +855,21 @@ Ceph performs well as the OSD map grows larger.
  
  ``osd map cache bl size``
  
-:Description: The size of the in-memory OSD map cache in OSD daemons. 
+:Description: The size of the in-memory OSD map cache in OSD daemons.
  :Type: 32-bit Integer
  :Default: ``50``
  
  
  ``osd map cache bl inc size``
  
-:Description: The size of the in-memory OSD map cache incrementals in 
+:Description: The size of the in-memory OSD map cache incrementals in
                OSD daemons.
  
  :Type: 32-bit Integer
  :Default: ``100``
  
  
-``osd map message max`` 
+``osd map message max``
  
  :Description: The maximum map entries allowed per MOSDMap message.
  :Type: 32-bit Integer
@@ -908,33 +898,33 @@ intensive.
  
  To maintain operational performance, Ceph performs recovery with limitations on
  the number recovery requests, threads and object chunk sizes which allows Ceph
-perform well in a degraded state. 
+perform well in a degraded state.
  
  
-``osd recovery delay start`` 
+``osd recovery delay start``
  
-:Description: After peering completes, Ceph will delay for the specified number 
+:Description: After peering completes, Ceph will delay for the specified number
                of seconds before starting to recover objects.
  
  :Type: Float
-:Default: ``0`` 
+:Default: ``0``
  
  
-``osd recovery max active`` 
+``osd recovery max active``
  
-:Description: The number of active recovery requests per OSD at one time. More 
-              requests will accelerate recovery, but the requests places an 
+:Description: The number of active recovery requests per OSD at one time. More
+              requests will accelerate recovery, but the requests places an
                increased load on the cluster.
  
  :Type: 32-bit Integer
  :Default: ``3``
  
  
-``osd recovery max chunk`` 
+``osd recovery max chunk``
  
-:Description: The maximum size of a recovered chunk of data to push. 
+:Description: The maximum size of a recovered chunk of data to push.
  :Type: 64-bit Unsigned Integer
-:Default: ``8 << 20`` 
+:Default: ``8 << 20``
  
  
  ``osd recovery max single start``
@@ -945,7 +935,7 @@ perform well in a degraded state.
  :Default: ``1``
  
  
-``osd recovery thread timeout`` 
+``osd recovery thread timeout``
  
  :Description: The maximum time in seconds before timing out a recovery thread.
  :Type: 32-bit Integer
@@ -954,7 +944,7 @@ perform well in a degraded state.
  
  ``osd recover clone overlap``
  
-:Description: Preserves clone overlap during recovery. Should always be set 
+:Description: Preserves clone overlap during recovery. Should always be set
                to ``true``.
  
  :Type: Boolean
@@ -1022,67 +1012,67 @@ Miscellaneous
  =============
  
  
-``osd snap trim thread timeout`` 
+``osd snap trim thread timeout``
  
  :Description: The maximum time in seconds before timing out a snap trim thread.
  :Type: 32-bit Integer
-:Default: ``60*60*1`` 
+:Default: ``60*60*1``
  
  
-``osd backlog thread timeout`` 
+``osd backlog thread timeout``
  
  :Description: The maximum time in seconds before timing out a backlog thread.
  :Type: 32-bit Integer
-:Default: ``60*60*1`` 
+:Default: ``60*60*1``
  
  
-``osd default notify timeout`` 
+``osd default notify timeout``
  
  :Description: The OSD default notification timeout (in seconds).
  :Type: 32-bit Unsigned Integer
-:Default: ``30`` 
+:Default: ``30``
  
  
-``osd check for log corruption`` 
+``osd check for log corruption``
  
  :Description: Check log files for corruption. Can be computationally expensive.
  :Type: Boolean
-:Default: ``false`` 
+:Default: ``false``
  
  
-``osd remove thread timeout`` 
+``osd remove thread timeout``
  
  :Description: The maximum time in seconds before timing out a remove OSD thread.
  :Type: 32-bit Integer
  :Default: ``60*60``
  
  
-``osd command thread timeout`` 
+``osd command thread timeout``
  
  :Description: The maximum time in seconds before timing out a command thread.
  :Type: 32-bit Integer
-:Default: ``10*60`` 
+:Default: ``10*60``
  
  
-``osd command max records`` 
+``osd command max records``
  
-:Description: Limits the number of lost objects to return. 
+:Description: Limits the number of lost objects to return.
  :Type: 32-bit Integer
-:Default: ``256`` 
+:Default: ``256``
  
  
-``osd auto upgrade tmap`` 
+``osd auto upgrade tmap``
  
  :Description: Uses ``tmap`` for ``omap`` on old objects.
  :Type: Boolean
  :Default: ``true``
- 
  
-``osd tmapput sets users tmap`` 
+
+``osd tmapput sets users tmap``
  
  :Description: Uses ``tmap`` for debugging only.
  :Type: Boolean
-:Default: ``false`` 
+:Default: ``false``
  
  
  ``osd fast fail on connection refused``
diff --git a/ceph/doc/radosgw/s3/bucketops.rst b/ceph/doc/radosgw/s3/bucketops.rst

index c7cd5b4fd608203af4070b03f15004367c750591..ed1f2a4f6c7d9ff4aaab360445098cc28d9ff209 100644 (file)
--- a/ceph/doc/radosgw/s3/bucketops.rst
+++ b/ceph/doc/radosgw/s3/bucketops.rst
@@ -91,18 +91,19 @@ Syntax
  Parameters
  ~~~~~~~~~~
  
-+-----------------+-----------+-----------------------------------------------------------------------+
-| Name            | Type      | Description                                                           |
-+=================+===========+=======================================================================+
-| ``prefix``      | String    | Only returns objects that contain the specified prefix.               |
-+-----------------+-----------+-----------------------------------------------------------------------+
-| ``delimiter``   | String    | The delimiter between the prefix and the rest of the object name.     |
-+-----------------+-----------+-----------------------------------------------------------------------+
-| ``marker``      | String    | A beginning index for the list of objects returned.                   |
-+-----------------+-----------+-----------------------------------------------------------------------+
-| ``max-keys``    | Integer   | The maximum number of keys to return. Default is 1000.                |
-+-----------------+-----------+-----------------------------------------------------------------------+
-
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| Name                | Type      | Description                                                                                     |
++=====================+===========+=================================================================================================+
+| ``prefix``          | String    | Only returns objects that contain the specified prefix.                                         |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``delimiter``       | String    | The delimiter between the prefix and the rest of the object name.                               |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``marker``          | String    | A beginning index for the list of objects returned.                                             |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``max-keys``        | Integer   | The maximum number of keys to return. Default is 1000.                                          |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
+| ``allow-unordered`` | Boolean   | Non-standard extension. Allows results to be returned unordered. Cannot be used with delimiter. |
++---------------------+-----------+-------------------------------------------------------------------------------------------------+
  
  HTTP Response
  ~~~~~~~~~~~~~
diff --git a/ceph/doc/radosgw/swift/containerops.rst b/ceph/doc/radosgw/swift/containerops.rst

index 463d91c6ce1924d15d42d0c2902595cb61e18eea..f97429579d76708ca0b5981d3fc7f231838070f7 100644 (file)
--- a/ceph/doc/radosgw/swift/containerops.rst
+++ b/ceph/doc/radosgw/swift/containerops.rst
@@ -147,6 +147,13 @@ Parameters
  :Type: String
  :Required: No
  
+``allow_unordered``
+
+:Description: Allows the results to be returned unordered to reduce computation overhead. Cannot be used with ``delimiter``.
+:Type: Boolean
+:Required: No
+:Non-Standard Extension: Yes
+
  
  Response Entities
  ~~~~~~~~~~~~~~~~~
diff --git a/ceph/examples/librados/Makefile b/ceph/examples/librados/Makefile

index 533a4c64711f19747cf6f192964e1acbbc1d1366..2b6109c4c6deab367b2f493e9dc9434e1d018348 100644 (file)
--- a/ceph/examples/librados/Makefile
+++ b/ceph/examples/librados/Makefile
@@ -1,7 +1,7 @@
  
  CXX?=g++
  CXX_FLAGS?=-std=c++11 -Wall -Wextra -Werror -g
-CXX_LIBS?=-lboost_system -lrados -lradosstriper
+CXX_LIBS?=-lrados -lradosstriper
  CXX_INC?=$(LOCAL_LIBRADOS_INC)
  CXX_CC=$(CXX) $(CXX_FLAGS) $(CXX_INC) $(LOCAL_LIBRADOS) $(CXX_LIBS)
  
diff --git a/ceph/install-deps.sh b/ceph/install-deps.sh

index 7e408ae146750a6c21afbf33c09c465f2f0180e0..9ead1056d97dfb5f7671e86476f8353d62f78b4d 100755 (executable)
--- a/ceph/install-deps.sh
+++ b/ceph/install-deps.sh
@@ -187,7 +187,7 @@ else
          $SUDO $builddepcmd $DIR/ceph.spec 2>&1 | tee $DIR/yum-builddep.out
          ! grep -q -i error: $DIR/yum-builddep.out || exit 1
          ;;
-    opensuse|suse|sles)
+    opensuse*|suse|sles)
          echo "Using zypper to install dependencies"
          $SUDO zypper --gpg-auto-import-keys --non-interactive install lsb-release systemd-rpm-macros
          munge_ceph_spec_in $DIR/ceph.spec
diff --git a/ceph/qa/run-standalone.sh b/ceph/qa/run-standalone.sh

index 3be6121f6ff3d075f920dbbb36c9f1491df05b9c..9321cba6515016696afaf22a4d60583ff09ab459 100755 (executable)
--- a/ceph/qa/run-standalone.sh
+++ b/ceph/qa/run-standalone.sh
@@ -61,6 +61,15 @@ if [ "$precore" = "$COREPATTERN" ]; then
  else
      sudo sysctl -w ${KERNCORE}=${COREPATTERN}
  fi
+# Clean out any cores in core target directory (currently .)
+if ls $(dirname $(sysctl -n $KERNCORE)) | grep -q '^core\|core$' ; then
+    mkdir found.cores.$$ 2> /dev/null || true
+    for i in $(ls $(dirname $(sysctl -n $KERNCORE)) | grep '^core\|core$'); do
+       mv $i found.cores.$$
+    done
+    echo "Stray cores put in $(pwd)/found.cores.$$"
+fi
+
  ulimit -c unlimited
  for f in $(cd $location ; find . -perm $exec_mode -type f)
  do
diff --git a/ceph/qa/standalone/ceph-helpers.sh b/ceph/qa/standalone/ceph-helpers.sh

index b9dd86bf2030d1246ade886d417534f3623841b0..f12f0698a7624255d027d840d0e3662aec15d4f0 100755 (executable)
--- a/ceph/qa/standalone/ceph-helpers.sh
+++ b/ceph/qa/standalone/ceph-helpers.sh
@@ -150,6 +150,7 @@ function test_setup() {
  # subvolumes that relate to it.
  #
  # @param dir path name of the environment
+# @param dumplogs pass "1" to dump logs otherwise it will only if cores found
  # @return 0 on success, 1 on error
  #
  function teardown() {
@@ -169,7 +170,7 @@ function teardown() {
        pattern=""
      fi
      # Local we start with core and teuthology ends with core
-    if ls $(dirname $pattern) | grep -q '^core\|core$' ; then
+    if ls $(dirname "$pattern") | grep -q '^core\|core$' ; then
          cores="yes"
          if [ -n "$LOCALRUN" ]; then
             mkdir /tmp/cores.$$ 2> /dev/null || true
@@ -179,7 +180,13 @@ function teardown() {
          fi
      fi
      if [ "$cores" = "yes" -o "$dumplogs" = "1" ]; then
-        display_logs $dir
+       if [ -n "$LOCALRUN" ]; then
+           display_logs $dir
+        else
+           # Move logs to where Teuthology will archive it
+           mkdir -p $TESTDIR/archive/log
+           mv $dir/*.log $TESTDIR/archive/log
+       fi
      fi
      rm -fr $dir
      rm -rf $(get_asok_dir)
@@ -1447,10 +1454,11 @@ function test_wait_for_clean() {
  #######################################################################
  
  ##
-# Wait until the cluster becomes HEALTH_OK again or if it does not make progress
-# for $TIMEOUT seconds.
+# Wait until the cluster has health condition passed as arg
+# again for $TIMEOUT seconds.
  #
-# @return 0 if the cluster is HEALTHY, 1 otherwise
+# @param string to grep for in health detail
+# @return 0 if the cluster health matches request, 1 otherwise
  #
  function wait_for_health() {
      local grepstr=$1
@@ -1467,6 +1475,12 @@ function wait_for_health() {
      done
  }
  
+##
+# Wait until the cluster becomes HEALTH_OK again or if it does not make progress
+# for $TIMEOUT seconds.
+#
+# @return 0 if the cluster is HEALTHY, 1 otherwise
+#
  function wait_for_health_ok() {
       wait_for_health "HEALTH_OK" || return 1
  }
@@ -2002,6 +2016,15 @@ function inject_eio() {
      done
  }
  
+function multidiff() {
+    if ! diff $@ ; then
+        if [ "$DIFFCOLOPTS" = "" ]; then
+            return 1
+        fi
+        diff $DIFFCOLOPTS $@
+    fi
+}
+
  # Local Variables:
  # compile-command: "cd ../../src ; make -j4 && ../qa/standalone/ceph-helpers.sh TESTS # test_get_config"
  # End:
diff --git a/ceph/qa/standalone/mon/osd-pool-create.sh b/ceph/qa/standalone/mon/osd-pool-create.sh

index 39eb1c4c38a0455a373b7de44c3049cba3b8e413..5b19c0095ef3d2fead735c837c65adcc9a64e7b9 100755 (executable)
--- a/ceph/qa/standalone/mon/osd-pool-create.sh
+++ b/ceph/qa/standalone/mon/osd-pool-create.sh
@@ -213,7 +213,6 @@ function TEST_pool_create_rep_expected_num_objects() {
      setup $dir || return 1
  
      # disable pg dir merge
-    CEPH_ARGS+="--filestore-merge-threshold=-10 "
      export CEPH_ARGS
      run_mon $dir a || return 1
      run_osd $dir 0 || return 1
diff --git a/ceph/qa/standalone/osd/osd-backfill-stats.sh b/ceph/qa/standalone/osd/osd-backfill-stats.sh

index 8fbef6117932d80fb35b629c9dce923ffb539762..f1fed4bc9cbe1d87754c0078e1040e607b17b775 100755 (executable)
--- a/ceph/qa/standalone/osd/osd-backfill-stats.sh
+++ b/ceph/qa/standalone/osd/osd-backfill-stats.sh
@@ -706,7 +706,7 @@ function TEST_backfill_ec_down_out() {
  }
  
  
-main recout "$@"
+main osd-backfill-stats "$@"
  
  # Local Variables:
  # compile-command: "make -j4 && ../qa/run-standalone.sh osd-backfill-stats.sh"
diff --git a/ceph/qa/standalone/osd/osd-recovery-stats.sh b/ceph/qa/standalone/osd/osd-recovery-stats.sh

index 8b50274e35581eba7a694c827298d12a9da617f3..46f51adb8e06bbd71a0e0ec178a03c3693242fc7 100755 (executable)
--- a/ceph/qa/standalone/osd/osd-recovery-stats.sh
+++ b/ceph/qa/standalone/osd/osd-recovery-stats.sh
@@ -423,7 +423,7 @@ function TEST_recovery_erasure_remapped() {
      kill_daemons $dir || return 1
  }
  
-main recout "$@"
+main osd-recovery-stats "$@"
  
  # Local Variables:
  # compile-command: "make -j4 && ../qa/run-standalone.sh osd-recovery-stats.sh"
diff --git a/ceph/qa/standalone/osd/repro_long_log.sh b/ceph/qa/standalone/osd/repro_long_log.sh

index 93201a2a50c60ed2ed1abd8dfc4ac14db82de2cf..7284fedb203f87dc5e8bc9636036f4f47471e7bf 100755 (executable)
--- a/ceph/qa/standalone/osd/repro_long_log.sh
+++ b/ceph/qa/standalone/osd/repro_long_log.sh
@@ -67,10 +67,10 @@ function setup_log_test() {
      ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 10 || return 1
      ceph tell osd.\* injectargs -- --osd-pg-log-dups-tracked 10 || return 1
  
-    touch foo
+    touch $dir/foo
      for i in $(seq 1 20)
      do
-        rados -p test put foo foo || return 1
+        rados -p test put foo $dir/foo || return 1
      done
  
      test_log_size $PGID 20 || return 1
@@ -93,7 +93,7 @@ function TEST_repro_long_log1()
  
      setup_log_test $dir || return 1
      # regular write should trim the log
-    rados -p test put foo foo || return 1
+    rados -p test put foo $dir/foo || return 1
      test_log_size $PGID 22 || return 1
  }
  
diff --git a/ceph/qa/standalone/scrub/osd-scrub-distrust.sh b/ceph/qa/standalone/scrub/osd-scrub-distrust.sh

new file mode 100755 (executable)

index 0000000..80ad963
--- /dev/null
+++ b/ceph/qa/standalone/scrub/osd-scrub-distrust.sh
@@ -0,0 +1,346 @@
+#!/bin/bash -x
+#
+# Copyright (C) 2014 Red Hat <contact@redhat.com>
+#
+# Author: Loic Dachary <loic@dachary.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+# Test development and debugging
+# Set to "yes" in order to ignore diff errors and save results to update test
+getjson="yes"
+
+# Filter out mtime and local_mtime dates, version, prior_version and last_reqid (client) from any object_info.
+jqfilter='def walk(f):
+  . as $in
+  | if type == "object" then
+      reduce keys[] as $key
+        ( {}; . + { ($key):  ($in[$key] | walk(f)) } ) | f
+    elif type == "array" then map( walk(f) ) | f
+    else f
+    end;
+walk(if type == "object" then del(.mtime) else . end)
+| walk(if type == "object" then del(.local_mtime) else . end)
+| walk(if type == "object" then del(.last_reqid) else . end)
+| walk(if type == "object" then del(.version) else . end)
+| walk(if type == "object" then del(.prior_version) else . end)
+| walk(if type == "object" then del(.redirect_target) else . end)
+| walk(if type == "object" then del(.legacy_snaps) else . end)'
+
+sortkeys='import json; import sys ; JSON=sys.stdin.read() ; ud = json.loads(JSON) ; print json.dumps(ud, sort_keys=True, indent=2)'
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7107" # git grep '\<7107\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+    CEPH_ARGS+="--osd-distrust-data-digest=true "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        $func $dir || return 1
+    done
+}
+
+function add_something() {
+    local dir=$1
+    local poolname=$2
+    local obj=${3:-SOMETHING}
+    local scrub=${4:-noscrub}
+
+    if [ "$scrub" = "noscrub" ];
+    then
+        ceph osd set noscrub || return 1
+        ceph osd set nodeep-scrub || return 1
+    else
+        ceph osd unset noscrub || return 1
+        ceph osd unset nodeep-scrub || return 1
+    fi
+
+    local payload=ABCDEF
+    echo $payload > $dir/ORIGINAL
+    rados --pool $poolname put $obj $dir/ORIGINAL || return 1
+}
+
+#
+# Test automatic repair with distrust set
+#
+function TEST_distrust_scrub_replicated() {
+    local dir=$1
+    local poolname=dsr_pool
+    local total_objs=2
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=2 || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    create_rbd_pool || return 1
+    wait_for_clean || return 1
+
+    create_pool foo 1 || return 1
+    create_pool $poolname 1 1 || return 1
+    wait_for_clean || return 1
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+        add_something $dir $poolname $objname || return 1
+    done
+
+    local pg=$(get_pg $poolname ROBJ0)
+
+    for i in $(seq 1 $total_objs) ; do
+        objname=ROBJ${i}
+
+        case $i in
+       1)
+           # Deep-scrub only (all replicas are diffent than the object info
+           local payload=XROBJ1
+           echo $payload > $dir/new.ROBJ1
+          objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ1 || return 1
+          objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ1 || return 1
+          ;;
+
+       2)
+           # Deep-scrub only (all replicas are diffent than the object info
+           local payload=XROBJ2
+           echo $payload > $dir/new.ROBJ2
+          objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ2 || return 1
+          objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ2 || return 1
+          # Make one replica have a different object info, so a full repair must happen too
+          objectstore_tool $dir 0 $objname corrupt-info || return 1
+          ;;
+        esac
+    done
+
+    # This should fix the data_digest because osd-distrust-data-digest is true
+    pg_deep_scrub $pg
+
+    # This hangs if the scrub didn't repair the data_digest
+    timeout 30 rados -p $poolname get ROBJ1 $dir/robj1.out || return 1
+    diff -q $dir/new.ROBJ1 $dir/robj1.out || return 1
+    rm -f $dir/new.ROBJ1 $dir/robj1.out || return 1
+
+    rados list-inconsistent-pg $poolname > $dir/json || return 1
+    # Check pg count
+    test $(jq '. | length' $dir/json) = "1" || return 1
+    # Check pgid
+    test $(jq -r '.[0]' $dir/json) = $pg || return 1
+
+    rados list-inconsistent-obj $pg > $dir/json || return 1
+    # Get epoch for repair-get requests
+    epoch=$(jq .epoch $dir/json)
+
+    jq "$jqfilter" << EOF | jq '.inconsistents' | python -c "$sortkeys" > $dir/checkcsjson
+{
+  "inconsistents": [
+    {
+      "shards": [
+        {
+          "object_info": {
+            "watchers": {},
+            "manifest": {
+              "redirect_target": {
+                "namespace": "",
+                "pool": -9223372036854776000,
+                "max": 0,
+                "hash": 0,
+                "snapid": 0,
+                "key": "",
+                "oid": ""
+              },
+              "type": 0
+            },
+            "alloc_hint_flags": 255,
+            "expected_write_size": 0,
+            "local_mtime": "2018-07-24 15:05:56.027234",
+            "mtime": "2018-07-24 15:05:56.021775",
+            "size": 7,
+            "user_version": 2,
+            "last_reqid": "client.4137.0:1",
+            "prior_version": "0'0",
+            "version": "23'2",
+            "oid": {
+              "namespace": "",
+              "pool": 3,
+              "max": 0,
+              "hash": 2026323607,
+              "snapid": -2,
+              "key": "",
+              "oid": "ROBJ2"
+            },
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "legacy_snaps": [],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0
+          },
+          "data_digest": "0x0bb7ab52",
+          "omap_digest": "0xffffffff",
+          "size": 7,
+          "errors": [],
+          "primary": false,
+          "osd": 0
+        },
+        {
+          "object_info": {
+            "watchers": {},
+            "manifest": {
+              "redirect_target": {
+                "namespace": "",
+                "pool": -9223372036854776000,
+                "max": 0,
+                "hash": 0,
+                "snapid": 0,
+                "key": "",
+                "oid": ""
+              },
+              "type": 0
+            },
+            "alloc_hint_flags": 0,
+            "expected_write_size": 0,
+            "local_mtime": "2018-07-24 15:05:56.027234",
+            "mtime": "2018-07-24 15:05:56.021775",
+            "size": 7,
+            "user_version": 2,
+            "last_reqid": "client.4137.0:1",
+            "prior_version": "0'0",
+            "version": "23'2",
+            "oid": {
+              "namespace": "",
+              "pool": 3,
+              "max": 0,
+              "hash": 2026323607,
+              "snapid": -2,
+              "key": "",
+              "oid": "ROBJ2"
+            },
+            "lost": 0,
+            "flags": [
+              "dirty",
+              "data_digest"
+            ],
+            "legacy_snaps": [],
+            "truncate_seq": 0,
+            "truncate_size": 0,
+            "data_digest": "0x2ddbf8f5",
+            "omap_digest": "0xffffffff",
+            "expected_object_size": 0
+          },
+          "data_digest": "0x0bb7ab52",
+          "omap_digest": "0xffffffff",
+          "size": 7,
+          "errors": [],
+          "primary": true,
+          "osd": 1
+        }
+      ],
+      "selected_object_info": {
+        "watchers": {},
+        "manifest": {
+          "redirect_target": {
+            "namespace": "",
+            "pool": -9223372036854776000,
+            "max": 0,
+            "hash": 0,
+            "snapid": 0,
+            "key": "",
+            "oid": ""
+          },
+          "type": 0
+        },
+        "alloc_hint_flags": 0,
+        "expected_write_size": 0,
+        "local_mtime": "2018-07-24 15:05:56.027234",
+        "mtime": "2018-07-24 15:05:56.021775",
+        "size": 7,
+        "user_version": 2,
+        "last_reqid": "client.4137.0:1",
+        "prior_version": "0'0",
+        "version": "23'2",
+        "oid": {
+          "namespace": "",
+          "pool": 3,
+          "max": 0,
+          "hash": 2026323607,
+          "snapid": -2,
+          "key": "",
+          "oid": "ROBJ2"
+        },
+        "lost": 0,
+        "flags": [
+          "dirty",
+          "data_digest"
+        ],
+        "legacy_snaps": [],
+        "truncate_seq": 0,
+        "truncate_size": 0,
+        "data_digest": "0x2ddbf8f5",
+        "omap_digest": "0xffffffff",
+        "expected_object_size": 0
+      },
+      "union_shard_errors": [],
+      "errors": [
+        "object_info_inconsistency"
+      ],
+      "object": {
+        "version": 2,
+        "snap": "head",
+        "locator": "",
+        "nspace": "",
+        "name": "ROBJ2"
+      }
+    }
+  ],
+  "epoch": 42
+}
+EOF
+
+    jq "$jqfilter" $dir/json | jq '.inconsistents' | python -c "$sortkeys" > $dir/csjson
+    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    if test $getjson = "yes"
+    then
+        jq '.' $dir/json > save1.json
+    fi
+
+    if test "$LOCALRUN" = "yes" && which jsonschema > /dev/null;
+    then
+      jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
+    fi
+
+    repair $pg
+    wait_for_clean
+
+    timeout 30 rados -p $poolname get ROBJ2 $dir/robj2.out || return 1
+    diff -q $dir/new.ROBJ2 $dir/robj2.out || return 1
+    rm -f $dir/new.ROBJ2 $dir/robj2.out || return 1
+
+    rados rmpool $poolname $poolname --yes-i-really-really-mean-it
+    teardown $dir || return 1
+}
+
+main osd-scrub-distrust "$@"
+
+# Local Variables:
+# compile-command: "cd build ; make -j4 && \
+#    ../qa/run-standalone.sh osd-scrub-distrust"
+# End:
diff --git a/ceph/qa/standalone/scrub/osd-scrub-repair.sh b/ceph/qa/standalone/scrub/osd-scrub-repair.sh

index a3732ba325e6102ec3187ae9b6b45dcd35fba86f..257b9dbaf5e59f58d63b75334e5a832d79312907 100755 (executable)
--- a/ceph/qa/standalone/scrub/osd-scrub-repair.sh
+++ b/ceph/qa/standalone/scrub/osd-scrub-repair.sh
@@ -490,7 +490,7 @@ function TEST_list_missing_erasure_coded_overwrites() {
  function TEST_corrupt_scrub_replicated() {
      local dir=$1
      local poolname=csr_pool
-    local total_objs=16
+    local total_objs=18
  
      setup $dir || return 1
      run_mon $dir a --osd_pool_default_size=2 || return 1
@@ -612,6 +612,24 @@ function TEST_corrupt_scrub_replicated() {
              objectstore_tool $dir 0 $objname rm-attr snapset || return 1
              echo -n bad-val > $dir/bad-val
              objectstore_tool $dir 1 $objname set-attr snapset $dir/bad-val || return 1
+           ;;
+
+       17)
+           # Deep-scrub only (all replicas are diffent than the object info
+           local payload=ROBJ17
+           echo $payload > $dir/new.ROBJ17
+          objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ17 || return 1
+          objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ17 || return 1
+          ;;
+
+       18)
+           # Deep-scrub only (all replicas are diffent than the object info
+           local payload=ROBJ18
+           echo $payload > $dir/new.ROBJ18
+          objectstore_tool $dir 0 $objname set-bytes $dir/new.ROBJ18 || return 1
+          objectstore_tool $dir 1 $objname set-bytes $dir/new.ROBJ18 || return 1
+          # Make one replica have a different object info, so a full repair must happen too
+          objectstore_tool $dir $osd $objname corrupt-info || return 1
  
          esac
      done
@@ -1039,6 +1057,125 @@ function TEST_corrupt_scrub_replicated() {
        ]
      },
      {
+     "errors": [
+       "object_info_inconsistency"
+     ],
+     "object": {
+       "locator": "",
+       "name": "ROBJ18",
+       "nspace": "",
+       "snap": "head"
+     },
+     "selected_object_info": {
+       "alloc_hint_flags": 255,
+       "data_digest": "0x2ddbf8f5",
+       "expected_object_size": 0,
+       "expected_write_size": 0,
+       "flags": [
+         "dirty",
+         "omap",
+         "data_digest",
+         "omap_digest"
+       ],
+       "lost": 0,
+       "manifest": {
+         "type": 0
+       },
+       "oid": {
+         "hash": 1629828556,
+         "key": "",
+         "max": 0,
+         "namespace": "",
+         "oid": "ROBJ18",
+         "pool": 3,
+         "snapid": -2
+       },
+       "omap_digest": "0xddc3680f",
+       "size": 7,
+       "truncate_seq": 0,
+       "truncate_size": 0,
+       "user_version": 54,
+       "watchers": {}
+     },
+     "shards": [
+       {
+         "errors": [],
+         "object_info": {
+           "alloc_hint_flags": 0,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "osd": 0,
+         "primary": false,
+         "size": 7
+       },
+       {
+         "errors": [],
+         "object_info": {
+           "alloc_hint_flags": 255,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "osd": 1,
+         "primary": true,
+         "size": 7
+       }
+     ],
+     "union_shard_errors": []
+   },
+   {
        "shards": [
          {
            "size": 7,
@@ -1154,7 +1291,7 @@ function TEST_corrupt_scrub_replicated() {
          "version": "79'66",
          "prior_version": "79'65",
          "last_reqid": "client.4554.0:1",
-        "user_version": 66,
+        "user_version": 74,
          "size": 7,
          "mtime": "",
          "local_mtime": "",
@@ -1206,7 +1343,7 @@ function TEST_corrupt_scrub_replicated() {
              "version": "95'67",
              "prior_version": "51'64",
              "last_reqid": "client.4649.0:1",
-            "user_version": 67,
+            "user_version": 75,
              "size": 1,
              "mtime": "",
              "local_mtime": "",
@@ -1292,7 +1429,7 @@ function TEST_corrupt_scrub_replicated() {
          "version": "95'67",
          "prior_version": "51'64",
          "last_reqid": "client.4649.0:1",
-        "user_version": 67,
+        "user_version": 75,
          "size": 1,
          "mtime": "",
          "local_mtime": "",
@@ -1335,7 +1472,7 @@ function TEST_corrupt_scrub_replicated() {
  EOF
  
      jq "$jqfilter" $dir/json | jq '.inconsistents' | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
      if test $getjson = "yes"
      then
          jq '.' $dir/json > save1.json
@@ -1907,7 +2044,201 @@ EOF
        ]
      },
      {
-      "shards": [
+     "errors": [],
+     "object": {
+       "locator": "",
+       "name": "ROBJ17",
+       "nspace": "",
+       "snap": "head"
+     },
+     "selected_object_info": {
+       "alloc_hint_flags": 0,
+       "data_digest": "0x2ddbf8f5",
+       "expected_object_size": 0,
+       "expected_write_size": 0,
+       "flags": [
+         "dirty",
+         "omap",
+         "data_digest",
+         "omap_digest"
+       ],
+       "lost": 0,
+       "manifest": {
+         "type": 0
+       },
+       "oid": {
+         "hash": 1884071249,
+         "key": "",
+         "max": 0,
+         "namespace": "",
+         "oid": "ROBJ17",
+         "pool": 3,
+         "snapid": -2
+       },
+       "omap_digest": "0xe9572720",
+       "size": 7,
+       "truncate_seq": 0,
+       "truncate_size": 0,
+       "user_version": 51,
+       "watchers": {}
+     },
+     "shards": [
+       {
+         "data_digest": "0x5af0c3ef",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "omap_digest": "0xe9572720",
+         "osd": 0,
+         "primary": false,
+         "size": 7
+       },
+       {
+         "data_digest": "0x5af0c3ef",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "omap_digest": "0xe9572720",
+         "osd": 1,
+         "primary": true,
+         "size": 7
+       }
+     ],
+     "union_shard_errors": [
+       "data_digest_mismatch_info"
+     ]
+   },
+   {
+     "errors": [
+       "object_info_inconsistency"
+     ],
+     "object": {
+       "locator": "",
+       "name": "ROBJ18",
+       "nspace": "",
+       "snap": "head"
+     },
+     "selected_object_info": {
+       "alloc_hint_flags": 255,
+       "data_digest": "0x2ddbf8f5",
+       "expected_object_size": 0,
+       "expected_write_size": 0,
+       "flags": [
+         "dirty",
+         "omap",
+         "data_digest",
+         "omap_digest"
+       ],
+       "lost": 0,
+       "manifest": {
+         "type": 0
+       },
+       "oid": {
+         "hash": 1629828556,
+         "key": "",
+         "max": 0,
+         "namespace": "",
+         "oid": "ROBJ18",
+         "pool": 3,
+         "snapid": -2
+       },
+       "omap_digest": "0xddc3680f",
+       "size": 7,
+       "truncate_seq": 0,
+       "truncate_size": 0,
+       "user_version": 54,
+       "watchers": {}
+     },
+     "shards": [
+       {
+         "data_digest": "0xbd89c912",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "object_info": {
+           "alloc_hint_flags": 0,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "omap_digest": "0xddc3680f",
+         "osd": 0,
+         "primary": false,
+         "size": 7
+       },
+       {
+         "data_digest": "0xbd89c912",
+         "errors": [
+           "data_digest_mismatch_info"
+         ],
+         "object_info": {
+           "alloc_hint_flags": 255,
+           "data_digest": "0x2ddbf8f5",
+           "expected_object_size": 0,
+           "expected_write_size": 0,
+           "flags": [
+             "dirty",
+             "omap",
+             "data_digest",
+             "omap_digest"
+           ],
+           "lost": 0,
+           "manifest": {
+             "type": 0
+           },
+           "oid": {
+             "hash": 1629828556,
+             "key": "",
+             "max": 0,
+             "namespace": "",
+             "oid": "ROBJ18",
+             "pool": 3,
+             "snapid": -2
+           },
+           "omap_digest": "0xddc3680f",
+           "size": 7,
+           "truncate_seq": 0,
+           "truncate_size": 0,
+           "user_version": 54,
+           "watchers": {}
+         },
+         "omap_digest": "0xddc3680f",
+         "osd": 1,
+         "primary": true,
+         "size": 7
+       }
+     ],
+     "union_shard_errors": [
+       "data_digest_mismatch_info"
+     ]
+   },
+   {
+     "shards": [
          {
            "data_digest": "0x578a4830",
            "omap_digest": "0xf8e11918",
@@ -2383,7 +2714,7 @@ EOF
          "version": "79'66",
          "prior_version": "79'65",
          "last_reqid": "client.4554.0:1",
-        "user_version": 66,
+        "user_version": 74,
          "size": 7,
          "mtime": "2018-04-05 14:34:05.598688",
          "local_mtime": "2018-04-05 14:34:05.599698",
@@ -2481,7 +2812,7 @@ EOF
              "version": "119'68",
              "prior_version": "51'64",
              "last_reqid": "client.4834.0:1",
-            "user_version": 68,
+            "user_version": 76,
              "size": 3,
              "mtime": "2018-04-05 14:35:01.500659",
              "local_mtime": "2018-04-05 14:35:01.502117",
@@ -2525,7 +2856,7 @@ EOF
          "version": "119'68",
          "prior_version": "51'64",
          "last_reqid": "client.4834.0:1",
-        "user_version": 68,
+        "user_version": 76,
          "size": 3,
          "mtime": "2018-04-05 14:35:01.500659",
          "local_mtime": "2018-04-05 14:35:01.502117",
@@ -2568,7 +2899,7 @@ EOF
  EOF
  
      jq "$jqfilter" $dir/json | jq '.inconsistents' | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
      if test $getjson = "yes"
      then
          jq '.' $dir/json > save2.json
@@ -2579,6 +2910,18 @@ EOF
        jsonschema -i $dir/json $CEPH_ROOT/doc/rados/command/list-inconsistent-obj.json || return 1
      fi
  
+    repair $pg
+    wait_for_clean
+
+    # This hangs if the repair doesn't work
+    timeout 30 rados -p $poolname get ROBJ17 $dir/robj17.out || return 1
+    timeout 30 rados -p $poolname get ROBJ18 $dir/robj18.out || return 1
+    # Even though we couldn't repair all of the introduced errors, we can fix ROBJ17
+    diff -q $dir/new.ROBJ17 $dir/robj17.out || return 1
+    rm -f $dir/new.ROBJ17 $dir/robj17.out || return 1
+    diff -q $dir/new.ROBJ18 $dir/robj18.out || return 1
+    rm -f $dir/new.ROBJ18 $dir/robj18.out || return 1
+
      rados rmpool $poolname $poolname --yes-i-really-really-mean-it
      teardown $dir || return 1
  }
@@ -3308,7 +3651,7 @@ function corrupt_scrub_erasure() {
  EOF
  
      jq "$jqfilter" $dir/json | jq '.inconsistents' | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
      if test $getjson = "yes"
      then
          jq '.' $dir/json > save3.json
@@ -4722,7 +5065,7 @@ EOF
      fi
  
      jq "$jqfilter" $dir/json | jq '.inconsistents' | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
      if test $getjson = "yes"
      then
        if [ "$allow_overwrites" = "true" ]
@@ -5092,7 +5435,7 @@ function TEST_corrupt_snapset_scrub_rep() {
  EOF
  
      jq "$jqfilter" $dir/json | jq '.inconsistents' | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
      if test $getjson = "yes"
      then
          jq '.' $dir/json > save6.json
diff --git a/ceph/qa/standalone/scrub/osd-scrub-snaps.sh b/ceph/qa/standalone/scrub/osd-scrub-snaps.sh

index a83cfe75c9a8ee49daf15e722e3e18f8bbfcc629..3370332de08f56316c562cc1629405d87a303fc4 100755 (executable)
--- a/ceph/qa/standalone/scrub/osd-scrub-snaps.sh
+++ b/ceph/qa/standalone/scrub/osd-scrub-snaps.sh
@@ -207,7 +207,7 @@ function TEST_scrub_snaps() {
  EOF
  
      jq "$jqfilter" $dir/json | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
  
      rados list-inconsistent-snapset $pgid > $dir/json || return 1
  
@@ -691,7 +691,7 @@ EOF
  EOF
  
      jq "$jqfilter" $dir/json | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
      if test $getjson = "yes"
      then
          jq '.' $dir/json > save1.json
@@ -1178,7 +1178,7 @@ EOF
  fi
  
      jq "$jqfilter" $dir/json | python -c "$sortkeys" > $dir/csjson
-    diff ${DIFFCOLOPTS} $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
+    multidiff $dir/checkcsjson $dir/csjson || test $getjson = "yes" || return 1
      if test $getjson = "yes"
      then
          jq '.' $dir/json > save1.json
diff --git a/ceph/qa/standalone/scrub/osd-unexpected-clone.sh b/ceph/qa/standalone/scrub/osd-unexpected-clone.sh

new file mode 100755 (executable)

index 0000000..5526440
--- /dev/null
+++ b/ceph/qa/standalone/scrub/osd-unexpected-clone.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+#
+# Copyright (C) 2015 Intel <contact@intel.com.com>
+# Copyright (C) 2014, 2015 Red Hat <contact@redhat.com>
+#
+# Author: Xiaoxi Chen <xiaoxi.chen@intel.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Library Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Library Public License for more details.
+#
+
+source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+
+function run() {
+    local dir=$1
+    shift
+
+    export CEPH_MON="127.0.0.1:7144" # git grep '\<7144\>' : there must be only one
+    export CEPH_ARGS
+    CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none "
+    CEPH_ARGS+="--mon-host=$CEPH_MON "
+
+    local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
+    for func in $funcs ; do
+        setup $dir || return 1
+        $func $dir || return 1
+        teardown $dir || return 1
+    done
+}
+
+function TEST_recover_unexpected() {
+    local dir=$1
+
+    run_mon $dir a || return 1
+    run_mgr $dir x || return 1
+    run_osd $dir 0 || return 1
+    run_osd $dir 1 || return 1
+    run_osd $dir 2 || return 1
+
+    ceph osd pool create foo 1
+    rados -p foo put foo /etc/passwd
+    rados -p foo mksnap snap
+    rados -p foo put foo /etc/group
+
+    wait_for_clean || return 1
+
+    local osd=$(get_primary foo foo)
+
+    JSON=`objectstore_tool $dir $osd --op list foo | grep snapid.:1`
+    echo "JSON is $JSON"
+    rm -f $dir/_ $dir/data
+    objectstore_tool $dir $osd "$JSON" get-attr _ > $dir/_ || return 1
+    objectstore_tool $dir $osd "$JSON" get-bytes $dir/data || return 1
+
+    rados -p foo rmsnap snap
+
+    sleep 5
+
+    objectstore_tool $dir $osd "$JSON" set-bytes $dir/data || return 1
+    objectstore_tool $dir $osd "$JSON" set-attr _ $dir/_ || return 1
+
+    sleep 5
+
+    ceph pg repair 1.0 || return 1
+
+    sleep 10
+
+    ceph log last
+
+    # make sure osds are still up
+    timeout 60 ceph tell osd.0 version || return 1
+    timeout 60 ceph tell osd.1 version || return 1
+    timeout 60 ceph tell osd.2 version || return 1
+}
+
+
+main osd-unexpected-clone "$@"
+
+# Local Variables:
+# compile-command: "cd ../.. ; make -j4 && test/osd/osd-bench.sh"
+# End:
diff --git a/ceph/qa/suites/krbd/rbd-nomount/conf.yaml b/ceph/qa/suites/krbd/rbd-nomount/conf.yaml

index 8279674dfa7c33581c29d6c40fecd6df8caeccca..f1ebeb50092efc9716d41e395d0eb389e01f5600 100644 (file)
--- a/ceph/qa/suites/krbd/rbd-nomount/conf.yaml
+++ b/ceph/qa/suites/krbd/rbd-nomount/conf.yaml
@@ -5,3 +5,5 @@ overrides:
          ms die on skipped message: false
        client:
          rbd default features: 5
+    log-whitelist:
+      - slow request
diff --git a/ceph/qa/suites/krbd/rbd/conf.yaml b/ceph/qa/suites/krbd/rbd/conf.yaml

index 8279674dfa7c33581c29d6c40fecd6df8caeccca..f1ebeb50092efc9716d41e395d0eb389e01f5600 100644 (file)
--- a/ceph/qa/suites/krbd/rbd/conf.yaml
+++ b/ceph/qa/suites/krbd/rbd/conf.yaml
@@ -5,3 +5,5 @@ overrides:
          ms die on skipped message: false
        client:
          rbd default features: 5
+    log-whitelist:
+      - slow request
diff --git a/ceph/qa/suites/krbd/singleton/conf.yaml b/ceph/qa/suites/krbd/singleton/conf.yaml

index 8279674dfa7c33581c29d6c40fecd6df8caeccca..f1ebeb50092efc9716d41e395d0eb389e01f5600 100644 (file)
--- a/ceph/qa/suites/krbd/singleton/conf.yaml
+++ b/ceph/qa/suites/krbd/singleton/conf.yaml
@@ -5,3 +5,5 @@ overrides:
          ms die on skipped message: false
        client:
          rbd default features: 5
+    log-whitelist:
+      - slow request
diff --git a/ceph/qa/suites/krbd/thrash/conf.yaml b/ceph/qa/suites/krbd/thrash/conf.yaml

index 8279674dfa7c33581c29d6c40fecd6df8caeccca..f1ebeb50092efc9716d41e395d0eb389e01f5600 100644 (file)
--- a/ceph/qa/suites/krbd/thrash/conf.yaml
+++ b/ceph/qa/suites/krbd/thrash/conf.yaml
@@ -5,3 +5,5 @@ overrides:
          ms die on skipped message: false
        client:
          rbd default features: 5
+    log-whitelist:
+      - slow request
diff --git a/ceph/qa/suites/krbd/wac/sysfs/conf.yaml b/ceph/qa/suites/krbd/wac/sysfs/conf.yaml

index 8279674dfa7c33581c29d6c40fecd6df8caeccca..f1ebeb50092efc9716d41e395d0eb389e01f5600 100644 (file)
--- a/ceph/qa/suites/krbd/wac/sysfs/conf.yaml
+++ b/ceph/qa/suites/krbd/wac/sysfs/conf.yaml
@@ -5,3 +5,5 @@ overrides:
          ms die on skipped message: false
        client:
          rbd default features: 5
+    log-whitelist:
+      - slow request
diff --git a/ceph/qa/suites/krbd/wac/wac/conf.yaml b/ceph/qa/suites/krbd/wac/wac/conf.yaml

index 8279674dfa7c33581c29d6c40fecd6df8caeccca..f1ebeb50092efc9716d41e395d0eb389e01f5600 100644 (file)
--- a/ceph/qa/suites/krbd/wac/wac/conf.yaml
+++ b/ceph/qa/suites/krbd/wac/wac/conf.yaml
@@ -5,3 +5,5 @@ overrides:
          ms die on skipped message: false
        client:
          rbd default features: 5
+    log-whitelist:
+      - slow request
diff --git a/ceph/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml b/ceph/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml

index c6043e209bd453e693e6d3a967a8e6f9a210a22c..2cbb03c770a6421620184c10fd944fb109cc6246 100644 (file)
--- a/ceph/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml
+++ b/ceph/qa/suites/powercycle/osd/tasks/cfuse_workunit_suites_fsync.yaml
@@ -1,3 +1,9 @@
+overrides:
+  ceph:
+    conf:
+      global:
+        osd_pg_log_dups_tracked: 10000
+
  tasks:
  - ceph-fuse:
  - workunit:
diff --git a/ceph/qa/suites/powercycle/osd/whitelist_health.yaml b/ceph/qa/suites/powercycle/osd/whitelist_health.yaml

index 0235037b54effbb9681e4ac4a8d3d0cebd271951..f9ab0a62b65c0bc3803cb49dbd6693f2eff7393b 100644 (file)
--- a/ceph/qa/suites/powercycle/osd/whitelist_health.yaml
+++ b/ceph/qa/suites/powercycle/osd/whitelist_health.yaml
@@ -2,4 +2,5 @@ overrides:
    ceph:
      log-whitelist:
        - \(MDS_TRIM\)
+      - \(MDS_SLOW_REQUEST\)
        - Behind on trimming
diff --git a/ceph/qa/suites/upgrade/jewel-x/ceph-deploy/slow_requests.yaml b/ceph/qa/suites/upgrade/jewel-x/ceph-deploy/slow_requests.yaml

new file mode 120000 (symlink)

index 0000000..bf4f8b4
--- /dev/null
+++ b/ceph/qa/suites/upgrade/jewel-x/ceph-deploy/slow_requests.yaml
@@ -0,0 +1 @@
+../parallel/slow_requests.yaml
+\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/jewel-x/parallel/slow_requests.yaml b/ceph/qa/suites/upgrade/jewel-x/parallel/slow_requests.yaml

new file mode 100644 (file)

index 0000000..8150313
--- /dev/null
+++ b/ceph/qa/suites/upgrade/jewel-x/parallel/slow_requests.yaml
@@ -0,0 +1,4 @@
+overrides:
+  ceph:
+    log-whitelist:
+      - slow request
diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/slow_requests.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/slow_requests.yaml

new file mode 120000 (symlink)

index 0000000..bf4f8b4
--- /dev/null
+++ b/ceph/qa/suites/upgrade/jewel-x/stress-split-erasure-code/slow_requests.yaml
@@ -0,0 +1 @@
+../parallel/slow_requests.yaml
+\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/jewel-x/stress-split/slow_requests.yaml b/ceph/qa/suites/upgrade/jewel-x/stress-split/slow_requests.yaml

new file mode 120000 (symlink)

index 0000000..bf4f8b4
--- /dev/null
+++ b/ceph/qa/suites/upgrade/jewel-x/stress-split/slow_requests.yaml
@@ -0,0 +1 @@
+../parallel/slow_requests.yaml
+\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/kraken-x/parallel/0-cluster/start.yaml b/ceph/qa/suites/upgrade/kraken-x/parallel/0-cluster/start.yaml

index 4d10158b71ba98d7fa8a55f4e7c3122e7893fe32..2b8219c961af7f9dfcb84f49f1197a0b2bdb5147 100644 (file)
--- a/ceph/qa/suites/upgrade/kraken-x/parallel/0-cluster/start.yaml
+++ b/ceph/qa/suites/upgrade/kraken-x/parallel/0-cluster/start.yaml
@@ -27,6 +27,8 @@ overrides:
      - wrongly marked
      - \(POOL_APP_NOT_ENABLED\)
      - overall HEALTH_
+    - \(REQUEST_SLOW\)
+    - slow request
      conf:
        global:
          enable experimental unrecoverable data corrupting features: "*"
diff --git a/ceph/qa/suites/upgrade/kraken-x/stress-split/0-cluster/start.yaml b/ceph/qa/suites/upgrade/kraken-x/stress-split/0-cluster/start.yaml

index b8a28f986ac8342f9670957abca713f35619522f..c12e10b982b8ff49141593e1c98582a7b2466143 100644 (file)
--- a/ceph/qa/suites/upgrade/kraken-x/stress-split/0-cluster/start.yaml
+++ b/ceph/qa/suites/upgrade/kraken-x/stress-split/0-cluster/start.yaml
@@ -10,6 +10,8 @@ overrides:
        - overall HEALTH_
        - \(MON_DOWN\)
        - \(MGR_DOWN\)
+      - \(REQUEST_SLOW\)
+      - slow request
      conf:
        global:
          enable experimental unrecoverable data corrupting features: "*"
diff --git a/ceph/qa/suites/upgrade/luminous-p2p/point-to-point-upgrade.yaml b/ceph/qa/suites/upgrade/luminous-p2p/point-to-point-upgrade.yaml

index f660e2a2f2a6ce976a6a4e0611131556bac91921..ffa21378c09c7946123665277833a399b377f7e7 100644 (file)
--- a/ceph/qa/suites/upgrade/luminous-p2p/point-to-point-upgrade.yaml
+++ b/ceph/qa/suites/upgrade/luminous-p2p/point-to-point-upgrade.yaml
@@ -7,6 +7,8 @@ meta:
     run workload and upgrade-sequence in parallel
     install ceph/luminous v12.2.5 point version
     run workload and upgrade-sequence in parallel
+   install ceph/luminous v12.2.7 point version
+   run workload and upgrade-sequence in parallel
     install ceph/luminous latest version
     run workload and upgrade-sequence in parallel
  overrides:
@@ -28,6 +30,7 @@ overrides:
      - PG_AVAILABILITY
      - PG_DEGRADED
      - application not enabled
+    - overall HEALTH_
      fs: xfs
      conf:
        mon:
@@ -87,13 +90,25 @@ tasks:
     - workload_luminous
     - upgrade-sequence_luminous
  - print: "**** done parallel luminous v12.2.5"
+
+####  upgrade to v12.2.7
+- install.upgrade:
+    #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev']
+    mon.a:
+      tag: v12.2.7
+    mon.b:
+      tag: v12.2.7
+    # Note that client.a IS NOT upgraded at this point
+- parallel:
+   - workload_luminous
+   - upgrade-sequence_luminous
+- print: "**** done parallel luminous v12.2.7"
+
  ####  upgrade to latest luminous
  - install.upgrade:
      #exclude_packages: ['ceph-mgr','libcephfs2','libcephfs-devel','libcephfs-dev']
      mon.a:
-      branch: luminous
      mon.b:
-      branch: luminous
      # Note that client.a IS NOT upgraded at this point
  - parallel:
     - workload_luminous
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/% b/ceph/qa/suites/upgrade/luminous-x/parallel/%

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/+ b/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/+

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/openstack.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/openstack.yaml

deleted file mode 100644 (file)

index f4d1349..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/openstack.yaml
+++ /dev/null
@@ -1,4 +0,0 @@
-openstack:
-  - volumes: # attached to each instance
-      count: 3
-      size: 30 # GB
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/start.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/start.yaml

deleted file mode 100644 (file)

index f873221..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/0-cluster/start.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-meta:
-- desc: |
-   Run ceph on two nodes,
-   with a separate client 0,1,2 third node.
-   Use xfs beneath the osds.
-   CephFS tests running on client 2,3
-roles:
-- - mon.a
-  - mgr.x
-  - mds.a
-  - osd.0
-  - osd.1
-- - mon.b
-  - mon.c
-  - osd.2
-  - osd.3
-- - client.0
-  - client.1
-  - client.2
-  - client.3
-- - client.4
-overrides:
-  ceph:
-    log-whitelist:
-    - scrub mismatch
-    - ScrubResult
-    - wrongly marked
-    - \(POOL_APP_NOT_ENABLED\)
-    - overall HEALTH_
-    conf:
-      global:
-        enable experimental unrecoverable data corrupting features: "*"
-      mon:
-        mon warn on osd down out interval zero: false
-      osd:
-        osd_class_load_list: "cephfs hello journal lock log numops rbd refcount 
-                              replica_log rgw sdk statelog timeindex user version"
-        osd_class_default_list: "cephfs hello journal lock log numops rbd refcount 
-                                 replica_log rgw sdk statelog timeindex user version"
-    fs: xfs
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/1-ceph-install/luminous.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/1-ceph-install/luminous.yaml

deleted file mode 100644 (file)

index 3d57f79..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/1-ceph-install/luminous.yaml
+++ /dev/null
@@ -1,43 +0,0 @@
-meta:
-- desc: |
-   install ceph/luminous latest
-   run workload and upgrade-sequence in parallel
-   upgrade the client node
-tasks:
-- install:
-    branch: luminous
-- print: "**** done installing luminous"
-- ceph:
-    log-whitelist:
-      - overall HEALTH_
-      - \(FS_
-      - \(MDS_
-      - \(OSD_
-      - \(MON_DOWN\)
-      - \(CACHE_POOL_
-      - \(POOL_
-      - \(MGR_DOWN\)
-      - \(PG_
-      - \(SMALLER_PGP_NUM\)
-      - Monitor daemon marked osd
-      - Behind on trimming
-      - Manager daemon
-    conf:
-      global:
-        mon warn on pool no app: false
-- exec:
-    osd.0:
-      - ceph osd require-osd-release luminous
-      - ceph osd set-require-min-compat-client luminous
-- print: "**** done ceph"
-- install.upgrade:
-    mon.a:
-    mon.b:
-- print: "**** done install.upgrade both hosts"
-- parallel:
-    - workload
-    - upgrade-sequence
-- print: "**** done parallel"
-- install.upgrade:
-    client.0:
-- print: "**** done install.upgrade on client.0"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/+ b/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/+

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/blogbench.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/blogbench.yaml

deleted file mode 100644 (file)

index 021fcc6..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/blogbench.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-meta:
-- desc: |
-   run a cephfs stress test
-   mount ceph-fuse on client.2 before running workunit
-workload:
-  full_sequential:
-  - sequential:
-    - ceph-fuse:
-    - print: "**** done ceph-fuse 2-workload"
-    - workunit:
-        clients:
-           client.2:
-            - suites/blogbench.sh
-    - print: "**** done suites/blogbench.sh 2-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/ec-rados-default.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/ec-rados-default.yaml

deleted file mode 100644 (file)

index 5c5a958..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/ec-rados-default.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-meta:
-- desc: |
-   run run randomized correctness test for rados operations
-   on an erasure-coded pool
-workload:
-  full_sequential:
-  - rados:
-      clients: [client.0]
-      ops: 4000
-      objects: 50
-      ec_pool: true
-      write_append_excl: false
-      op_weights:
-        read: 100
-        write: 0
-        append: 100
-        delete: 50
-        snap_create: 50
-        snap_remove: 50
-        rollback: 50
-        copy_from: 50
-        setattr: 25
-        rmattr: 25
-  - print: "**** done rados ec task"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/rados_api.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/rados_api.yaml

deleted file mode 100644 (file)

index e4cc9f9..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/rados_api.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-meta:
-- desc: |
-   object class functional tests
-workload:
-  full_sequential:
-    - workunit:
-        branch: luminous
-        clients:
-          client.0:
-            - cls
-    - print: "**** done cls 2-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/rados_loadgenbig.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/rados_loadgenbig.yaml

deleted file mode 100644 (file)

index 874a8c5..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/rados_loadgenbig.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-meta:
-- desc: |
-   generate read/write load with rados objects ranging from 1MB to 25MB
-workload:
-  full_sequential:
-    - workunit:
-        branch: luminous
-        clients:
-          client.0:
-            - rados/load-gen-big.sh
-    - print: "**** done rados/load-gen-big.sh 2-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/test_rbd_api.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/test_rbd_api.yaml

deleted file mode 100644 (file)

index 81563c9..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/test_rbd_api.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-meta:
-- desc: |
-   librbd C and C++ api tests
-workload:
-  full_sequential:
-    - workunit:
-        branch: luminous
-        clients:
-          client.0:
-            - rbd/test_librbd.sh
-    - print: "**** done rbd/test_librbd.sh 2-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/test_rbd_python.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/test_rbd_python.yaml

deleted file mode 100644 (file)

index e17207d..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/2-workload/test_rbd_python.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-meta:
-- desc: |
-   librbd python api tests
-workload:
-  full_sequential:
-    - workunit:
-        branch: luminous
-        clients:
-          client.0:
-            - rbd/test_librbd_python.sh
-    - print: "**** done rbd/test_librbd_python.sh 2-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/3-upgrade-sequence/upgrade-all.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/3-upgrade-sequence/upgrade-all.yaml

deleted file mode 100644 (file)

index cff3a68..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/3-upgrade-sequence/upgrade-all.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-meta:
-- desc: |
-   upgrade the ceph cluster
-upgrade-sequence:
-   sequential:
-   - ceph.restart:
-       daemons: [mon.a, mon.b, mon.c, mgr.x]
-   - ceph.restart:
-       daemons: [osd.0, osd.1, osd.2, osd.3]
-       wait-for-healthy: false
-       wait-for-osds-up: true
-   - ceph.restart:
-       daemons: [mds.a]
-       wait-for-healthy: false
-       wait-for-osds-up: true
-   - print: "**** done ceph.restart all"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml

deleted file mode 100644 (file)

index f197de6..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/3-upgrade-sequence/upgrade-mon-osd-mds.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-meta:
-- desc: |
-   upgrade the ceph cluster,
-   upgrate in two steps
-   step one ordering: mon.a, osd.0, osd.1, mds.a
-   step two ordering: mon.b, mon.c, osd.2, osd.3
-   ceph expected to be healthy state after each step
-upgrade-sequence:
-   sequential:
-   - ceph.restart:
-       daemons: [mon.a]
-       wait-for-healthy: true
-   - sleep:
-       duration: 60
-   - ceph.restart:
-       daemons: [mon.b, mon.c, mgr.x]
-       wait-for-healthy: true
-   - sleep:
-       duration: 60
-   - ceph.restart:
-       daemons: [osd.0, osd.1]
-       wait-for-healthy: true
-   - sleep:
-       duration: 60
-   - ceph.restart: [mds.a]
-   - sleep:
-       duration: 60
-   - sleep:
-       duration: 60
-   - ceph.restart:
-       daemons: [osd.2, osd.3]
-       wait-for-healthy: false
-       wait-for-osds-up: true
-   - sleep:
-       duration: 60
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/+ b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/+

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/blogbench.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/blogbench.yaml

deleted file mode 100644 (file)

index d2629c0..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/blogbench.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-meta:
-- desc: |
-   run a cephfs stress test
-   mount ceph-fuse on client.3 before running workunit
-tasks:
-- sequential:
-  - ceph-fuse:
-  - print: "**** done ceph-fuse 5-final-workload"
-  - workunit:
-      clients:
-         client.3:
-          - suites/blogbench.sh
-  - print: "**** done suites/blogbench.sh 5-final-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados-snaps-few-objects.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados-snaps-few-objects.yaml

deleted file mode 100644 (file)

index d8b3dcb..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados-snaps-few-objects.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-meta:
-- desc: |
-   randomized correctness test for rados operations on a replicated pool with snapshots
-tasks:
-  - rados:
-      clients: [client.1]
-      ops: 4000
-      objects: 50
-      write_append_excl: false
-      op_weights:
-        read: 100
-        write: 100
-        delete: 50
-        snap_create: 50
-        snap_remove: 50
-        rollback: 50
-  - print: "**** done rados 4-final-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados_loadgenmix.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados_loadgenmix.yaml

deleted file mode 100644 (file)

index 922a9da..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados_loadgenmix.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-meta:
-- desc: |
-   generate read/write load with rados objects ranging from 1 byte to 1MB
-tasks:
-  - workunit:
-      clients:
-        client.1:
-          - rados/load-gen-mix.sh
-  - print: "**** done rados/load-gen-mix.sh 4-final-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados_mon_thrash.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados_mon_thrash.yaml

deleted file mode 100644 (file)

index a42b7d2..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rados_mon_thrash.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-meta:
-- desc: |
-   librados C and C++ api tests
-overrides:
-  ceph:
-    log-whitelist:
-      - reached quota
-tasks:
-  - mon_thrash:
-      revive_delay: 20
-      thrash_delay: 1
-  - print: "**** done mon_thrash 4-final-workload"
-  - workunit:
-      branch: luminous
-      clients:
-        client.1:
-          - rados/test.sh
-  - print: "**** done rados/test.sh 4-final-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_cls.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_cls.yaml

deleted file mode 100644 (file)

index aaf0a37..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_cls.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-meta:
-- desc: |
-   rbd object class functional tests
-tasks:
-  - workunit:
-      clients:
-        client.1:
-          - cls/test_cls_rbd.sh
-  - print: "**** done cls/test_cls_rbd.sh 4-final-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_import_export_no_upgrated.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_import_export_no_upgrated.yaml

deleted file mode 100644 (file)

index 5de8a23..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_import_export_no_upgrated.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-meta:
-- desc: |
-   run basic import/export cli tests for rbd
-   on NO upgrated client
-tasks:
-  - workunit:
-      branch: luminous
-      clients:
-        client.4:
-          - rbd/import_export.sh
-      env:
-        RBD_CREATE_ARGS: --new-format
-  - print: "**** done rbd/import_export.sh 4-final-workload on NO upgrated client"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_import_export_upgrated.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_import_export_upgrated.yaml

deleted file mode 100644 (file)

index 2c7c484..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rbd_import_export_upgrated.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-meta:
-- desc: |
-   run basic import/export cli tests for rbd
-   on upgrated client
-tasks:
-  - workunit:
-      clients:
-        client.1:
-          - rbd/import_export.sh
-      env:
-        RBD_CREATE_ARGS: --new-format
-  - print: "**** done rbd/import_export.sh 4-final-workload  on upgrated client"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rgw_swift.yaml b/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rgw_swift.yaml

deleted file mode 100644 (file)

index 7a7659f..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/5-final-workload/rgw_swift.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-meta:
-- desc: |
-   swift api tests for rgw
-overrides:
-  rgw:
-    frontend: civetweb
-tasks:
-  - rgw: [client.1]
-  - print: "**** done rgw 4-final-workload"
-  - swift:
-      client.1:
-        rgw_server: client.1
-  - print: "**** done swift 4-final-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/distros b/ceph/qa/suites/upgrade/luminous-x/parallel/distros

deleted file mode 120000 (symlink)

index ca99fee..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/distros
+++ /dev/null
@@ -1 +0,0 @@
-../../../../distros/supported/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/parallel/objectstore b/ceph/qa/suites/upgrade/luminous-x/parallel/objectstore

deleted file mode 120000 (symlink)

index 016cbf9..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/parallel/objectstore
+++ /dev/null
@@ -1 +0,0 @@
-../stress-split/objectstore/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/% b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/%

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/0-cluster b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/0-cluster

deleted file mode 120000 (symlink)

index 3580937..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/0-cluster
+++ /dev/null
@@ -1 +0,0 @@
-../stress-split/0-cluster/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/1-ceph-install b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/1-ceph-install

deleted file mode 120000 (symlink)

index 0479ac5..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/1-ceph-install
+++ /dev/null
@@ -1 +0,0 @@
-../stress-split/1-ceph-install/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/2-partial-upgrade b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/2-partial-upgrade

deleted file mode 120000 (symlink)

index ab35fc1..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/2-partial-upgrade
+++ /dev/null
@@ -1 +0,0 @@
-../stress-split/2-partial-upgrade/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/3-thrash/default.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/3-thrash/default.yaml

deleted file mode 100644 (file)

index edae7b3..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/3-thrash/default.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-meta:
-- desc: |
-   randomly kill and revive osd
-   small chance to increase the number of pgs
-overrides:
-  ceph:
-    log-whitelist:
-    - but it is still running
-    - wrongly marked me down
-    - objects unfound and apparently lost
-    - log bound mismatch
-tasks:
-- parallel:
-  - stress-tasks
-stress-tasks:
-- thrashosds:
-    timeout: 1200
-    chance_pgnum_grow: 1
-    chance_pgpnum_fix: 1
-    min_in: 4
-    chance_thrash_cluster_full: 0
-    chance_thrash_pg_upmap: 0
-    chance_thrash_pg_upmap_items: 0
-    chance_force_recovery: 0
-- print: "**** done thrashosds 3-thrash"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/4-ec-workload.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/4-ec-workload.yaml

deleted file mode 100644 (file)

index c89551e..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/4-ec-workload.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-meta:
-- desc: |
-   randomized correctness test for rados operations on an erasure coded pool
-stress-tasks:
-  - rados:
-      clients: [client.0]
-      ops: 4000
-      objects: 50
-      ec_pool: true
-      write_append_excl: false
-      op_weights:
-        read: 100
-        write: 0
-        append: 100
-        delete: 50
-        snap_create: 50
-        snap_remove: 50
-        rollback: 50
-        copy_from: 50
-        setattr: 25
-        rmattr: 25
-  - print: "**** done rados ec task"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/5-finish-upgrade.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/5-finish-upgrade.yaml

deleted file mode 120000 (symlink)

index a66a7dc..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/5-finish-upgrade.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../stress-split/5-finish-upgrade.yaml
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/7-final-workload.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/7-final-workload.yaml

deleted file mode 100644 (file)

index 50a1465..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/7-final-workload.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# k=3 implies a stripe_width of 1376*3 = 4128 which is different from
-# the default value of 4096 It is also not a multiple of 1024*1024 and
-# creates situations where rounding rules during recovery becomes
-# necessary.
-#
-meta:
-- desc: |
-   randomized correctness test for rados operations on an erasure coded pool
-   using the jerasure plugin with k=3 and m=1
-tasks:
-- rados:
-    clients: [client.0]
-    ops: 4000
-    objects: 50
-    ec_pool: true
-    write_append_excl: false
-    erasure_code_profile:
-      name: jerasure31profile
-      plugin: jerasure
-      k: 3
-      m: 1
-      technique: reed_sol_van
-      crush-failure-domain: osd
-    op_weights:
-      read: 100
-      write: 0
-      append: 100
-      delete: 50
-      snap_create: 50
-      snap_remove: 50
-      rollback: 50
-      copy_from: 50
-      setattr: 25
-      rmattr: 25
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/distros b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/distros

deleted file mode 120000 (symlink)

index ca99fee..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/distros
+++ /dev/null
@@ -1 +0,0 @@
-../../../../distros/supported/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/objectstore b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/objectstore

deleted file mode 120000 (symlink)

index 016cbf9..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/objectstore
+++ /dev/null
@@ -1 +0,0 @@
-../stress-split/objectstore/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/thrashosds-health.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/thrashosds-health.yaml

deleted file mode 120000 (symlink)

index e0426db..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split-erasure-code/thrashosds-health.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tasks/thrashosds-health.yaml
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/% b/ceph/qa/suites/upgrade/luminous-x/stress-split/%

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/+ b/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/+

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/openstack.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/openstack.yaml

deleted file mode 100644 (file)

index a0d5c20..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/openstack.yaml
+++ /dev/null
@@ -1,6 +0,0 @@
-openstack:
-  - machine:
-      disk: 100 # GB
-  - volumes: # attached to each instance
-      count: 3
-      size: 30 # GB
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/start.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/start.yaml

deleted file mode 100644 (file)

index e3ad918..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/0-cluster/start.yaml
+++ /dev/null
@@ -1,29 +0,0 @@
-meta:
-- desc: |
-   Run ceph on two nodes,
-   with a separate client-only node.
-   Use xfs beneath the osds.
-overrides:
-  ceph:
-    fs: xfs
-    log-whitelist:
-      - overall HEALTH_
-      - \(MON_DOWN\)
-      - \(MGR_DOWN\)
-    conf:
-      global:
-        enable experimental unrecoverable data corrupting features: "*"
-      mon:
-        mon warn on osd down out interval zero: false
-roles:
-- - mon.a
-  - mon.b
-  - mon.c
-  - mgr.x
-  - osd.0
-  - osd.1
-  - osd.2
-- - osd.3
-  - osd.4
-  - osd.5
-- - client.0
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/1-ceph-install/luminous.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/1-ceph-install/luminous.yaml

deleted file mode 100644 (file)

index 2230525..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/1-ceph-install/luminous.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-meta:
-- desc: install ceph/luminous latest
-tasks:
-- install:
-    branch: luminous
-- print: "**** done install luminous"
-- ceph:
-- exec:
-    osd.0:
-      - ceph osd require-osd-release luminous
-      - ceph osd set-require-min-compat-client luminous
-- print: "**** done ceph "
-overrides:
-  ceph:
-    conf:
-      mon:
-        mon warn on osd down out interval zero: false
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/2-partial-upgrade/firsthalf.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/2-partial-upgrade/firsthalf.yaml

deleted file mode 100644 (file)

index 87fa1d5..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/2-partial-upgrade/firsthalf.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-meta:
-- desc: |
-   install upgrade ceph/-x on one node only
-   1st half
-   restart : osd.0,1,2
-tasks:
-- install.upgrade:
-    osd.0:
-- print: "**** done install.upgrade osd.0"
-- ceph.restart:
-    daemons: [mon.a,mon.b,mon.c,mgr.x,osd.0,osd.1,osd.2]
-- print: "**** done ceph.restart 1st half"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/3-thrash/default.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/3-thrash/default.yaml

deleted file mode 100644 (file)

index b3fddef..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/3-thrash/default.yaml
+++ /dev/null
@@ -1,25 +0,0 @@
-meta:
-- desc: |
-   randomly kill and revive osd
-   small chance to increase the number of pgs
-overrides:
-  ceph:
-    log-whitelist:
-    - but it is still running
-    - wrongly marked me down
-    - objects unfound and apparently lost
-    - log bound mismatch
-tasks:
-- parallel:
-  - stress-tasks
-stress-tasks:
-- thrashosds:
-    timeout: 1200
-    chance_pgnum_grow: 1
-    chance_pgpnum_fix: 1
-    chance_thrash_cluster_full: 0
-    chance_thrash_pg_upmap: 0
-    chance_thrash_pg_upmap_items: 0
-    disable_objectstore_tool_tests: true
-    chance_force_recovery: 0
-- print: "**** done thrashosds 3-thrash"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/+ b/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/+

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/radosbench.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/radosbench.yaml

deleted file mode 100644 (file)

index 626ae8e..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/radosbench.yaml
+++ /dev/null
@@ -1,40 +0,0 @@
-meta:
-- desc: |
-   run randomized correctness test for rados operations
-   generate write load with rados bench
-stress-tasks:
-- full_sequential:
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-  - radosbench:
-      clients: [client.0]
-      time: 150
-- print: "**** done radosbench 7-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd-cls.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd-cls.yaml

deleted file mode 100644 (file)

index f8cc4d8..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd-cls.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-meta:
-- desc: |
-   run basic cls tests for rbd
-stress-tasks:
-- workunit:
-    branch: luminous
-    clients:
-      client.0:
-        - cls/test_cls_rbd.sh
-- print: "**** done cls/test_cls_rbd.sh 5-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd-import-export.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd-import-export.yaml

deleted file mode 100644 (file)

index 30a677a..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd-import-export.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-meta:
-- desc: |
-   run basic import/export cli tests for rbd
-stress-tasks:
-- workunit:
-    branch: luminous
-    clients:
-      client.0:
-        - rbd/import_export.sh
-    env:
-      RBD_CREATE_ARGS: --new-format
-- print: "**** done rbd/import_export.sh 5-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd_api.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd_api.yaml

deleted file mode 100644 (file)

index 9079aa3..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/rbd_api.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-meta:
-- desc: |
-   librbd C and C++ api tests
-stress-tasks:
-- workunit:
-     branch: luminous
-     clients:
-        client.0:
-           - rbd/test_librbd.sh
-- print: "**** done rbd/test_librbd.sh 7-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/readwrite.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/readwrite.yaml

deleted file mode 100644 (file)

index 41e34d6..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/readwrite.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-meta:
-- desc: |
-   randomized correctness test for rados operations on a replicated pool,
-   using only reads, writes, and deletes
-stress-tasks:
-- full_sequential:
-  - rados:
-      clients: [client.0]
-      ops: 4000
-      objects: 500
-      write_append_excl: false
-      op_weights:
-        read: 45
-        write: 45
-        delete: 10
-- print: "**** done rados/readwrite 5-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/snaps-few-objects.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/snaps-few-objects.yaml

deleted file mode 100644 (file)

index f56d0de..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/4-workload/snaps-few-objects.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
-meta:
-- desc: |
-   randomized correctness test for rados operations on a replicated pool with snapshot operations
-stress-tasks:
-- full_sequential:
-  - rados:
-      clients: [client.0]
-      ops: 4000
-      objects: 50
-      write_append_excl: false
-      op_weights:
-        read: 100
-        write: 100
-        delete: 50
-        snap_create: 50
-        snap_remove: 50
-        rollback: 50
-- print: "**** done rados/snaps-few-objects 5-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/5-finish-upgrade.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/5-finish-upgrade.yaml

deleted file mode 100644 (file)

index 1d528cd..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/5-finish-upgrade.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-tasks:
-- install.upgrade:
-    osd.3:
-    client.0:
-- ceph.restart:
-    daemons: [osd.3, osd.4, osd.5]
-    wait-for-healthy: false
-    wait-for-osds-up: true
-
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/+ b/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/+

deleted file mode 100644 (file)

index e69de29..0000000
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/rbd-python.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/rbd-python.yaml

deleted file mode 100644 (file)

index 92fe658..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/rbd-python.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-meta:
-- desc: |
-   librbd python api tests
-tasks:
-- workunit:
-    branch: luminous
-    clients:
-      client.0:
-        - rbd/test_librbd_python.sh
-- print: "**** done rbd/test_librbd_python.sh 9-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/rgw-swift.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/rgw-swift.yaml

deleted file mode 100644 (file)

index 76e5d6f..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/rgw-swift.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-meta:
-- desc: |
-   swift api tests for rgw
-tasks:
-- rgw:
-    client.0:
-- print: "**** done rgw 9-workload"
-- swift:
-    client.0:
-      rgw_server: client.0
-- print: "**** done swift 9-workload"
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/snaps-many-objects.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/snaps-many-objects.yaml

deleted file mode 100644 (file)

index 805bf97..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/7-final-workload/snaps-many-objects.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-meta:
-- desc: |
-   randomized correctness test for rados operations on a replicated pool with snapshot operations
-tasks:
-- rados:
-    clients: [client.0]
-    ops: 4000
-    objects: 500
-    write_append_excl: false
-    op_weights:
-      read: 100
-      write: 100
-      delete: 50
-      snap_create: 50
-      snap_remove: 50
-      rollback: 50
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/distros b/ceph/qa/suites/upgrade/luminous-x/stress-split/distros

deleted file mode 120000 (symlink)

index ca99fee..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/distros
+++ /dev/null
@@ -1 +0,0 @@
-../../../../distros/supported/
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/objectstore/bluestore.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/objectstore/bluestore.yaml

deleted file mode 120000 (symlink)

index d644598..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/objectstore/bluestore.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../objectstore/bluestore.yaml
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/objectstore/filestore-xfs.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/objectstore/filestore-xfs.yaml

deleted file mode 120000 (symlink)

index 03750e5..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/objectstore/filestore-xfs.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../objectstore/filestore-xfs.yaml
-\ No newline at end of file
diff --git a/ceph/qa/suites/upgrade/luminous-x/stress-split/thrashosds-health.yaml b/ceph/qa/suites/upgrade/luminous-x/stress-split/thrashosds-health.yaml

deleted file mode 120000 (symlink)

index e0426db..0000000
--- a/ceph/qa/suites/upgrade/luminous-x/stress-split/thrashosds-health.yaml
+++ /dev/null
@@ -1 +0,0 @@
-../../../../tasks/thrashosds-health.yaml
-\ No newline at end of file
diff --git a/ceph/qa/tasks/ceph_fuse.py b/ceph/qa/tasks/ceph_fuse.py

index c9d83549668208147e1f99ad67b87fd26b9d8a78..14c6989720b584772d5159c6b82abf130f1c6ed7 100644 (file)
--- a/ceph/qa/tasks/ceph_fuse.py
+++ b/ceph/qa/tasks/ceph_fuse.py
@@ -7,6 +7,8 @@ import logging
  
  from teuthology import misc as teuthology
  from cephfs.fuse_mount import FuseMount
+from tasks.cephfs.filesystem import MDSCluster
+from tasks.cephfs.filesystem import Filesystem
  
  log = logging.getLogger(__name__)
  
@@ -103,6 +105,14 @@ def task(ctx, config):
      all_mounts = getattr(ctx, 'mounts', {})
      mounted_by_me = {}
  
+    log.info('Wait for MDS to reach steady state...')
+    mds_cluster = MDSCluster(ctx)
+    status = mds_cluster.status()
+    for filesystem in status.get_filesystems():
+        fs = Filesystem(ctx, fscid=filesystem['id']) 
+        fs.wait_for_daemons()
+    log.info('Ready to start ceph-fuse...')
+
      # Construct any new FuseMount instances
      for id_, remote in clients:
          client_config = config.get("client.%s" % id_)
diff --git a/ceph/qa/tasks/cephfs/filesystem.py b/ceph/qa/tasks/cephfs/filesystem.py

index b3950441990866a76458a78e9dfc9a20a4068a32..393d69e53790a60179f4b03577c314d641685bb2 100644 (file)
--- a/ceph/qa/tasks/cephfs/filesystem.py
+++ b/ceph/qa/tasks/cephfs/filesystem.py
@@ -268,6 +268,12 @@ class MDSCluster(CephCluster):
  
          self._one_or_all(mds_id, _fail_restart)
  
+    def mds_signal(self, mds_id, sig, silent=False):
+        """
+        signal a MDS daemon
+        """
+        self.mds_daemons[mds_id].signal(sig, silent);
+
      def newfs(self, name='cephfs', create=True):
          return Filesystem(self._ctx, name=name, create=create)
  
diff --git a/ceph/qa/tasks/cephfs/test_failover.py b/ceph/qa/tasks/cephfs/test_failover.py

index 9d3392c6953834c286c0ad010db0d85a027e76b3..dd8416136d3c66038330de27f41ad0017c48a3ed 100644 (file)
--- a/ceph/qa/tasks/cephfs/test_failover.py
+++ b/ceph/qa/tasks/cephfs/test_failover.py
@@ -1,3 +1,5 @@
+import time
+import signal
  import json
  import logging
  from unittest import case, SkipTest
@@ -133,8 +135,59 @@ class TestFailover(CephFSTestCase):
          self.fs.mon_manager.raw_cluster_cmd('fs', 'set', self.fs.name, 'standby_count_wanted', '0')
          self.wait_for_health_clear(timeout=30)
  
+    def test_discontinuous_mdsmap(self):
+        """
+        That discontinuous mdsmap does not affect failover.
+        See http://tracker.ceph.com/issues/24856.
+        """
+        mds_ids = sorted(self.mds_cluster.mds_ids)
+        mds_a, mds_b = mds_ids[0:2]
+        # Assign mds to fixed ranks. To prevent standby mds from replacing frozen mds
+        rank = 0;
+        for mds_id in mds_ids:
+            self.set_conf("mds.{0}".format(mds_id), "mds_standby_for_rank", str(rank))
+            rank += 1
+        self.mds_cluster.mds_restart()
+        self.fs.wait_for_daemons()
+
+        self.fs.set_max_mds(2)
+        self.fs.wait_for_state('up:active', rank=1)
+
+        # Drop 'export prep' message, make import stay in 'discovered' state
+        self.fs.mds_asok(['config', 'set', 'mds_inject_migrator_message_loss', '82'], mds_id=mds_b)
+
+        self.mount_a.run_shell(["mkdir", "a"])
+        self.mount_a.setfattr("a", "ceph.dir.pin", "1")
+        self.mount_a.umount_wait()
+
+        # Should be long enough for start the export
+        time.sleep(30)
+
+        grace = float(self.fs.get_config("mds_beacon_grace", service_type="mon"))
+        monc_timeout = float(self.fs.get_config("mon_client_ping_timeout", service_type="mds"))
  
+        # Freeze mds_b
+        self.mds_cluster.mds_signal(mds_b, signal.SIGSTOP)
+        self.wait_until_true(
+            lambda: "laggy_since" in self.fs.mon_manager.get_mds_status(mds_b),
+            timeout=grace * 2
+        )
+
+        self.mds_cluster.mds_restart(mds_a)
+        self.fs.wait_for_state('up:resolve', rank=0, timeout=30)
+
+        # Make sure of mds_b's monitor connection gets reset
+        time.sleep(monc_timeout * 2)
+
+        # Unfreeze mds_b, it will get discontinuous mdsmap
+        self.mds_cluster.mds_signal(mds_b, signal.SIGCONT)
+        self.wait_until_true(
+            lambda: "laggy_since" not in self.fs.mon_manager.get_mds_status(mds_b),
+            timeout=grace * 2
+        )
  
+        # Check if mds_b sends 'resolve' message to mds_a. If not, mds_a can't become active
+        self.fs.wait_for_state('up:active', rank=0, timeout=30)
  
  class TestStandbyReplay(CephFSTestCase):
      MDSS_REQUIRED = 4
diff --git a/ceph/qa/tasks/cephfs/test_forward_scrub.py b/ceph/qa/tasks/cephfs/test_forward_scrub.py

index ac912dd0b2968b3f402c95cbac1528b0306c3b21..1f80366af0cf98ccf8b9cec365a9ba182f1df6b8 100644 (file)
--- a/ceph/qa/tasks/cephfs/test_forward_scrub.py
+++ b/ceph/qa/tasks/cephfs/test_forward_scrub.py
@@ -232,7 +232,8 @@ class TestForwardScrub(CephFSTestCase):
          self.mount_a.umount_wait()
  
          with self.assert_cluster_log("inode table repaired", invert_match=True):
-            self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
+            out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
+            self.assertNotEqual(out_json, None)
  
          self.mds_cluster.mds_stop()
          self.mds_cluster.mds_fail()
@@ -254,7 +255,8 @@ class TestForwardScrub(CephFSTestCase):
          self.fs.wait_for_daemons()
  
          with self.assert_cluster_log("inode table repaired"):
-            self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
+            out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
+            self.assertNotEqual(out_json, None)
  
          self.mds_cluster.mds_stop()
          table_text = self.fs.table_tool(["0", "show", "inode"])
@@ -284,7 +286,8 @@ class TestForwardScrub(CephFSTestCase):
                                    "oh i'm sorry did i overwrite your xattr?")
  
          with self.assert_cluster_log("bad backtrace on inode"):
-            self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
+            out_json = self.fs.mds_asok(["scrub_path", "/", "repair", "recursive"])
+            self.assertNotEqual(out_json, None)
          self.fs.mds_asok(["flush", "journal"])
          backtrace = self.fs.read_backtrace(file_ino)
          self.assertEqual(['alpha', 'parent_a'],
diff --git a/ceph/qa/tasks/cephfs/test_scrub.py b/ceph/qa/tasks/cephfs/test_scrub.py

index 32371dd67be94218d52b574db1f5685fbff958a7..9469dfce6e4939edd926890eda96b8f43465cb0a 100644 (file)
--- a/ceph/qa/tasks/cephfs/test_scrub.py
+++ b/ceph/qa/tasks/cephfs/test_scrub.py
@@ -14,7 +14,7 @@ log = logging.getLogger(__name__)
  ValidationError = namedtuple("ValidationError", ["exception", "backtrace"])
  
  
-class Workload(object):
+class Workload(CephFSTestCase):
      def __init__(self, filesystem, mount):
          self._mount = mount
          self._filesystem = filesystem
@@ -26,15 +26,6 @@ class Workload(object):
          # a string
          self._errors = []
  
-    def assert_equal(self, a, b):
-        try:
-            if a != b:
-                raise AssertionError("{0} != {1}".format(a, b))
-        except AssertionError as e:
-            self._errors.append(
-                ValidationError(e, traceback.format_exc(3))
-            )
-
      def write(self):
          """
          Write the workload files to the mount
@@ -78,7 +69,7 @@ class BacktraceWorkload(Workload):
          self._filesystem.mds_asok(["flush", "journal"])
          bt = self._filesystem.read_backtrace(st['st_ino'])
          parent = bt['ancestors'][0]['dname']
-        self.assert_equal(parent, "sixmegs")
+        self.assertEqual(parent, 'sixmegs')
          return self._errors
  
      def damage(self):
@@ -112,8 +103,9 @@ class DupInodeWorkload(Workload):
          self._filesystem.wait_for_daemons()
  
      def validate(self):
-        self._filesystem.mds_asok(["scrub_path", "/", "recursive", "repair"])
-        self.assert_equal(self._filesystem.are_daemons_healthy(), True)
+        out_json = self._filesystem.mds_asok(["scrub_path", "/", "recursive", "repair"])
+        self.assertNotEqual(out_json, None)
+        self.assertTrue(self._filesystem.are_daemons_healthy())
          return self._errors
  
  
@@ -137,7 +129,8 @@ class TestScrub(CephFSTestCase):
          # Apply any data damage the workload wants
          workload.damage()
  
-        self.fs.mds_asok(["scrub_path", "/", "recursive", "repair"])
+        out_json = self.fs.mds_asok(["scrub_path", "/", "recursive", "repair"])
+        self.assertNotEqual(out_json, None)
  
          # See that the files are present and correct
          errors = workload.validate()
diff --git a/ceph/qa/tasks/thrashosds-health.yaml b/ceph/qa/tasks/thrashosds-health.yaml

index 9defe69efd8027ae9ff5271a4310d0a7855e30b6..111e2d8c48c50c7456a76f4ffd2a0ec099cd6056 100644 (file)
--- a/ceph/qa/tasks/thrashosds-health.yaml
+++ b/ceph/qa/tasks/thrashosds-health.yaml
@@ -12,3 +12,4 @@ overrides:
        - \(REQUEST_SLOW\)
        - \(TOO_FEW_PGS\)
        - \(MON_DOWN\)
+      - slow requests
diff --git a/ceph/qa/tasks/vstart_runner.py b/ceph/qa/tasks/vstart_runner.py

index c3988214abf5c11303ef7ebc2231a686bb9f2e1a..e7f7f68f34570557db146eb87d7b6b63d3e15748 100644 (file)
--- a/ceph/qa/tasks/vstart_runner.py
+++ b/ceph/qa/tasks/vstart_runner.py
@@ -373,6 +373,14 @@ class LocalDaemon(object):
  
          self.proc = self.controller.run([os.path.join(BIN_PREFIX, "./ceph-{0}".format(self.daemon_type)), "-i", self.daemon_id])
  
+    def signal(self, sig, silent=False):
+        if not self.running():
+            raise RuntimeError("Can't send signal to non-running daemon")
+
+        os.kill(self._get_pid(), sig)
+        if not silent:
+            log.info("Sent signal {0} to {1}.{2}".format(sig, self.daemon_type, self.daemon_id))
+
  
  def safe_kill(pid):
      """
diff --git a/ceph/qa/workunits/cephtool/test.sh b/ceph/qa/workunits/cephtool/test.sh

index 00606a256d9ec88abaa2853f8681436367e6f311..36f9dc43ebe861046db20aabb704e5ea956a981f 100755 (executable)
--- a/ceph/qa/workunits/cephtool/test.sh
+++ b/ceph/qa/workunits/cephtool/test.sh
@@ -239,29 +239,60 @@ function test_mon_injectargs_SI()
    # We only aim at testing the units are parsed accordingly
    # and don't intend to test whether the options being set
    # actually expect SI units to be passed.
-  # Keep in mind that all integer based options (i.e., INT,
-  # LONG, U32, U64) will accept SI unit modifiers.
+  # Keep in mind that all integer based options that are not based on bytes
+  # (i.e., INT, LONG, U32, U64) will accept SI unit modifiers and be parsed to
+  # base 10.
    initial_value=$(get_config_value_or_die "mon.a" "mon_pg_warn_min_objects")
    $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10
    expect_config_value "mon.a" "mon_pg_warn_min_objects" 10
    $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10K
-  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10240
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10000
    $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 1G
-  expect_config_value "mon.a" "mon_pg_warn_min_objects" 1073741824
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 1000000000
    $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects 10F > $TMPFILE || true
    check_response "'10F': (22) Invalid argument"
    # now test with injectargs
    ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10'
    expect_config_value "mon.a" "mon_pg_warn_min_objects" 10
    ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10K'
-  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10240
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 10000
    ceph tell mon.a injectargs '--mon_pg_warn_min_objects 1G'
-  expect_config_value "mon.a" "mon_pg_warn_min_objects" 1073741824
+  expect_config_value "mon.a" "mon_pg_warn_min_objects" 1000000000
    expect_false ceph tell mon.a injectargs '--mon_pg_warn_min_objects 10F'
    expect_false ceph tell mon.a injectargs '--mon_globalid_prealloc -1'
    $SUDO ceph daemon mon.a config set mon_pg_warn_min_objects $initial_value
  }
  
+function test_mon_injectargs_IEC()
+{
+  # Test IEC units during injectargs and 'config set'
+  # We only aim at testing the units are parsed accordingly
+  # and don't intend to test whether the options being set
+  # actually expect IEC units to be passed.
+  # Keep in mind that all integer based options that are based on bytes
+  # (i.e., INT, LONG, U32, U64) will accept IEC unit modifiers, as well as SI
+  # unit modifiers (for backwards compatibility and convinience) and be parsed
+  # to base 2.
+  initial_value=$(get_config_value_or_die "mon.a" "mon_data_size_warn")
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 15000000000
+  expect_config_value "mon.a" "mon_data_size_warn" 15000000000
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 15G
+  expect_config_value "mon.a" "mon_data_size_warn" 16106127360
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 16Gi
+  expect_config_value "mon.a" "mon_data_size_warn" 17179869184
+  $SUDO ceph daemon mon.a config set mon_data_size_warn 10F > $TMPFILE || true
+  check_response "'10F': (22) Invalid argument"
+  # now test with injectargs
+  ceph tell mon.a injectargs '--mon_data_size_warn 15000000000'
+  expect_config_value "mon.a" "mon_data_size_warn" 15000000000
+  ceph tell mon.a injectargs '--mon_data_size_warn 15G'
+  expect_config_value "mon.a" "mon_data_size_warn" 16106127360
+  ceph tell mon.a injectargs '--mon_data_size_warn 16Gi'
+  expect_config_value "mon.a" "mon_data_size_warn" 17179869184
+  expect_false ceph tell mon.a injectargs '--mon_data_size_warn 10F'
+  $SUDO ceph daemon mon.a config set mon_data_size_warn $initial_value
+}
+
  function test_tiering_agent()
  {
    local slow=slow_eviction
@@ -1731,18 +1762,35 @@ function test_mon_osd_pool_quota()
    ceph osd pool set-quota tmp-quota-pool max_bytes 10
    ceph osd pool set-quota tmp-quota-pool max_objects 10M
    #
-  # get quotas
-  #
-  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10B'
-  ceph osd pool get-quota tmp-quota-pool | grep 'max objects.*10240k objects'
-  #
    # get quotas in json-pretty format
    #
    ceph osd pool get-quota tmp-quota-pool --format=json-pretty | \
-    grep '"quota_max_objects":.*10485760'
+    grep '"quota_max_objects":.*10000000'
    ceph osd pool get-quota tmp-quota-pool --format=json-pretty | \
      grep '"quota_max_bytes":.*10'
    #
+  # get quotas
+  #
+  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10B'
+  ceph osd pool get-quota tmp-quota-pool | grep 'max objects.*10M objects'
+  #
+  # set valid quotas with unit prefix
+  #
+  ceph osd pool set-quota tmp-quota-pool max_bytes 10K
+  #
+  # get quotas
+  #
+  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10Ki'
+  #
+  # set valid quotas with unit prefix
+  #
+  ceph osd pool set-quota tmp-quota-pool max_bytes 10Ki
+  #
+  # get quotas
+  #
+  ceph osd pool get-quota tmp-quota-pool | grep 'max bytes.*10Ki'
+  #
+  #
    # reset pool quotas
    #
    ceph osd pool set-quota tmp-quota-pool max_bytes 0
diff --git a/ceph/qa/workunits/rbd/cli_generic.sh b/ceph/qa/workunits/rbd/cli_generic.sh

index f958520842fb4d80796ed84f99c4d68a81221bf8..759464b83cc934497c160975567597a4c789ef0f 100755 (executable)
--- a/ceph/qa/workunits/rbd/cli_generic.sh
+++ b/ceph/qa/workunits/rbd/cli_generic.sh
@@ -42,8 +42,8 @@ test_others() {
      rbd export testimg1 /tmp/img3
  
      # info
-    rbd info testimg1 | grep 'size 128 MB'
-    rbd info --snap=snap1 testimg1 | grep 'size 256 MB'
+    rbd info testimg1 | grep 'size 128MiB'
+    rbd info --snap=snap1 testimg1 | grep 'size 256MiB'
  
      # export-diff
      rm -rf /tmp/diff-testimg1-1 /tmp/diff-testimg1-2
@@ -56,10 +56,10 @@ test_others() {
      rbd import-diff --sparse-size 8K /tmp/diff-testimg1-2 testimg-diff1
  
      # info
-    rbd info testimg1 | grep 'size 128 MB'
-    rbd info --snap=snap1 testimg1 | grep 'size 256 MB'
-    rbd info testimg-diff1 | grep 'size 128 MB'
-    rbd info --snap=snap1 testimg-diff1 | grep 'size 256 MB'
+    rbd info testimg1 | grep 'size 128MiB'
+    rbd info --snap=snap1 testimg1 | grep 'size 256MiB'
+    rbd info testimg-diff1 | grep 'size 128MiB'
+    rbd info --snap=snap1 testimg-diff1 | grep 'size 256MiB'
  
      # make copies
      rbd copy testimg1 --snap=snap1 testimg2
@@ -68,10 +68,10 @@ test_others() {
      rbd copy testimg-diff1 --sparse-size 768K testimg-diff3
  
      # verify the result
-    rbd info testimg2 | grep 'size 256 MB'
-    rbd info testimg3 | grep 'size 128 MB'
-    rbd info testimg-diff2 | grep 'size 256 MB'
-    rbd info testimg-diff3 | grep 'size 128 MB'
+    rbd info testimg2 | grep 'size 256MiB'
+    rbd info testimg3 | grep 'size 128MiB'
+    rbd info testimg-diff2 | grep 'size 256MiB'
+    rbd info testimg-diff3 | grep 'size 128MiB'
  
      rbd export testimg1 /tmp/img1.new
      rbd export testimg2 /tmp/img2.new
@@ -88,8 +88,8 @@ test_others() {
      # rollback
      rbd snap rollback --snap=snap1 testimg1
      rbd snap rollback --snap=snap1 testimg-diff1
-    rbd info testimg1 | grep 'size 256 MB'
-    rbd info testimg-diff1 | grep 'size 256 MB'
+    rbd info testimg1 | grep 'size 256MiB'
+    rbd info testimg-diff1 | grep 'size 256MiB'
      rbd export testimg1 /tmp/img1.snap1
      rbd export testimg-diff1 /tmp/img-diff1.snap1
      cmp /tmp/img2 /tmp/img1.snap1
@@ -147,8 +147,8 @@ test_ls() {
      rbd ls | grep test2
      rbd ls | wc -l | grep 2
      # look for fields in output of ls -l without worrying about space
-    rbd ls -l | grep 'test1.*1024k.*1'
-    rbd ls -l | grep 'test2.*1024k.*1'
+    rbd ls -l | grep 'test1.*1MiB.*1'
+    rbd ls -l | grep 'test2.*1MiB.*1'
  
      rbd rm test1
      rbd rm test2
@@ -158,8 +158,8 @@ test_ls() {
      rbd ls | grep test1
      rbd ls | grep test2
      rbd ls | wc -l | grep 2
-    rbd ls -l | grep 'test1.*1024k.*2'
-    rbd ls -l | grep 'test2.*1024k.*2'
+    rbd ls -l | grep 'test1.*1MiB.*2'
+    rbd ls -l | grep 'test2.*1MiB.*2'
  
      rbd rm test1
      rbd rm test2
@@ -169,8 +169,8 @@ test_ls() {
      rbd ls | grep test1
      rbd ls | grep test2
      rbd ls | wc -l | grep 2
-    rbd ls -l | grep 'test1.*1024k.*2'
-    rbd ls -l | grep 'test2.*1024k.*1'
+    rbd ls -l | grep 'test1.*1MiB.*2'
+    rbd ls -l | grep 'test2.*1MiB.*1'
      remove_images
         
      # test that many images can be shown by ls
diff --git a/ceph/qa/workunits/suites/blogbench.sh b/ceph/qa/workunits/suites/blogbench.sh

index 17c91c8c375c331c29010fc6faa93e7cb9be00a1..802a04c9345798cd1b61fa72fc17b201cb599ecf 100755 (executable)
--- a/ceph/qa/workunits/suites/blogbench.sh
+++ b/ceph/qa/workunits/suites/blogbench.sh
@@ -5,7 +5,7 @@ echo "getting blogbench"
  wget http://download.ceph.com/qa/blogbench-1.0.tar.bz2
  #cp /home/gregf/src/blogbench-1.0.tar.bz2 .
  tar -xvf blogbench-1.0.tar.bz2
-cd blogbench*
+cd blogbench-1.0/
  echo "making blogbench"
  ./configure
  make
diff --git a/ceph/qa/workunits/suites/ffsb.patch b/ceph/qa/workunits/suites/ffsb.patch

new file mode 100644 (file)

index 0000000..a512175
--- /dev/null
+++ b/ceph/qa/workunits/suites/ffsb.patch
@@ -0,0 +1,12 @@
+diff -urp 1/parser.c 2/parser.c
+--- 1/parser.c 2008-10-28 04:17:05.000000000 +0800
++++ 2/parser.c 2018-06-26 20:25:59.000000000 +0800
+@@ -203,7 +203,7 @@ static char *get_optstr(char *buf, char 
+       len = strnlen(string, BUFSIZE);
+       sprintf(search_str, "%s=%%%ds\\n", string, BUFSIZE - len-1);
+       if (1 == sscanf(line, search_str, &temp)) {
+-              len = strnlen(temp, 4096);
++              len = strnlen(temp, 4095) + 1;
+               ret_buf = malloc(len);
+               strncpy(ret_buf, temp, len);
+               return ret_buf;
diff --git a/ceph/qa/workunits/suites/ffsb.sh b/ceph/qa/workunits/suites/ffsb.sh

index 9ed66ab00633d8d8c068d77178a58ff295f69853..c6a35e2e16723bfe052de13e2e8ebb40447bafda 100755 (executable)
--- a/ceph/qa/workunits/suites/ffsb.sh
+++ b/ceph/qa/workunits/suites/ffsb.sh
@@ -6,7 +6,8 @@ mydir=`dirname $0`
  
  wget http://download.ceph.com/qa/ffsb.tar.bz2
  tar jxvf ffsb.tar.bz2
-cd ffsb-*
+cd ffsb-6.0-rc2
+patch -p1 < $mydir/ffsb.patch
  ./configure
  make
  cd ..
diff --git a/ceph/qa/workunits/suites/iogen.sh b/ceph/qa/workunits/suites/iogen.sh

index d159bde9719722613eb9c41d1766698c1fd64916..03e08c78dd426e69686991893f1a2db441ef2ef1 100755 (executable)
--- a/ceph/qa/workunits/suites/iogen.sh
+++ b/ceph/qa/workunits/suites/iogen.sh
@@ -4,7 +4,7 @@ set -e
  echo "getting iogen"
  wget http://download.ceph.com/qa/iogen_3.1p0.tar
  tar -xvzf iogen_3.1p0.tar
-cd iogen*
+cd iogen_3.1p0
  echo "making iogen"
  make
  echo "running iogen"
diff --git a/ceph/qa/workunits/suites/pjd.sh b/ceph/qa/workunits/suites/pjd.sh

index e6df309ad5b8bfde4b6240e56187767dcc14ee6e..4474131771d49657729d5e16910cca4fedb2527c 100755 (executable)
--- a/ceph/qa/workunits/suites/pjd.sh
+++ b/ceph/qa/workunits/suites/pjd.sh
@@ -4,7 +4,7 @@ set -e
  
  wget http://download.ceph.com/qa/pjd-fstest-20090130-RC-aclfixes.tgz
  tar zxvf pjd*.tgz
-cd pjd*
+cd pjd-fstest-20090130-RC
  make clean
  make
  cd ..
diff --git a/ceph/src/.git_version b/ceph/src/.git_version

index fa15181c222433798fc2f260e1554d4152dc32ef..9704f6797480968a14d225cee0c30d255f9695dd 100644 (file)
--- a/ceph/src/.git_version
+++ b/ceph/src/.git_version
@@ -1,2 +1,2 @@
-3ec878d1e53e1aeb47a9f619c49d9e7c0aa384d5
-v12.2.7
+ae699615bac534ea496ee965ac6192cb7e0e07c0
+v12.2.8
diff --git a/ceph/src/CMakeLists.txt b/ceph/src/CMakeLists.txt

index 3d4baae39d86653eccddfedaae9e7e7e79c0e1de..80d4b351f5d22422928e1beefbd14e131f09888c 100644 (file)
--- a/ceph/src/CMakeLists.txt
+++ b/ceph/src/CMakeLists.txt
@@ -1145,7 +1145,3 @@ if (IS_DIRECTORY "${PROJECT_SOURCE_DIR}/.git")
  endif()
  
  add_subdirectory(script)
-
-if(WITH_EMBEDDED)
-  add_subdirectory(libcephd)
-endif()
diff --git a/ceph/src/ceph-volume/ceph_volume/__init__.py b/ceph/src/ceph-volume/ceph_volume/__init__.py

index 6550db415572af84ba9d61de72a29043a4a8fc9c..7652f181c589469d4ee83e556411e0e3de79a1d4 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/__init__.py
+++ b/ceph/src/ceph-volume/ceph_volume/__init__.py
@@ -1,6 +1,10 @@
  from collections import namedtuple
  
  
+sys_info = namedtuple('sys_info', ['devices'])
+sys_info.devices = dict()
+
+
  class UnloadedConfig(object):
      """
      This class is used as the default value for conf.ceph so that if
@@ -14,3 +18,5 @@ conf = namedtuple('config', ['ceph', 'cluster', 'verbosity', 'path', 'log_path']
  conf.ceph = UnloadedConfig()
  
  __version__ = "1.0.0"
+
+__release__ = "luminous"
diff --git a/ceph/src/ceph-volume/ceph_volume/api/lvm.py b/ceph/src/ceph-volume/ceph_volume/api/lvm.py

index 2f2bd17388f656159a8608eee1a3511693e04942..e766671b3238c0feb18b8ccb2255825b4fd7f105 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/api/lvm.py
+++ b/ceph/src/ceph-volume/ceph_volume/api/lvm.py
@@ -5,8 +5,13 @@ set of utilities for interacting with LVM.
  """
  import logging
  import os
-from ceph_volume import process
-from ceph_volume.exceptions import MultipleLVsError, MultipleVGsError, MultiplePVsError
+import uuid
+from math import floor
+from ceph_volume import process, util
+from ceph_volume.exceptions import (
+    MultipleLVsError, MultipleVGsError,
+    MultiplePVsError, SizeAllocationError
+)
  
  logger = logging.getLogger(__name__)
  
@@ -46,6 +51,80 @@ def _output_parser(output, fields):
      return report
  
  
+def _splitname_parser(line):
+    """
+    Parses the output from ``dmsetup splitname``, that should contain prefixes
+    (--nameprefixes) and set the separator to ";"
+
+    Output for /dev/mapper/vg-lv will usually look like::
+
+        DM_VG_NAME='/dev/mapper/vg';DM_LV_NAME='lv';DM_LV_LAYER=''
+
+
+    The ``VG_NAME`` will usually not be what other callers need (e.g. just 'vg'
+    in the example), so this utility will split ``/dev/mapper/`` out, so that
+    the actual volume group name is kept
+
+    :returns: dictionary with stripped prefixes
+    """
+    parts = line[0].split(';')
+    parsed = {}
+    for part in parts:
+        part = part.replace("'", '')
+        key, value = part.split('=')
+        if 'DM_VG_NAME' in key:
+            value = value.split('/dev/mapper/')[-1]
+        key = key.split('DM_')[-1]
+        parsed[key] = value
+
+    return parsed
+
+
+def sizing(device_size, parts=None, size=None):
+    """
+    Calculate proper sizing to fully utilize the volume group in the most
+    efficient way possible. To prevent situations where LVM might accept
+    a percentage that is beyond the vg's capabilities, it will refuse with
+    an error when requesting a larger-than-possible parameter, in addition
+    to rounding down calculations.
+
+    A dictionary with different sizing parameters is returned, to make it
+    easier for others to choose what they need in order to create logical
+    volumes::
+
+        >>> sizing(100, parts=2)
+        >>> {'parts': 2, 'percentages': 50, 'sizes': 50}
+
+    """
+    if parts is not None and size is not None:
+        raise ValueError(
+            "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
+        )
+
+    if size and size > device_size:
+        raise SizeAllocationError(size, device_size)
+
+    def get_percentage(parts):
+        return int(floor(100 / float(parts)))
+
+    if parts is not None:
+        # Prevent parts being 0, falling back to 1 (100% usage)
+        parts = parts or 1
+        percentages = get_percentage(parts)
+
+    if size:
+        parts = int(device_size / size) or 1
+        percentages = get_percentage(parts)
+
+    sizes = device_size / parts if parts else int(floor(device_size))
+
+    return {
+        'parts': parts,
+        'percentages': percentages,
+        'sizes': int(sizes),
+    }
+
+
  def parse_tags(lv_tags):
      """
      Return a dictionary mapping of all the tags associated with
@@ -167,6 +246,36 @@ def is_vdo(path):
          return '0'
  
  
+def dmsetup_splitname(dev):
+    """
+    Run ``dmsetup splitname`` and parse the results.
+
+    .. warning:: This call does not ensure that the device is correct or that
+    it exists. ``dmsetup`` will happily take a non existing path and still
+    return a 0 exit status.
+    """
+    command = [
+        'dmsetup', 'splitname', '--noheadings',
+        "--separator=';'", '--nameprefixes', dev
+    ]
+    out, err, rc = process.call(command)
+    return _splitname_parser(out)
+
+
+def is_lv(dev, lvs=None):
+    """
+    Boolean to detect if a device is an LV or not.
+    """
+    splitname = dmsetup_splitname(dev)
+    # Allowing to optionally pass `lvs` can help reduce repetitive checks for
+    # multiple devices at once.
+    lvs = lvs if lvs is not None else Volumes()
+    if splitname.get('LV_NAME'):
+        lvs.filter(lv_name=splitname['LV_NAME'], vg_name=splitname['VG_NAME'])
+        return len(lvs) > 0
+    return False
+
+
  def get_api_vgs():
      """
      Return the list of group volumes available in the system using flags to
@@ -174,15 +283,17 @@ def get_api_vgs():
  
      Command and sample delimited output should look like::
  
-        $ vgs --noheadings --readonly --separator=';' \
+        $ vgs --noheadings --units=g --readonly --separator=';' \
            -o vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free
            ubuntubox-vg;1;2;0;wz--n-;299.52g;12.00m
            osd_vg;3;1;0;wz--n-;29.21g;9.21g
  
+    To normalize sizing, the units are forced in 'g' which is equivalent to
+    gigabytes, which uses multiples of 1024 (as opposed to 1000)
      """
-    fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free'
+    fields = 'vg_name,pv_count,lv_count,snap_count,vg_attr,vg_size,vg_free,vg_free_count'
      stdout, stderr, returncode = process.call(
-        ['vgs', '--noheadings', '--readonly', '--separator=";"', '-o', fields]
+        ['vgs', '--noheadings', '--readonly', '--units=g', '--separator=";"', '-o', fields]
      )
      return _output_parser(stdout, fields)
  
@@ -199,7 +310,7 @@ def get_api_lvs():
            ;/dev/ubuntubox-vg/swap_1;swap_1;ubuntubox-vg
  
      """
-    fields = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid'
+    fields = 'lv_tags,lv_path,lv_name,vg_name,lv_uuid,lv_size'
      stdout, stderr, returncode = process.call(
          ['lvs', '--noheadings', '--readonly', '--separator=";"', '-o', fields]
      )
@@ -290,25 +401,64 @@ def create_pv(device):
      ])
  
  
-def create_vg(name, *devices):
+def create_vg(devices, name=None, name_prefix=None):
      """
      Create a Volume Group. Command looks like::
  
          vgcreate --force --yes group_name device
  
      Once created the volume group is returned as a ``VolumeGroup`` object
+
+    :param devices: A list of devices to create a VG. Optionally, a single
+                    device (as a string) can be used.
+    :param name: Optionally set the name of the VG, defaults to 'ceph-{uuid}'
+    :param name_prefix: Optionally prefix the name of the VG, which will get combined
+                        with a UUID string
      """
+    if isinstance(devices, set):
+        devices = list(devices)
+    if not isinstance(devices, list):
+        devices = [devices]
+    if name_prefix:
+        name = "%s-%s" % (name_prefix, str(uuid.uuid4()))
+    elif name is None:
+        name = "ceph-%s" % str(uuid.uuid4())
      process.run([
          'vgcreate',
          '--force',
          '--yes',
-        name] + list(devices)
+        name] + devices
      )
  
      vg = get_vg(vg_name=name)
      return vg
  
  
+def extend_vg(vg, devices):
+    """
+    Extend a Volume Group. Command looks like::
+
+        vgextend --force --yes group_name [device, ...]
+
+    Once created the volume group is extended and returned as a ``VolumeGroup`` object
+
+    :param vg: A VolumeGroup object
+    :param devices: A list of devices to extend the VG. Optionally, a single
+                    device (as a string) can be used.
+    """
+    if not isinstance(devices, list):
+        devices = [devices]
+    process.run([
+        'vgextend',
+        '--force',
+        '--yes',
+        vg.name] + devices
+    )
+
+    vg = get_vg(vg_name=vg.name)
+    return vg
+
+
  def remove_vg(vg_name):
      """
      Removes a volume group.
@@ -363,7 +513,7 @@ def remove_lv(path):
      return True
  
  
-def create_lv(name, group, size=None, tags=None):
+def create_lv(name, group, extents=None, size=None, tags=None, uuid_name=False):
      """
      Create a Logical Volume in a Volume Group. Command looks like::
  
@@ -374,7 +524,19 @@ def create_lv(name, group, size=None, tags=None):
      conform to the convention of prefixing them with "ceph." like::
  
          {"ceph.block_device": "/dev/ceph/osd-1"}
+
+    :param uuid_name: Optionally combine the ``name`` with UUID to ensure uniqueness
      """
+    if uuid_name:
+        name = '%s-%s' % (name, uuid.uuid4())
+    if tags is None:
+        tags = {
+            "ceph.osd_id": "null",
+            "ceph.type": "null",
+            "ceph.cluster_fsid": "null",
+            "ceph.osd_fsid": "null",
+        }
+
      # XXX add CEPH_VOLUME_LVM_DEBUG to enable -vvvv on lv operations
      type_path_tag = {
          'journal': 'ceph.journal_device',
@@ -392,6 +554,14 @@ def create_lv(name, group, size=None, tags=None):
              '%s' % size,
              '-n', name, group
          ])
+    elif extents:
+        process.run([
+            'lvcreate',
+            '--yes',
+            '-l',
+            '%s' % extents,
+            '-n', name, group
+        ])
      # create the lv with all the space available, this is needed because the
      # system call is different for LVM
      else:
@@ -416,6 +586,50 @@ def create_lv(name, group, size=None, tags=None):
      return lv
  
  
+def create_lvs(volume_group, parts=None, size=None, name_prefix='ceph-lv'):
+    """
+    Create multiple Logical Volumes from a Volume Group by calculating the
+    proper extents from ``parts`` or ``size``. A custom prefix can be used
+    (defaults to ``ceph-lv``), these names are always suffixed with a uuid.
+
+    LV creation in ceph-volume will require tags, this is expected to be
+    pre-computed by callers who know Ceph metadata like OSD IDs and FSIDs. It
+    will probably not be the case when mass-creating LVs, so common/default
+    tags will be set to ``"null"``.
+
+    .. note:: LVs that are not in use can be detected by querying LVM for tags that are
+              set to ``"null"``.
+
+    :param volume_group: The volume group (vg) to use for LV creation
+    :type group: ``VolumeGroup()`` object
+    :param parts: Number of LVs to create *instead of* ``size``.
+    :type parts: int
+    :param size: Size (in gigabytes) of LVs to create, e.g. "as many 10gb LVs as possible"
+    :type size: int
+    :param extents: The number of LVM extents to use to create the LV. Useful if looking to have
+    accurate LV sizes (LVM rounds sizes otherwise)
+    """
+    if parts is None and size is None:
+        # fallback to just one part (using 100% of the vg)
+        parts = 1
+    lvs = []
+    tags = {
+        "ceph.osd_id": "null",
+        "ceph.type": "null",
+        "ceph.cluster_fsid": "null",
+        "ceph.osd_fsid": "null",
+    }
+    sizing = volume_group.sizing(parts=parts, size=size)
+    for part in range(0, sizing['parts']):
+        size = sizing['sizes']
+        extents = sizing['extents']
+        lv_name = '%s-%s' % (name_prefix, uuid.uuid4())
+        lvs.append(
+            create_lv(lv_name, volume_group.name, extents=extents, tags=tags)
+        )
+    return lvs
+
+
  def get_vg(vg_name=None, vg_tags=None):
      """
      Return a matching vg for the current system, requires ``vg_name`` or
@@ -726,7 +940,7 @@ class PVolumes(list):
          )
          if not pvs:
              return None
-        if len(pvs) > 1:
+        if len(pvs) > 1 and pv_tags:
              raise MultiplePVsError(pv_name)
          return pvs[0]
  
@@ -748,6 +962,97 @@ class VolumeGroup(object):
      def __repr__(self):
          return self.__str__()
  
+    def _parse_size(self, size):
+        error_msg = "Unable to convert vg size to integer: '%s'" % str(size)
+        try:
+            integer, _ = size.split('g')
+        except ValueError:
+            logger.exception(error_msg)
+            raise RuntimeError(error_msg)
+
+        return util.str_to_int(integer)
+
+    @property
+    def free(self):
+        """
+        Parse the available size in gigabytes from the ``vg_free`` attribute, that
+        will be a string with a character ('g') to indicate gigabytes in size.
+        Returns a rounded down integer to ease internal operations::
+
+        >>> data_vg.vg_free
+        '0.01g'
+        >>> data_vg.size
+        0
+        """
+        return self._parse_size(self.vg_free)
+
+    @property
+    def size(self):
+        """
+        Parse the size in gigabytes from the ``vg_size`` attribute, that
+        will be a string with a character ('g') to indicate gigabytes in size.
+        Returns a rounded down integer to ease internal operations::
+
+        >>> data_vg.vg_size
+        '1024.9g'
+        >>> data_vg.size
+        1024
+        """
+        return self._parse_size(self.vg_size)
+
+    def sizing(self, parts=None, size=None):
+        """
+        Calculate proper sizing to fully utilize the volume group in the most
+        efficient way possible. To prevent situations where LVM might accept
+        a percentage that is beyond the vg's capabilities, it will refuse with
+        an error when requesting a larger-than-possible parameter, in addition
+        to rounding down calculations.
+
+        A dictionary with different sizing parameters is returned, to make it
+        easier for others to choose what they need in order to create logical
+        volumes::
+
+        >>> data_vg.free
+        1024
+        >>> data_vg.sizing(parts=4)
+        {'parts': 4, 'sizes': 256, 'percentages': 25}
+        >>> data_vg.sizing(size=512)
+        {'parts': 2, 'sizes': 512, 'percentages': 50}
+
+
+        :param parts: Number of parts to create LVs from
+        :param size: Size in gigabytes to divide the VG into
+
+        :raises SizeAllocationError: When requested size cannot be allocated with
+        :raises ValueError: If both ``parts`` and ``size`` are given
+        """
+        if parts is not None and size is not None:
+            raise ValueError(
+                "Cannot process sizing with both parts (%s) and size (%s)" % (parts, size)
+            )
+
+        # if size is given we need to map that to extents so that we avoid
+        # issues when trying to get this right with a size in gigabytes find
+        # the percentage first, cheating, because these values are thrown out
+        vg_free_count = util.str_to_int(self.vg_free_count)
+
+        if size:
+            extents = int(size * vg_free_count / self.free)
+            disk_sizing = sizing(self.free, size=size, parts=parts)
+        else:
+            if parts is not None:
+                # Prevent parts being 0, falling back to 1 (100% usage)
+                parts = parts or 1
+            size = int(self.free / parts)
+            extents = size * vg_free_count / self.free
+            disk_sizing = sizing(self.free, parts=parts)
+
+        extent_sizing = sizing(vg_free_count, size=extents)
+
+        disk_sizing['extents'] = int(extents)
+        disk_sizing['percentages'] = extent_sizing['percentages']
+        return disk_sizing
+
  
  class Volume(object):
      """
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py

index 5e9376e5dde48cb579aaaf9ffc7bcff13f6fe7e2..782a6d16933af3bc32d0a26dde293c58518d7fe2 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/activate.py
@@ -3,7 +3,7 @@ import argparse
  import logging
  import os
  from textwrap import dedent
-from ceph_volume import process, conf, decorators, terminal
+from ceph_volume import process, conf, decorators, terminal, __release__
  from ceph_volume.util import system, disk
  from ceph_volume.util import prepare as prepare_utils
  from ceph_volume.util import encryption as encryption_utils
@@ -74,6 +74,9 @@ def activate_filestore(lvs, no_systemd=False):
          # enable the ceph-volume unit for this OSD
          systemctl.enable_volume(osd_id, osd_fsid, 'lvm')
  
+        # enable the OSD
+        systemctl.enable_osd(osd_id)
+
          # start the OSD
          systemctl.start_osd(osd_id)
      terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
@@ -148,10 +151,16 @@ def activate_bluestore(lvs, no_systemd=False):
      wal_device_path = get_osd_device_path(osd_lv, lvs, 'wal', dmcrypt_secret=dmcrypt_secret)
  
      # Once symlinks are removed, the osd dir can be 'primed again.
-    process.run([
+    prime_command = [
          'ceph-bluestore-tool', '--cluster=%s' % conf.cluster,
          'prime-osd-dir', '--dev', osd_lv_path,
-        '--path', osd_path])
+        '--path', osd_path]
+
+    if __release__ != "luminous":
+        # mon-config changes are not available in Luminous
+        prime_command.append('--no-mon-config')
+
+    process.run(prime_command)
      # always re-do the symlink regardless if it exists, so that the block,
      # block.wal, and block.db devices that may have changed can be mapped
      # correctly every time
@@ -173,6 +182,9 @@ def activate_bluestore(lvs, no_systemd=False):
          # enable the ceph-volume unit for this OSD
          systemctl.enable_volume(osd_id, osd_fsid, 'lvm')
  
+        # enable the OSD
+        systemctl.enable_osd(osd_id)
+
          # start the OSD
          systemctl.start_osd(osd_id)
      terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py

new file mode 100644 (file)

index 0000000..4086064
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/batch.py
@@ -0,0 +1,221 @@
+import argparse
+from textwrap import dedent
+from ceph_volume import terminal, decorators
+from ceph_volume.util import disk, prompt_bool
+from ceph_volume.util import arg_validators
+from . import strategies
+
+
+device_list_template = """
+  * {path: <25} {size: <10} {state}"""
+
+
+def device_formatter(devices):
+    lines = []
+    for path, details in devices:
+        lines.append(device_list_template.format(
+            path=path, size=details['human_readable_size'],
+            state='solid' if details['rotational'] == '0' else 'rotational')
+        )
+
+    return ''.join(lines)
+
+
+# Scenario filtering/detection
+def bluestore_single_type(device_facts):
+    """
+    Detect devices that are just HDDs or solid state so that a 1:1
+    device-to-osd provisioning can be done
+    """
+    types = [device.sys_api['rotational'] for device in device_facts]
+    if len(set(types)) == 1:
+        return strategies.bluestore.SingleType
+
+
+def bluestore_mixed_type(device_facts):
+    """
+    Detect if devices are HDDs as well as solid state so that block.db can be
+    placed in solid devices while data is kept in the spinning drives.
+    """
+    types = [device.sys_api['rotational'] for device in device_facts]
+    if len(set(types)) > 1:
+        return strategies.bluestore.MixedType
+
+
+def filestore_single_type(device_facts):
+    """
+    Detect devices that are just HDDs or solid state so that a 1:1
+    device-to-osd provisioning can be done, keeping the journal on the OSD
+    """
+    types = [device.sys_api['rotational'] for device in device_facts]
+    if len(set(types)) == 1:
+        return strategies.filestore.SingleType
+
+
+def filestore_mixed_type(device_facts):
+    """
+    Detect if devices are HDDs as well as solid state so that the journal can be
+    placed in solid devices while data is kept in the spinning drives.
+    """
+    types = [device.sys_api['rotational'] for device in device_facts]
+    if len(set(types)) > 1:
+        return strategies.filestore.MixedType
+
+
+def get_strategy(args):
+    """
+    Given a set of devices as input, go through the different detection
+    mechanisms to narrow down on a strategy to use. The strategies are 4 in
+    total:
+
+    * Single device type on Bluestore
+    * Mixed device types on Bluestore
+    * Single device type on Filestore
+    * Mixed device types on Filestore
+
+    When the function matches to a scenario it returns the strategy class. This
+    allows for dynamic loading of the conditions needed for each scenario, with
+    normalized classes
+    """
+    bluestore_strategies = [bluestore_mixed_type, bluestore_single_type]
+    filestore_strategies = [filestore_mixed_type, filestore_single_type]
+    if args.bluestore:
+        strategies = bluestore_strategies
+    else:
+        strategies = filestore_strategies
+
+    for strategy in strategies:
+        backend = strategy(args.devices)
+        if backend:
+            return backend(args.devices, args)
+
+
+class Batch(object):
+
+    help = 'Automatically size devices for multi-OSD provisioning with minimal interaction'
+
+    _help = dedent("""
+    Automatically size devices ready for OSD provisioning based on default strategies.
+
+    Detected devices:
+    {detected_devices}
+
+    Usage:
+
+        ceph-volume lvm batch [DEVICE...]
+
+    Optional reporting on possible outcomes is enabled with --report
+
+        ceph-volume lvm batch --report [DEVICE...]
+    """)
+
+    def __init__(self, argv):
+        self.argv = argv
+
+    def get_devices(self):
+        all_devices = disk.get_devices()
+        # remove devices with partitions
+        # XXX Should be optional when getting device info
+        for device, detail in all_devices.items():
+            if detail.get('partitions') != {}:
+                del all_devices[device]
+        devices = sorted(all_devices.items(), key=lambda x: (x[0], x[1]['size']))
+        return device_formatter(devices)
+
+    def print_help(self):
+        return self._help.format(
+            detected_devices=self.get_devices(),
+        )
+
+    def report(self, args):
+        strategy = get_strategy(args)
+        if args.format == 'pretty':
+            strategy.report_pretty()
+        elif args.format == 'json':
+            strategy.report_json()
+        else:
+            raise RuntimeError('report format must be "pretty" or "json"')
+
+    def execute(self, args):
+        strategy = get_strategy(args)
+        if not args.yes:
+            strategy.report_pretty()
+            terminal.info('The above OSDs would be created if the operation continues')
+            if not prompt_bool('do you want to proceed? (yes/no)'):
+                terminal.error('aborting OSD provisioning for %s' % ','.join(args.devices))
+                raise SystemExit(0)
+
+        strategy.execute()
+
+    @decorators.needs_root
+    def main(self):
+        parser = argparse.ArgumentParser(
+            prog='ceph-volume lvm batch',
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            description=self.print_help(),
+        )
+
+        parser.add_argument(
+            'devices',
+            metavar='DEVICES',
+            nargs='*',
+            type=arg_validators.ValidDevice(),
+            default=[],
+            help='Devices to provision OSDs',
+        )
+        parser.add_argument(
+            '--bluestore',
+            action='store_true',
+            help='bluestore objectstore (default)',
+        )
+        parser.add_argument(
+            '--filestore',
+            action='store_true',
+            help='filestore objectstore',
+        )
+        parser.add_argument(
+            '--report',
+            action='store_true',
+            help='Autodetect the objectstore by inspecting the OSD',
+        )
+        parser.add_argument(
+            '--yes',
+            action='store_true',
+            help='Avoid prompting for confirmation when provisioning',
+        )
+        parser.add_argument(
+            '--format',
+            help='output format, defaults to "pretty"',
+            default='pretty',
+            choices=['json', 'pretty'],
+        )
+        parser.add_argument(
+            '--dmcrypt',
+            action='store_true',
+            help='Enable device encryption via dm-crypt',
+        )
+        parser.add_argument(
+            '--crush-device-class',
+            dest='crush_device_class',
+            help='Crush device class to assign this OSD to',
+        )
+        parser.add_argument(
+            '--no-systemd',
+            dest='no_systemd',
+            action='store_true',
+            help='Skip creating and enabling systemd units and starting OSD services',
+        )
+        args = parser.parse_args(self.argv)
+
+        if not args.devices:
+            return parser.print_help()
+
+        # Default to bluestore here since defaulting it in add_argument may
+        # cause both to be True
+        if not args.bluestore and not args.filestore:
+            args.bluestore = True
+
+        if args.report:
+            self.report(args)
+        else:
+            self.execute(args)
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py

index a84a39c182a512e4a6032e60be0afbf083dd1203..a3975280de72158c26d8f1fed2dc9f309703c42c 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/listing.py
@@ -49,8 +49,15 @@ def pretty_report(report):
                          value=value
                      )
                  )
-            output.append(
-                device_metadata_item_template.format(tag_name='devices', value=','.join(device['devices'])))
+            if not device.get('devices'):
+                continue
+            else:
+                output.append(
+                    device_metadata_item_template.format(
+                        tag_name='devices',
+                        value=','.join(device['devices'])
+                    )
+                )
  
      print(''.join(output))
  
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/main.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/main.py

index 8b698a03f64cab12fe5d4673b956c4fe5964aeeb..abe77b57e130f2da99ec6602605bb428da065a37 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/main.py
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/main.py
@@ -7,6 +7,7 @@ from . import create
  from . import trigger
  from . import listing
  from . import zap
+from . import batch
  
  
  class LVM(object):
@@ -21,6 +22,7 @@ class LVM(object):
  
      mapper = {
          'activate': activate.Activate,
+        'batch': batch.Batch,
          'prepare': prepare.Prepare,
          'create': create.Create,
          'trigger': trigger.Trigger,
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py

index 2369cb6f3a58d5d7e0a427e12cd8b7d136b88430..aedb71ed5d63cfed45ad07fb75018a41b35bedc8 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/prepare.py
@@ -1,7 +1,6 @@
  from __future__ import print_function
  import json
  import logging
-import uuid
  from textwrap import dedent
  from ceph_volume.util import prepare as prepare_utils
  from ceph_volume.util import encryption as encryption_utils
@@ -69,7 +68,7 @@ def prepare_filestore(device, journal, secrets, tags, osd_id, fsid):
      # get the latest monmap
      prepare_utils.get_monmap(osd_id)
      # prepare the osd filesystem
-    prepare_utils.osd_mkfs_filestore(osd_id, fsid)
+    prepare_utils.osd_mkfs_filestore(osd_id, fsid, cephx_secret)
      # write the OSD keyring if it doesn't exist already
      prepare_utils.write_keyring(osd_id, cephx_secret)
      if secrets.get('dmcrypt_key'):
@@ -192,12 +191,11 @@ class Prepare(object):
          """
          if disk.is_partition(arg) or disk.is_device(arg):
              # we must create a vg, and then a single lv
-            vg_name = "ceph-%s" % str(uuid.uuid4())
-            api.create_vg(vg_name, arg)
+            vg = api.create_vg(arg)
              lv_name = "osd-%s-%s" % (device_type, osd_fsid)
              return api.create_lv(
                  lv_name,
-                vg_name,  # the volume group
+                vg.name,  # the volume group
                  tags={'ceph.type': device_type})
          else:
              error = [
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/__init__.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/__init__.py

new file mode 100644 (file)

index 0000000..cb16d11
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/__init__.py
@@ -0,0 +1 @@
+from . import bluestore, filestore # noqa
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/bluestore.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/bluestore.py

new file mode 100644 (file)

index 0000000..7b60526
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/bluestore.py
@@ -0,0 +1,248 @@
+from __future__ import print_function
+import json
+from uuid import uuid4
+from ceph_volume.util import disk
+from ceph_volume.api import lvm
+from . import validators
+from ceph_volume.devices.lvm.create import Create
+from ceph_volume.util import templates
+
+
+class SingleType(object):
+    """
+    Support for all SSDs, or all HDDS
+    """
+
+    def __init__(self, devices, args):
+        self.args = args
+        self.devices = devices
+        self.hdds = [device for device in devices if device.sys_api['rotational'] == '1']
+        self.ssds = [device for device in devices if device.sys_api['rotational'] == '0']
+        self.computed = {'osds': [], 'vgs': []}
+        self.validate()
+        self.compute()
+
+    def report_json(self):
+        print(json.dumps(self.computed, indent=4, sort_keys=True))
+
+    def report_pretty(self):
+        string = ""
+        string += templates.total_osds.format(
+            total_osds=len(self.hdds) or len(self.ssds) * 2
+        )
+        string += templates.osd_component_titles
+
+        for osd in self.computed['osds']:
+            string += templates.osd_header
+            string += templates.osd_component.format(
+                _type='[data]',
+                path=osd['data']['path'],
+                size=osd['data']['human_readable_size'],
+                percent=osd['data']['percentage'],
+            )
+
+        print(string)
+
+    def validate(self):
+        """
+        Ensure that the minimum requirements for this type of scenario is
+        met, raise an error if the provided devices would not work
+        """
+        # validate minimum size for all devices
+        validators.minimum_device_size(self.devices)
+
+    def compute(self):
+        """
+        Go through the rules needed to properly size the lvs, return
+        a dictionary with the result
+        """
+        osds = self.computed['osds']
+        vgs = self.computed['vgs']
+        for device in self.hdds:
+            vgs.append({'devices': [device.abspath], 'parts': 1})
+            osd = {'data': {}, 'block.db': {}}
+            osd['data']['path'] = device.abspath
+            osd['data']['size'] = device.sys_api['size']
+            osd['data']['parts'] = 1
+            osd['data']['percentage'] = 100
+            osd['data']['human_readable_size'] = str(disk.Size(b=device.sys_api['size']))
+            osds.append(osd)
+
+        for device in self.ssds:
+            # TODO: creates 2 OSDs per device, make this configurable (env var?)
+            extents = lvm.sizing(device.sys_api['size'], parts=2)
+            vgs.append({'devices': [device.abspath], 'parts': 2})
+            for ssd in range(2):
+                osd = {'data': {}, 'block.db': {}}
+                osd['data']['path'] = device.abspath
+                osd['data']['size'] = extents['sizes']
+                osd['data']['parts'] = extents['parts']
+                osd['data']['percentage'] = 50
+                osd['data']['human_readable_size'] = str(disk.Size(b=extents['sizes']))
+                osds.append(osd)
+
+    def execute(self):
+        """
+        Create vgs/lvs from the incoming set of devices, assign their roles
+        (block, block.db, block.wal, etc..) and offload the OSD creation to
+        ``lvm create``
+        """
+        osd_vgs = dict([(osd['data']['path'], None) for osd in self.computed['osds']])
+
+        # create the vgs first, mapping them to the device path
+        for osd in self.computed['osds']:
+            vg = osd_vgs.get(osd['data']['path'])
+            if not vg:
+                vg = lvm.create_vg(osd['data']['path'])
+                osd_vgs[osd['data']['path']] = {'vg': vg, 'parts': osd['data']['parts']}
+
+        # create the lvs from the vgs captured in the beginning
+        for create in osd_vgs.values():
+            lvs = lvm.create_lvs(create['vg'], parts=create['parts'], name_prefix='osd-data')
+            vg_name = create['vg'].name
+            for lv in lvs:
+                command = ['--bluestore', '--data']
+                command.append('%s/%s' % (vg_name, lv.name))
+                if self.args.dmcrypt:
+                    command.append('--dmcrypt')
+                if self.args.no_systemd:
+                    command.append('--no-systemd')
+                if self.args.crush_device_class:
+                    command.extend(['--crush-device-class', self.args.crush_device_class])
+
+                Create(command).main()
+
+
+class MixedType(object):
+
+    def __init__(self, devices, args):
+        self.args = args
+        self.devices = devices
+        self.hdds = [device for device in devices if device.sys_api['rotational'] == '1']
+        self.ssds = [device for device in devices if device.sys_api['rotational'] == '0']
+        self.computed = {'osds': [], 'vgs': []}
+        self.block_db_size = None
+        # For every HDD we get 1 block.db
+        self.db_lvs = len(self.hdds)
+        self.validate()
+        self.compute()
+
+    def report_json(self):
+        print(json.dumps(self.computed, indent=4, sort_keys=True))
+
+    def report_pretty(self):
+        vg_extents = lvm.sizing(self.total_ssd_size.b, parts=self.db_lvs)
+        db_size = str(disk.Size(b=(vg_extents['sizes'])))
+
+        string = ""
+        string += templates.total_osds.format(
+            total_osds=len(self.hdds)
+        )
+
+        string += templates.ssd_volume_group.format(
+            target='block.db',
+            total_lv_size=str(self.total_ssd_size),
+            total_lvs=vg_extents['parts'],
+            block_lv_size=db_size,
+            block_db_devices=', '.join([ssd.abspath for ssd in self.ssds]),
+            lv_size=str(disk.Size(b=(vg_extents['sizes']))),
+            total_osds=len(self.hdds)
+        )
+
+        string += templates.osd_component_titles
+        for osd in self.computed['osds']:
+            string += templates.osd_header
+            string += templates.osd_component.format(
+                _type='[data]',
+                path=osd['data']['path'],
+                size=osd['data']['human_readable_size'],
+                percent=osd['data']['percentage'])
+
+            string += templates.osd_component.format(
+                _type='[block.db]',
+                path='(volume-group/lv)',
+                size=osd['block.db']['human_readable_size'],
+                percent=osd['block.db']['percentage'])
+
+        print(string)
+
+    def compute(self):
+        osds = self.computed['osds']
+        for device in self.hdds:
+            osd = {'data': {}, 'block.db': {}}
+            osd['data']['path'] = device.abspath
+            osd['data']['size'] = device.sys_api['size']
+            osd['data']['percentage'] = 100
+            osd['data']['human_readable_size'] = str(disk.Size(b=(device.sys_api['size'])))
+            osd['block.db']['path'] = None
+            osd['block.db']['size'] = int(self.block_db_size.b)
+            osd['block.db']['human_readable_size'] = str(self.block_db_size)
+            osd['block.db']['percentage'] = self.vg_extents['percentages']
+            osds.append(osd)
+
+        self.computed['vgs'] = [{
+            'devices': [d.abspath for d in self.ssds],
+            'parts': self.db_lvs,
+            'percentages': self.vg_extents['percentages'],
+            'sizes': self.vg_extents['sizes'],
+            'size': int(self.total_ssd_size.b),
+            'human_readable_sizes': str(disk.Size(b=self.vg_extents['sizes'])),
+            'human_readable_size': str(self.total_ssd_size),
+        }]
+
+    def execute(self):
+        """
+        Create vgs/lvs from the incoming set of devices, assign their roles
+        (block, block.db, block.wal, etc..) and offload the OSD creation to
+        ``lvm create``
+        """
+        # create the single vg for all block.db lv's first
+        vg_info = self.computed['vgs'][0]
+        vg = lvm.create_vg(vg_info['devices'])
+
+        # now produce all the block.db lvs needed from that single vg
+        db_lvs = lvm.create_lvs(vg, parts=vg_info['parts'], name_prefix='osd-block-db')
+
+        # create the data lvs, and create the OSD with the matching block.db lvs from before
+        for osd in self.computed['osds']:
+            vg = lvm.create_vg(osd['data']['path'])
+            data_lv = lvm.create_lv('osd-data-%s' % str(uuid4()), vg.name)
+            db_lv = db_lvs.pop()
+            command = [
+                '--bluestore',
+                '--data', "%s/%s" % (data_lv.vg_name, data_lv.name),
+                '--block.db', '%s/%s' % (db_lv.vg_name, db_lv.name)
+            ]
+            if self.args.dmcrypt:
+                command.append('--dmcrypt')
+            if self.args.no_systemd:
+                command.append('--no-systemd')
+            if self.args.crush_device_class:
+                command.extend(['--crush-device-class', self.args.crush_device_class])
+
+            Create(command).main()
+
+    def validate(self):
+        """
+        HDDs represent data devices, and solid state devices are for block.db,
+        make sure that the number of data devices would have enough LVs and
+        those LVs would be large enough to accommodate a block.db
+        """
+        # validate minimum size for all devices
+        validators.minimum_device_size(self.devices)
+
+        # add all the size available in solid drives and divide it by the
+        # expected number of osds, the expected output should be larger than
+        # the minimum alllowed for block.db
+        self.total_ssd_size = disk.Size(b=0)
+        for ssd in self.ssds:
+            self.total_ssd_size += disk.Size(b=ssd.sys_api['size'])
+
+        self.block_db_size = self.total_ssd_size / self.db_lvs
+        self.vg_extents = lvm.sizing(self.total_ssd_size.b, parts=self.db_lvs)
+
+        # min 2GB of block.db is allowed
+        msg = 'Total solid size (%s) is not enough for block.db LVs larger than 2 GB'
+        if self.block_db_size < disk.Size(gb=2):
+            # use ad-hoc exception here
+            raise RuntimeError(msg % self.total_ssd_size)
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/filestore.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/filestore.py

new file mode 100644 (file)

index 0000000..9e80c5c
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/filestore.py
@@ -0,0 +1,318 @@
+from __future__ import print_function
+import json
+from ceph_volume.util import disk, prepare
+from ceph_volume.api import lvm
+from . import validators
+from ceph_volume.devices.lvm.create import Create
+from ceph_volume.util import templates
+
+
+class SingleType(object):
+    """
+    Support for all SSDs, or all HDDs, data and journal LVs will be colocated
+    in the same device
+    """
+
+    def __init__(self, devices, args):
+        self.args = args
+        self.devices = devices
+        self.hdds = [device for device in devices if device.sys_api['rotational'] == '1']
+        self.ssds = [device for device in devices if device.sys_api['rotational'] == '0']
+        self.computed = {'osds': [], 'vgs': []}
+        self.validate()
+        self.compute()
+
+    def report_json(self):
+        print(json.dumps(self.computed, indent=4, sort_keys=True))
+
+    def report_pretty(self):
+        string = ""
+        string += templates.total_osds.format(
+            total_osds=len(self.hdds) or len(self.ssds) * 2
+        )
+        string += templates.osd_component_titles
+
+        for osd in self.computed['osds']:
+            string += templates.osd_header
+            string += templates.osd_component.format(
+                _type='[data]',
+                path=osd['data']['path'],
+                size=osd['data']['human_readable_size'],
+                percent=osd['data']['percentage'],
+            )
+            string += templates.osd_component.format(
+                _type='[journal]',
+                path=osd['journal']['path'],
+                size=osd['journal']['human_readable_size'],
+                percent=osd['journal']['percentage'],
+            )
+
+        print(string)
+
+    def validate(self):
+        """
+        Ensure that the minimum requirements for this type of scenario is
+        met, raise an error if the provided devices would not work
+        """
+        # validate minimum size for all devices
+        validators.minimum_device_size(self.devices)
+
+    def compute(self):
+        """
+        Go through the rules needed to properly size the lvs, return
+        a dictionary with the result
+        """
+        # chose whichever is the one group we have to compute against
+        devices = self.hdds or self.ssds
+        osds = self.computed['osds']
+        vgs = self.computed['vgs']
+        for device in devices:
+            device_size = disk.Size(b=device.sys_api['size'])
+            journal_size = prepare.get_journal_size(lv_format=False)
+            data_size = device_size - journal_size
+            data_percentage = data_size * 100 / device_size
+            vgs.append({'devices': [device.abspath], 'parts': 2})
+            osd = {'data': {}, 'journal': {}}
+            osd['data']['path'] = device.abspath
+            osd['data']['size'] = data_size.b
+            osd['data']['percentage'] = int(data_percentage)
+            osd['data']['human_readable_size'] = str(data_size)
+            osd['journal']['path'] = device.abspath
+            osd['journal']['size'] = journal_size.b
+            osd['journal']['percentage'] = int(100 - data_percentage)
+            osd['journal']['human_readable_size'] = str(journal_size)
+            osds.append(osd)
+
+    def execute(self):
+        """
+        Create vgs/lvs from the incoming set of devices, assign their roles
+        (data, journal) and offload the OSD creation to ``lvm create``
+        """
+        osd_vgs = []
+
+        # create the vgs first, one per device (since this is colocating, it
+        # picks the 'data' path)
+        for osd in self.computed['osds']:
+            vg = lvm.create_vg(osd['data']['path'])
+            osd_vgs.append(vg)
+
+        journal_size = prepare.get_journal_size()
+
+        # create the lvs from the vgs captured in the beginning
+        for vg in osd_vgs:
+            # this is called again, getting us the LVM formatted string
+            journal_lv = lvm.create_lv(
+                'osd-journal', vg.name, size=journal_size, uuid_name=True
+            )
+            # no extents or size means it will use 100%FREE
+            data_lv = lvm.create_lv('osd-data', vg.name)
+
+            command = ['--filestore', '--data']
+            command.append('%s/%s' % (vg.name, data_lv.name))
+            command.extend(['--journal', '%s/%s' % (vg.name, journal_lv.name)])
+            if self.args.dmcrypt:
+                command.append('--dmcrypt')
+            if self.args.no_systemd:
+                command.append('--no-systemd')
+            if self.args.crush_device_class:
+                command.extend(['--crush-device-class', self.args.crush_device_class])
+
+            Create(command).main()
+
+
+class MixedType(object):
+    """
+    Supports HDDs with SSDs, journals will be placed on SSDs, while HDDs will
+    be used fully for data.
+
+    If an existing common VG is detected on SSDs, it will be extended if blank
+    SSDs are used, otherwise it will be used directly.
+    """
+
+    def __init__(self, devices, args):
+        self.args = args
+        self.devices = devices
+        self.hdds = [device for device in devices if device.sys_api['rotational'] == '1']
+        self.ssds = [device for device in devices if device.sys_api['rotational'] == '0']
+        self.computed = {'osds': [], 'vg': None}
+        self.blank_ssds = []
+        self.journals_needed = len(self.hdds)
+        self.journal_size = prepare.get_journal_size(lv_format=False)
+        self.system_vgs = lvm.VolumeGroups()
+        self.validate()
+        self.compute()
+
+    def report_json(self):
+        print(json.dumps(self.computed, indent=4, sort_keys=True))
+
+    def report_pretty(self):
+        string = ""
+        string += templates.total_osds.format(
+            total_osds=len(self.hdds) or len(self.ssds) * 2
+        )
+
+        string += templates.ssd_volume_group.format(
+            target='journal',
+            total_lv_size=str(self.total_available_journal_space),
+            total_lvs=self.journals_needed,
+            block_db_devices=', '.join([d.path for d in self.ssds]),
+            lv_size=str(self.journal_size),
+            total_osds=self.journals_needed
+        )
+
+        string += templates.osd_component_titles
+
+        for osd in self.computed['osds']:
+            string += templates.osd_header
+            string += templates.osd_component.format(
+                _type='[data]',
+                path=osd['data']['path'],
+                size=osd['data']['human_readable_size'],
+                percent=osd['data']['percentage'],
+            )
+            string += templates.osd_component.format(
+                _type='[journal]',
+                path=osd['journal']['path'],
+                size=osd['journal']['human_readable_size'],
+                percent=osd['journal']['percentage'],
+            )
+
+        print(string)
+
+    def get_common_vg(self):
+        # find all the vgs associated with the current device
+        for ssd in self.ssds:
+            for pv in ssd.pvs_api:
+                vg = self.system_vgs.get(vg_name=pv.vg_name)
+                if not vg:
+                    continue
+                # this should give us just one VG, it would've been caught by
+                # the validator otherwise
+                return vg
+
+    def validate(self):
+        """
+        Ensure that the minimum requirements for this type of scenario is
+        met, raise an error if the provided devices would not work
+        """
+        # validate minimum size for all devices
+        validators.minimum_device_size(self.devices)
+
+        # make sure that data devices do not have any LVs
+        validators.no_lvm_membership(self.hdds)
+
+        # do not allow non-common VG to continue
+        validators.has_common_vg(self.ssds)
+
+        # find the common VG to calculate how much is available
+        self.common_vg = self.get_common_vg()
+
+        # find how many journals are possible from the common VG
+        if self.common_vg:
+            common_vg_size = disk.Size(gb=self.common_vg.free)
+        else:
+            common_vg_size = disk.Size(gb=0)
+
+        # non-VG SSDs
+        self.vg_ssds = set([d for d in self.ssds if d.is_lvm_member])
+        self.blank_ssds = set(self.ssds).difference(self.vg_ssds)
+        self.total_blank_ssd_size = disk.Size(b=0)
+        for blank_ssd in self.blank_ssds:
+            self.total_blank_ssd_size += disk.Size(b=blank_ssd.sys_api['size'])
+
+        self.total_available_journal_space = self.total_blank_ssd_size + common_vg_size
+
+        try:
+            self.vg_extents = lvm.sizing(
+                self.total_available_journal_space.b, size=self.journal_size.b
+            )
+        # FIXME with real exception catching from sizing that happens when the
+        # journal space is not enough
+        except Exception:
+            self.vg_extents = {'parts': 0, 'percentages': 0, 'sizes': 0}
+
+        # validate that number of journals possible are enough for number of
+        # OSDs proposed
+        total_journals_possible = self.total_available_journal_space / self.journal_size
+        if len(self.hdds) > total_journals_possible:
+            msg = "Not enough %s journals (%s) can be created for %s OSDs" % (
+                self.journal_size, total_journals_possible, len(self.hdds)
+            )
+            raise RuntimeError(msg)
+
+    def compute(self):
+        """
+        Go through the rules needed to properly size the lvs, return
+        a dictionary with the result
+        """
+        osds = self.computed['osds']
+
+        vg_free = int(self.total_available_journal_space.gb)
+        if not self.common_vg:
+            # there isn't a common vg, so a new one must be created with all
+            # the blank SSDs
+            self.computed['vg'] = {
+                'devices': self.blank_ssds,
+                'parts': self.journals_needed,
+                'percentages': self.vg_extents['percentages'],
+                'sizes': self.journal_size.b,
+                'size': int(self.total_blank_ssd_size.b),
+                'human_readable_sizes': str(self.journal_size),
+                'human_readable_size': str(self.total_available_journal_space),
+            }
+            vg_name = 'lv/vg'
+        else:
+            vg_name = self.common_vg.name
+
+        for device in self.hdds:
+            device_size = disk.Size(b=device.sys_api['size'])
+            data_size = device_size - self.journal_size
+            osd = {'data': {}, 'journal': {}}
+            osd['data']['path'] = device.path
+            osd['data']['size'] = data_size.b
+            osd['data']['percentage'] = 100
+            osd['data']['human_readable_size'] = str(device_size)
+            osd['journal']['path'] = 'vg: %s' % vg_name
+            osd['journal']['size'] = self.journal_size.b
+            osd['journal']['percentage'] = int(self.journal_size.gb * 100 / vg_free)
+            osd['journal']['human_readable_size'] = str(self.journal_size)
+            osds.append(osd)
+
+    def execute(self):
+        """
+        Create vgs/lvs from the incoming set of devices, assign their roles
+        (data, journal) and offload the OSD creation to ``lvm create``
+        """
+        ssd_paths = [d.abspath for d in self.blank_ssds]
+
+        # no common vg is found, create one with all the blank SSDs
+        if not self.common_vg:
+            journal_vg = lvm.create_vg(ssd_paths, name_prefix='ceph-journals')
+        # a vg exists that can be extended
+        elif self.common_vg and ssd_paths:
+            journal_vg = lvm.extend_vg(self.common_vg, ssd_paths)
+        # one common vg with nothing else to extend can be used directly
+        else:
+            journal_vg = self.common_vg
+
+        journal_size = prepare.get_journal_size(lv_format=True)
+
+        for osd in self.computed['osds']:
+            data_vg = lvm.create_vg(osd['data']['path'], name_prefix='ceph-data')
+            # no extents or size means it will use 100%FREE
+            data_lv = lvm.create_lv('osd-data', data_vg.name)
+            journal_lv = lvm.create_lv(
+                'osd-journal', journal_vg.name, size=journal_size, uuid_name=True
+            )
+
+            command = ['--filestore', '--data']
+            command.append('%s/%s' % (data_vg.name, data_lv.name))
+            command.extend(['--journal', '%s/%s' % (journal_vg.name, journal_lv.name)])
+            if self.args.dmcrypt:
+                command.append('--dmcrypt')
+            if self.args.no_systemd:
+                command.append('--no-systemd')
+            if self.args.crush_device_class:
+                command.extend(['--crush-device-class', self.args.crush_device_class])
+
+            Create(command).main()
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/validators.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/validators.py

new file mode 100644 (file)

index 0000000..06c5ebe
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/strategies/validators.py
@@ -0,0 +1,48 @@
+from ceph_volume.util import disk
+from ceph_volume.api import lvm
+
+
+def minimum_device_size(devices):
+    """
+    Ensure that the minimum requirements for this type of scenario is
+    met, raise an error if the provided devices would not work
+    """
+    msg = 'Unable to use device smaller than 5GB: %s (%s)'
+    for device in devices:
+        device_size = disk.Size(b=device.sys_api['size'])
+        if device_size < disk.Size(gb=5):
+            raise RuntimeError(msg % (device, device_size))
+
+
+def no_lvm_membership(devices):
+    """
+    Do not allow devices that are part of LVM
+    """
+    msg = 'Unable to use device, already a member of LVM: %s'
+    for device in devices:
+        if device.is_lvm_member:
+            raise RuntimeError(msg % device.abspath)
+
+
+def has_common_vg(ssd_devices):
+    """
+    Ensure that devices have a common VG between them
+    """
+    msg = 'Could not find a common VG between devices: %s'
+    system_vgs = lvm.VolumeGroups()
+    ssd_vgs = {}
+
+    for ssd_device in ssd_devices:
+        for pv in ssd_device.pvs_api:
+            vg = system_vgs.get(vg_name=pv.vg_name)
+            if not vg:
+                continue
+            try:
+                ssd_vgs[vg.name].append(ssd_device.abspath)
+            except KeyError:
+                ssd_vgs[vg.name] = [ssd_device.abspath]
+    # len of 1 means they all have a common vg, and len of 0 means that these
+    # are blank
+    if len(ssd_vgs) <= 1:
+        return
+    raise RuntimeError(msg % ', '.join(ssd_vgs.keys()))
diff --git a/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py b/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py

index ee5cdf97b6a590726df2f3c16346f44c8355c14a..90c5447c0e1e8959d5c27e21ef2a4777eac8da67 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py
+++ b/ceph/src/ceph-volume/ceph_volume/devices/lvm/zap.py
@@ -46,71 +46,80 @@ class Zap(object):
      def __init__(self, argv):
          self.argv = argv
  
-    @decorators.needs_root
-    def zap(self, args):
-        device = args.device
-        lv = api.get_lv_from_argument(device)
-        if lv:
-            # we are zapping a logical volume
-            path = lv.lv_path
-        else:
-            # we are zapping a partition
-            #TODO: ensure device is a partition
-            path = device
-
-        mlogger.info("Zapping: %s", path)
-
-        # check if there was a pv created with the
-        # name of device
-        pv = api.get_pv(pv_name=device)
-        if pv:
-            vg_name = pv.vg_name
-            lv = api.get_lv(vg_name=vg_name)
-
-        dmcrypt = False
-        dmcrypt_uuid = None
-        if lv:
-            osd_path = "/var/lib/ceph/osd/{}-{}".format(lv.tags['ceph.cluster_name'], lv.tags['ceph.osd_id'])
-            dmcrypt_uuid = lv.lv_uuid
-            dmcrypt = lv.encrypted
-            if system.path_is_mounted(osd_path):
-                mlogger.info("Unmounting %s", osd_path)
-                system.unmount(osd_path)
+    def unmount_lv(self, lv):
+        if lv.tags.get('ceph.cluster_name') and lv.tags.get('ceph.osd_id'):
+            lv_path = "/var/lib/ceph/osd/{}-{}".format(lv.tags['ceph.cluster_name'], lv.tags['ceph.osd_id'])
          else:
-            # we're most likely dealing with a partition here, check to
-            # see if it was encrypted
-            partuuid = disk.get_partuuid(device)
-            if encryption.status("/dev/mapper/{}".format(partuuid)):
-                dmcrypt_uuid = partuuid
-                dmcrypt = True
-
+            lv_path = lv.lv_path
+        dmcrypt_uuid = lv.lv_uuid
+        dmcrypt = lv.encrypted
+        if system.path_is_mounted(lv_path):
+            mlogger.info("Unmounting %s", lv_path)
+            system.unmount(lv_path)
          if dmcrypt and dmcrypt_uuid:
-            dmcrypt_path = "/dev/mapper/{}".format(dmcrypt_uuid)
-            mlogger.info("Closing encrypted path %s", dmcrypt_path)
-            encryption.dmcrypt_close(dmcrypt_path)
-
-        if args.destroy and pv:
-            logger.info("Found a physical volume created from %s, will destroy all it's vgs and lvs", device)
-            vg_name = pv.vg_name
-            mlogger.info("Destroying volume group %s because --destroy was given", vg_name)
-            api.remove_vg(vg_name)
-            mlogger.info("Destroying physical volume %s because --destroy was given", device)
-            api.remove_pv(device)
-        elif args.destroy and not pv:
-            mlogger.info("Skipping --destroy because no associated physical volumes are found for %s", device)
-
-        wipefs(path)
-        zap_data(path)
+            self.dmcrypt_close(dmcrypt_uuid)
  
-        if lv and not pv:
-            # remove all lvm metadata
-            lv.clear_tags()
-
-        terminal.success("Zapping successful for: %s" % path)
+    @decorators.needs_root
+    def zap(self, args):
+        for device in args.devices:
+            if disk.is_mapper_device(device):
+                terminal.error("Refusing to zap the mapper device: {}".format(device))
+                raise SystemExit(1)
+            lv = api.get_lv_from_argument(device)
+            if lv:
+                # we are zapping a logical volume
+                path = lv.lv_path
+                self.unmount_lv(lv)
+            else:
+                # we are zapping a partition
+                #TODO: ensure device is a partition
+                path = device
+                # check to if it is encrypted to close
+                partuuid = disk.get_partuuid(device)
+                if encryption.status("/dev/mapper/{}".format(partuuid)):
+                    dmcrypt_uuid = partuuid
+                    self.dmcrypt_close(dmcrypt_uuid)
+
+            mlogger.info("Zapping: %s", path)
+
+            # check if there was a pv created with the
+            # name of device
+            pvs = api.PVolumes()
+            pvs.filter(pv_name=device)
+            vgs = set([pv.vg_name for pv in pvs])
+            for pv in pvs:
+                vg_name = pv.vg_name
+                lv = None
+                if pv.lv_uuid:
+                    lv = api.get_lv(vg_name=vg_name, lv_uuid=pv.lv_uuid)
+
+                if lv:
+                    self.unmount_lv(lv)
+
+            if args.destroy:
+                for vg_name in vgs:
+                    mlogger.info("Destroying volume group %s because --destroy was given", vg_name)
+                    api.remove_vg(vg_name)
+                mlogger.info("Destroying physical volume %s because --destroy was given", device)
+                api.remove_pv(device)
+
+            wipefs(path)
+            zap_data(path)
+
+            if lv and not pvs:
+                # remove all lvm metadata
+                lv.clear_tags()
+
+        terminal.success("Zapping successful for: %s" % ", ".join(args.devices))
+
+    def dmcrypt_close(self, dmcrypt_uuid):
+        dmcrypt_path = "/dev/mapper/{}".format(dmcrypt_uuid)
+        mlogger.info("Closing encrypted path %s", dmcrypt_path)
+        encryption.dmcrypt_close(dmcrypt_path)
  
      def main(self):
          sub_command_help = dedent("""
-        Zaps the given logical volume, raw device or partition for reuse by ceph-volume.
+        Zaps the given logical volume(s), raw device(s) or partition(s) for reuse by ceph-volume.
          If given a path to a logical volume it must be in the format of vg/lv. Any
          filesystems present on the given device, vg/lv, or partition will be removed and
          all data will be purged.
@@ -130,6 +139,10 @@ class Zap(object):
  
                ceph-volume lvm zap /dev/sdc1
  
+          Zapping many raw devices:
+
+              ceph-volume lvm zap /dev/sda /dev/sdb /db/sdc
+
          If the --destroy flag is given and you are zapping a raw device or partition
          then all vgs and lvs that exist on that raw device or partition will be destroyed.
  
@@ -151,10 +164,11 @@ class Zap(object):
          )
  
          parser.add_argument(
-            'device',
-            metavar='DEVICE',
-            nargs='?',
-            help='Path to an lv (as vg/lv), partition (as /dev/sda1) or device (as /dev/sda)'
+            'devices',
+            metavar='DEVICES',
+            nargs='*',
+            default=[],
+            help='Path to one or many lv (as vg/lv), partition (as /dev/sda1) or device (as /dev/sda)'
          )
          parser.add_argument(
              '--destroy',
diff --git a/ceph/src/ceph-volume/ceph_volume/exceptions.py b/ceph/src/ceph-volume/ceph_volume/exceptions.py

index 211d9d09b9e04d01e8a4006b1e2df34c7dee50a1..f40b7b11d37a9811a94ea2bc64cb9a8a0a717429 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/exceptions.py
+++ b/ceph/src/ceph-volume/ceph_volume/exceptions.py
@@ -79,3 +79,16 @@ class MultipleVGsError(Exception):
      def __str__(self):
          msg = "Got more than 1 result looking for volume group: %s" % self.vg_name
          return msg
+
+
+class SizeAllocationError(Exception):
+
+    def __init__(self, requested, available):
+        self.requested = requested
+        self.available = available
+
+    def __str__(self):
+        msg = 'Unable to allocate size (%s), not enough free space (%s)' % (
+            self.requested, self.available
+        )
+        return msg
diff --git a/ceph/src/ceph-volume/ceph_volume/main.py b/ceph/src/ceph-volume/ceph_volume/main.py

index b3543b1a21d63e86b4daf40d3a689febf118bb97..94730cbe442e1a5524937100340a9aae5784677e 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/main.py
+++ b/ceph/src/ceph-volume/ceph_volume/main.py
@@ -27,7 +27,10 @@ Ceph Conf: {ceph_path}
      """
  
      def __init__(self, argv=None, parse=True):
-        self.mapper = {'lvm': devices.lvm.LVM, 'simple': devices.simple.Simple}
+        self.mapper = {
+            'lvm': devices.lvm.LVM,
+            'simple': devices.simple.Simple,
+        }
          self.plugin_help = "No plugins found/loaded"
          if argv is None:
              self.argv = sys.argv
diff --git a/ceph/src/ceph-volume/ceph_volume/process.py b/ceph/src/ceph-volume/ceph_volume/process.py

index 9eece12419f110a1aff9edc8146e3dc914958bc9..66872ea54350a65618709db62a0d4fee7e0b1a23 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/process.py
+++ b/ceph/src/ceph-volume/ceph_volume/process.py
@@ -3,6 +3,7 @@ from os import O_NONBLOCK, read
  import subprocess
  from select import select
  from ceph_volume import terminal
+from ceph_volume.util import as_bytes
  
  import logging
  
@@ -52,7 +53,10 @@ def log_descriptors(reads, process, terminal_logging):
      for descriptor in reads:
          descriptor_name = descriptor_names[descriptor]
          try:
-            log_output(descriptor_name, read(descriptor, 1024), terminal_logging, True)
+            message = read(descriptor, 1024)
+            if not isinstance(message, str):
+                message = message.decode('utf-8')
+            log_output(descriptor_name, message, terminal_logging, True)
          except (IOError, OSError):
              # nothing else to log
              pass
@@ -196,8 +200,9 @@ def call(command, **kw):
          close_fds=True,
          **kw
      )
+
      if stdin:
-        stdout_stream, stderr_stream = process.communicate(stdin)
+        stdout_stream, stderr_stream = process.communicate(as_bytes(stdin))
      else:
          stdout_stream = process.stdout.read()
          stderr_stream = process.stderr.read()
diff --git a/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py b/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py

index 688433774f15056e6f7b824e802450d06137a5cb..41dbbc19e644d42e059d850c203543e5d89ec8ab 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py
+++ b/ceph/src/ceph-volume/ceph_volume/systemd/systemctl.py
@@ -12,8 +12,11 @@ def stop(unit):
      process.run(['systemctl', 'stop', unit])
  
  
-def enable(unit):
-    process.run(['systemctl', 'enable', unit])
+def enable(unit, runtime=False):
+    if runtime:
+        process.run(['systemctl', 'enable', '--runtime', unit])
+    else:
+        process.run(['systemctl', 'enable', unit])
  
  
  def disable(unit):
@@ -41,7 +44,7 @@ def stop_osd(id_):
  
  
  def enable_osd(id_):
-    return enable(osd_unit % id_)
+    return enable(osd_unit % id_, runtime=True)
  
  
  def disable_osd(id_):
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py b/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py

index adfa30c3bbbec3aedb0299494d5422cb8e203971..d4c0b7231615f2ff0b7929dc97e21106d16ec7ed 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/api/test_lvm.py
@@ -88,14 +88,6 @@ def volumes(monkeypatch):
      return volumes
  
  
-@pytest.fixture
-def pvolumes(monkeypatch):
-    monkeypatch.setattr(process, 'call', lambda x: ('', '', 0))
-    pvolumes = api.PVolumes()
-    pvolumes._purge()
-    return pvolumes
-
-
  @pytest.fixture
  def volume_groups(monkeypatch):
      monkeypatch.setattr(process, 'call', lambda x: ('', '', 0))
@@ -143,6 +135,31 @@ class TestGetPV(object):
          monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes)
          assert api.get_pv(pv_uuid='0000') == FooPVolume
  
+    def test_multiple_pvs_is_matched_by_uuid(self, pvolumes, monkeypatch):
+        FooPVolume = api.PVolume(vg_name="vg", pv_name='/dev/sda', pv_uuid="0000", pv_tags={}, lv_uuid="0000000")
+        BarPVolume = api.PVolume(vg_name="vg", pv_name='/dev/sda', pv_uuid="0000", pv_tags={})
+        pvolumes.append(FooPVolume)
+        pvolumes.append(BarPVolume)
+        monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes)
+        assert api.get_pv(pv_uuid='0000') == FooPVolume
+
+    def test_multiple_pvs_is_matched_by_name(self, pvolumes, monkeypatch):
+        FooPVolume = api.PVolume(vg_name="vg", pv_name='/dev/sda', pv_uuid="0000", pv_tags={}, lv_uuid="0000000")
+        BarPVolume = api.PVolume(vg_name="vg", pv_name='/dev/sda', pv_uuid="0000", pv_tags={})
+        pvolumes.append(FooPVolume)
+        pvolumes.append(BarPVolume)
+        monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes)
+        assert api.get_pv(pv_name='/dev/sda') == FooPVolume
+
+    def test_multiple_pvs_is_matched_by_tags(self, pvolumes, monkeypatch):
+        FooPVolume = api.PVolume(vg_name="vg1", pv_name='/dev/sdc', pv_uuid="1000", pv_tags="ceph.foo=bar", lv_uuid="0000000")
+        BarPVolume = api.PVolume(vg_name="vg", pv_name='/dev/sda', pv_uuid="0000", pv_tags="ceph.foo=bar")
+        pvolumes.append(FooPVolume)
+        pvolumes.append(BarPVolume)
+        monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes)
+        with pytest.raises(exceptions.MultiplePVsError):
+            api.get_pv(pv_tags={"ceph.foo": "bar"})
+
      def test_single_pv_is_matched_by_uuid(self, pvolumes, monkeypatch):
          FooPVolume = api.PVolume(
              pv_name='/dev/vg/foo',
@@ -346,6 +363,151 @@ class TestVolumeGroups(object):
              volume_groups.filter()
  
  
+class TestVolumeGroupFree(object):
+
+    def test_no_g_in_output(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free='')
+        with pytest.raises(RuntimeError):
+            vg.free
+
+    def test_g_without_size(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free='g')
+        with pytest.raises(RuntimeError):
+            vg.free
+
+    def test_size_without_g(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free='1')
+        with pytest.raises(RuntimeError):
+            vg.free
+
+    def test_error_message(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free='F')
+        with pytest.raises(RuntimeError) as error:
+            vg.free
+        assert "Unable to convert vg size to integer: 'F'" in str(error)
+
+    def test_invalid_float(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free=' g')
+        with pytest.raises(RuntimeError) as error:
+            vg.free
+        assert "Unable to convert to integer: ' '" in str(error.value)
+
+    def test_integer_gets_produced(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free='100g')
+        assert vg.free == 100
+
+    def test_integer_gets_produced_whitespace(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free=' 100g ')
+        assert vg.free == 100
+
+    def test_integer_gets_rounded_down(self):
+        vg = api.VolumeGroup(vg_name='nosize', vg_free='100.99g')
+        assert vg.free == 100
+
+
+class TestCreateLVs(object):
+
+    def test_creates_correct_lv_number_from_parts(self, monkeypatch):
+        monkeypatch.setattr('ceph_volume.api.lvm.create_lv', lambda *a, **kw: (a, kw))
+        vg = api.VolumeGroup(
+            vg_name='ceph', vg_free='1024g',
+            vg_size='99999999g', vg_free_count='999'
+        )
+        lvs = api.create_lvs(vg, parts=4)
+        assert len(lvs) == 4
+
+    def test_suffixes_the_size_arg(self, monkeypatch):
+        monkeypatch.setattr('ceph_volume.api.lvm.create_lv', lambda *a, **kw: (a, kw))
+        vg = api.VolumeGroup(
+            vg_name='ceph', vg_free='1024g',
+            vg_size='99999999g', vg_free_count='999'
+        )
+        lvs = api.create_lvs(vg, parts=4)
+        assert lvs[0][1]['extents'] == 249
+
+    def test_only_uses_free_size(self, monkeypatch):
+        monkeypatch.setattr('ceph_volume.api.lvm.create_lv', lambda *a, **kw: (a, kw))
+        vg = api.VolumeGroup(
+            vg_name='ceph', vg_free='1024g',
+            vg_size='99999999g', vg_free_count='1000'
+        )
+        lvs = api.create_lvs(vg, parts=4)
+        assert lvs[0][1]['extents'] == 250
+
+    def test_null_tags_are_set_by_default(self, monkeypatch):
+        monkeypatch.setattr('ceph_volume.api.lvm.create_lv', lambda *a, **kw: (a, kw))
+        vg = api.VolumeGroup(
+            vg_name='ceph', vg_free='1024g',
+            vg_size='99999999g', vg_free_count='999'
+        )
+        kwargs = api.create_lvs(vg, parts=4)[0][1]
+        assert list(kwargs['tags'].values()) == ['null', 'null', 'null', 'null']
+
+    def test_fallback_to_one_part(self, monkeypatch):
+        monkeypatch.setattr('ceph_volume.api.lvm.create_lv', lambda *a, **kw: (a, kw))
+        vg = api.VolumeGroup(
+            vg_name='ceph', vg_free='1024g',
+            vg_size='99999999g', vg_free_count='999'
+        )
+        lvs = api.create_lvs(vg)
+        assert len(lvs) == 1
+
+
+class TestVolumeGroupSizing(object):
+
+    def setup(self):
+        self.vg = api.VolumeGroup(
+            vg_name='ceph', vg_free='1024g',
+            vg_free_count='261129'
+        )
+
+    def test_parts_and_size_errors(self):
+        with pytest.raises(ValueError) as error:
+            self.vg.sizing(parts=4, size=10)
+        assert "Cannot process sizing" in str(error)
+
+    def test_zero_parts_produces_100_percent(self):
+        result = self.vg.sizing(parts=0)
+        assert result['percentages'] == 100
+
+    def test_two_parts_produces_50_percent(self):
+        result = self.vg.sizing(parts=2)
+        assert result['percentages'] == 50
+
+    def test_two_parts_produces_half_size(self):
+        result = self.vg.sizing(parts=2)
+        assert result['sizes'] == 512
+
+    def test_half_size_produces_round_sizes(self):
+        result = self.vg.sizing(size=512)
+        assert result['sizes'] == 512
+        assert result['percentages'] == 50
+        assert result['parts'] == 2
+
+    def test_bit_more_than_half_size_allocates_full_size(self):
+        # 513 can't allocate more than 1, so it just fallsback to using the
+        # whole device
+        result = self.vg.sizing(size=513)
+        assert result['sizes'] == 1024
+        assert result['percentages'] == 100
+        assert result['parts'] == 1
+
+    def test_extents_are_halfed_rounded_down(self):
+        result = self.vg.sizing(size=512)
+        # the real extents would've given 130564.5
+        assert result['extents'] == 130564
+
+    def test_bit_less_size_rounds_down(self):
+        result = self.vg.sizing(size=129)
+        assert result['sizes'] == 146
+        assert result['percentages'] == 14
+        assert result['parts'] == 7
+
+    def test_unable_to_allocate_past_free_size(self):
+        with pytest.raises(exceptions.SizeAllocationError):
+            self.vg.sizing(size=2048)
+
+
  class TestGetLVFromArgument(object):
  
      def setup(self):
@@ -416,6 +578,72 @@ class TestCreateLV(object):
          data_tag = ['lvchange', '--addtag', 'ceph.data_device=/path', '/path']
          assert capture.calls[2]['args'][0] == data_tag
  
+    def test_uses_uuid(self, monkeypatch, capture):
+        monkeypatch.setattr(process, 'run', capture)
+        monkeypatch.setattr(process, 'call', capture)
+        monkeypatch.setattr(api, 'get_lv', lambda *a, **kw: self.foo_volume)
+        api.create_lv('foo', 'foo_group', size='5G', tags={'ceph.type': 'data'}, uuid_name=True)
+        result = capture.calls[0]['args'][0][5]
+        assert result.startswith('foo-')
+        assert len(result) == 40
+
+
+class TestExtendVG(object):
+
+    def setup(self):
+        self.foo_volume = api.VolumeGroup(vg_name='foo', lv_tags='')
+
+    def test_uses_single_device_in_list(self, monkeypatch, fake_run):
+        monkeypatch.setattr(api, 'get_vg', lambda **kw: True)
+        api.extend_vg(self.foo_volume, ['/dev/sda'])
+        expected = ['vgextend', '--force', '--yes', 'foo', '/dev/sda']
+        assert fake_run.calls[0]['args'][0] == expected
+
+    def test_uses_single_device(self, monkeypatch, fake_run):
+        monkeypatch.setattr(api, 'get_vg', lambda **kw: True)
+        api.extend_vg(self.foo_volume, '/dev/sda')
+        expected = ['vgextend', '--force', '--yes', 'foo', '/dev/sda']
+        assert fake_run.calls[0]['args'][0] == expected
+
+    def test_uses_multiple_devices(self, monkeypatch, fake_run):
+        monkeypatch.setattr(api, 'get_vg', lambda **kw: True)
+        api.extend_vg(self.foo_volume, ['/dev/sda', '/dev/sdb'])
+        expected = ['vgextend', '--force', '--yes', 'foo', '/dev/sda', '/dev/sdb']
+        assert fake_run.calls[0]['args'][0] == expected
+
+
+class TestCreateVG(object):
+
+    def setup(self):
+        self.foo_volume = api.VolumeGroup(vg_name='foo', lv_tags='')
+
+    def test_no_name(self, monkeypatch, fake_run):
+        monkeypatch.setattr(api, 'get_vg', lambda **kw: True)
+        api.create_vg('/dev/sda')
+        result = fake_run.calls[0]['args'][0]
+        assert '/dev/sda' in result
+        assert result[-2].startswith('ceph-')
+
+    def test_devices_list(self, monkeypatch, fake_run):
+        monkeypatch.setattr(api, 'get_vg', lambda **kw: True)
+        api.create_vg(['/dev/sda', '/dev/sdb'], name='ceph')
+        result = fake_run.calls[0]['args'][0]
+        expected = ['vgcreate', '--force', '--yes', 'ceph', '/dev/sda', '/dev/sdb']
+        assert result == expected
+
+    def test_name_prefix(self, monkeypatch, fake_run):
+        monkeypatch.setattr(api, 'get_vg', lambda **kw: True)
+        api.create_vg('/dev/sda', name_prefix='master')
+        result = fake_run.calls[0]['args'][0]
+        assert '/dev/sda' in result
+        assert result[-2].startswith('master-')
+
+    def test_specific_name(self, monkeypatch, fake_run):
+        monkeypatch.setattr(api, 'get_vg', lambda **kw: True)
+        api.create_vg('/dev/sda', name='master')
+        result = fake_run.calls[0]['args'][0]
+        assert '/dev/sda' in result
+        assert result[-2] == 'master'
  
  #
  # The following tests are pretty gnarly. VDO detection is very convoluted and
@@ -561,3 +789,42 @@ class TestVDOParents(object):
              '/sys/block': block_path})
          result = api._vdo_parents(['dm-3'])
          assert result == []
+
+
+class TestSplitNameParser(object):
+
+    def test_keys_are_parsed_without_prefix(self):
+        line = ["DM_VG_NAME='/dev/mapper/vg';DM_LV_NAME='lv';DM_LV_LAYER=''"]
+        result = api._splitname_parser(line)
+        assert result['VG_NAME'] == 'vg'
+        assert result['LV_NAME'] == 'lv'
+        assert result['LV_LAYER'] == ''
+
+    def test_vg_name_sans_mapper(self):
+        line = ["DM_VG_NAME='/dev/mapper/vg';DM_LV_NAME='lv';DM_LV_LAYER=''"]
+        result = api._splitname_parser(line)
+        assert '/dev/mapper' not in result['VG_NAME']
+
+
+class TestIsLV(object):
+
+    def test_is_not_an_lv(self, monkeypatch):
+        monkeypatch.setattr(api, 'dmsetup_splitname', lambda x: {})
+        assert api.is_lv('/dev/sda1', lvs=[]) is False
+
+    def test_lvs_not_found(self, monkeypatch, volumes):
+        CephVolume = api.Volume(lv_name='foo', lv_path='/dev/vg/foo', lv_tags="ceph.type=data")
+        volumes.append(CephVolume)
+        splitname = {'LV_NAME': 'data', 'VG_NAME': 'ceph'}
+        monkeypatch.setattr(api, 'dmsetup_splitname', lambda x: splitname)
+        assert api.is_lv('/dev/sda1', lvs=volumes) is False
+
+    def test_is_lv(self, monkeypatch, volumes):
+        CephVolume = api.Volume(
+            vg_name='ceph', lv_name='data',
+            lv_path='/dev/vg/foo', lv_tags="ceph.type=data"
+        )
+        volumes.append(CephVolume)
+        splitname = {'LV_NAME': 'data', 'VG_NAME': 'ceph'}
+        monkeypatch.setattr(api, 'dmsetup_splitname', lambda x: splitname)
+        assert api.is_lv('/dev/sda1', lvs=volumes) is True
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/conftest.py b/ceph/src/ceph-volume/ceph_volume/tests/conftest.py

index b1f858138ea615e306bf83916fca94ec653f16b1..65279dc9bdf79c931213115e48848deaf470b689 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/conftest.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/conftest.py
@@ -117,6 +117,14 @@ def volume_groups(monkeypatch):
      return vgs
  
  
+@pytest.fixture
+def pvolumes(monkeypatch):
+    monkeypatch.setattr('ceph_volume.process.call', lambda x: ('', '', 0))
+    pvolumes = lvm_api.PVolumes()
+    pvolumes._purge()
+    return pvolumes
+
+
  @pytest.fixture
  def is_root(monkeypatch):
      """
@@ -132,9 +140,23 @@ def tmpfile(tmpdir):
      Create a temporary file, optionally filling it with contents, returns an
      absolute path to the file when called
      """
-    def generate_file(name='file', contents=''):
-        path = os.path.join(str(tmpdir), name)
+    def generate_file(name='file', contents='', directory=None):
+        directory = directory or str(tmpdir)
+        path = os.path.join(directory, name)
          with open(path, 'w') as fp:
              fp.write(contents)
          return path
      return generate_file
+
+
+@pytest.fixture
+def device_info(monkeypatch):
+    def apply(devices=None, lsblk=None, lv=None):
+        devices = devices if devices else {}
+        lsblk = lsblk if lsblk else {}
+        lv = Factory(**lv) if lv else None
+        monkeypatch.setattr("ceph_volume.sys_info.devices", {})
+        monkeypatch.setattr("ceph_volume.util.device.disk.get_devices", lambda: devices)
+        monkeypatch.setattr("ceph_volume.util.device.lvm.get_lv_from_argument", lambda path: lv)
+        monkeypatch.setattr("ceph_volume.util.device.disk.lsblk", lambda path: lsblk)
+    return apply
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py

index a49a3e9e6a08f44246ad06ce86e14380c4809b5e..173da9392fd5825ada4bddae49d33b348c355ca1 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/lvm/test_listing.py
@@ -210,6 +210,26 @@ class TestSingleReport(object):
          assert result['0'][0]['path'] == '/dev/VolGroup/lv'
          assert result['0'][0]['devices'] == ['/dev/sda1', '/dev/sdb1']
  
+    def test_report_a_ceph_lv_with_multiple_pvs_of_same_name(self, pvolumes, monkeypatch):
+        tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data'
+        lv = api.Volume(
+            lv_name='lv', vg_name='VolGroup',
+            lv_uuid='aaaa', lv_path='/dev/VolGroup/lv', lv_tags=tags
+        )
+        monkeypatch.setattr(api, 'get_lv_from_argument', lambda device: None)
+        monkeypatch.setattr(api, 'get_lv', lambda vg_name: lv)
+        FooPVolume = api.PVolume(vg_name="vg", pv_name='/dev/sda', pv_uuid="0000", pv_tags={}, lv_uuid="aaaa")
+        BarPVolume = api.PVolume(vg_name="vg", pv_name='/dev/sda', pv_uuid="0000", pv_tags={})
+        pvolumes.append(FooPVolume)
+        pvolumes.append(BarPVolume)
+        monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes)
+        listing = lvm.listing.List([])
+        result = listing.single_report('/dev/sda')
+        assert result['0'][0]['name'] == 'lv'
+        assert result['0'][0]['lv_tags'] == tags
+        assert result['0'][0]['path'] == '/dev/VolGroup/lv'
+        assert len(result) == 1
+
      def test_report_a_ceph_lv_with_no_matching_devices(self, volumes, monkeypatch):
          tags = 'ceph.osd_id=0,ceph.journal_uuid=x,ceph.type=data'
          lv = api.Volume(
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py b/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py

index 5e267fca77e5978b4ebe7760ec61c34d40dbfb08..493c74c509c733be891712414f5a1f95cb30e20a 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/devices/test_zap.py
@@ -7,11 +7,20 @@ class TestZap(object):
      def test_main_spits_help_with_no_arguments(self, capsys):
          lvm.zap.Zap([]).main()
          stdout, stderr = capsys.readouterr()
-        assert 'Zaps the given logical volume, raw device or partition' in stdout
+        assert 'Zaps the given logical volume(s), raw device(s) or partition(s)' in stdout
  
      def test_main_shows_full_help(self, capsys):
          with pytest.raises(SystemExit):
              lvm.zap.Zap(argv=['--help']).main()
          stdout, stderr = capsys.readouterr()
          assert 'optional arguments' in stdout
-        assert 'positional arguments' in stdout
+
+    @pytest.mark.parametrize('device_name', [
+        '/dev/mapper/foo',
+        '/dev/dm-0',
+    ])
+    def test_can_not_zap_mapper_device(self, capsys, is_root, device_name):
+        with pytest.raises(SystemExit):
+            lvm.zap.Zap(argv=[device_name]).main()
+        stdout, stderr = capsys.readouterr()
+        assert 'Refusing to zap' in stdout
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/group_vars/all

new file mode 100644 (file)

index 0000000..5cb47c8
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/group_vars/all
@@ -0,0 +1,23 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "bluestore"
+osd_scenario: lvm
+dmcrypt: true
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/test.yml

new file mode 120000 (symlink)

index 0000000..ac5ac6b
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_bluestore_dmcrypt.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7d1a444
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type-dmcrypt/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: centos/7
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/group_vars/all

new file mode 100644 (file)

index 0000000..87b031f
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/group_vars/all
@@ -0,0 +1,22 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "bluestore"
+osd_scenario: lvm
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/test.yml

new file mode 120000 (symlink)

index 0000000..165d9da
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_bluestore.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7d1a444
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/bluestore/single-type/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: centos/7
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/group_vars/all

new file mode 100644 (file)

index 0000000..f6f67a9
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/group_vars/all
@@ -0,0 +1,25 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "filestore"
+osd_scenario: lvm
+dmcrypt: true
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
+  osd:
+    osd_journal_size: 2048
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/test.yml

new file mode 120000 (symlink)

index 0000000..8ed725f
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_filestore_dmcrypt.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7d1a444
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type-dmcrypt/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: centos/7
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/group_vars/all

new file mode 100644 (file)

index 0000000..d4b26c3
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/group_vars/all
@@ -0,0 +1,22 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "filestore"
+osd_scenario: lvm
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/test.yml

new file mode 120000 (symlink)

index 0000000..1a8c37c
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_filestore.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7d1a444
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/centos7/filestore/single-type/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: centos/7
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_bluestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_bluestore.yml

new file mode 100644 (file)

index 0000000..85c702e
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_bluestore.yml
@@ -0,0 +1,46 @@
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: stop ceph-osd@1 daemon
+      service:
+        name: ceph-osd@1
+        state: stopped
+
+    - name: stop ceph-osd@0 daemon
+      service:
+        name: ceph-osd@0
+        state: stopped
+
+
+- hosts: mons
+  become: yes
+  tasks:
+
+    - name: destroy osd.1
+      command: "ceph osd purge osd.1 --yes-i-really-mean-it"
+
+    - name: destroy osd.0
+      command: "ceph osd purge osd.0 --yes-i-really-mean-it"
+
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: zap /dev/sdd
+      command: "ceph-volume lvm zap /dev/sdb --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+
+    - name: zap /dev/sdc
+      command: "ceph-volume lvm zap /dev/sdc --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+    - name: batch create /dev/sdb and /dev/sdc again
+      command: "ceph-volume lvm batch --yes --bluestore /dev/sdb /dev/sdc"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_bluestore_dmcrypt.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_bluestore_dmcrypt.yml

new file mode 100644 (file)

index 0000000..9e1a73f
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_bluestore_dmcrypt.yml
@@ -0,0 +1,46 @@
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: stop ceph-osd@1 daemon
+      service:
+        name: ceph-osd@1
+        state: stopped
+
+    - name: stop ceph-osd@0 daemon
+      service:
+        name: ceph-osd@0
+        state: stopped
+
+
+- hosts: mons
+  become: yes
+  tasks:
+
+    - name: destroy osd.1
+      command: "ceph osd purge osd.1 --yes-i-really-mean-it"
+
+    - name: destroy osd.0
+      command: "ceph osd purge osd.0 --yes-i-really-mean-it"
+
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: zap /dev/sdd
+      command: "ceph-volume lvm zap /dev/sdb --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+
+    - name: zap /dev/sdc
+      command: "ceph-volume lvm zap /dev/sdc --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+    - name: batch create /dev/sdb and /dev/sdc again
+      command: "ceph-volume lvm batch --yes --bluestore --dmcrypt /dev/sdb /dev/sdc"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_filestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_filestore.yml

new file mode 100644 (file)

index 0000000..95909f9
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_filestore.yml
@@ -0,0 +1,46 @@
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: stop ceph-osd@1 daemon
+      service:
+        name: ceph-osd@1
+        state: stopped
+
+    - name: stop ceph-osd@0 daemon
+      service:
+        name: ceph-osd@0
+        state: stopped
+
+
+- hosts: mons
+  become: yes
+  tasks:
+
+    - name: destroy osd.1
+      command: "ceph osd purge osd.1 --yes-i-really-mean-it"
+
+    - name: destroy osd.0
+      command: "ceph osd purge osd.0 --yes-i-really-mean-it"
+
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: zap /dev/sdd
+      command: "ceph-volume lvm zap /dev/sdb --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+
+    - name: zap /dev/sdc
+      command: "ceph-volume lvm zap /dev/sdc --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+    - name: batch create /dev/sdb and /dev/sdc again
+      command: "ceph-volume lvm batch --yes --filestore /dev/sdb /dev/sdc"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_filestore_dmcrypt.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_filestore_dmcrypt.yml

new file mode 100644 (file)

index 0000000..81f84e9
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/playbooks/test_filestore_dmcrypt.yml
@@ -0,0 +1,46 @@
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: stop ceph-osd@1 daemon
+      service:
+        name: ceph-osd@1
+        state: stopped
+
+    - name: stop ceph-osd@0 daemon
+      service:
+        name: ceph-osd@0
+        state: stopped
+
+
+- hosts: mons
+  become: yes
+  tasks:
+
+    - name: destroy osd.1
+      command: "ceph osd purge osd.1 --yes-i-really-mean-it"
+
+    - name: destroy osd.0
+      command: "ceph osd purge osd.0 --yes-i-really-mean-it"
+
+
+- hosts: osds
+  become: yes
+  tasks:
+
+    - name: zap /dev/sdd
+      command: "ceph-volume lvm zap /dev/sdb --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+
+    - name: zap /dev/sdc
+      command: "ceph-volume lvm zap /dev/sdc --destroy"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
+
+    - name: batch create /dev/sdb and /dev/sdc again
+      command: "ceph-volume lvm batch --yes --filestore --dmcrypt /dev/sdb /dev/sdc"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini

new file mode 100644 (file)

index 0000000..6a43a11
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/tox.ini
@@ -0,0 +1,63 @@
+[tox]
+envlist = {centos7,xenial}-{bluestore,filestore}-{single_type,single_type_dmcrypt}
+skipsdist = True
+
+[testenv]
+whitelist_externals =
+    vagrant
+    bash
+    git
+    cp
+passenv=*
+setenv=
+  ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config
+  ANSIBLE_ACTION_PLUGINS = {envdir}/tmp/ceph-ansible/plugins/actions
+  ANSIBLE_STDOUT_CALLBACK = debug
+  ANSIBLE_RETRY_FILES_ENABLED = False
+  ANSIBLE_SSH_RETRIES = 5
+  VAGRANT_CWD = {changedir}
+  CEPH_VOLUME_DEBUG = 1
+deps=
+  ansible~=2.6,<2.7
+  testinfra
+  pytest-xdist
+  notario>=0.0.13
+changedir=
+  centos7-filestore-single_type: {toxinidir}/centos7/filestore/single-type
+  centos7-filestore-single_type_dmcrypt: {toxinidir}/centos7/filestore/single-type-dmcrypt
+  centos7-bluestore-single_type: {toxinidir}/centos7/bluestore/single-type
+  centos7-bluestore-single_type_dmcrypt: {toxinidir}/centos7/bluestore/single-type-dmcrypt
+  xenial-filestore-single_type: {toxinidir}/xenial/filestore/single-type
+  xenial-filestore-single_type_dmcrypt: {toxinidir}/xenial/filestore/single-type-dmcrypt
+  xenial-bluestore-single_type: {toxinidir}/xenial/bluestore/single-type
+  xenial-bluestore-single_type_dmcrypt: {toxinidir}/xenial/bluestore/single-type-dmcrypt
+commands=
+  git clone -b {env:CEPH_ANSIBLE_BRANCH:master} --single-branch https://github.com/ceph/ceph-ansible.git {envdir}/tmp/ceph-ansible
+
+  bash {toxinidir}/../scripts/vagrant_up.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox}
+  bash {toxinidir}/../scripts/generate_ssh_config.sh {changedir}
+
+  cp {toxinidir}/../playbooks/deploy.yml {envdir}/tmp/ceph-ansible
+
+  # use ceph-ansible to deploy a ceph cluster on the vms
+  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/deploy.yml --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest} toxinidir={toxinidir}"
+
+  # prepare nodes for testing with testinfra
+  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml
+
+  # test cluster state using ceph-ansible tests
+  testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests
+
+  # reboot all vms - attempt
+  bash {toxinidir}/../scripts/vagrant_reload.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox}
+
+  # retest to ensure cluster came back up correctly after rebooting
+  testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests
+
+  # destroy an OSD, zap it's device and recreate it using it's ID
+  ansible-playbook -vv -i {changedir}/hosts {changedir}/test.yml
+
+  # retest to ensure cluster came back up correctly
+  testinfra -n 4 --sudo -v --connection=ansible --ansible-inventory={changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/tests
+
+  vagrant destroy {env:VAGRANT_DESTROY_FLAGS:"--force"}
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/.DS_Store b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/.DS_Store

new file mode 100644 (file)

index 0000000..5008ddf

Binary files /dev/null and b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/.DS_Store differ
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/group_vars/all

new file mode 100644 (file)

index 0000000..7822876
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/group_vars/all
@@ -0,0 +1,23 @@
+---
+
+dmcrypt: True
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "bluestore"
+osd_scenario: lvm
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/test.yml

new file mode 120000 (symlink)

index 0000000..ac5ac6b
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_bluestore_dmcrypt.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7252344
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type-dmcrypt/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: ceph/ubuntu-xenial
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/group_vars/all

new file mode 100644 (file)

index 0000000..87b031f
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/group_vars/all
@@ -0,0 +1,22 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "bluestore"
+osd_scenario: lvm
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/test.yml

new file mode 120000 (symlink)

index 0000000..165d9da
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_bluestore.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7252344
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/bluestore/single-type/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: ceph/ubuntu-xenial
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/group_vars/all

new file mode 100644 (file)

index 0000000..2f5d6fb
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/group_vars/all
@@ -0,0 +1,25 @@
+---
+
+dmcrypt: True
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "filestore"
+osd_scenario: lvm
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
+  osd:
+    osd_journal_size: 2048
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/test.yml

new file mode 120000 (symlink)

index 0000000..8ed725f
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_filestore_dmcrypt.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7252344
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type-dmcrypt/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: ceph/ubuntu-xenial
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/Vagrantfile b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/Vagrantfile

new file mode 120000 (symlink)

index 0000000..16076e4
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/Vagrantfile
@@ -0,0 +1 @@
+../../../../Vagrantfile
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/group_vars/all b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/group_vars/all

new file mode 100644 (file)

index 0000000..d4b26c3
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/group_vars/all
@@ -0,0 +1,22 @@
+---
+
+ceph_dev: True
+cluster: ceph
+public_network: "192.168.3.0/24"
+cluster_network: "192.168.4.0/24"
+monitor_interface: eth1
+osd_objectstore: "filestore"
+osd_scenario: lvm
+ceph_origin: 'repository'
+ceph_repository: 'dev'
+copy_admin_key: false
+devices:
+  - /dev/sdb
+  - /dev/sdc
+os_tuning_params:
+  - { name: kernel.pid_max, value: 4194303 }
+  - { name: fs.file-max, value: 26234859 }
+ceph_conf_overrides:
+  global:
+    osd_pool_default_pg_num: 8
+    osd_pool_default_size: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/hosts

new file mode 100644 (file)

index 0000000..e1c1de6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/hosts
@@ -0,0 +1,8 @@
+[mons]
+mon0
+
+[osds]
+osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/test.yml

new file mode 120000 (symlink)

index 0000000..1a8c37c
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/test.yml
@@ -0,0 +1 @@
+../../../playbooks/test_filestore.yml
+\ No newline at end of file
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/vagrant_variables.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/vagrant_variables.yml

new file mode 100644 (file)

index 0000000..7252344
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/batch/xenial/filestore/single-type/vagrant_variables.yml
@@ -0,0 +1,56 @@
+---
+
+# DEFINE THE NUMBER OF VMS TO RUN
+mon_vms: 1
+osd_vms: 1
+mds_vms: 0
+rgw_vms: 0
+nfs_vms: 0
+rbd_mirror_vms: 0
+client_vms: 0
+iscsi_gw_vms: 0
+mgr_vms: 0
+
+# SUBNETS TO USE FOR THE VMS
+public_subnet: 192.168.3
+cluster_subnet: 192.168.4
+
+# MEMORY
+# set 1024 for CentOS
+memory: 512
+
+# Ethernet interface name
+# use eth1 for libvirt and ubuntu precise, enp0s8 for CentOS and ubuntu xenial
+eth: 'eth1'
+
+
+# VAGRANT BOX
+# Ceph boxes are *strongly* suggested. They are under better control and will
+# not get updated frequently unless required for build systems. These are (for
+# now):
+#
+# * ceph/ubuntu-xenial
+#
+# Ubuntu: ceph/ubuntu-xenial bento/ubuntu-16.04 or ubuntu/trusty64 or ubuntu/wily64
+# CentOS: bento/centos-7.1 or puppetlabs/centos-7.0-64-puppet
+# libvirt CentOS: centos/7
+# parallels Ubuntu: parallels/ubuntu-14.04
+# Debian: deb/jessie-amd64 - be careful the storage controller is named 'SATA Controller'
+# For more boxes have a look at:
+#   - https://atlas.hashicorp.com/boxes/search?utf8=✓&sort=&provider=virtualbox&q=
+#   - https://download.gluster.org/pub/gluster/purpleidea/vagrant/
+vagrant_box: ceph/ubuntu-xenial
+#ssh_private_key_path: "~/.ssh/id_rsa"
+# The sync directory changes based on vagrant box
+# Set to /home/vagrant/sync for Centos/7, /home/{ user }/vagrant for openstack and defaults to /vagrant
+#vagrant_sync_dir: /home/vagrant/sync
+#vagrant_sync_dir: /
+# Disables synced folder creation. Not needed for testing, will skip mounting
+# the vagrant directory on the remote box regardless of the provider.
+vagrant_disable_synced_folder: true
+# VAGRANT URL
+# This is a URL to download an image from an alternate location.  vagrant_box
+# above should be set to the filename of the image.
+# Fedora virtualbox: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
+# Fedora libvirt: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-libvirt.box
+# vagrant_box_url: https://download.fedoraproject.org/pub/fedora/linux/releases/22/Cloud/x86_64/Images/Fedora-Cloud-Base-Vagrant-22-20150521.x86_64.vagrant-virtualbox.box
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/create/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/create/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/create/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/create/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml

index bbdc1035d25dbd8adcf229cca4ad9f743f7277bd..d882293def7210c86941c5cdf888e3fa170c720d 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/bluestore/dmcrypt/test.yml
@@ -81,3 +81,8 @@
        command: "ceph-volume lvm activate --all"
        environment:
          CEPH_VOLUME_DEBUG: 1
+
+    - name: list all OSDs
+      command: "ceph-volume lvm list"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/create/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/create/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/create/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/create/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml

index 49f37d20aade92b4a861a15cce0121ced2aafcab..5dc67ade18c169bb77c31577d3972222d83b978a 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/centos7/filestore/dmcrypt/test.yml
@@ -65,3 +65,8 @@
        command: "ceph-volume lvm activate --filestore --all"
        environment:
          CEPH_VOLUME_DEBUG: 1
+
+    - name: list all OSDs
+      command: "ceph-volume lvm list"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml

index eb0ef32cd9fcf2f0d5335493c548e151376548a5..90eedbdee1e52afbf0995ac32e3b3b584486a16f 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_bluestore.yml
@@ -55,3 +55,8 @@
        command: "ceph-volume lvm activate --all"
        environment:
          CEPH_VOLUME_DEBUG: 1
+
+    - name: list all OSDs
+      command: "ceph-volume lvm list"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml

index d37efe19dff4e1dba6db90e3cf8a784d08d3690a..6979502836aab7e26e3197f244b825eb73b49997 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/playbooks/test_filestore.yml
@@ -67,3 +67,8 @@
        command: "ceph-volume lvm activate --filestore --all"
        environment:
          CEPH_VOLUME_DEBUG: 1
+
+    - name: list all OSDs
+      command: "ceph-volume lvm list"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini

index 5a2ff02a1a48c58cf64c691f3404f4aebe12054b..3e451aab57d8125948b1b02097d427e21fe4b61b 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/tox.ini
@@ -7,6 +7,8 @@ whitelist_externals =
      vagrant
      bash
      git
+    cp
+    sleep
  passenv=*
  setenv=
    ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config
@@ -17,8 +19,8 @@ setenv=
    VAGRANT_CWD = {changedir}
    CEPH_VOLUME_DEBUG = 1
  deps=
-  ansible==2.4.1
-  testinfra==1.7.1
+  ansible~=2.6,<2.7
+  testinfra
    pytest-xdist
    notario>=0.0.13
  changedir=
@@ -44,7 +46,7 @@ commands=
    # but the master branch doesn't pin dependencies so we can't guarantee to work correctly
    #pip install -r {envdir}/tmp/ceph-ansible/requirements.txt
  
-  vagrant up --no-provision {posargs:--provider=virtualbox}
+  bash {toxinidir}/../scripts/vagrant_up.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox}
    bash {toxinidir}/../scripts/generate_ssh_config.sh {changedir}
  
    # create logical volumes to test with on the vms
@@ -53,8 +55,10 @@ commands=
    # ad-hoc/local test setup for lvm
    ansible-playbook -vv -i {changedir}/hosts {changedir}/setup.yml
  
+  cp {toxinidir}/../playbooks/deploy.yml {envdir}/tmp/ceph-ansible
+
    # use ceph-ansible to deploy a ceph cluster on the vms
-  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/site.yml.sample --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest}"
+  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/deploy.yml --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest} toxinidir={toxinidir}"
  
    # prepare nodes for testing with testinfra
    ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/create/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/create/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/create/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/create/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml

index f93ff2304e3cac1baf248842660f606f8172d466..b6db0ac2f90fc04c79094ca4e1e7c7d1448fa7e6 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/bluestore/dmcrypt/test.yml
@@ -81,3 +81,8 @@
        command: "ceph-volume lvm activate --all"
        environment:
          CEPH_VOLUME_DEBUG: 1
+
+    - name: list all OSDs
+      command: "ceph-volume lvm list"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/create/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/create/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/create/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/create/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/hosts b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/hosts

index f6a265ab3e5d6093f9a7ed71fefc9358d8d0a8a3..e1c1de6f821a2c6348b7923232a2e12d3e768231 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/hosts
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/hosts
@@ -3,3 +3,6 @@ mon0
  
  [osds]
  osd0
+
+[mgrs]
+mon0
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml

index 49f37d20aade92b4a861a15cce0121ced2aafcab..5dc67ade18c169bb77c31577d3972222d83b978a 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/lvm/xenial/filestore/dmcrypt/test.yml
@@ -65,3 +65,8 @@
        command: "ceph-volume lvm activate --filestore --all"
        environment:
          CEPH_VOLUME_DEBUG: 1
+
+    - name: list all OSDs
+      command: "ceph-volume lvm list"
+      environment:
+        CEPH_VOLUME_DEBUG: 1
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml b/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml

new file mode 100644 (file)

index 0000000..ff3954b
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/playbooks/deploy.yml
@@ -0,0 +1,123 @@
+---
+# Defines deployment design and assigns role to server groups
+
+- hosts:
+  - mons
+  - osds
+  - mgrs
+
+  gather_facts: false
+  any_errors_fatal: true
+  become: true
+
+  tags:
+    - always
+
+  vars:
+    delegate_facts_host: True
+
+  pre_tasks:
+    # If we can't get python2 installed before any module is used we will fail
+    # so just try what we can to get it installed
+    - name: check for python2
+      stat:
+        path: /usr/bin/python
+      ignore_errors: yes
+      register: systempython2
+
+    - name: install python2 for debian based systems
+      raw: sudo apt-get -y install python-simplejson
+      ignore_errors: yes
+      when:
+        - systempython2.stat is undefined or systempython2.stat.exists == false
+
+    - name: install python2 for fedora
+      raw: sudo dnf -y install python creates=/usr/bin/python
+      ignore_errors: yes
+      when:
+        - systempython2.stat is undefined or systempython2.stat.exists == false
+
+    - name: install python2 for opensuse
+      raw: sudo zypper -n install python-base creates=/usr/bin/python2.7
+      ignore_errors: yes
+      when:
+        - systempython2.stat is undefined or systempython2.stat.exists == false
+
+    - name: gather facts
+      setup:
+      when:
+        - not delegate_facts_host | bool
+
+    - name: gather and delegate facts
+      setup:
+      delegate_to: "{{ item }}"
+      delegate_facts: True
+      with_items: "{{ groups['all'] }}"
+      run_once: true
+      when:
+        - delegate_facts_host | bool
+
+    - name: install required packages for fedora > 23
+      raw: sudo dnf -y install python2-dnf libselinux-python ntp
+      when:
+        - ansible_distribution == 'Fedora'
+        - ansible_distribution_major_version|int >= 23
+
+  roles:
+    - ceph-defaults
+    - ceph-validate
+
+- hosts:
+  - mons
+  - osds
+  - mgrs
+  gather_facts: false
+  become: True
+  roles:
+    - role: ceph-defaults
+      tags: ['ceph_update_config']
+    - role: ceph-common
+    - role: ceph-config
+      tags: ['ceph_update_config']
+
+- hosts: mons
+  gather_facts: false
+  become: True
+  roles:
+    - role: ceph-defaults
+    - role: ceph-common
+    - role: ceph-mon
+
+- hosts: mgrs
+  gather_facts: false
+  become: True
+  roles:
+    - role: ceph-defaults
+    - role: ceph-common
+    - role: ceph-mgr
+
+- hosts: osds
+  gather_facts: false
+  become: True
+  tasks:
+    - name: rsync ceph-volume to test nodes on centos
+      synchronize:
+        src: "{{ toxinidir}}/../../../../ceph_volume"
+        dest: "/usr/lib/python2.7/site-packages"
+        use_ssh_args: true
+      when: ansible_os_family == "RedHat"
+
+    - name: rsync ceph-volume to test nodes on ubuntu
+      synchronize:
+        src: "{{ toxinidir}}/../../../../ceph_volume"
+        dest: "/usr/lib/python2.7/dist-packages"
+        use_ssh_args: true
+      when: ansible_os_family == "Debian"
+
+- hosts: osds
+  gather_facts: false
+  become: True
+  roles:
+    - role: ceph-defaults
+    - role: ceph-common
+    - role: ceph-osd
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/scripts/vagrant_up.sh b/ceph/src/ceph-volume/ceph_volume/tests/functional/scripts/vagrant_up.sh

new file mode 100644 (file)

index 0000000..2f9a15f
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/scripts/vagrant_up.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+retries=0
+until [ $retries -ge 5 ]
+do
+  echo "Attempting to start VMs. Attempts: $retries"
+  timeout 10m vagrant up "$@" && break
+  retries=$[$retries+1]
+  sleep 5
+done
+
+sleep 10
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini

index d5e9c33ea6401f0f52d7cf5bfc3f0a1e854ae0f3..1e813cedf5d1c07d0189f181e041da5adfda3a1f 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini
+++ b/ceph/src/ceph-volume/ceph_volume/tests/functional/simple/tox.ini
@@ -8,6 +8,7 @@ whitelist_externals =
      bash
      git
      sleep
+    cp
  passenv=*
  setenv=
    ANSIBLE_SSH_ARGS = -F {changedir}/vagrant_ssh_config
@@ -18,8 +19,8 @@ setenv=
    VAGRANT_CWD = {changedir}
    CEPH_VOLUME_DEBUG = 1
  deps=
-  ansible==2.4.1
-  testinfra==1.7.1
+  ansible~=2.6,<2.7
+  testinfra
    pytest-xdist
    notario>=0.0.13
  changedir=
@@ -41,11 +42,13 @@ commands=
    # but the master branch doesn't pin dependencies so we can't guarantee to work correctly
    #pip install -r {envdir}/tmp/ceph-ansible/requirements.txt
  
-  vagrant up --no-provision {posargs:--provider=virtualbox}
+  bash {toxinidir}/../scripts/vagrant_up.sh {env:VAGRANT_UP_FLAGS:"--no-provision"} {posargs:--provider=virtualbox}
    bash {toxinidir}/../scripts/generate_ssh_config.sh {changedir}
  
+  cp {toxinidir}/../playbooks/deploy.yml {envdir}/tmp/ceph-ansible
+
    # use ceph-ansible to deploy a ceph cluster on the vms
-  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/site.yml.sample --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest}"
+  ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/deploy.yml --extra-vars "fetch_directory={changedir}/fetch ceph_dev_branch={env:CEPH_DEV_BRANCH:master} ceph_dev_sha1={env:CEPH_DEV_SHA1:latest} toxinidir={toxinidir}"
  
    # prepare nodes for testing with testinfra
    ansible-playbook -vv -i {changedir}/hosts {envdir}/tmp/ceph-ansible/tests/functional/setup.yml
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/test_process.py b/ceph/src/ceph-volume/ceph_volume/tests/test_process.py

index d38927ae2c6795279000b6387ea3b9cb5b012231..c9dfaeebf957851edeaf581f156ed2fda5d8ba93 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/test_process.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/test_process.py
@@ -66,3 +66,21 @@ class TestCall(object):
          assert 'ls' in log_lines
          assert 'stderr' in log_lines
          assert out == ''
+
+
+class TestFunctionalCall(object):
+
+    def test_stdin(self):
+        process.call(['xargs', 'ls'], stdin="echo '/'")
+
+    def test_unicode_encoding(self):
+        process.call(['echo', u'\xd0'])
+
+    def test_unicode_encoding_stdin(self):
+        process.call(['echo'], stdin=u'\xd0'.encode('utf-8'))
+
+
+class TestFunctionalRun(object):
+
+    def test_log_descriptors(self):
+        process.run(['ls', '-l'])
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py

index f989599c8b77297c132128246f3750bc0e6ff6c3..9f20edbf724c077808c3f9862cdf57f11cfcc16f 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_arg_validators.py
@@ -96,3 +96,17 @@ class TestExcludeGroupOptions(object):
          )
          stdout, stderr = capsys.readouterr()
          assert 'Cannot use --filestore (filestore) with --bluestore (bluestore)' in stdout
+
+
+class TestValidDevice(object):
+
+    def setup(self):
+        self.validator = arg_validators.ValidDevice()
+
+    def test_path_is_valid(self, fake_call):
+        result = self.validator('/')
+        assert result.abspath == '/'
+
+    def test_path_is_invalid(self, fake_call):
+        with pytest.raises(argparse.ArgumentError):
+            self.validator('/device/does/not/exist')
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py

new file mode 100644 (file)

index 0000000..225b060
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_device.py
@@ -0,0 +1,66 @@
+from ceph_volume.util import device
+from ceph_volume.api import lvm as api
+
+
+class TestDevice(object):
+
+    def test_sys_api(self, device_info):
+        data = {"/dev/sda": {"foo": "bar"}}
+        device_info(devices=data)
+        disk = device.Device("/dev/sda")
+        assert disk.sys_api
+        assert "foo" in disk.sys_api
+
+    def test_is_lv(self, device_info):
+        data = {"lv_path": "vg/lv"}
+        device_info(lv=data)
+        disk = device.Device("vg/lv")
+        assert disk.is_lv
+
+    def test_is_device(self, device_info):
+        data = {"/dev/sda": {"foo": "bar"}}
+        lsblk = {"TYPE": "device"}
+        device_info(devices=data, lsblk=lsblk)
+        disk = device.Device("/dev/sda")
+        assert disk.is_device
+
+    def test_is_partition(self, device_info, pvolumes):
+        data = {"/dev/sda": {"foo": "bar"}}
+        lsblk = {"TYPE": "part"}
+        device_info(devices=data, lsblk=lsblk)
+        disk = device.Device("/dev/sda")
+        assert disk.is_partition
+
+    def test_is_not_lvm_memeber(self, device_info, pvolumes):
+        data = {"/dev/sda": {"foo": "bar"}}
+        lsblk = {"TYPE": "part"}
+        device_info(devices=data, lsblk=lsblk)
+        disk = device.Device("/dev/sda")
+        assert not disk.is_lvm_member
+
+    def test_is_lvm_memeber(self, device_info, pvolumes):
+        data = {"/dev/sda": {"foo": "bar"}}
+        lsblk = {"TYPE": "part"}
+        device_info(devices=data, lsblk=lsblk)
+        disk = device.Device("/dev/sda")
+        assert not disk.is_lvm_member
+
+    def test_is_mapper_device(self, device_info):
+        device_info()
+        disk = device.Device("/dev/mapper/foo")
+        assert disk.is_mapper
+
+    def test_is_not_mapper_device(self, device_info):
+        device_info()
+        disk = device.Device("/dev/sda")
+        assert not disk.is_mapper
+
+    def test_pv_api(self, device_info, pvolumes, monkeypatch):
+        FooPVolume = api.PVolume(pv_name='/dev/sda', pv_uuid="0000", pv_tags={}, vg_name="vg")
+        pvolumes.append(FooPVolume)
+        monkeypatch.setattr(api, 'PVolumes', lambda: pvolumes)
+        data = {"/dev/sda": {"foo": "bar"}}
+        lsblk = {"TYPE": "part"}
+        device_info(devices=data, lsblk=lsblk)
+        disk = device.Device("/dev/sda")
+        assert disk.pvs_api
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py

index b81db81bc95bad949554fbe7430c715e5799d59b..ae5fbe5080349bc8bc01c614a9a18e04b45d94bb 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_disk.py
@@ -1,3 +1,5 @@
+import os
+import pytest
  from ceph_volume.util import disk
  
  
@@ -39,3 +41,464 @@ class TestDeviceFamily(object):
          result = disk.device_family('sdaa5')
          for parsed in result:
              assert parsed['NAME'] in names
+
+
+class TestMapDevPaths(object):
+
+    def test_errors_return_empty_mapping(self, tmpdir):
+        bad_dir = os.path.join(str(tmpdir), 'nonexisting')
+        assert disk._map_dev_paths(bad_dir) == {}
+
+    def test_base_name_and_abspath(self, tmpfile):
+        sda_path = tmpfile(name='sda', contents='')
+        directory = os.path.dirname(sda_path)
+        result = disk._map_dev_paths(directory)
+        assert len(result.keys()) == 1
+        assert result['sda'] == sda_path
+
+    def test_abspath_included(self, tmpfile):
+        sda_path = tmpfile(name='sda', contents='')
+        directory = os.path.dirname(sda_path)
+        result = disk._map_dev_paths(directory, include_abspath=True)
+        assert sorted(result.keys()) == sorted(['sda', sda_path])
+        assert result['sda'] == sda_path
+        assert result[sda_path] == 'sda'
+
+    def test_realpath_included(self, tmpfile):
+        sda_path = tmpfile(name='sda', contents='')
+        directory = os.path.dirname(sda_path)
+        dm_path = os.path.join(directory, 'dm-0')
+        os.symlink(sda_path, os.path.join(directory, 'dm-0'))
+        result = disk._map_dev_paths(directory, include_realpath=True)
+        assert sorted(result.keys()) == sorted(['sda', 'dm-0'])
+        assert result['sda'] == dm_path
+        assert result['dm-0'] == dm_path
+
+    def test_absolute_and_realpath_included(self, tmpfile):
+        dm_path = tmpfile(name='dm-0', contents='')
+        directory = os.path.dirname(dm_path)
+        sda_path = os.path.join(directory, 'sda')
+        os.symlink(sda_path, os.path.join(directory, 'sda'))
+        result = disk._map_dev_paths(directory, include_realpath=True, include_abspath=True)
+        assert sorted(result.keys()) == sorted([dm_path, sda_path, 'sda', 'dm-0'])
+        assert result['sda'] == sda_path
+        assert result['dm-0'] == dm_path
+        assert result[sda_path] == sda_path
+        assert result[dm_path] == 'dm-0'
+
+
+class TestGetBlockDevs(object):
+
+    def test_loop_devices_are_missing(self, tmpfile):
+        path = os.path.dirname(tmpfile(name='loop0', contents=''))
+        result = disk.get_block_devs(sys_block_path=path)
+        assert result == []
+
+    def test_loop_devices_are_included(self, tmpfile):
+        path = os.path.dirname(tmpfile(name='loop0', contents=''))
+        result = disk.get_block_devs(sys_block_path=path, skip_loop=False)
+        assert len(result) == 1
+        assert result == ['loop0']
+
+
+class TestGetDevDevs(object):
+
+    def test_abspaths_are_included(self, tmpfile):
+        sda_path = tmpfile(name='sda', contents='')
+        directory = os.path.dirname(sda_path)
+        result = disk.get_dev_devs(directory)
+        assert sorted(result.keys()) == sorted(['sda', sda_path])
+        assert result['sda'] == sda_path
+        assert result[sda_path] == 'sda'
+
+
+class TestGetMapperDevs(object):
+
+    def test_abspaths_and_realpaths_are_included(self, tmpfile):
+        dm_path = tmpfile(name='dm-0', contents='')
+        directory = os.path.dirname(dm_path)
+        sda_path = os.path.join(directory, 'sda')
+        os.symlink(sda_path, os.path.join(directory, 'sda'))
+        result = disk.get_mapper_devs(directory)
+        assert sorted(result.keys()) == sorted([dm_path, sda_path, 'sda', 'dm-0'])
+        assert result['sda'] == sda_path
+        assert result['dm-0'] == dm_path
+        assert result[sda_path] == sda_path
+        assert result[dm_path] == 'dm-0'
+
+
+class TestHumanReadableSize(object):
+
+    def test_bytes(self):
+        result = disk.human_readable_size(800)
+        assert result == '800.00 B'
+
+    def test_kilobytes(self):
+        result = disk.human_readable_size(800*1024)
+        assert result == '800.00 KB'
+
+    def test_megabytes(self):
+        result = disk.human_readable_size(800*1024*1024)
+        assert result == '800.00 MB'
+
+    def test_gigabytes(self):
+        result = disk.human_readable_size(8.19*1024*1024*1024)
+        assert result == '8.19 GB'
+
+    def test_terabytes(self):
+        result = disk.human_readable_size(81.2*1024*1024*1024*1024)
+        assert result == '81.20 TB'
+
+
+class TestGetDevices(object):
+
+    def setup_paths(self, tmpdir):
+        paths = []
+        for directory in ['block', 'dev', 'mapper']:
+            path = os.path.join(str(tmpdir), directory)
+            paths.append(path)
+            os.makedirs(path)
+        return paths
+
+    def test_no_devices_are_found(self, tmpdir):
+        result = disk.get_devices(
+            _sys_block_path=str(tmpdir),
+            _dev_path=str(tmpdir),
+            _mapper_path=str(tmpdir))
+        assert result == {}
+
+    def test_sda_block_is_found(self, tmpfile, tmpdir):
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        dev_sda_path = os.path.join(dev_path, 'sda')
+        os.makedirs(os.path.join(block_path, 'sda'))
+        os.makedirs(dev_sda_path)
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        assert len(result.keys()) == 1
+        assert result[dev_sda_path]['human_readable_size'] == '0.00 B'
+        assert result[dev_sda_path]['model'] == ''
+        assert result[dev_sda_path]['partitions'] == {}
+
+    def test_sda_is_removable_gets_skipped(self, tmpfile, tmpdir):
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        dev_sda_path = os.path.join(dev_path, 'sda')
+        block_sda_path = os.path.join(block_path, 'sda')
+        os.makedirs(block_sda_path)
+        os.makedirs(dev_sda_path)
+
+        tmpfile('removable', contents='1', directory=block_sda_path)
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        assert result == {}
+
+    def test_dm_device_is_not_used(self, monkeypatch, tmpdir):
+        # the link to the mapper is used instead
+        monkeypatch.setattr(disk.lvm, 'is_lv', lambda: True)
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        dev_dm_path = os.path.join(dev_path, 'dm-0')
+        ceph_data_path = os.path.join(mapper_path, 'ceph-data')
+        os.symlink(dev_dm_path, ceph_data_path)
+        block_dm_path = os.path.join(block_path, 'dm-0')
+        os.makedirs(block_dm_path)
+
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        result = list(result.keys())
+        assert len(result) == 1
+        assert result == [ceph_data_path]
+
+    def test_sda_size(self, tmpfile, tmpdir):
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        block_sda_path = os.path.join(block_path, 'sda')
+        dev_sda_path = os.path.join(dev_path, 'sda')
+        os.makedirs(block_sda_path)
+        os.makedirs(dev_sda_path)
+        tmpfile('size', '1024', directory=block_sda_path)
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        assert list(result.keys()) == [dev_sda_path]
+        assert result[dev_sda_path]['human_readable_size'] == '512.00 KB'
+
+    def test_sda_sectorsize_fallsback(self, tmpfile, tmpdir):
+        # if no sectorsize, it will use queue/hw_sector_size
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        block_sda_path = os.path.join(block_path, 'sda')
+        sda_queue_path = os.path.join(block_sda_path, 'queue')
+        dev_sda_path = os.path.join(dev_path, 'sda')
+        os.makedirs(block_sda_path)
+        os.makedirs(sda_queue_path)
+        os.makedirs(dev_sda_path)
+        tmpfile('hw_sector_size', contents='1024', directory=sda_queue_path)
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        assert list(result.keys()) == [dev_sda_path]
+        assert result[dev_sda_path]['sectorsize'] == '1024'
+
+    def test_sda_sectorsize_from_logical_block(self, tmpfile, tmpdir):
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        block_sda_path = os.path.join(block_path, 'sda')
+        sda_queue_path = os.path.join(block_sda_path, 'queue')
+        dev_sda_path = os.path.join(dev_path, 'sda')
+        os.makedirs(block_sda_path)
+        os.makedirs(sda_queue_path)
+        os.makedirs(dev_sda_path)
+        tmpfile('logical_block_size', contents='99', directory=sda_queue_path)
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        assert result[dev_sda_path]['sectorsize'] == '99'
+
+    def test_sda_sectorsize_does_not_fallback(self, tmpfile, tmpdir):
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        block_sda_path = os.path.join(block_path, 'sda')
+        sda_queue_path = os.path.join(block_sda_path, 'queue')
+        dev_sda_path = os.path.join(dev_path, 'sda')
+        os.makedirs(block_sda_path)
+        os.makedirs(sda_queue_path)
+        os.makedirs(dev_sda_path)
+        tmpfile('logical_block_size', contents='99', directory=sda_queue_path)
+        tmpfile('hw_sector_size', contents='1024', directory=sda_queue_path)
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        assert result[dev_sda_path]['sectorsize'] == '99'
+
+    def test_is_rotational(self, tmpfile, tmpdir):
+        block_path, dev_path, mapper_path = self.setup_paths(tmpdir)
+        block_sda_path = os.path.join(block_path, 'sda')
+        sda_queue_path = os.path.join(block_sda_path, 'queue')
+        dev_sda_path = os.path.join(dev_path, 'sda')
+        os.makedirs(block_sda_path)
+        os.makedirs(sda_queue_path)
+        os.makedirs(dev_sda_path)
+        tmpfile('rotational', contents='1', directory=sda_queue_path)
+        result = disk.get_devices(
+            _sys_block_path=block_path,
+            _dev_path=dev_path,
+            _mapper_path=mapper_path)
+        assert result[dev_sda_path]['rotational'] == '1'
+
+
+class TestSizeCalculations(object):
+
+    @pytest.mark.parametrize('aliases', [
+        ('b', 'bytes'),
+        ('kb', 'kilobytes'),
+        ('mb', 'megabytes'),
+        ('gb', 'gigabytes'),
+        ('tb', 'terabytes'),
+    ])
+    def test_aliases(self, aliases):
+        short_alias, long_alias = aliases
+        s = disk.Size(b=1)
+        short_alias = getattr(s, short_alias)
+        long_alias = getattr(s, long_alias)
+        assert short_alias == long_alias
+
+    @pytest.mark.parametrize('values', [
+        ('b', 857619069665.28),
+        ('kb', 837518622.72),
+        ('mb', 817889.28),
+        ('gb', 798.72),
+        ('tb', 0.78),
+    ])
+    def test_terabytes(self, values):
+        # regardless of the input value, all the other values correlate to each
+        # other the same, every time
+        unit, value = values
+        s = disk.Size(**{unit: value})
+        assert s.b == 857619069665.28
+        assert s.kb == 837518622.72
+        assert s.mb == 817889.28
+        assert s.gb == 798.72
+        assert s.tb == 0.78
+
+
+class TestSizeOperators(object):
+
+    @pytest.mark.parametrize('larger', [1025, 1024.1, 1024.001])
+    def test_gigabytes_is_smaller(self, larger):
+        assert disk.Size(gb=1) < disk.Size(mb=larger)
+
+    @pytest.mark.parametrize('smaller', [1023, 1023.9, 1023.001])
+    def test_gigabytes_is_larger(self, smaller):
+        assert disk.Size(gb=1) > disk.Size(mb=smaller)
+
+    @pytest.mark.parametrize('larger', [1025, 1024.1, 1024.001, 1024])
+    def test_gigabytes_is_smaller_or_equal(self, larger):
+        assert disk.Size(gb=1) <= disk.Size(mb=larger)
+
+    @pytest.mark.parametrize('smaller', [1023, 1023.9, 1023.001, 1024])
+    def test_gigabytes_is_larger_or_equal(self, smaller):
+        assert disk.Size(gb=1) >= disk.Size(mb=smaller)
+
+    @pytest.mark.parametrize('values', [
+        ('b', 857619069665.28),
+        ('kb', 837518622.72),
+        ('mb', 817889.28),
+        ('gb', 798.72),
+        ('tb', 0.78),
+    ])
+    def test_equality(self, values):
+        unit, value = values
+        s = disk.Size(**{unit: value})
+        # both tb and b, since b is always calculated regardless, and is useful
+        # when testing tb
+        assert disk.Size(tb=0.78) == s
+        assert disk.Size(b=857619069665.28) == s
+
+    @pytest.mark.parametrize('values', [
+        ('b', 857619069665.28),
+        ('kb', 837518622.72),
+        ('mb', 817889.28),
+        ('gb', 798.72),
+        ('tb', 0.78),
+    ])
+    def test_inequality(self, values):
+        unit, value = values
+        s = disk.Size(**{unit: value})
+        # both tb and b, since b is always calculated regardless, and is useful
+        # when testing tb
+        assert disk.Size(tb=1) != s
+        assert disk.Size(b=100) != s
+
+
+class TestSizeOperations(object):
+
+    def test_assignment_addition_with_size_objects(self):
+        result = disk.Size(mb=256) + disk.Size(gb=1)
+        assert result.gb == 1.25
+        assert result.gb.as_int() == 1
+        assert result.gb.as_float() == 1.25
+
+    def test_self_addition_with_size_objects(self):
+        base = disk.Size(mb=256)
+        base += disk.Size(gb=1)
+        assert base.gb == 1.25
+
+    def test_self_addition_does_not_alter_state(self):
+        base = disk.Size(mb=256)
+        base + disk.Size(gb=1)
+        assert base.mb == 256
+
+    def test_addition_with_non_size_objects(self):
+        with pytest.raises(TypeError):
+            disk.Size(mb=100) + 4
+
+    def test_assignment_subtraction_with_size_objects(self):
+        base = disk.Size(gb=1)
+        base -= disk.Size(mb=256)
+        assert base.mb == 768
+
+    def test_self_subtraction_does_not_alter_state(self):
+        base = disk.Size(gb=1)
+        base - disk.Size(mb=256)
+        assert base.gb == 1
+
+    def test_subtraction_with_size_objects(self):
+        result = disk.Size(gb=1) - disk.Size(mb=256)
+        assert result.mb == 768
+
+    def test_subtraction_with_non_size_objects(self):
+        with pytest.raises(TypeError):
+            disk.Size(mb=100) - 4
+
+    def test_multiplication_with_size_objects(self):
+        with pytest.raises(TypeError):
+            disk.Size(mb=100) * disk.Size(mb=1)
+
+    def test_multiplication_with_non_size_objects(self):
+        base = disk.Size(gb=1)
+        result = base * 2
+        assert result.gb == 2
+        assert result.gb.as_int() == 2
+
+    def test_division_with_size_objects(self):
+        result = disk.Size(gb=1) / disk.Size(mb=1)
+        assert int(result) == 1024
+
+    def test_division_with_non_size_objects(self):
+        base = disk.Size(gb=1)
+        base / 2
+        assert base.mb == 512
+        assert base.mb.as_int() == 512
+
+
+class TestSizeAttributes(object):
+
+    def test_attribute_does_not_exist(self):
+        with pytest.raises(AttributeError):
+            disk.Size(mb=1).exabytes
+
+
+class TestSizeFormatting(object):
+
+    def test_default_formatting_tb_to_b(self):
+        size = disk.Size(tb=0.0000000001)
+        result = "%s" % size
+        assert result == "109.95 B"
+
+    def test_default_formatting_tb_to_kb(self):
+        size = disk.Size(tb=0.00000001)
+        result = "%s" % size
+        assert result == "10.74 KB"
+
+    def test_default_formatting_tb_to_mb(self):
+        size = disk.Size(tb=0.000001)
+        result = "%s" % size
+        assert result == "1.05 MB"
+
+    def test_default_formatting_tb_to_gb(self):
+        size = disk.Size(tb=0.001)
+        result = "%s" % size
+        assert result == "1.02 GB"
+
+    def test_default_formatting_tb_to_tb(self):
+        size = disk.Size(tb=10)
+        result = "%s" % size
+        assert result == "10.00 TB"
+
+
+class TestSizeSpecificFormatting(object):
+
+    def test_formatting_b(self):
+        size = disk.Size(b=2048)
+        result = "%s" % size.b
+        assert "%s" % size.b == "%s" % size.bytes
+        assert result == "2048.00 B"
+
+    def test_formatting_kb(self):
+        size = disk.Size(kb=5700)
+        result = "%s" % size.kb
+        assert "%s" % size.kb == "%s" % size.kilobytes
+        assert result == "5700.00 KB"
+
+    def test_formatting_mb(self):
+        size = disk.Size(mb=4000)
+        result = "%s" % size.mb
+        assert "%s" % size.mb == "%s" % size.megabytes
+        assert result == "4000.00 MB"
+
+    def test_formatting_gb(self):
+        size = disk.Size(gb=77777)
+        result = "%s" % size.gb
+        assert "%s" % size.gb == "%s" % size.gigabytes
+        assert result == "77777.00 GB"
+
+    def test_formatting_tb(self):
+        size = disk.Size(tb=1027)
+        result = "%s" % size.tb
+        assert "%s" % size.tb == "%s" % size.terabytes
+        assert result == "1027.00 TB"
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py

index 3d84e55394163dcc66b42ce4147295536d4b5047..8cca42689b41ec0652a5b818661c5b2b01ca6371 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_encryption.py
@@ -17,3 +17,19 @@ class TestStatus(object):
          out = ['some line here', '  ']
          stub_call((out, '', 0))
          assert encryption.status('/dev/sdc1') == {}
+
+
+class TestDmcryptClose(object):
+
+    def test_mapper_exists(self, fake_run, tmpfile):
+        file_name = tmpfile(name='mapper-device')
+        encryption.dmcrypt_close(file_name)
+        arguments = fake_run.calls[0]['args'][0]
+        assert arguments[0] == 'cryptsetup'
+        assert arguments[1] == 'remove'
+        assert arguments[2].startswith('/')
+
+    def test_mapper_does_not_exist(self, fake_run):
+        file_name = '/path/does/not/exist'
+        encryption.dmcrypt_close(file_name)
+        assert fake_run.calls == []
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_prepare.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_prepare.py

index e702e052dea45a032f9b5662c6129cd1d51a8706..c611480ca99852ce5798e096a16080cff30080d0 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_prepare.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_prepare.py
@@ -7,24 +7,24 @@ from ceph_volume import conf
  from ceph_volume.tests.conftest import Factory
  
  
-class TestCheckID(object):
+class TestOSDIDAvailable(object):
  
      def test_false_if_id_is_none(self):
-        assert not prepare.check_id(None)
+        assert not prepare.osd_id_available(None)
  
      def test_returncode_is_not_zero(self, monkeypatch):
          monkeypatch.setattr('ceph_volume.process.call', lambda *a, **kw: ('', '', 1))
          with pytest.raises(RuntimeError):
-            prepare.check_id(1)
+            prepare.osd_id_available(1)
  
-    def test_id_does_exist(self, monkeypatch):
+    def test_id_does_exist_but_not_available(self, monkeypatch):
          stdout = dict(nodes=[
-            dict(id=0),
+            dict(id=0, status="up"),
          ])
          stdout = ['', json.dumps(stdout)]
          monkeypatch.setattr('ceph_volume.process.call', lambda *a, **kw: (stdout, '', 0))
-        result = prepare.check_id(0)
-        assert result
+        result = prepare.osd_id_available(0)
+        assert not result
  
      def test_id_does_not_exist(self, monkeypatch):
          stdout = dict(nodes=[
@@ -32,7 +32,7 @@ class TestCheckID(object):
          ])
          stdout = ['', json.dumps(stdout)]
          monkeypatch.setattr('ceph_volume.process.call', lambda *a, **kw: (stdout, '', 0))
-        result = prepare.check_id(1)
+        result = prepare.osd_id_available(1)
          assert not result
  
      def test_invalid_osd_id(self, monkeypatch):
@@ -41,9 +41,18 @@ class TestCheckID(object):
          ])
          stdout = ['', json.dumps(stdout)]
          monkeypatch.setattr('ceph_volume.process.call', lambda *a, **kw: (stdout, '', 0))
-        result = prepare.check_id("foo")
+        result = prepare.osd_id_available("foo")
          assert not result
  
+    def test_returns_true_when_id_is_destroyed(self, monkeypatch):
+        stdout = dict(nodes=[
+            dict(id=0, status="destroyed"),
+        ])
+        stdout = ['', json.dumps(stdout)]
+        monkeypatch.setattr('ceph_volume.process.call', lambda *a, **kw: (stdout, '', 0))
+        result = prepare.osd_id_available(0)
+        assert result
+
  
  class TestFormatDevice(object):
  
@@ -116,6 +125,38 @@ class TestFormatDevice(object):
          assert expected == fake_run.calls[0]['args'][0]
  
  
+mkfs_filestore_flags = [
+    'ceph-osd',
+    '--cluster',
+    '--osd-objectstore', 'filestore',
+    '--mkfs',
+    '-i',
+    '--monmap',
+    '--keyfile', '-', # goes through stdin
+    '--osd-data',
+    '--osd-journal',
+    '--osd-uuid',
+    '--setuser', 'ceph',
+    '--setgroup', 'ceph'
+]
+
+
+class TestOsdMkfsFilestore(object):
+
+    @pytest.mark.parametrize('flag', mkfs_filestore_flags)
+    def test_keyring_is_used(self, fake_call, monkeypatch, flag):
+        monkeypatch.setattr(prepare, '__release__', 'mimic')
+        monkeypatch.setattr(system, 'chown', lambda path: True)
+        prepare.osd_mkfs_filestore(1, 'asdf', keyring='secret')
+        assert flag in fake_call.calls[0]['args'][0]
+
+    def test_keyring_is_used_luminous(self, fake_call, monkeypatch):
+        monkeypatch.setattr(prepare, '__release__', 'luminous')
+        monkeypatch.setattr(system, 'chown', lambda path: True)
+        prepare.osd_mkfs_filestore(1, 'asdf', keyring='secret')
+        assert '--keyfile' not in fake_call.calls[0]['args'][0]
+
+
  class TestOsdMkfsBluestore(object):
  
      def test_keyring_is_added(self, fake_call, monkeypatch):
@@ -128,6 +169,12 @@ class TestOsdMkfsBluestore(object):
          prepare.osd_mkfs_bluestore(1, 'asdf')
          assert '--keyfile' not in fake_call.calls[0]['args'][0]
  
+    def test_keyring_is_not_added_luminous(self, fake_call, monkeypatch):
+        monkeypatch.setattr(system, 'chown', lambda path: True)
+        prepare.osd_mkfs_bluestore(1, 'asdf')
+        monkeypatch.setattr(prepare, '__release__', 'luminous')
+        assert '--keyfile' not in fake_call.calls[0]['args'][0]
+
      def test_wal_is_added(self, fake_call, monkeypatch):
          monkeypatch.setattr(system, 'chown', lambda path: True)
          prepare.osd_mkfs_bluestore(1, 'asdf', wal='/dev/smm1')
@@ -290,3 +337,57 @@ class TestMkfsBluestore(object):
              '--osd-uuid', 'asdf-1234',
              '--setuser', 'ceph', '--setgroup', 'ceph'])
          assert expected in str(error)
+
+
+class TestGetJournalSize(object):
+
+    def test_undefined_size_fallbacks_formatted(self, conf_ceph_stub):
+        conf_ceph_stub(dedent("""
+        [global]
+        fsid = a25d19a6-7d57-4eda-b006-78e35d2c4d9f
+        """))
+        result = prepare.get_journal_size()
+        assert result == '5G'
+
+    def test_undefined_size_fallbacks_unformatted(self, conf_ceph_stub):
+        conf_ceph_stub(dedent("""
+        [global]
+        fsid = a25d19a6-7d57-4eda-b006-78e35d2c4d9f
+        """))
+        result = prepare.get_journal_size(lv_format=False)
+        assert result.gb.as_int() == 5
+
+    def test_defined_size_unformatted(self, conf_ceph_stub):
+        conf_ceph_stub(dedent("""
+        [global]
+        fsid = a25d19a6-7d57-4eda-b006-78e35d2c4d9f
+
+        [osd]
+        osd journal size = 10240
+        """))
+        result = prepare.get_journal_size(lv_format=False)
+        assert result.gb.as_int() == 10
+
+    def test_defined_size_formatted(self, conf_ceph_stub):
+        conf_ceph_stub(dedent("""
+        [global]
+        fsid = a25d19a6-7d57-4eda-b006-78e35d2c4d9f
+
+        [osd]
+        osd journal size = 10240
+        """))
+        result = prepare.get_journal_size()
+        assert result == '10G'
+
+    def test_refuse_tiny_journals(self, conf_ceph_stub):
+        conf_ceph_stub(dedent("""
+        [global]
+        fsid = a25d19a6-7d57-4eda-b006-78e35d2c4d9f
+
+        [osd]
+        osd journal size = 1024
+        """))
+        with pytest.raises(RuntimeError) as error:
+            prepare.get_journal_size()
+        assert 'journal sizes must be larger' in str(error)
+        assert 'detected: 1024.00 MB' in str(error)
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py

index 690147cb29b3091b833ae174c74a4f361075ac89..bf71e8746b739547bbf5efaaa1090e6f273807d6 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py
+++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_system.py
@@ -168,6 +168,30 @@ class TestIsBinary(object):
          assert system.is_binary(binary_path) is False
  
  
+class TestGetFileContents(object):
+
+    def test_path_does_not_exist(self, tmpdir):
+        filepath = os.path.join(str(tmpdir), 'doesnotexist')
+        assert system.get_file_contents(filepath, 'default') == 'default'
+
+    def test_path_has_contents(self, tmpfile):
+        interesting_file = tmpfile(contents="1")
+        result = system.get_file_contents(interesting_file)
+        assert result == "1"
+
+    def test_path_has_multiline_contents(self, tmpfile):
+        interesting_file = tmpfile(contents="0\n1")
+        result = system.get_file_contents(interesting_file)
+        assert result == "0\n1"
+
+    def test_exception_returns_default(self, tmpfile):
+        interesting_file = tmpfile(contents="0")
+        # remove read, causes IOError
+        os.chmod(interesting_file, 0o000)
+        result = system.get_file_contents(interesting_file)
+        assert result == ''
+
+
  class TestWhich(object):
  
      def test_executable_exists_but_is_not_file(self, monkeypatch):
diff --git a/ceph/src/ceph-volume/ceph_volume/tests/util/test_util.py b/ceph/src/ceph-volume/ceph_volume/tests/util/test_util.py

new file mode 100644 (file)

index 0000000..82f2ef2
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/tests/util/test_util.py
@@ -0,0 +1,96 @@
+import pytest
+from ceph_volume import util
+
+
+class TestAsBytes(object):
+
+    def test_bytes_just_gets_returned(self):
+        bytes_string = "contents".encode('utf-8')
+        assert util.as_bytes(bytes_string) == bytes_string
+
+    def test_string_gets_converted_to_bytes(self):
+        result = util.as_bytes('contents')
+        assert isinstance(result, bytes)
+
+
+class TestStrToInt(object):
+
+    def test_passing_a_float_str(self):
+        result = util.str_to_int("1.99")
+        assert result == 1
+
+    def test_passing_a_float_does_not_round(self):
+        result = util.str_to_int("1.99", round_down=False)
+        assert result == 2
+
+    def test_text_is_not_an_integer_like(self):
+        with pytest.raises(RuntimeError) as error:
+            util.str_to_int("1.4GB")
+        assert str(error.value) == "Unable to convert to integer: '1.4GB'"
+
+
+def true_responses(upper_casing=False):
+    if upper_casing:
+        return ['Y', 'YES', '']
+    return ['y', 'yes', '']
+
+
+def false_responses(upper_casing=False):
+    if upper_casing:
+        return ['N', 'NO']
+    return ['n', 'no']
+
+
+def invalid_responses():
+    return [9, 0.1, 'h', [], {}, None]
+
+
+class TestStrToBool(object):
+
+    @pytest.mark.parametrize('response', true_responses())
+    def test_trueish(self, response):
+        assert util.str_to_bool(response) is True
+
+    @pytest.mark.parametrize('response', false_responses())
+    def test_falseish(self, response):
+        assert util.str_to_bool(response) is False
+
+    @pytest.mark.parametrize('response', true_responses(True))
+    def test_trueish_upper(self, response):
+        assert util.str_to_bool(response) is True
+
+    @pytest.mark.parametrize('response', false_responses(True))
+    def test_falseish_upper(self, response):
+        assert util.str_to_bool(response) is False
+
+    @pytest.mark.parametrize('response', invalid_responses())
+    def test_invalid(self, response):
+        with pytest.raises(ValueError):
+            util.str_to_bool(response)
+
+
+class TestPromptBool(object):
+
+    @pytest.mark.parametrize('response', true_responses())
+    def test_trueish(self, response):
+        fake_input = lambda x: response
+        qx = 'what the what?'
+        assert util.prompt_bool(qx, _raw_input=fake_input) is True
+
+    @pytest.mark.parametrize('response', false_responses())
+    def test_falseish(self, response):
+        fake_input = lambda x: response
+        qx = 'what the what?'
+        assert util.prompt_bool(qx, _raw_input=fake_input) is False
+
+    def test_try_again_true(self):
+        responses = ['g', 'h', 'y']
+        fake_input = lambda x: responses.pop(0)
+        qx = 'what the what?'
+        assert util.prompt_bool(qx, _raw_input=fake_input) is True
+
+    def test_try_again_false(self):
+        responses = ['g', 'h', 'n']
+        fake_input = lambda x: responses.pop(0)
+        qx = 'what the what?'
+        assert util.prompt_bool(qx, _raw_input=fake_input) is False
diff --git a/ceph/src/ceph-volume/ceph_volume/util/__init__.py b/ceph/src/ceph-volume/ceph_volume/util/__init__.py

index 3b8c30906129db18092d951dbcdd8f16f92cb0f1..cdcf3a5b0d9cb1d85dde52c3214a281aa1fa11c7 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/util/__init__.py
+++ b/ceph/src/ceph-volume/ceph_volume/util/__init__.py
@@ -1,3 +1,10 @@
+import logging
+from math import floor
+from ceph_volume import terminal
+
+
+logger = logging.getLogger(__name__)
+
  
  def as_string(string):
      """
@@ -8,3 +15,71 @@ def as_string(string):
          # we really ignore here if we can't properly decode with utf-8
          return string.decode('utf-8', 'ignore')
      return string
+
+
+def as_bytes(string):
+    """
+    Ensure that whatever type of string is incoming, it is returned as bytes,
+    encoding to utf-8 otherwise
+    """
+    if isinstance(string, bytes):
+        return string
+    return string.encode('utf-8', errors='ignore')
+
+
+def str_to_int(string, round_down=True):
+    """
+    Parses a string number into an integer, optionally converting to a float
+    and rounding down.
+    """
+    error_msg = "Unable to convert to integer: '%s'" % str(string)
+    try:
+        integer = float(string)
+    except (TypeError, ValueError):
+        logger.exception(error_msg)
+        raise RuntimeError(error_msg)
+
+    if round_down:
+        integer = floor(integer)
+    else:
+        integer = round(integer)
+    return int(integer)
+
+
+def str_to_bool(val):
+    """
+    Convert a string representation of truth to True or False
+
+    True values are 'y', 'yes', or ''; case-insensitive
+    False values are 'n', or 'no'; case-insensitive
+    Raises ValueError if 'val' is anything else.
+    """
+    true_vals = ['yes', 'y', '']
+    false_vals = ['no', 'n']
+    try:
+        val = val.lower()
+    except AttributeError:
+        val = str(val).lower()
+    if val in true_vals:
+        return True
+    elif val in false_vals:
+        return False
+    else:
+        raise ValueError("Invalid input value: %s" % val)
+
+
+def prompt_bool(question, _raw_input=None):
+    """
+    Interface to prompt a boolean (or boolean-like) response from a user.
+    Usually a confirmation.
+    """
+    input_prompt = _raw_input or raw_input
+    prompt_format = '--> {question} '.format(question=question)
+    response = input_prompt(prompt_format)
+    try:
+        return str_to_bool(response)
+    except ValueError:
+        terminal.error('Valid true responses are: y, yes, <Enter>')
+        terminal.error('Valid false responses are: n, no')
+        terminal.error('That response was invalid, please try again')
+        return prompt_bool(question, _raw_input=input_prompt)
diff --git a/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py b/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py

index 3866027ef97eca8838412d2336ae76c2664e6d4a..d0144fc58de30d7301143acff5c740e58d057cc4 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py
+++ b/ceph/src/ceph-volume/ceph_volume/util/arg_validators.py
@@ -3,6 +3,7 @@ import os
  from ceph_volume import terminal
  from ceph_volume import decorators
  from ceph_volume.util import disk
+from ceph_volume.util.device import Device
  
  
  class LVPath(object):
@@ -41,6 +42,18 @@ class LVPath(object):
          return string
  
  
+class ValidDevice(object):
+
+    def __call__(self, string):
+        device = Device(string)
+        if not device.exists:
+            raise argparse.ArgumentError(
+                None, "Unable to proceed with non-existing device: %s" % string
+            )
+
+        return device
+
+
  class OSDPath(object):
      """
      Validate path exists and it looks like an OSD directory.
diff --git a/ceph/src/ceph-volume/ceph_volume/util/device.py b/ceph/src/ceph-volume/ceph_volume/util/device.py

new file mode 100644 (file)

index 0000000..7dca60a
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/util/device.py
@@ -0,0 +1,98 @@
+import os
+from ceph_volume import sys_info
+from ceph_volume.api import lvm
+from ceph_volume.util import disk
+
+
+class Device(object):
+
+    def __init__(self, path):
+        self.path = path
+        # LVs can have a vg/lv path, while disks will have /dev/sda
+        self.abspath = path
+        self.lv_api = None
+        self.pvs_api = []
+        self.disk_api = {}
+        self.sys_api = {}
+        self._exists = None
+        self._is_lvm_member = None
+        self._parse()
+
+    def _parse(self):
+        # start with lvm since it can use an absolute or relative path
+        lv = lvm.get_lv_from_argument(self.path)
+        if lv:
+            self.lv_api = lv
+            self.abspath = lv.lv_path
+        else:
+            dev = disk.lsblk(self.path)
+            self.disk_api = dev
+            device_type = dev.get('TYPE', '')
+            # always check is this is an lvm member
+            if device_type in ['part', 'disk']:
+                self._set_lvm_membership()
+
+        if not sys_info.devices:
+            sys_info.devices = disk.get_devices()
+        self.sys_api = sys_info.devices.get(self.abspath, {})
+
+    def __repr__(self):
+        prefix = 'Unknown'
+        if self.is_lv:
+            prefix = 'LV'
+        elif self.is_partition:
+            prefix = 'Partition'
+        elif self.is_device:
+            prefix = 'Raw Device'
+        return '<%s: %s>' % (prefix, self.abspath)
+
+    def _set_lvm_membership(self):
+        if self._is_lvm_member is None:
+            # check if there was a pv created with the
+            # name of device
+            pvs = lvm.PVolumes()
+            pvs.filter(pv_name=self.abspath)
+            if not pvs:
+                self._is_lvm_member = False
+                return self._is_lvm_member
+            has_vgs = [pv.vg_name for pv in pvs if pv.vg_name]
+            if has_vgs:
+                self._is_lvm_member = True
+                self.pvs_api = pvs
+            else:
+                # this is contentious, if a PV is recognized by LVM but has no
+                # VGs, should we consider it as part of LVM? We choose not to
+                # here, because most likely, we need to use VGs from this PV.
+                self._is_lvm_member = False
+
+        return self._is_lvm_member
+
+    @property
+    def exists(self):
+        return os.path.exists(self.abspath)
+
+    @property
+    def is_lvm_member(self):
+        if self._is_lvm_member is None:
+            self._set_lvm_membership()
+        return self._is_lvm_member
+
+    @property
+    def is_mapper(self):
+        return self.path.startswith('/dev/mapper')
+
+    @property
+    def is_lv(self):
+        return self.lv_api is not None
+
+    @property
+    def is_partition(self):
+        if self.disk_api:
+            return self.disk_api['TYPE'] == 'part'
+        return False
+
+    @property
+    def is_device(self):
+        if self.disk_api:
+            return self.disk_api['TYPE'] == 'device'
+        return False
diff --git a/ceph/src/ceph-volume/ceph_volume/util/disk.py b/ceph/src/ceph-volume/ceph_volume/util/disk.py

index 434723340258d405b6a87eedf1925e4dba27165e..053338972beea6d3bca415ee0652dd08833af16d 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/util/disk.py
+++ b/ceph/src/ceph-volume/ceph_volume/util/disk.py
@@ -1,6 +1,13 @@
+import logging
  import os
+import re
  import stat
  from ceph_volume import process
+from ceph_volume.api import lvm
+from ceph_volume.util.system import get_file_contents
+
+
+logger = logging.getLogger(__name__)
  
  
  # The blkid CLI tool has some oddities which prevents having one common call
@@ -221,3 +228,431 @@ def is_partition(dev):
      if os.path.exists('/sys/dev/block/%d:%d/partition' % (major, minor)):
          return True
      return False
+
+
+def _map_dev_paths(_path, include_abspath=False, include_realpath=False):
+    """
+    Go through all the items in ``_path`` and map them to their absolute path::
+
+        {'sda': '/dev/sda'}
+
+    If ``include_abspath`` is set, then a reverse mapping is set as well::
+
+        {'sda': '/dev/sda', '/dev/sda': 'sda'}
+
+    If ``include_realpath`` is set then the same operation is done for any
+    links found when listing, these are *not* reversed to avoid clashing on
+    existing keys, but both abspath and basename can be included. For example::
+
+        {
+            'ceph-data': '/dev/mapper/ceph-data',
+            '/dev/mapper/ceph-data': 'ceph-data',
+            '/dev/dm-0': '/dev/mapper/ceph-data',
+            'dm-0': '/dev/mapper/ceph-data'
+        }
+
+
+    In case of possible exceptions the mapping is returned empty, and the
+    exception is logged.
+    """
+    mapping = {}
+    try:
+        dev_names = os.listdir(_path)
+    except (OSError, IOError):
+        logger.exception('unable to list block devices from: %s' % _path)
+        return {}
+
+    for dev_name in dev_names:
+        mapping[dev_name] = os.path.join(_path, dev_name)
+
+    if include_abspath:
+        for k, v in list(mapping.items()):
+            mapping[v] = k
+
+    if include_realpath:
+        for abspath in list(mapping.values()):
+            if not os.path.islink(abspath):
+                continue
+
+            realpath = os.path.realpath(abspath)
+            basename = os.path.basename(realpath)
+            mapping[basename] = abspath
+            if include_abspath:
+                mapping[realpath] = abspath
+
+    return mapping
+
+
+def get_block_devs(sys_block_path="/sys/block", skip_loop=True):
+    """
+    Go through all the items in /sys/block and return them as a list.
+
+    The ``sys_block_path`` argument is set for easier testing and is not
+    required for proper operation.
+    """
+    devices = _map_dev_paths(sys_block_path).keys()
+    if skip_loop:
+        return [d for d in devices if not d.startswith('loop')]
+    return list(devices)
+
+
+def get_dev_devs(dev_path="/dev"):
+    """
+    Go through all the items in /dev and return them as a list.
+
+    The ``dev_path`` argument is set for easier testing and is not
+    required for proper operation.
+    """
+    return _map_dev_paths(dev_path, include_abspath=True)
+
+
+def get_mapper_devs(mapper_path="/dev/mapper"):
+    """
+    Go through all the items in /dev and return them as a list.
+
+    The ``dev_path`` argument is set for easier testing and is not
+    required for proper operation.
+    """
+    return _map_dev_paths(mapper_path, include_abspath=True, include_realpath=True)
+
+
+class BaseFloatUnit(float):
+    """
+    Base class to support float representations of size values. Suffix is
+    computed on child classes by inspecting the class name
+    """
+
+    def __repr__(self):
+        return "<%s(%s)>" % (self.__class__.__name__, self.__float__())
+
+    def __str__(self):
+        return "{size:.2f} {suffix}".format(
+            size=self.__float__(),
+            suffix=self.__class__.__name__.split('Float')[-1]
+        )
+
+    def as_int(self):
+        return int(self.real)
+
+    def as_float(self):
+        return self.real
+
+
+class FloatB(BaseFloatUnit):
+    pass
+
+
+class FloatMB(BaseFloatUnit):
+    pass
+
+
+class FloatGB(BaseFloatUnit):
+    pass
+
+
+class FloatKB(BaseFloatUnit):
+    pass
+
+
+class FloatTB(BaseFloatUnit):
+    pass
+
+
+class Size(object):
+    """
+    Helper to provide an interface for different sizes given a single initial
+    input. Allows for comparison between different size objects, which avoids
+    the need to convert sizes before comparison (e.g. comparing megabytes
+    against gigabytes).
+
+    Common comparison operators are supported::
+
+        >>> hd1 = Size(gb=400)
+        >>> hd2 = Size(gb=500)
+        >>> hd1 > hd2
+        False
+        >>> hd1 < hd2
+        True
+        >>> hd1 == hd2
+        False
+        >>> hd1 == Size(gb=400)
+        True
+
+    The Size object can also be multiplied or divided::
+
+        >>> hd1
+        <Size(400.00 GB)>
+        >>> hd1 * 2
+        <Size(800.00 GB)>
+        >>> hd1
+        <Size(800.00 GB)>
+
+    Additions and subtractions are only supported between Size objects::
+
+        >>> Size(gb=224) - Size(gb=100)
+        <Size(124.00 GB)>
+        >>> Size(gb=1) + Size(mb=300)
+        <Size(1.29 GB)>
+
+    Can also display a human-readable representation, with automatic detection
+    on best suited unit, or alternatively, specific unit representation::
+
+        >>> s = Size(mb=2211)
+        >>> s
+        <Size(2.16 GB)>
+        >>> s.mb
+        <FloatMB(2211.0)>
+        >>> print "Total size: %s" % s.mb
+        Total size: 2211.00 MB
+        >>> print "Total size: %s" % s
+        Total size: 2.16 GB
+    """
+
+    def __init__(self, multiplier=1024, **kw):
+        self._multiplier = multiplier
+        # create a mapping of units-to-multiplier, skip bytes as that is
+        # calculated initially always and does not need to convert
+        aliases = [
+            [('kb', 'kilobytes'), self._multiplier],
+            [('mb', 'megabytes'), self._multiplier ** 2],
+            [('gb', 'gigabytes'), self._multiplier ** 3],
+            [('tb', 'terabytes'), self._multiplier ** 4],
+        ]
+        # and mappings for units-to-formatters, including bytes and aliases for
+        # each
+        format_aliases = [
+            [('b', 'bytes'), FloatB],
+            [('kb', 'kilobytes'), FloatKB],
+            [('mb', 'megabytes'), FloatMB],
+            [('gb', 'gigabytes'), FloatGB],
+            [('tb', 'terabytes'), FloatTB],
+        ]
+        self._formatters = {}
+        for key, value in format_aliases:
+            for alias in key:
+                self._formatters[alias] = value
+        self._factors = {}
+        for key, value in aliases:
+            for alias in key:
+                self._factors[alias] = value
+
+        for k, v in kw.items():
+            self._convert(v, k)
+            # only pursue the first occurence
+            break
+
+    def _convert(self, size, unit):
+        """
+        Convert any size down to bytes so that other methods can rely on bytes
+        being available always, regardless of what they pass in, avoiding the
+        need for a mapping of every permutation.
+        """
+        if unit in ['b', 'bytes']:
+            self._b = size
+            return
+        factor = self._factors[unit]
+        self._b = float(size * factor)
+
+    def _get_best_format(self):
+        """
+        Go through all the supported units, and use the first one that is less
+        than 1024. This allows to represent size in the most readable format
+        available
+        """
+        for unit in ['b', 'kb', 'mb', 'gb', 'tb']:
+            if getattr(self, unit) > 1024:
+                continue
+            return getattr(self, unit)
+
+    def __repr__(self):
+        return "<Size(%s)>" % self._get_best_format()
+
+    def __str__(self):
+        return "%s" % self._get_best_format()
+
+    def __lt__(self, other):
+        return self._b < other._b
+
+    def __le__(self, other):
+        return self._b <= other._b
+
+    def __eq__(self, other):
+        return self._b == other._b
+
+    def __ne__(self, other):
+        return self._b != other._b
+
+    def __ge__(self, other):
+        return self._b >= other._b
+
+    def __gt__(self, other):
+        return self._b > other._b
+
+    def __add__(self, other):
+        if isinstance(other, Size):
+            _b = self._b + other._b
+            return Size(b=_b)
+        raise TypeError('Cannot add "Size" object with int')
+
+    def __sub__(self, other):
+        if isinstance(other, Size):
+            _b = self._b - other._b
+            return Size(b=_b)
+        raise TypeError('Cannot subtract "Size" object from int')
+
+    def __mul__(self, other):
+        if isinstance(other, Size):
+            raise TypeError('Cannot multiply with "Size" object')
+        _b = self._b * other
+        return Size(b=_b)
+
+    def __truediv__(self, other):
+        if isinstance(other, Size):
+            return self._b / other._b
+        self._b = self._b / other
+        return self
+
+    def __div__(self, other):
+        if isinstance(other, Size):
+            return self._b / other._b
+        self._b = self._b / other
+        return self
+
+    def __getattr__(self, unit):
+        """
+        Calculate units on the fly, relies on the fact that ``bytes`` has been
+        converted at instantiation. Units that don't exist will trigger an
+        ``AttributeError``
+        """
+        try:
+            formatter = self._formatters[unit]
+        except KeyError:
+            raise AttributeError('Size object has not attribute "%s"' % unit)
+        if unit in ['b', 'bytes']:
+            return formatter(self._b)
+        try:
+            factor = self._factors[unit]
+        except KeyError:
+            raise AttributeError('Size object has not attribute "%s"' % unit)
+        return formatter(float(self._b) / factor)
+
+
+def human_readable_size(size):
+    """
+    Take a size in bytes, and transform it into a human readable size with up
+    to two decimals of precision.
+    """
+    suffixes = ['B', 'KB', 'MB', 'GB', 'TB']
+    suffix_index = 0
+    while size > 1024:
+        suffix_index += 1
+        size = size / 1024.0
+    return "{size:.2f} {suffix}".format(
+        size=size,
+        suffix=suffixes[suffix_index])
+
+
+def get_partitions_facts(sys_block_path):
+    partition_metadata = {}
+    for folder in os.listdir(sys_block_path):
+        folder_path = os.path.join(sys_block_path, folder)
+        if os.path.exists(os.path.join(folder_path, 'partition')):
+            contents = get_file_contents(os.path.join(folder_path, 'partition'))
+            if '1' in contents:
+                part = {}
+                partname = folder
+                part_sys_block_path = os.path.join(sys_block_path, partname)
+
+                part['start'] = get_file_contents(part_sys_block_path + "/start", 0)
+                part['sectors'] = get_file_contents(part_sys_block_path + "/size", 0)
+
+                part['sectorsize'] = get_file_contents(
+                    part_sys_block_path + "/queue/logical_block_size")
+                if not part['sectorsize']:
+                    part['sectorsize'] = get_file_contents(
+                        part_sys_block_path + "/queue/hw_sector_size", 512)
+                part['size'] = human_readable_size(float(part['sectors']) * 512)
+
+                partition_metadata[partname] = part
+    return partition_metadata
+
+
+def is_mapper_device(device_name):
+    return device_name.startswith(('/dev/mapper', '/dev/dm-'))
+
+
+def get_devices(_sys_block_path='/sys/block', _dev_path='/dev', _mapper_path='/dev/mapper'):
+    """
+    Captures all available devices from /sys/block/, including its partitions,
+    along with interesting metadata like sectors, size, vendor,
+    solid/rotational, etc...
+
+    Returns a dictionary, where keys are the full paths to devices.
+
+    ..note:: dmapper devices get their path updated to what they link from, if
+            /dev/dm-0 is linked by /dev/mapper/ceph-data, then the latter gets
+            used as the key.
+
+    ..note:: loop devices, removable media, and logical volumes are never included.
+    """
+    # Portions of this detection process are inspired by some of the fact
+    # gathering done by Ansible in module_utils/facts/hardware/linux.py. The
+    # processing of metadata and final outcome *is very different* and fully
+    # imcompatible. There are ignored devices, and paths get resolved depending
+    # on dm devices, loop, and removable media
+
+    device_facts = {}
+
+    block_devs = get_block_devs(_sys_block_path)
+    dev_devs = get_dev_devs(_dev_path)
+    mapper_devs = get_mapper_devs(_mapper_path)
+
+    for block in block_devs:
+        sysdir = os.path.join(_sys_block_path, block)
+        metadata = {}
+
+        # Ensure that the diskname is an absolute path and that it never points
+        # to a /dev/dm-* device
+        diskname = mapper_devs.get(block) or dev_devs.get(block)
+
+        # If the mapper device is a logical volume it gets excluded
+        if is_mapper_device(diskname):
+            if lvm.is_lv(diskname):
+                continue
+
+        # If the device reports itself as 'removable', get it excluded
+        metadata['removable'] = get_file_contents(os.path.join(sysdir, 'removable'))
+        if metadata['removable'] == '1':
+            continue
+
+        for key in ['vendor', 'model', 'sas_address', 'sas_device_handle']:
+            metadata[key] = get_file_contents(sysdir + "/device/" + key)
+
+        for key in ['sectors', 'size']:
+            metadata[key] = get_file_contents(os.path.join(sysdir, key), 0)
+
+        for key, _file in [('support_discard', '/queue/discard_granularity')]:
+            metadata[key] = get_file_contents(os.path.join(sysdir, _file))
+
+        metadata['partitions'] = get_partitions_facts(sysdir)
+
+        metadata['rotational'] = get_file_contents(sysdir + "/queue/rotational")
+        metadata['scheduler_mode'] = ""
+        scheduler = get_file_contents(sysdir + "/queue/scheduler")
+        if scheduler is not None:
+            m = re.match(r".*?(\[(.*)\])", scheduler)
+            if m:
+                metadata['scheduler_mode'] = m.group(2)
+
+        if not metadata['sectors']:
+            metadata['sectors'] = 0
+        size = metadata['sectors'] or metadata['size']
+        metadata['sectorsize'] = get_file_contents(sysdir + "/queue/logical_block_size")
+        if not metadata['sectorsize']:
+            metadata['sectorsize'] = get_file_contents(sysdir + "/queue/hw_sector_size", 512)
+        metadata['human_readable_size'] = human_readable_size(float(size) * 512)
+        metadata['size'] = float(size) * 512
+        metadata['path'] = diskname
+
+        device_facts[diskname] = metadata
+    return device_facts
diff --git a/ceph/src/ceph-volume/ceph_volume/util/encryption.py b/ceph/src/ceph-volume/ceph_volume/util/encryption.py

index 0abe9b6c189d0d7f6e9b9c7d77f1225ec25202fc..cc594a07e8304793f54d34b9043eed5aedcb9365 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/util/encryption.py
+++ b/ceph/src/ceph-volume/ceph_volume/util/encryption.py
@@ -96,6 +96,10 @@ def dmcrypt_close(mapping):
  
      :param mapping:
      """
+    if not os.path.exists(mapping):
+        logger.debug('device mapper path does not exist %s' % mapping)
+        logger.debug('will skip cryptsetup removal')
+        return
      process.run(['cryptsetup', 'remove', mapping])
  
  
diff --git a/ceph/src/ceph-volume/ceph_volume/util/prepare.py b/ceph/src/ceph-volume/ceph_volume/util/prepare.py

index d1cddf073a6978c22989c4d3b20d58043e4c677a..687a5892bda9cc06783ec6335c41b42ec31e6b32 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/util/prepare.py
+++ b/ceph/src/ceph-volume/ceph_volume/util/prepare.py
@@ -7,10 +7,11 @@ the single-call helper
  import os
  import logging
  import json
-from ceph_volume import process, conf
-from ceph_volume.util import system, constants
+from ceph_volume import process, conf, __release__, terminal
+from ceph_volume.util import system, constants, str_to_int, disk
  
  logger = logging.getLogger(__name__)
+mlogger = terminal.MultiLogger(__name__)
  
  
  def create_key():
@@ -47,6 +48,28 @@ def write_keyring(osd_id, secret, keyring_name='keyring', name=None):
      system.chown(osd_keyring)
  
  
+def get_journal_size(lv_format=True):
+    """
+    Helper to retrieve the size (defined in megabytes in ceph.conf) to create
+    the journal logical volume, it "translates" the string into a float value,
+    then converts that into gigabytes, and finally (optionally) it formats it
+    back as a string so that it can be used for creating the LV.
+
+    :param lv_format: Return a string to be used for ``lv_create``. A 5 GB size
+    would result in '5G', otherwise it will return a ``Size`` object.
+    """
+    conf_journal_size = conf.ceph.get_safe('osd', 'osd_journal_size', '5120')
+    logger.debug('osd_journal_size set to %s' % conf_journal_size)
+    journal_size = disk.Size(mb=str_to_int(conf_journal_size))
+
+    if journal_size < disk.Size(gb=2):
+        mlogger.error('Refusing to continue with configured size for journal')
+        raise RuntimeError('journal sizes must be larger than 2GB, detected: %s' % journal_size)
+    if lv_format:
+        return '%sG' % journal_size.gb.as_int()
+    return journal_size
+
+
  def create_id(fsid, json_secrets, osd_id=None):
      """
      :param fsid: The osd fsid to create, always required
@@ -64,8 +87,11 @@ def create_id(fsid, json_secrets, osd_id=None):
          '-i', '-',
          'osd', 'new', fsid
      ]
-    if check_id(osd_id):
-        cmd.append(osd_id)
+    if osd_id is not None:
+        if osd_id_available(osd_id):
+            cmd.append(osd_id)
+        else:
+            raise RuntimeError("The osd ID {} is already in use or does not exist.".format(osd_id))
      stdout, stderr, returncode = process.call(
          cmd,
          stdin=json_secrets,
@@ -76,10 +102,10 @@ def create_id(fsid, json_secrets, osd_id=None):
      return ' '.join(stdout).strip()
  
  
-def check_id(osd_id):
+def osd_id_available(osd_id):
      """
-    Checks to see if an osd ID exists or not. Returns True
-    if it does exist, False if it doesn't.
+    Checks to see if an osd ID exists and if it's available for
+    reuse. Returns True if it is, False if it isn't.
  
      :param osd_id: The osd ID to check
      """
@@ -103,7 +129,10 @@ def check_id(osd_id):
  
      output = json.loads(''.join(stdout).strip())
      osds = output['nodes']
-    return any([str(osd['id']) == str(osd_id) for osd in osds])
+    osd = [osd for osd in osds if str(osd['id']) == str(osd_id)]
+    if osd and osd[0].get('status') == "destroyed":
+        return True
+    return False
  
  
  def mount_tmpfs(path):
@@ -114,6 +143,9 @@ def mount_tmpfs(path):
          path
      ])
  
+    # Restore SELinux context
+    system.set_context(path)
+
  
  def create_osd_path(osd_id, tmpfs=False):
      path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
@@ -213,6 +245,9 @@ def mount_osd(device, osd_id, **kw):
      command.append(destination)
      process.run(command)
  
+    # Restore SELinux context
+    system.set_context(destination)
+
  
  def _link_device(device, device_type, osd_id):
      """
@@ -327,7 +362,7 @@ def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False):
          raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
  
  
-def osd_mkfs_filestore(osd_id, fsid):
+def osd_mkfs_filestore(osd_id, fsid, keyring):
      """
      Create the files for the OSD to function. A normal call will look like:
  
@@ -347,7 +382,7 @@ def osd_mkfs_filestore(osd_id, fsid):
      system.chown(journal)
      system.chown(path)
  
-    process.run([
+    command = [
          'ceph-osd',
          '--cluster', conf.cluster,
          # undocumented flag, sets the `type` file to contain 'filestore'
@@ -355,9 +390,22 @@ def osd_mkfs_filestore(osd_id, fsid):
          '--mkfs',
          '-i', osd_id,
          '--monmap', monmap,
+    ]
+
+    if __release__ != 'luminous':
+        # goes through stdin
+        command.extend(['--keyfile', '-'])
+
+    command.extend([
          '--osd-data', path,
          '--osd-journal', journal,
          '--osd-uuid', fsid,
          '--setuser', 'ceph',
          '--setgroup', 'ceph'
      ])
+
+    _, _, returncode = process.call(
+        command, stdin=keyring, terminal_verbose=True, show_command=True
+    )
+    if returncode != 0:
+        raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
diff --git a/ceph/src/ceph-volume/ceph_volume/util/system.py b/ceph/src/ceph-volume/ceph_volume/util/system.py

index b4b7d17c4c946648e17c1548a7f53d961a65b3ba..b637f023a4410e807d2bc5aacc4626aefba8e3c7 100644 (file)
--- a/ceph/src/ceph-volume/ceph_volume/util/system.py
+++ b/ceph/src/ceph-volume/ceph_volume/util/system.py
@@ -65,6 +65,19 @@ def get_ceph_user_ids():
      return user[2], user[3]
  
  
+def get_file_contents(path, default=''):
+    contents = default
+    if not os.path.exists(path):
+        return contents
+    try:
+        with open(path, 'r') as open_file:
+            contents = open_file.read().strip()
+    except Exception:
+        logger.exception('Failed to read contents from: %s' % path)
+
+    return contents
+
+
  def mkdir_p(path, chown=True):
      """
      A `mkdir -p` that defaults to chown the path to the ceph user
@@ -260,3 +273,12 @@ def get_mounts(devices=False, paths=False, realpath=False):
          return devices_mounted
      else:
          return paths_mounted
+
+
+def set_context(path, recursive = False):
+    # restore selinux context to default policy values
+    if which('restorecon').startswith('/'):
+        if recursive:
+            process.run(['restorecon', '-R', path])
+        else:
+            process.run(['restorecon', path])
diff --git a/ceph/src/ceph-volume/ceph_volume/util/templates.py b/ceph/src/ceph-volume/ceph_volume/util/templates.py

new file mode 100644 (file)

index 0000000..90858d6
--- /dev/null
+++ b/ceph/src/ceph-volume/ceph_volume/util/templates.py
@@ -0,0 +1,25 @@
+
+osd_header = """
+{:-^100}""".format('')
+
+
+osd_component_titles = """
+  Type            Path                                                    LV Size         % of device"""
+
+
+osd_component = """
+  {_type: <15} {path: <55} {size: <15} {percent}%"""
+
+
+total_osds = """
+Total OSDs: {total_osds}
+"""
+
+ssd_volume_group = """
+Solid State VG:
+  Targets:   {target: <25} Total size: {total_lv_size: <25}
+  Total LVs: {total_lvs: <25} Size per LV: {lv_size: <25}
+  Devices:   {block_db_devices}
+"""
+
+
diff --git a/ceph/src/ceph_mds.cc b/ceph/src/ceph_mds.cc

index b6cff8348ebaa4ab7b751a60b700778619a3d312..7f68e5b521e5106bc8507cbc9fdac2222d184711 100644 (file)
--- a/ceph/src/ceph_mds.cc
+++ b/ceph/src/ceph_mds.cc
@@ -88,11 +88,7 @@ static void handle_mds_signal(int signum)
      mds->handle_signal(signum);
  }
  
-#ifdef BUILDING_FOR_EMBEDDED
-extern "C" int cephd_mds(int argc, const char **argv)
-#else
  int main(int argc, const char **argv)
-#endif
  {
    ceph_pthread_setname(pthread_self(), "ceph-mds");
  
diff --git a/ceph/src/ceph_mon.cc b/ceph/src/ceph_mon.cc

index 41a6ee0eb10a4b246d6f2df8eed6d862165b99a7..c1d0b041fedb84e61b79cb92c6374b0e19c9dd1a 100644 (file)
--- a/ceph/src/ceph_mon.cc
+++ b/ceph/src/ceph_mon.cc
@@ -181,12 +181,7 @@ static void usage()
    generic_server_usage();
  }
  
-#ifdef BUILDING_FOR_EMBEDDED
-void cephd_preload_embedded_plugins();
-extern "C" int cephd_mon(int argc, const char **argv)
-#else
  int main(int argc, const char **argv)
-#endif
  {
    int err;
  
@@ -475,7 +470,7 @@ int main(int argc, const char **argv)
      if (stats.avail_percent <= g_conf->mon_data_avail_crit) {
        derr << "error: monitor data filesystem reached concerning levels of"
             << " available storage space (available: "
-           << stats.avail_percent << "% " << prettybyte_t(stats.byte_avail)
+           << stats.avail_percent << "% " << byte_u_t(stats.byte_avail)
             << ")\nyou may adjust 'mon data avail crit' to a lower value"
             << " to make this go away (default: " << g_conf->mon_data_avail_crit
             << "%)\n" << dendl;
@@ -505,12 +500,8 @@ int main(int argc, const char **argv)
      }
      common_init_finish(g_ceph_context);
      global_init_chdir(g_ceph_context);
-#ifndef BUILDING_FOR_EMBEDDED
      if (global_init_preload_erasure_code(g_ceph_context) < 0)
        prefork.exit(1);
-#else
-    cephd_preload_embedded_plugins();
-#endif
    }
  
    MonitorDBStore *store = new MonitorDBStore(g_conf->mon_data);
diff --git a/ceph/src/ceph_osd.cc b/ceph/src/ceph_osd.cc

index 1cfda9c1ddf35d71b79490ff954cce94b7f235d8..b2c8450658542454b7633da90c1dea1e33a1ada0 100644 (file)
--- a/ceph/src/ceph_osd.cc
+++ b/ceph/src/ceph_osd.cc
@@ -96,13 +96,7 @@ static void usage()
    generic_server_usage();
  }
  
-#ifdef BUILDING_FOR_EMBEDDED
-void cephd_preload_embedded_plugins();
-void cephd_preload_rados_classes(OSD *osd);
-extern "C" int cephd_osd(int argc, const char **argv)
-#else
  int main(int argc, const char **argv)
-#endif
  {
    vector<const char*> args;
    argv_to_vec(argc, argv, args);
@@ -262,9 +256,6 @@ int main(int argc, const char **argv)
      return -ENODEV;
    }
  
-#ifdef BUILDING_FOR_EMBEDDED
-  cephd_preload_embedded_plugins();
-#endif
  
    if (mkkey) {
      common_init_finish(g_ceph_context);
@@ -594,10 +585,8 @@ flushjournal_out:
      return -1;
    global_init_chdir(g_ceph_context);
  
-#ifndef BUILDING_FOR_EMBEDDED
    if (global_init_preload_erasure_code(g_ceph_context) < 0)
      return -1;
-#endif
  
    srand(time(NULL) + getpid());
  
@@ -638,10 +627,6 @@ flushjournal_out:
      return 1;
    }
  
-#ifdef BUILDING_FOR_EMBEDDED
-  cephd_preload_rados_classes(osd);
-#endif
-
    // install signal handlers
    init_async_signal_handler();
    register_async_signal_handler(SIGHUP, sighup_handler);
diff --git a/ceph/src/client/Client.cc b/ceph/src/client/Client.cc

index 41eff1b4b2a247e64b534a4646a8dfd96172a8df..7043f9058a405acc430f77774054f75da93cafb9 100644 (file)
--- a/ceph/src/client/Client.cc
+++ b/ceph/src/client/Client.cc
@@ -448,6 +448,10 @@ void Client::dump_status(Formatter *f)
      f->dump_int("dentry_count", lru.lru_get_size());
      f->dump_int("dentry_pinned_count", lru.lru_get_num_pinned());
      f->dump_int("id", get_nodeid().v);
+    entity_inst_t inst(messenger->get_myname(), messenger->get_myaddr());
+    f->dump_object("inst", inst);
+    f->dump_stream("inst_str") << inst;
+    f->dump_stream("addr_str") << inst.addr;
      f->dump_int("inode_count", inode_map.size());
      f->dump_int("mds_epoch", mdsmap->get_epoch());
      f->dump_int("osd_epoch", osd_epoch);
@@ -674,33 +678,11 @@ void Client::trim_dentry(Dentry *dn)
  }
  
  
-void Client::update_inode_file_bits(Inode *in,
-                                   uint64_t truncate_seq, uint64_t truncate_size,
-                                   uint64_t size, uint64_t change_attr,
-                                   uint64_t time_warp_seq, utime_t ctime,
-                                   utime_t mtime,
-                                   utime_t atime,
-                                   version_t inline_version,
-                                   bufferlist& inline_data,
-                                   int issued)
+void Client::update_inode_file_size(Inode *in, int issued, uint64_t size,
+                                   uint64_t truncate_seq, uint64_t truncate_size)
  {
-  bool warn = false;
-  ldout(cct, 10) << "update_inode_file_bits " << *in << " " << ccap_string(issued)
-          << " mtime " << mtime << dendl;
-  ldout(cct, 25) << "truncate_seq: mds " << truncate_seq <<  " local "
-          << in->truncate_seq << " time_warp_seq: mds " << time_warp_seq
-          << " local " << in->time_warp_seq << dendl;
    uint64_t prior_size = in->size;
  
-  if (inline_version > in->inline_version) {
-    in->inline_data = inline_data;
-    in->inline_version = inline_version;
-  }
-
-  /* always take a newer change attr */
-  if (change_attr > in->change_attr)
-    in->change_attr = change_attr;
-
    if (truncate_seq > in->truncate_seq ||
        (truncate_seq == in->truncate_seq && size > in->size)) {
      ldout(cct, 10) << "size " << in->size << " -> " << size << dendl;
@@ -736,7 +718,20 @@ void Client::update_inode_file_bits(Inode *in,
        ldout(cct, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl;
      }
    }
-  
+}
+
+void Client::update_inode_file_time(Inode *in, int issued, uint64_t time_warp_seq,
+                                   utime_t ctime, utime_t mtime, utime_t atime)
+{
+  ldout(cct, 10) << __func__ << " " << *in << " " << ccap_string(issued)
+                << " ctime " << ctime << " mtime " << mtime << dendl;
+
+  if (time_warp_seq > in->time_warp_seq)
+    ldout(cct, 10) << " mds time_warp_seq " << time_warp_seq
+                  << " is higher than local time_warp_seq "
+                  << in->time_warp_seq << dendl;
+
+  int warn = false;
    // be careful with size, mtime, atime
    if (issued & (CEPH_CAP_FILE_EXCL|
                 CEPH_CAP_FILE_WR|
@@ -747,9 +742,6 @@ void Client::update_inode_file_bits(Inode *in,
      if (ctime > in->ctime) 
        in->ctime = ctime;
      if (time_warp_seq > in->time_warp_seq) {
-      ldout(cct, 10) << "mds time_warp_seq " << time_warp_seq << " on inode " << *in
-              << " is higher than local time_warp_seq "
-              << in->time_warp_seq << dendl;
        //the mds updated times, so take those!
        in->mtime = mtime;
        in->atime = atime;
@@ -834,53 +826,59 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from,
    if (in->is_symlink())
      in->symlink = st->symlink;
  
-  if (was_new)
-    ldout(cct, 12) << "add_update_inode adding " << *in << " caps " << ccap_string(st->cap.caps) << dendl;
-
-  if (!st->cap.caps)
-    return in;   // as with readdir returning indoes in different snaprealms (no caps!)
-
    // only update inode if mds info is strictly newer, or it is the same and projected (odd).
-  bool updating_inode = false;
-  int issued = 0;
-  if (st->version == 0 ||
-      (in->version & ~1) < st->version) {
-    updating_inode = true;
+  bool new_version = false;
+  if (in->version == 0 ||
+      ((st->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+       (in->version & ~1) < st->version))
+    new_version = true;
  
-    int implemented = 0;
-    issued = in->caps_issued(&implemented) | in->caps_dirty();
-    issued |= implemented;
+  int issued;
+  in->caps_issued(&issued);
+  issued |= in->caps_dirty();
+  int new_issued = ~issued & (int)st->cap.caps;
  
-    in->version = st->version;
+  if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
+      !(issued & CEPH_CAP_AUTH_EXCL)) {
+    in->mode = st->mode;
+    in->uid = st->uid;
+    in->gid = st->gid;
+    in->btime = st->btime;
+  }
  
-    if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
-      in->mode = st->mode;
-      in->uid = st->uid;
-      in->gid = st->gid;
-      in->btime = st->btime;
-    }
+  if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
+      !(issued & CEPH_CAP_LINK_EXCL)) {
+    in->nlink = st->nlink;
+  }
  
-    if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
-      in->nlink = st->nlink;
-    }
+  if (new_version || (new_issued & CEPH_CAP_ANY_RD)) {
+    update_inode_file_time(in, issued, st->time_warp_seq,
+                          st->ctime, st->mtime, st->atime);
+  }
  
-    in->dirstat = st->dirstat;
-    in->rstat = st->rstat;
-    in->quota = st->quota;
+  if (new_version ||
+      (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
      in->layout = st->layout;
+    update_inode_file_size(in, issued, st->size, st->truncate_seq, st->truncate_size);
+  }
  
-    if (in->is_dir()) {
+  if (in->is_dir()) {
+    if (new_version || (new_issued & CEPH_CAP_FILE_SHARED)) {
+      in->dirstat = st->dirstat;
+    }
+    // dir_layout/rstat/quota are not tracked by capability, update them only if
+    // the inode stat is from auth mds
+    if (new_version || (st->cap.flags & CEPH_CAP_FLAG_AUTH)) {
        in->dir_layout = st->dir_layout;
        ldout(cct, 20) << " dir hash is " << (int)in->dir_layout.dl_dir_hash << dendl;
+      in->rstat = st->rstat;
+      in->quota = st->quota;
+    }
+    // move me if/when version reflects fragtree changes.
+    if (in->dirfragtree != st->dirfragtree) {
+      in->dirfragtree = st->dirfragtree;
+      _fragmap_remove_non_leaves(in);
      }
-
-    update_inode_file_bits(in, st->truncate_seq, st->truncate_size, st->size,
-                          st->change_attr, st->time_warp_seq, st->ctime,
-                          st->mtime, st->atime, st->inline_version,
-                          st->inline_data, issued);
-  } else if (st->inline_version > in->inline_version) {
-    in->inline_data = st->inline_data;
-    in->inline_version = st->inline_version;
    }
  
    if ((in->xattr_version  == 0 || !(issued & CEPH_CAP_XATTR_EXCL)) &&
@@ -891,12 +889,24 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from,
      in->xattr_version = st->xattr_version;
    }
  
-  // move me if/when version reflects fragtree changes.
-  if (in->dirfragtree != st->dirfragtree) {
-    in->dirfragtree = st->dirfragtree;
-    _fragmap_remove_non_leaves(in);
+  if (st->inline_version > in->inline_version) {
+    in->inline_data = st->inline_data;
+    in->inline_version = st->inline_version;
    }
  
+  /* always take a newer change attr */
+  if (st->change_attr > in->change_attr)
+    in->change_attr = st->change_attr;
+
+  if (st->version > in->version)
+    in->version = st->version;
+
+  if (was_new)
+    ldout(cct, 12) << __func__ << " adding " << *in << " caps " << ccap_string(st->cap.caps) << dendl;
+
+  if (!st->cap.caps)
+    return in;   // as with readdir returning indoes in different snaprealms (no caps!)
+
    if (in->snapid == CEPH_NOSNAP) {
      add_update_cap(in, session, st->cap.cap_id, st->cap.caps, st->cap.seq,
                    st->cap.mseq, inodeno_t(st->cap.realm), st->cap.flags,
@@ -905,30 +915,28 @@ Inode * Client::add_update_inode(InodeStat *st, utime_t from,
        in->max_size = st->max_size;
        in->rstat = st->rstat;
      }
-  } else
-    in->snap_caps |= st->cap.caps;
  
-  // setting I_COMPLETE needs to happen after adding the cap
-  if (updating_inode &&
-      in->is_dir() &&
-      (st->cap.caps & CEPH_CAP_FILE_SHARED) &&
-      (issued & CEPH_CAP_FILE_EXCL) == 0 &&
-      in->dirstat.nfiles == 0 &&
-      in->dirstat.nsubdirs == 0) {
-    ldout(cct, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in << dendl;
-    in->flags |= I_COMPLETE | I_DIR_ORDERED;
-    if (in->dir) {
-      ldout(cct, 10) << " dir is open on empty dir " << in->ino << " with "
-                    << in->dir->dentries.size() << " entries, marking all dentries null" << dendl;
-      in->dir->readdir_cache.clear();
-      for (auto p = in->dir->dentries.begin();
-          p != in->dir->dentries.end();
-          ++p) {
-       unlink(p->second, true, true);  // keep dir, keep dentry
+    // setting I_COMPLETE needs to happen after adding the cap
+    if (in->is_dir() &&
+       (st->cap.caps & CEPH_CAP_FILE_SHARED) &&
+       (issued & CEPH_CAP_FILE_EXCL) == 0 &&
+       in->dirstat.nfiles == 0 &&
+       in->dirstat.nsubdirs == 0) {
+      ldout(cct, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in << dendl;
+      in->flags |= I_COMPLETE | I_DIR_ORDERED;
+      if (in->dir) {
+       ldout(cct, 10) << " dir is open on empty dir " << in->ino << " with "
+                      << in->dir->dentries.size() << " entries, marking all dentries null" << dendl;
+       in->dir->readdir_cache.clear();
+       for (const auto& p : in->dir->dentries) {
+         unlink(p.second, true, true);  // keep dir, keep dentry
+       }
+       if (in->dir->dentries.empty())
+         close_dir(in->dir);
        }
-      if (in->dir->dentries.empty())
-       close_dir(in->dir);
      }
+  } else {
+    in->snap_caps |= st->cap.caps;
    }
  
    return in;
@@ -1506,6 +1514,10 @@ void Client::connect_mds_targets(mds_rank_t mds)
  void Client::dump_mds_sessions(Formatter *f)
  {
    f->dump_int("id", get_nodeid().v);
+  entity_inst_t inst(messenger->get_myname(), messenger->get_myaddr());
+  f->dump_object("inst", inst);
+  f->dump_stream("inst_str") << inst;
+  f->dump_stream("addr_str") << inst.addr;
    f->open_array_section("sessions");
    for (map<mds_rank_t,MetaSession*>::const_iterator p = mds_sessions.begin(); p != mds_sessions.end(); ++p) {
      f->open_object_section("session");
@@ -3794,7 +3806,7 @@ bool Client::_flush(Inode *in, Context *onfinish)
    }
  
    if (objecter->osdmap_pool_full(in->layout.pool_id)) {
-    ldout(cct, 1) << __func__ << ": FULL, purging for ENOSPC" << dendl;
+    ldout(cct, 8) << __func__ << ": FULL, purging for ENOSPC" << dendl;
      objectcacher->purge_set(&in->oset);
      if (onfinish) {
        onfinish->complete(-ENOSPC);
@@ -4839,13 +4851,11 @@ void Client::handle_cap_trunc(MetaSession *session, Inode *in, MClientCaps *m)
            << " size " << in->size << " -> " << m->get_size()
            << dendl;
    
-  int implemented = 0;
-  int issued = in->caps_issued(&implemented) | in->caps_dirty();
-  issued |= implemented;
-  update_inode_file_bits(in, m->get_truncate_seq(), m->get_truncate_size(),
-                        m->get_size(), m->get_change_attr(), m->get_time_warp_seq(),
-                        m->get_ctime(), m->get_mtime(), m->get_atime(),
-                         m->inline_version, m->inline_data, issued);
+  int issued;
+  in->caps_issued(&issued);
+  issued |= in->caps_dirty();
+  update_inode_file_size(in, issued, m->get_size(),
+                        m->get_truncate_seq(), m->get_truncate_size());
    m->put();
  }
  
@@ -5041,27 +5051,27 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
    cap->seq = m->get_seq();
    cap->gen = session->cap_gen;
  
-  in->layout = m->get_layout();
-
    // update inode
-  int implemented = 0;
-  int issued = in->caps_issued(&implemented) | in->caps_dirty();
-  issued |= implemented;
+  int issued;
+  in->caps_issued(&issued);
+  issued |= in->caps_dirty();
  
-  if ((issued & CEPH_CAP_AUTH_EXCL) == 0) {
+  if ((new_caps & CEPH_CAP_AUTH_SHARED) &&
+      !(issued & CEPH_CAP_AUTH_EXCL)) {
      in->mode = m->head.mode;
      in->uid = m->head.uid;
      in->gid = m->head.gid;
      in->btime = m->btime;
    }
    bool deleted_inode = false;
-  if ((issued & CEPH_CAP_LINK_EXCL) == 0) {
+  if ((new_caps & CEPH_CAP_LINK_SHARED) &&
+      !(issued & CEPH_CAP_LINK_EXCL)) {
      in->nlink = m->head.nlink;
      if (in->nlink == 0 &&
         (new_caps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
        deleted_inode = true;
    }
-  if ((issued & CEPH_CAP_XATTR_EXCL) == 0 &&
+  if (!(issued & CEPH_CAP_XATTR_EXCL) &&
        m->xattrbl.length() &&
        m->head.xattr_version > in->xattr_version) {
      bufferlist::iterator p = m->xattrbl.begin();
@@ -5074,14 +5084,30 @@ void Client::handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, MClient
      in->dirstat.nsubdirs = m->get_nsubdirs();
    }
  
-  update_inode_file_bits(in, m->get_truncate_seq(), m->get_truncate_size(), m->get_size(),
-                        m->get_change_attr(), m->get_time_warp_seq(), m->get_ctime(),
-                        m->get_mtime(), m->get_atime(),
-                        m->inline_version, m->inline_data, issued);
+  if (new_caps & CEPH_CAP_ANY_RD) {
+    update_inode_file_time(in, issued, m->get_time_warp_seq(),
+                          m->get_ctime(), m->get_mtime(), m->get_atime());
+  }
+
+  if (new_caps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
+    in->layout = m->get_layout();
+    update_inode_file_size(in, issued, m->get_size(),
+                          m->get_truncate_seq(), m->get_truncate_size());
+  }
+
+  if (m->inline_version > in->inline_version) {
+    in->inline_data = m->inline_data;
+    in->inline_version = m->inline_version;
+  }
+
+  /* always take a newer change attr */
+  if (m->get_change_attr() > in->change_attr)
+    in->change_attr = m->get_change_attr();
  
    // max_size
    if (cap == in->auth_cap &&
-      m->get_max_size() != in->max_size) {
+      (new_caps & CEPH_CAP_ANY_FILE_WR) &&
+      (m->get_max_size() != in->max_size)) {
      ldout(cct, 10) << "max_size " << in->max_size << " -> " << m->get_max_size() << dendl;
      in->max_size = m->get_max_size();
      if (in->max_size > in->wanted_max_size) {
@@ -5185,7 +5211,7 @@ int Client::xattr_permission(Inode *in, const char *name, unsigned want,
      r = inode_permission(in, perms, want);
    }
  out:
-  ldout(cct, 3) << __func__ << " " << in << " = " << r <<  dendl;
+  ldout(cct, 5) << __func__ << " " << in << " = " << r <<  dendl;
    return r;
  }
  
@@ -7441,7 +7467,7 @@ int Client::_opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms)
      return -ENOTDIR;
    *dirpp = new dir_result_t(in, perms);
    opened_dirs.insert(*dirpp);
-  ldout(cct, 3) << "_opendir(" << in->ino << ") = " << 0 << " (" << *dirpp << ")" << dendl;
+  ldout(cct, 8) << "_opendir(" << in->ino << ") = " << 0 << " (" << *dirpp << ")" << dendl;
    return 0;
  }
  
@@ -8243,10 +8269,9 @@ int Client::lookup_hash(inodeno_t ino, inodeno_t dirino, const char *name,
   * the resulting Inode object in one operation, so that caller
   * can safely assume inode will still be there after return.
   */
-int Client::lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode)
+int Client::_lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode)
  {
-  Mutex::Locker lock(client_lock);
-  ldout(cct, 3) << "lookup_ino enter(" << ino << ")" << dendl;
+  ldout(cct, 8) << "lookup_ino enter(" << ino << ")" << dendl;
  
    if (unmounting)
      return -ENOTCONN;
@@ -8263,21 +8288,24 @@ int Client::lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode)
      *inode = p->second;
      _ll_get(*inode);
    }
-  ldout(cct, 3) << "lookup_ino exit(" << ino << ") = " << r << dendl;
+  ldout(cct, 8) << "lookup_ino exit(" << ino << ") = " << r << dendl;
    return r;
  }
  
-
+int Client::lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode)
+{
+  Mutex::Locker lock(client_lock);
+  return _lookup_ino(ino, perms, inode);
+}
  
  /**
   * Find the parent inode of `ino` and insert it into
   * our cache.  Conditionally also set `parent` to a referenced
   * Inode* if caller provides non-NULL value.
   */
-int Client::lookup_parent(Inode *ino, const UserPerm& perms, Inode **parent)
+int Client::_lookup_parent(Inode *ino, const UserPerm& perms, Inode **parent)
  {
-  Mutex::Locker lock(client_lock);
-  ldout(cct, 3) << "lookup_parent enter(" << ino->ino << ")" << dendl;
+  ldout(cct, 8) << "lookup_parent enter(" << ino->ino << ")" << dendl;
  
    if (unmounting)
      return -ENOTCONN;
@@ -8285,13 +8313,13 @@ int Client::lookup_parent(Inode *ino, const UserPerm& perms, Inode **parent)
    if (!ino->dn_set.empty()) {
      // if we exposed the parent here, we'd need to check permissions,
      // but right now we just rely on the MDS doing so in make_request
-    ldout(cct, 3) << "lookup_parent dentry already present" << dendl;
+    ldout(cct, 8) << "lookup_parent dentry already present" << dendl;
      return 0;
    }
    
    if (ino->is_root()) {
      *parent = NULL;
-    ldout(cct, 3) << "ino is root, no parent" << dendl;
+    ldout(cct, 8) << "ino is root, no parent" << dendl;
      return -EINVAL;
    }
  
@@ -8306,25 +8334,28 @@ int Client::lookup_parent(Inode *ino, const UserPerm& perms, Inode **parent)
      if (r == 0) {
        *parent = target.get();
        _ll_get(*parent);
-      ldout(cct, 3) << "lookup_parent found parent " << (*parent)->ino << dendl;
+      ldout(cct, 8) << "lookup_parent found parent " << (*parent)->ino << dendl;
      } else {
        *parent = NULL;
      }
    }
-  ldout(cct, 3) << "lookup_parent exit(" << ino->ino << ") = " << r << dendl;
+  ldout(cct, 8) << "lookup_parent exit(" << ino->ino << ") = " << r << dendl;
    return r;
  }
  
+int Client::lookup_parent(Inode *ino, const UserPerm& perms, Inode **parent)
+{
+  Mutex::Locker lock(client_lock);
+  return _lookup_parent(ino, perms, parent);
+}
  
  /**
   * Populate the parent dentry for `ino`, provided it is
   * a child of `parent`.
   */
-int Client::lookup_name(Inode *ino, Inode *parent, const UserPerm& perms)
+int Client::_lookup_name(Inode *ino, Inode *parent, const UserPerm& perms)
  {
    assert(parent->is_dir());
-
-  Mutex::Locker lock(client_lock);
    ldout(cct, 3) << "lookup_name enter(" << ino->ino << ")" << dendl;
  
    if (unmounting)
@@ -8340,6 +8371,11 @@ int Client::lookup_name(Inode *ino, Inode *parent, const UserPerm& perms)
    return r;
  }
  
+int Client::lookup_name(Inode *ino, Inode *parent, const UserPerm& perms)
+{
+  Mutex::Locker lock(client_lock);
+  return _lookup_name(ino, parent, perms);
+}
  
   Fh *Client::_create_fh(Inode *in, int flags, int cmode, const UserPerm& perms)
  {
@@ -8383,7 +8419,7 @@ int Client::_release_fh(Fh *f)
    //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
    //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
    Inode *in = f->inode.get();
-  ldout(cct, 5) << "_release_fh " << f << " mode " << f->mode << " on " << *in << dendl;
+  ldout(cct, 8) << "_release_fh " << f << " mode " << f->mode << " on " << *in << dendl;
  
    in->unset_deleg(f);
  
@@ -8477,7 +8513,7 @@ int Client::_open(Inode *in, int flags, mode_t mode, Fh **fhp,
  
        result = get_caps(in, need, want, &have, -1);
        if (result < 0) {
-       ldout(cct, 1) << "Unable to get caps after open of inode " << *in <<
+       ldout(cct, 8) << "Unable to get caps after open of inode " << *in <<
                           " . Denying open: " <<
                           cpp_strerror(result) << dendl;
         in->put_open_ref(cmode);
@@ -8608,7 +8644,7 @@ loff_t Client::_lseek(Fh *f, loff_t offset, int whence)
      ceph_abort();
    }
  
-  ldout(cct, 3) << "_lseek(" << f << ", " << offset << ", " << whence << ") = " << f->pos << dendl;
+  ldout(cct, 8) << "_lseek(" << f << ", " << offset << ", " << whence << ") = " << f->pos << dendl;
    return f->pos;
  }
  
@@ -9416,14 +9452,14 @@ int Client::fsync(int fd, bool syncdataonly)
      // The IOs in this fsync were okay, but maybe something happened
      // in the background that we shoudl be reporting?
      r = f->take_async_err();
-    ldout(cct, 3) << "fsync(" << fd << ", " << syncdataonly
+    ldout(cct, 5) << "fsync(" << fd << ", " << syncdataonly
                    << ") = 0, async_err = " << r << dendl;
    } else {
      // Assume that an error we encountered during fsync, even reported
      // synchronously, would also have applied the error to the Fh, and we
      // should clear it here to avoid returning the same error again on next
      // call.
-    ldout(cct, 3) << "fsync(" << fd << ", " << syncdataonly << ") = "
+    ldout(cct, 5) << "fsync(" << fd << ", " << syncdataonly << ") = "
                    << r << dendl;
      f->take_async_err();
    }
@@ -9440,7 +9476,7 @@ int Client::_fsync(Inode *in, bool syncdataonly)
    ceph_tid_t flush_tid = 0;
    InodeRef tmp_ref;
  
-  ldout(cct, 3) << "_fsync on " << *in << " " << (syncdataonly ? "(dataonly)":"(data+metadata)") << dendl;
+  ldout(cct, 8) << "_fsync on " << *in << " " << (syncdataonly ? "(dataonly)":"(data+metadata)") << dendl;
    
    if (cct->_conf->client_oc) {
      object_cacher_completion = new C_SafeCond(&lock, &cond, &done, &r);
@@ -9490,7 +9526,7 @@ int Client::_fsync(Inode *in, bool syncdataonly)
  
      ldout(cct, 10) << "ino " << in->ino << " has no uncommitted writes" << dendl;
    } else {
-    ldout(cct, 1) << "ino " << in->ino << " failed to commit to disk! "
+    ldout(cct, 8) << "ino " << in->ino << " failed to commit to disk! "
                   << cpp_strerror(-r) << dendl;
    }
  
@@ -9499,7 +9535,7 @@ int Client::_fsync(Inode *in, bool syncdataonly)
  
  int Client::_fsync(Fh *f, bool syncdataonly)
  {
-  ldout(cct, 3) << "_fsync(" << f << ", " << (syncdataonly ? "dataonly)":"data+metadata)") << dendl;
+  ldout(cct, 8) << "_fsync(" << f << ", " << (syncdataonly ? "dataonly)":"data+metadata)") << dendl;
    return _fsync(f->inode.get(), syncdataonly);
  }
  
@@ -9519,7 +9555,7 @@ int Client::fstat(int fd, struct stat *stbuf, const UserPerm& perms, int mask)
    if (r < 0)
      return r;
    fill_stat(f->inode, stbuf, NULL);
-  ldout(cct, 3) << "fstat(" << fd << ", " << stbuf << ") = " << r << dendl;
+  ldout(cct, 5) << "fstat(" << fd << ", " << stbuf << ") = " << r << dendl;
    return r;
  }
  
@@ -10325,6 +10361,51 @@ int Client::ll_lookup(Inode *parent, const char *name, struct stat *attr,
    return r;
  }
  
+int Client::ll_lookup_inode(
+    struct inodeno_t ino,
+    const UserPerm& perms,
+    Inode **inode)
+{
+  Mutex::Locker lock(client_lock);
+  ldout(cct, 3) << "ll_lookup_inode " << ino  << dendl;
+   
+  // Num1: get inode and *inode
+  int r = _lookup_ino(ino, perms, inode);
+  if (r) {
+    return r;
+  }
+  assert(inode != NULL);
+  assert(*inode != NULL);
+
+  // Num2: Request the parent inode, so that we can look up the name
+  Inode *parent;
+  r = _lookup_parent(*inode, perms, &parent);
+  if (r && r != -EINVAL) {
+    // Unexpected error
+    _ll_forget(*inode, 1);  
+    return r;
+  } else if (r == -EINVAL) {
+    // EINVAL indicates node without parents (root), drop out now
+    // and don't try to look up the non-existent dentry.
+    return 0;
+  }
+  // FIXME: I don't think this works; lookup_parent() returns 0 if the parent
+  // is already in cache
+  assert(parent != NULL);
+
+  // Num3: Finally, get the name (dentry) of the requested inode
+  r = _lookup_name(*inode, parent, perms);
+  if (r) {
+    // Unexpected error
+    _ll_forget(parent, 1);
+    _ll_forget(*inode, 1);
+    return r;
+  }
+
+  _ll_forget(parent, 1);
+  return 0;
+}
+
  int Client::ll_lookupx(Inode *parent, const char *name, Inode **out,
                        struct ceph_statx *stx, unsigned want, unsigned flags,
                        const UserPerm& perms)
@@ -10431,6 +10512,7 @@ int Client::_ll_put(Inode *in, int num)
  void Client::_ll_drop_pins()
  {
    ldout(cct, 10) << "_ll_drop_pins" << dendl;
+  std::set<InodeRef> to_be_put; //this set will be deconstructed item by item when exit
    ceph::unordered_map<vinodeno_t, Inode*>::iterator next;
    for (ceph::unordered_map<vinodeno_t, Inode*>::iterator it = inode_map.begin();
         it != inode_map.end();
@@ -10438,17 +10520,18 @@ void Client::_ll_drop_pins()
      Inode *in = it->second;
      next = it;
      ++next;
-    if (in->ll_ref)
+    if (in->ll_ref){
+      to_be_put.insert(in);
        _ll_put(in, in->ll_ref);
+    }
    }
  }
  
-bool Client::ll_forget(Inode *in, int count)
+bool Client::_ll_forget(Inode *in, int count)
  {
-  Mutex::Locker lock(client_lock);
    inodeno_t ino = _get_inodeno(in);
  
-  ldout(cct, 3) << "ll_forget " << ino << " " << count << dendl;
+  ldout(cct, 8) << "ll_forget " << ino << " " << count << dendl;
    tout(cct) << "ll_forget" << std::endl;
    tout(cct) << ino.val << std::endl;
    tout(cct) << count << std::endl;
@@ -10473,6 +10556,12 @@ bool Client::ll_forget(Inode *in, int count)
    return last;
  }
  
+bool Client::ll_forget(Inode *in, int count)
+{
+  Mutex::Locker lock(client_lock);
+  return _ll_forget(in, count);
+}
+
  bool Client::ll_put(Inode *in)
  {
    /* ll_forget already takes the lock */
@@ -10520,7 +10609,7 @@ int Client::_ll_getattr(Inode *in, int caps, const UserPerm& perms)
  {
    vinodeno_t vino = _get_vino(in);
  
-  ldout(cct, 3) << "ll_getattr " << vino << dendl;
+  ldout(cct, 8) << "ll_getattr " << vino << dendl;
    tout(cct) << "ll_getattr" << std::endl;
    tout(cct) << vino.ino.val << std::endl;
  
@@ -10570,7 +10659,7 @@ int Client::_ll_setattrx(Inode *in, struct ceph_statx *stx, int mask,
  {
    vinodeno_t vino = _get_vino(in);
  
-  ldout(cct, 3) << "ll_setattrx " << vino << " mask " << hex << mask << dec
+  ldout(cct, 8) << "ll_setattrx " << vino << " mask " << hex << mask << dec
                 << dendl;
    tout(cct) << "ll_setattrx" << std::endl;
    tout(cct) << vino.ino.val << std::endl;
@@ -10878,7 +10967,7 @@ int Client::_getxattr(Inode *in, const char *name, void *value, size_t size,
      }
    }
   out:
-  ldout(cct, 3) << "_getxattr(" << in->ino << ", \"" << name << "\", " << size << ") = " << r << dendl;
+  ldout(cct, 8) << "_getxattr(" << in->ino << ", \"" << name << "\", " << size << ") = " << r << dendl;
    return r;
  }
  
@@ -10958,7 +11047,7 @@ int Client::_listxattr(Inode *in, char *name, size_t size,
         r = -ERANGE;
      }
    }
-  ldout(cct, 3) << "_listxattr(" << in->ino << ", " << size << ") = " << r << dendl;
+  ldout(cct, 8) << "_listxattr(" << in->ino << ", " << size << ") = " << r << dendl;
    return r;
  }
  
@@ -11198,7 +11287,7 @@ int Client::_removexattr(Inode *in, const char *name, const UserPerm& perms)
    int res = make_request(req, perms);
  
    trim_cache();
-  ldout(cct, 3) << "_removexattr(" << in->ino << ", \"" << name << "\") = " << res << dendl;
+  ldout(cct, 8) << "_removexattr(" << in->ino << ", \"" << name << "\") = " << res << dendl;
    return res;
  }
  
@@ -11495,7 +11584,7 @@ int Client::ll_readlink(Inode *in, char *buf, size_t buflen, const UserPerm& per
  int Client::_mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
                    const UserPerm& perms, InodeRef *inp)
  {
-  ldout(cct, 3) << "_mknod(" << dir->ino << " " << name << ", 0" << oct
+  ldout(cct, 8) << "_mknod(" << dir->ino << " " << name << ", 0" << oct
                 << mode << dec << ", " << rdev << ", uid " << perms.uid()
                 << ", gid " << perms.gid() << ")" << dendl;
  
@@ -11538,7 +11627,7 @@ int Client::_mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
  
    trim_cache();
  
-  ldout(cct, 3) << "mknod(" << path << ", 0" << oct << mode << dec << ") = " << res << dendl;
+  ldout(cct, 8) << "mknod(" << path << ", 0" << oct << mode << dec << ") = " << res << dendl;
    return res;
  
   fail:
@@ -11627,7 +11716,7 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode,
                     int object_size, const char *data_pool, bool *created,
                     const UserPerm& perms)
  {
-  ldout(cct, 3) << "_create(" << dir->ino << " " << name << ", 0" << oct <<
+  ldout(cct, 8) << "_create(" << dir->ino << " " << name << ", 0" << oct <<
      mode << dec << ")" << dendl;
  
    if (strlen(name) > NAME_MAX)
@@ -11703,7 +11792,7 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode,
   reply_error:
    trim_cache();
  
-  ldout(cct, 3) << "create(" << path << ", 0" << oct << mode << dec 
+  ldout(cct, 8) << "create(" << path << ", 0" << oct << mode << dec
                 << " layout " << stripe_unit
                 << ' ' << stripe_count
                 << ' ' << object_size
@@ -11719,7 +11808,7 @@ int Client::_create(Inode *dir, const char *name, int flags, mode_t mode,
  int Client::_mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& perm,
                    InodeRef *inp)
  {
-  ldout(cct, 3) << "_mkdir(" << dir->ino << " " << name << ", 0" << oct
+  ldout(cct, 8) << "_mkdir(" << dir->ino << " " << name << ", 0" << oct
                 << mode << dec << ", uid " << perm.uid()
                 << ", gid " << perm.gid() << ")" << dendl;
  
@@ -11764,7 +11853,7 @@ int Client::_mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& pe
  
    trim_cache();
  
-  ldout(cct, 3) << "_mkdir(" << path << ", 0" << oct << mode << dec << ") = " << res << dendl;
+  ldout(cct, 8) << "_mkdir(" << path << ", 0" << oct << mode << dec << ") = " << res << dendl;
    return res;
  
   fail:
@@ -11849,7 +11938,7 @@ int Client::ll_mkdirx(Inode *parent, const char *name, mode_t mode, Inode **out,
  int Client::_symlink(Inode *dir, const char *name, const char *target,
                      const UserPerm& perms, InodeRef *inp)
  {
-  ldout(cct, 3) << "_symlink(" << dir->ino << " " << name << ", " << target
+  ldout(cct, 8) << "_symlink(" << dir->ino << " " << name << ", " << target
                 << ", uid " << perms.uid() << ", gid " << perms.gid() << ")"
                 << dendl;
  
@@ -11883,7 +11972,7 @@ int Client::_symlink(Inode *dir, const char *name, const char *target,
    res = make_request(req, perms, inp);
  
    trim_cache();
-  ldout(cct, 3) << "_symlink(\"" << path << "\", \"" << target << "\") = " <<
+  ldout(cct, 8) << "_symlink(\"" << path << "\", \"" << target << "\") = " <<
      res << dendl;
    return res;
  
@@ -11967,7 +12056,7 @@ int Client::ll_symlinkx(Inode *parent, const char *name, const char *value,
  
  int Client::_unlink(Inode *dir, const char *name, const UserPerm& perm)
  {
-  ldout(cct, 3) << "_unlink(" << dir->ino << " " << name
+  ldout(cct, 8) << "_unlink(" << dir->ino << " " << name
                 << " uid " << perm.uid() << " gid " << perm.gid()
                 << ")" << dendl;
  
@@ -12007,7 +12096,7 @@ int Client::_unlink(Inode *dir, const char *name, const UserPerm& perm)
    res = make_request(req, perm);
  
    trim_cache();
-  ldout(cct, 3) << "unlink(" << path << ") = " << res << dendl;
+  ldout(cct, 8) << "unlink(" << path << ") = " << res << dendl;
    return res;
  
   fail:
@@ -12039,7 +12128,7 @@ int Client::ll_unlink(Inode *in, const char *name, const UserPerm& perm)
  
  int Client::_rmdir(Inode *dir, const char *name, const UserPerm& perms)
  {
-  ldout(cct, 3) << "_rmdir(" << dir->ino << " " << name << " uid "
+  ldout(cct, 8) << "_rmdir(" << dir->ino << " " << name << " uid "
                 << perms.uid() << " gid " << perms.gid() << ")" << dendl;
  
    if (dir->snapid != CEPH_NOSNAP && dir->snapid != CEPH_SNAPDIR) {
@@ -12083,7 +12172,7 @@ int Client::_rmdir(Inode *dir, const char *name, const UserPerm& perms)
    res = make_request(req, perms);
  
    trim_cache();
-  ldout(cct, 3) << "rmdir(" << path << ") = " << res << dendl;
+  ldout(cct, 8) << "rmdir(" << path << ") = " << res << dendl;
    return res;
  
   fail:
@@ -12116,7 +12205,7 @@ int Client::ll_rmdir(Inode *in, const char *name, const UserPerm& perms)
  
  int Client::_rename(Inode *fromdir, const char *fromname, Inode *todir, const char *toname, const UserPerm& perm)
  {
-  ldout(cct, 3) << "_rename(" << fromdir->ino << " " << fromname << " to "
+  ldout(cct, 8) << "_rename(" << fromdir->ino << " " << fromname << " to "
                 << todir->ino << " " << toname
                 << " uid " << perm.uid() << " gid " << perm.gid() << ")"
                 << dendl;
@@ -12211,7 +12300,7 @@ int Client::_rename(Inode *fromdir, const char *fromname, Inode *todir, const ch
    // renamed item from our cache
  
    trim_cache();
-  ldout(cct, 3) << "_rename(" << from << ", " << to << ") = " << res << dendl;
+  ldout(cct, 8) << "_rename(" << from << ", " << to << ") = " << res << dendl;
    return res;
  
   fail:
@@ -12252,7 +12341,7 @@ int Client::ll_rename(Inode *parent, const char *name, Inode *newparent,
  
  int Client::_link(Inode *in, Inode *dir, const char *newname, const UserPerm& perm, InodeRef *inp)
  {
-  ldout(cct, 3) << "_link(" << in->ino << " to " << dir->ino << " " << newname
+  ldout(cct, 8) << "_link(" << in->ino << " to " << dir->ino << " " << newname
                 << " uid " << perm.uid() << " gid " << perm.gid() << ")" << dendl;
  
    if (strlen(newname) > NAME_MAX)
@@ -12287,7 +12376,7 @@ int Client::_link(Inode *in, Inode *dir, const char *newname, const UserPerm& pe
    ldout(cct, 10) << "link result is " << res << dendl;
  
    trim_cache();
-  ldout(cct, 3) << "link(" << existing << ", " << path << ") = " << res << dendl;
+  ldout(cct, 8) << "link(" << existing << ", " << path << ") = " << res << dendl;
    return res;
  
   fail:
@@ -12527,7 +12616,7 @@ int Client::_ll_create(Inode *parent, const char *name, mode_t mode,
  
    vinodeno_t vparent = _get_vino(parent);
  
-  ldout(cct, 3) << "_ll_create " << vparent << " " << name << " 0" << oct <<
+  ldout(cct, 8) << "_ll_create " << vparent << " " << name << " 0" << oct <<
      mode << dec << " " << ceph_flags_sys2wire(flags) << ", uid " << perms.uid()
                 << ", gid " << perms.gid() << dendl;
    tout(cct) << "ll_create" << std::endl;
@@ -12594,7 +12683,7 @@ out:
  
    tout(cct) << (unsigned long)*fhp << std::endl;
    tout(cct) << ino << std::endl;
-  ldout(cct, 3) << "_ll_create " << vparent << " " << name << " 0" << oct <<
+  ldout(cct, 8) << "_ll_create " << vparent << " " << name << " 0" << oct <<
      mode << dec << " " << ceph_flags_sys2wire(flags) << " = " << r << " (" <<
      *fhp << " " << hex << ino << dec << ")" << dendl;
  
diff --git a/ceph/src/client/Client.h b/ceph/src/client/Client.h

index 9c076214ac76bcce47f066df3c60fd6da8040325..2616f6d7169194eccaec4709c58b04bb348f9b8f 100644 (file)
--- a/ceph/src/client/Client.h
+++ b/ceph/src/client/Client.h
@@ -713,10 +713,11 @@ protected:
    void clear_dir_complete_and_ordered(Inode *diri, bool complete);
    void insert_readdir_results(MetaRequest *request, MetaSession *session, Inode *diri);
    Inode* insert_trace(MetaRequest *request, MetaSession *session);
-  void update_inode_file_bits(Inode *in, uint64_t truncate_seq, uint64_t truncate_size, uint64_t size,
-                             uint64_t change_attr, uint64_t time_warp_seq, utime_t ctime,
-                             utime_t mtime, utime_t atime, version_t inline_version,
-                             bufferlist& inline_data, int issued);
+  void update_inode_file_size(Inode *in, int issued, uint64_t size,
+                             uint64_t truncate_seq, uint64_t truncate_size);
+  void update_inode_file_time(Inode *in, int issued, uint64_t time_warp_seq,
+                             utime_t ctime, utime_t mtime, utime_t atime);
+
    Inode *add_update_inode(InodeStat *st, utime_t ttl, MetaSession *session,
                           const UserPerm& request_perms);
    Dentry *insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dlease, 
@@ -945,6 +946,10 @@ private:
    mds_rank_t _get_random_up_mds() const;
  
    int _ll_getattr(Inode *in, int caps, const UserPerm& perms);
+  int _lookup_parent(Inode *in, const UserPerm& perms, Inode **parent=NULL);
+  int _lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
+  int _lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
+  bool _ll_forget(Inode *in, int count);
  
  public:
    int mount(const std::string &mount_root, const UserPerm& perms,
@@ -1142,6 +1147,7 @@ public:
    Inode *ll_get_inode(vinodeno_t vino);
    int ll_lookup(Inode *parent, const char *name, struct stat *attr,
                 Inode **out, const UserPerm& perms);
+  int ll_lookup_inode(struct inodeno_t ino, const UserPerm& perms, Inode **inode);
    int ll_lookupx(Inode *parent, const char *name, Inode **out,
                         struct ceph_statx *stx, unsigned want, unsigned flags,
                         const UserPerm& perms);
diff --git a/ceph/src/client/Inode.h b/ceph/src/client/Inode.h

index 614d84a306d05e1249bd7070beedd4e4fb335b0e..63a7f50aa517829e6aad6102945e5de857b3d6b5 100644 (file)
--- a/ceph/src/client/Inode.h
+++ b/ceph/src/client/Inode.h
@@ -255,7 +255,6 @@ struct Inode {
        fcntl_locks(NULL), flock_locks(NULL)
    {
      memset(&dir_layout, 0, sizeof(dir_layout));
-    memset(&quota, 0, sizeof(quota));
    }
    ~Inode();
  
diff --git a/ceph/src/cls/CMakeLists.txt b/ceph/src/cls/CMakeLists.txt

index 1c36c1fd0b5e77a899f1dbdb4464fc371352e8ae..e4474c0194de356ab3b1b5564e23764ae5264334 100644 (file)
--- a/ceph/src/cls/CMakeLists.txt
+++ b/ceph/src/cls/CMakeLists.txt
@@ -1,7 +1,6 @@
  ## Rados object classes
  
  set(cls_dir ${CMAKE_INSTALL_LIBDIR}/rados-classes)
-set(cls_embedded_srcs)
  
  # cls_sdk
  add_library(cls_sdk SHARED sdk/cls_sdk.cc)
@@ -16,7 +15,6 @@ set_target_properties(cls_hello PROPERTIES
    SOVERSION "1"
    INSTALL_RPATH "")
  install(TARGETS cls_hello DESTINATION ${cls_dir})
-list(APPEND cls_embedded_srcs ${cls_hello_srcs})
  
  # cls_numops
  set(cls_numops_srcs numops/cls_numops.cc)
@@ -30,7 +28,6 @@ install(TARGETS cls_numops DESTINATION ${cls_dir})
  set(cls_numops_client_srcs numops/cls_numops_client.cc)
  add_library(cls_numops_client STATIC ${cls_numops_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_numops_srcs} ${cls_numops_client_srcs})
  
  # cls_rbd
  if (WITH_RBD)
@@ -46,7 +43,6 @@ if (WITH_RBD)
    add_library(cls_rbd_client STATIC ${cls_rbd_client_srcs})
    target_link_libraries(cls_rbd_client cls_lock_client)
  
-  list(APPEND cls_embedded_srcs ${cls_rbd_srcs} ${cls_rbd_client_srcs})
  endif (WITH_RBD)
  
  # cls_lock
@@ -64,7 +60,6 @@ set(cls_lock_client_srcs
    lock/cls_lock_ops.cc)
  add_library(cls_lock_client STATIC ${cls_lock_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_lock_srcs} ${cls_lock_client_srcs})
  
  # cls_refcount
  set(cls_refcount_srcs
@@ -84,7 +79,6 @@ set(cls_refcount_client_srcs
    refcount/cls_refcount_ops.cc)
  add_library(cls_refcount_client STATIC ${cls_refcount_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_refcount_srcs} ${cls_refcount_client_srcs})
  
  # cls_version
  set(cls_version_srcs version/cls_version.cc)
@@ -100,7 +94,6 @@ set(cls_version_client_srcs
    version/cls_version_types.cc)
  add_library(cls_version_client STATIC ${cls_version_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_version_srcs} ${cls_version_client_srcs})
  
  # cls_log
  set(cls_log_srcs log/cls_log.cc)
@@ -114,7 +107,6 @@ install(TARGETS cls_log DESTINATION ${cls_dir})
  set(cls_log_client_srcs log/cls_log_client.cc)
  add_library(cls_log_client STATIC ${cls_log_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_log_srcs} ${cls_log_client_srcs})
  
  # cls_statelog
  set(cls_statelog_srcs statelog/cls_statelog.cc)
@@ -128,7 +120,6 @@ install(TARGETS cls_statelog DESTINATION ${cls_dir})
  set(cls_statelog_client_srcs statelog/cls_statelog_client.cc)
  add_library(cls_statelog_client STATIC ${cls_statelog_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_statelog_srcs} ${cls_statelog_client_srcs})
  
  # cls_timeindex
  set(cls_timeindex_srcs timeindex/cls_timeindex.cc)
@@ -142,7 +133,6 @@ install(TARGETS cls_timeindex DESTINATION ${cls_dir})
  set(cls_timeindex_client_srcs timeindex/cls_timeindex_client.cc)
  add_library(cls_timeindex_client STATIC ${cls_timeindex_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_timeindex_srcs} ${cls_timeindex_client_srcs})
  
  # cls_replica_log
  set(cls_replica_log_srcs replica_log/cls_replica_log.cc)
@@ -176,7 +166,6 @@ set(cls_user_client_srcs
    user/cls_user_ops.cc)
  add_library(cls_user_client STATIC ${cls_user_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_user_srcs} ${cls_user_client_srcs})
  
  # cls_journal
  set(cls_journal_srcs
@@ -194,7 +183,6 @@ set(cls_journal_client_srcs
    journal/cls_journal_types.cc)
  add_library(cls_journal_client STATIC ${cls_journal_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_journal_srcs} ${cls_journal_client_srcs})
  
  # cls_rgw
  if (WITH_RADOSGW)
@@ -217,7 +205,6 @@ if (WITH_RADOSGW)
      rgw/cls_rgw_ops.cc)
    add_library(cls_rgw_client STATIC ${cls_rgw_client_srcs})
  
-  list(APPEND cls_embedded_srcs ${cls_rgw_srcs} ${cls_rgw_client_srcs})
  endif (WITH_RADOSGW)
  
  # cls_cephfs
@@ -235,7 +222,6 @@ if (WITH_CEPHFS)
      cephfs/cls_cephfs_client.cc)
    add_library(cls_cephfs_client STATIC ${cls_cephfs_client_srcs})
  
-  list(APPEND cls_embedded_srcs ${cls_cephfs_srcs} ${cls_cephfs_client_srcs})
  endif (WITH_CEPHFS)
  
  # cls_lua
@@ -256,15 +242,3 @@ set(cls_lua_client_srcs
      lua/cls_lua_client.cc)
  add_library(cls_lua_client STATIC ${cls_lua_client_srcs})
  
-list(APPEND cls_embedded_srcs ${cls_lua_srcs} ${cls_lua_client_srcs})
-
-if(WITH_EMBEDDED)
-  include(MergeStaticLibraries)
-  list(REMOVE_DUPLICATES cls_embedded_srcs)
-  add_library(cephd_cls_base STATIC ${cls_embedded_srcs})
-  # while not necessary this seems to bring in the lua's include directories
-  # so that cls_lua srcs build correctly
-  target_link_libraries(cephd_cls_base liblua)
-  set_target_properties(cephd_cls_base PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-  merge_static_libraries(cephd_cls cephd_cls_base liblua)
-endif()
diff --git a/ceph/src/cls/rgw/cls_rgw.cc b/ceph/src/cls/rgw/cls_rgw.cc

index d7deb78f8b6feb2f347050ebb59ac47b137abc54..cc47818a819e2ecd07ae8e8c56926489dee36a79 100644 (file)
--- a/ceph/src/cls/rgw/cls_rgw.cc
+++ b/ceph/src/cls/rgw/cls_rgw.cc
@@ -154,43 +154,37 @@ static int get_obj_vals(cls_method_context_t hctx, const string& start, const st
    if (pkeys->empty())
      return 0;
  
-  map<string, bufferlist>::reverse_iterator last_element = pkeys->rbegin();
+  auto last_element = pkeys->rbegin();
    if ((unsigned char)last_element->first[0] < BI_PREFIX_CHAR) {
      /* nothing to see here, move along */
      return 0;
    }
  
-  map<string, bufferlist>::iterator first_element = pkeys->begin();
+  auto first_element = pkeys->begin();
    if ((unsigned char)first_element->first[0] > BI_PREFIX_CHAR) {
      return 0;
    }
  
    /* let's rebuild the list, only keep entries we're interested in */
-  map<string, bufferlist> old_keys;
-  old_keys.swap(*pkeys);
+  auto comp = [](const pair<string, bufferlist>& l, const string &r) { return l.first < r; };
+  string new_start = {static_cast<char>(BI_PREFIX_CHAR + 1)};
  
-  for (map<string, bufferlist>::iterator iter = old_keys.begin(); iter != old_keys.end(); ++iter) {
-    if ((unsigned char)iter->first[0] != BI_PREFIX_CHAR) {
-      (*pkeys)[iter->first] = iter->second;
-    }
-  }
+  auto lower = pkeys->lower_bound(string{static_cast<char>(BI_PREFIX_CHAR)});
+  auto upper = std::lower_bound(lower, pkeys->end(), new_start, comp);
+  pkeys->erase(lower, upper);
  
    if (num_entries == (int)pkeys->size())
      return 0;
  
    map<string, bufferlist> new_keys;
-  char c[] = { (char)(BI_PREFIX_CHAR + 1), 0 };
-  string new_start = c;
  
    /* now get some more keys */
    ret = cls_cxx_map_get_vals(hctx, new_start, filter_prefix, num_entries - pkeys->size(), &new_keys, pmore);
    if (ret < 0)
      return ret;
  
-  for (map<string, bufferlist>::iterator iter = new_keys.begin(); iter != new_keys.end(); ++iter) {
-    (*pkeys)[iter->first] = iter->second;
-  }
-
+  pkeys->insert(std::make_move_iterator(new_keys.begin()),
+                std::make_move_iterator(new_keys.end()));
    return 0;
  }
  
@@ -1905,6 +1899,10 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
      if (ret < 0 && ret != -ENOENT)
        return -EINVAL;
  
+    if (ret == -ENOENT) {
+      continue;
+    }
+
      if (cur_disk_bl.length()) {
        bufferlist::iterator cur_disk_iter = cur_disk_bl.begin();
        try {
@@ -1959,18 +1957,6 @@ int rgw_dir_suggest_changes(cls_method_context_t hctx,
          }
          break;
        case CEPH_RGW_UPDATE:
-       if (!cur_disk.exists) {
-         // this update would only have been sent by the rgw client
-         // if the rgw_bucket_dir_entry existed, however between that
-         // check and now the entry has diappeared, so we were likely
-         // in the midst of a delete op, and we will not recreate the
-         // entry
-         CLS_LOG(10,
-                 "CEPH_RGW_UPDATE not applied because rgw_bucket_dir_entry"
-                 " no longer exists\n");
-         break;
-       }
-
          CLS_LOG(10, "CEPH_RGW_UPDATE name=%s instance=%s total_entries: %" PRId64 " -> %" PRId64 "\n",
                  cur_change.key.name.c_str(), cur_change.key.instance.c_str(), stats.num_entries, stats.num_entries + 1);
  
@@ -2555,7 +2541,7 @@ static int rgw_bi_list_op(cls_method_context_t hctx, bufferlist *in, bufferlist
    }
  
    op_ret.is_truncated = (count >= max) || more;
-  while (count >= max) {
+  while (count > max) {
      op_ret.entries.pop_back();
      count--;
    }
@@ -3326,8 +3312,11 @@ static int gc_iterate_entries(cls_method_context_t hctx, const string& marker, b
  
      CLS_LOG(10, "gc_iterate_entries key=%s\n", key.c_str());
  
-    if (!end_key.empty() && key.compare(end_key) >= 0)
+    if (!end_key.empty() && key.compare(end_key) >= 0) {
+      if (truncated)
+        *truncated = false;
        return 0;
+    }
  
      if (!key_in_index(key, GC_OBJ_TIME_INDEX))
        return 0;
diff --git a/ceph/src/cls/rgw/cls_rgw_ops.h b/ceph/src/cls/rgw/cls_rgw_ops.h

index 07ebeea12fde758122a897ffc2cdd766bfdea6ae..52b37c55c6c4f95c58f10f39e227c88bcc6ceb0d 100644 (file)
--- a/ceph/src/cls/rgw/cls_rgw_ops.h
+++ b/ceph/src/cls/rgw/cls_rgw_ops.h
@@ -413,8 +413,7 @@ struct rgw_cls_list_op
  };
  WRITE_CLASS_ENCODER(rgw_cls_list_op)
  
-struct rgw_cls_list_ret
-{
+struct rgw_cls_list_ret {
    rgw_bucket_dir dir;
    bool is_truncated;
  
diff --git a/ceph/src/cls/rgw/cls_rgw_types.cc b/ceph/src/cls/rgw/cls_rgw_types.cc

index 1917235627ec738135f115b2685cdb7a2cf3b0f8..51ee342e92123eae8f4f327dec479e4cf8507d92 100644 (file)
--- a/ceph/src/cls/rgw/cls_rgw_types.cc
+++ b/ceph/src/cls/rgw/cls_rgw_types.cc
@@ -613,7 +613,21 @@ void cls_rgw_reshard_entry::generate_test_instances(list<cls_rgw_reshard_entry*>
  
  void cls_rgw_bucket_instance_entry::dump(Formatter *f) const
  {
-  encode_json("reshard_status", (int)reshard_status, f);
+  string status_str;
+  switch(reshard_status) {
+    case CLS_RGW_RESHARD_NONE:
+      status_str= "none";
+      break;
+    case CLS_RGW_RESHARD_IN_PROGRESS:
+      status_str = "in-progress";
+      break;
+    case CLS_RGW_RESHARD_DONE:
+      status_str = "done";
+      break;
+    default:
+      status_str = "invalid";
+  }
+  encode_json("reshard_status", status_str, f);
    encode_json("new_bucket_instance_id", new_bucket_instance_id, f);
    encode_json("num_shards", num_shards, f);
  
diff --git a/ceph/src/common/DecayCounter.cc b/ceph/src/common/DecayCounter.cc

index 26a552d439c8e71b4356676e591525896dfe2665..823249487a541c3bdc2df6681f4a5f5e0b07a34e 100644 (file)
--- a/ceph/src/common/DecayCounter.cc
+++ b/ceph/src/common/DecayCounter.cc
@@ -38,6 +38,7 @@ void DecayCounter::decode(const utime_t &t, bufferlist::iterator &p)
    ::decode(val, p);
    ::decode(delta, p);
    ::decode(vel, p);
+  last_decay = t;
    DECODE_FINISH(p);
  }
  
diff --git a/ceph/src/common/TrackedOp.cc b/ceph/src/common/TrackedOp.cc

index bd605e54cc7c6bc3bd5b3595f8393410c2517cd8..4ed2fa48bd6f3f515835770045678a2bbcc5b25f 100644 (file)
--- a/ceph/src/common/TrackedOp.cc
+++ b/ceph/src/common/TrackedOp.cc
@@ -341,6 +341,10 @@ bool OpTracker::check_ops_in_flight(std::vector<string> &warning_vector, int *sl
      auto i = sdata->ops_in_flight_sharded.begin();
      while (i != sdata->ops_in_flight_sharded.end() &&
            i->get_initiated() < too_old) {
+
+      if (!i->warn_interval_multiplier)
+       continue;
+
        (*slow)++;
  
        // exponential backoff of warning intervals
diff --git a/ceph/src/common/TrackedOp.h b/ceph/src/common/TrackedOp.h

index 9435a1913c063febbb74ad8da243062dd5aa5b50..dea88df4c257db22fdcc9b44f994b1fb9968b14c 100644 (file)
--- a/ceph/src/common/TrackedOp.h
+++ b/ceph/src/common/TrackedOp.h
@@ -288,6 +288,10 @@ public:
    void mark_event(const char *event,
                   utime_t stamp=ceph_clock_now());
  
+  void mark_nowarn() {
+    warn_interval_multiplier = 0;
+  }
+
    virtual const char *state_string() const {
      Mutex::Locker l(lock);
      return events.rbegin()->c_str();
diff --git a/ceph/src/common/config.cc b/ceph/src/common/config.cc

index 3cbb27e3484c043788d3412786c7a48b9908cea8..88e3da9b50aae824ebf5dd51a4ed81f68e50f38d 100644 (file)
--- a/ceph/src/common/config.cc
+++ b/ceph/src/common/config.cc
@@ -391,6 +391,17 @@ void md_config_t::show_config(Formatter *f)
    _show_config(NULL, f);
  }
  
+void md_config_t::config_options(Formatter *f)
+{
+  Mutex::Locker l(lock);
+  f->open_array_section("options");
+  for (const auto& i: schema) {
+    const Option &opt = i.second;
+    opt.dump(f);
+  }
+  f->close_section();
+}
+
  void md_config_t::_show_config(std::ostream *out, Formatter *f)
  {
    if (out) {
diff --git a/ceph/src/common/config.h b/ceph/src/common/config.h

index 1e573f5e11b28832ee77b3cb3b1fb5cd29a1d573..612f083d89cfe06ee536fb2b8e6803fc4b9c085f 100644 (file)
--- a/ceph/src/common/config.h
+++ b/ceph/src/common/config.h
@@ -180,6 +180,9 @@ public:
    void show_config(std::ostream& out);
    /// dump all config values to a formatter
    void show_config(Formatter *f);
+  
+  /// dump all config settings to a formatter
+  void config_options(Formatter *f);
  
    /// obtain a diff between our config values and another md_config_t values
    void diff(const md_config_t *other,
diff --git a/ceph/src/common/hobject.h b/ceph/src/common/hobject.h

index 9b3f38f4494697b85e9f2e01d0c6f30a021166de..24eeb9754e2f24290828a03e3fa372cb5aac691b 100644 (file)
--- a/ceph/src/common/hobject.h
+++ b/ceph/src/common/hobject.h
@@ -300,8 +300,8 @@ WRITE_CLASS_ENCODER(hobject_t)
  namespace std {
    template<> struct hash<hobject_t> {
      size_t operator()(const hobject_t &r) const {
-      static rjhash<uint64_t> I;
-      return r.get_hash() ^ I(r.snap);
+      static rjhash<uint64_t> RJ;
+      return RJ(r.get_hash() ^ r.snap);
      }
    };
  } // namespace std
@@ -473,8 +473,12 @@ WRITE_CLASS_ENCODER(ghobject_t)
  namespace std {
    template<> struct hash<ghobject_t> {
      size_t operator()(const ghobject_t &r) const {
-      static rjhash<uint64_t> I;
-      return r.hobj.get_hash() ^ I(r.hobj.snap);
+      static rjhash<uint64_t> RJ;
+      static hash<hobject_t> HO;
+      size_t hash = HO(r.hobj);
+      hash = RJ(hash ^ r.generation);
+      hash = hash ^ r.shard_id.id;
+      return hash;
      }
    };
  } // namespace std
diff --git a/ceph/src/common/legacy_config_opts.h b/ceph/src/common/legacy_config_opts.h

index e2ac86444322c6c996dc7b3cb01db69257aca3c1..38b36a60cc88eec35879c7a0f094acdd5003b12f 100644 (file)
--- a/ceph/src/common/legacy_config_opts.h
+++ b/ceph/src/common/legacy_config_opts.h
@@ -1375,6 +1375,7 @@ OPTION(rgw_cross_domain_policy, OPT_STR)
  OPTION(rgw_healthcheck_disabling_path, OPT_STR) // path that existence causes the healthcheck to respond 503
  OPTION(rgw_s3_auth_use_rados, OPT_BOOL)  // should we try to use the internal credentials for s3?
  OPTION(rgw_s3_auth_use_keystone, OPT_BOOL)  // should we try to use keystone for s3?
+OPTION(rgw_s3_auth_order, OPT_STR) // s3 authentication order to try
  OPTION(rgw_barbican_url, OPT_STR)  // url for barbican server
  
  /* OpenLDAP-style LDAP parameter strings */
@@ -1473,6 +1474,8 @@ OPTION(rgw_md_log_max_shards, OPT_INT) // max shards for metadata log
  OPTION(rgw_num_zone_opstate_shards, OPT_INT) // max shards for keeping inter-region copy progress info
  OPTION(rgw_opstate_ratelimit_sec, OPT_INT) // min time between opstate updates on a single upload (0 for disabling ratelimit)
  OPTION(rgw_curl_wait_timeout_ms, OPT_INT) // timeout for certain curl calls
+OPTION(rgw_curl_low_speed_limit, OPT_INT) // low speed limit for certain curl calls
+OPTION(rgw_curl_low_speed_time, OPT_INT) // low speed time for certain curl calls
  OPTION(rgw_copy_obj_progress, OPT_BOOL) // should dump progress during long copy operations?
  OPTION(rgw_copy_obj_progress_every_bytes, OPT_INT) // min bytes between copy progress output
  OPTION(rgw_obj_tombstone_cache_size, OPT_INT) // how many objects in tombstone cache, which is used in multi-zone sync to keep
diff --git a/ceph/src/common/options.cc b/ceph/src/common/options.cc

index 5c83f9527773e566eeb6d20e1d51f6656872ca77..1ed027c9bebd6938ef37b37eeb1f758bdcc50d00 100644 (file)
--- a/ceph/src/common/options.cc
+++ b/ceph/src/common/options.cc
@@ -4068,7 +4068,7 @@ std::vector<Option> get_global_options() {
      .set_description(""),
  
      Option("filestore_merge_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
-    .set_default(10)
+    .set_default(-10)
      .set_description(""),
  
      Option("filestore_split_multiple", Option::TYPE_INT, Option::LEVEL_ADVANCED)
@@ -4709,6 +4709,15 @@ std::vector<Option> get_rgw_options() {
      .set_default(false)
      .set_description("Should S3 authentication use Keystone."),
  
+    Option("rgw_s3_auth_order", Option::TYPE_STR, Option::LEVEL_ADVANCED)
+     .set_default("external, local")
+     .set_description("Authentication strategy order to use for s3 authentication")
+     .set_long_description(
+         "Order of authentication strategies to try for s3 authentication, the allowed "
+          "options are a comma separated list of engines external, local. The "
+          "default order is to try all the externally configured engines before "
+          "attempting local rados based authentication"),
+
      Option("rgw_barbican_url", Option::TYPE_STR, Option::LEVEL_ADVANCED)
      .set_default("")
      .set_description("URL to barbican server."),
@@ -5191,6 +5200,20 @@ std::vector<Option> get_rgw_options() {
      .set_default(1000)
      .set_description(""),
  
+    Option("rgw_curl_low_speed_limit", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(1024)
+    .set_long_description(
+        "It contains the average transfer speed in bytes per second that the "
+        "transfer should be below during rgw_curl_low_speed_time seconds for libcurl "
+        "to consider it to be too slow and abort. Set it zero to disable this."),
+
+    Option("rgw_curl_low_speed_time", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+    .set_default(30)
+    .set_long_description(
+        "It contains the time in number seconds that the transfer speed should be below "
+        "the rgw_curl_low_speed_limit for the library to consider it too slow and abort. "
+        "Set it zero to disable this."),
+
      Option("rgw_copy_obj_progress", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
      .set_default(true)
      .set_description("Send progress report through copy operation")
@@ -6423,6 +6446,11 @@ std::vector<Option> get_mds_options() {
  
      Option("mds_inject_migrator_session_race", Option::TYPE_BOOL, Option::LEVEL_DEV)
       .set_default(false),
+
+    Option("mds_inject_migrator_message_loss", Option::TYPE_INT, Option::LEVEL_DEV)
+    .set_default(0)
+    .set_description(""),
+
    });
  }
  
diff --git a/ceph/src/common/perf_counters.cc b/ceph/src/common/perf_counters.cc

index 048c93a64334a20ef59abdbc4eff08d167979ad7..ce2ad58d931291397a94c4b8b8e2f759c656d5c3 100644 (file)
--- a/ceph/src/common/perf_counters.cc
+++ b/ceph/src/common/perf_counters.cc
@@ -397,6 +397,12 @@ void PerfCounters::dump_formatted_generic(Formatter *f, bool schema,
          f->dump_string("nick", "");
        }
        f->dump_int("priority", get_adjusted_priority(d->prio));
+      
+      if (d->unit == NONE) {
+       f->dump_string("units", "none"); 
+      } else if (d->unit == BYTES) {
+       f->dump_string("units", "bytes");
+      }
        f->close_section();
      } else {
        if (d->type & PERFCOUNTER_LONGRUNAVG) {
@@ -479,25 +485,25 @@ PerfCountersBuilder::~PerfCountersBuilder()
  
  void PerfCountersBuilder::add_u64_counter(
    int idx, const char *name,
-  const char *description, const char *nick, int prio)
+  const char *description, const char *nick, int prio, int unit)
  {
    add_impl(idx, name, description, nick, prio,
-          PERFCOUNTER_U64 | PERFCOUNTER_COUNTER);
+          PERFCOUNTER_U64 | PERFCOUNTER_COUNTER, unit);
  }
  
  void PerfCountersBuilder::add_u64(
    int idx, const char *name,
-  const char *description, const char *nick, int prio)
+  const char *description, const char *nick, int prio, int unit)
  {
-  add_impl(idx, name, description, nick, prio, PERFCOUNTER_U64);
+  add_impl(idx, name, description, nick, prio, PERFCOUNTER_U64, unit);
  }
  
  void PerfCountersBuilder::add_u64_avg(
    int idx, const char *name,
-  const char *description, const char *nick, int prio)
+  const char *description, const char *nick, int prio, int unit)
  {
    add_impl(idx, name, description, nick, prio,
-          PERFCOUNTER_U64 | PERFCOUNTER_LONGRUNAVG);
+          PERFCOUNTER_U64 | PERFCOUNTER_LONGRUNAVG, unit);
  }
  
  void PerfCountersBuilder::add_time(
@@ -519,16 +525,16 @@ void PerfCountersBuilder::add_u64_counter_histogram(
    int idx, const char *name,
    PerfHistogramCommon::axis_config_d x_axis_config,
    PerfHistogramCommon::axis_config_d y_axis_config,
-  const char *description, const char *nick, int prio)
+  const char *description, const char *nick, int prio, int unit)
  {
    add_impl(idx, name, description, nick, prio,
-          PERFCOUNTER_U64 | PERFCOUNTER_HISTOGRAM | PERFCOUNTER_COUNTER,
+          PERFCOUNTER_U64 | PERFCOUNTER_HISTOGRAM | PERFCOUNTER_COUNTER, unit,
             unique_ptr<PerfHistogram<>>{new PerfHistogram<>{x_axis_config, y_axis_config}});
  }
  
  void PerfCountersBuilder::add_impl(
    int idx, const char *name,
-  const char *description, const char *nick, int prio, int ty,
+  const char *description, const char *nick, int prio, int ty, int unit,
    unique_ptr<PerfHistogram<>> histogram)
  {
    assert(idx > m_perf_counters->m_lower_bound);
@@ -546,6 +552,7 @@ void PerfCountersBuilder::add_impl(
    data.nick = nick;
    data.prio = prio ? prio : prio_default;
    data.type = (enum perfcounter_type_d)ty;
+  data.unit = (enum unit_t) unit;
    data.histogram = std::move(histogram);
  }
  
diff --git a/ceph/src/common/perf_counters.h b/ceph/src/common/perf_counters.h

index 846e407ad2f0d0cd008c9435e88fe35012e0787e..35cb55f2b89023f469756b6d11782839d9ca998f 100644 (file)
--- a/ceph/src/common/perf_counters.h
+++ b/ceph/src/common/perf_counters.h
@@ -41,6 +41,11 @@ enum perfcounter_type_d : uint8_t
    PERFCOUNTER_HISTOGRAM = 0x10, // histogram (vector) of values
  };
  
+enum unit_t : uint8_t
+{
+  BYTES,
+  NONE
+};
  
  /* Class for constructing a PerfCounters object.
   *
@@ -73,15 +78,15 @@ public:
    };
    void add_u64(int key, const char *name,
                const char *description=NULL, const char *nick = NULL,
-              int prio=0);
+              int prio=0, int unit=NONE);
    void add_u64_counter(int key, const char *name,
                        const char *description=NULL,
                        const char *nick = NULL,
-                      int prio=0);
+                      int prio=0, int unit=NONE);
    void add_u64_avg(int key, const char *name,
                    const char *description=NULL,
                    const char *nick = NULL,
-                  int prio=0);
+                  int prio=0, int unit=NONE);
    void add_time(int key, const char *name,
                 const char *description=NULL,
                 const char *nick = NULL,
@@ -96,7 +101,7 @@ public:
      PerfHistogramCommon::axis_config_d y_axis_config,
      const char *description=NULL,
      const char* nick = NULL,
-    int prio=0);
+    int prio=0, int unit=NONE);
  
    void set_prio_default(int prio_)
    {
@@ -108,7 +113,7 @@ private:
    PerfCountersBuilder(const PerfCountersBuilder &rhs);
    PerfCountersBuilder& operator=(const PerfCountersBuilder &rhs);
    void add_impl(int idx, const char *name,
-                const char *description, const char *nick, int prio, int ty,
+                const char *description, const char *nick, int prio, int ty, int unit=NONE,
                  unique_ptr<PerfHistogram<>> histogram = nullptr);
  
    PerfCounters *m_perf_counters;
@@ -150,14 +155,16 @@ public:
        : name(NULL),
          description(NULL),
          nick(NULL),
-           type(PERFCOUNTER_NONE)
+        type(PERFCOUNTER_NONE),
+        unit(NONE)
      {}
      perf_counter_data_any_d(const perf_counter_data_any_d& other)
        : name(other.name),
          description(other.description),
          nick(other.nick),
-       type(other.type),
-       u64(other.u64.load()) {
+        type(other.type),
+        unit(other.unit),
+        u64(other.u64.load()) {
        pair<uint64_t,uint64_t> a = other.read_avg();
        u64 = a.first;
        avgcount = a.second;
@@ -172,6 +179,7 @@ public:
      const char *nick;
      uint8_t prio = 0;
      enum perfcounter_type_d type;
+    enum unit_t unit;
      std::atomic<uint64_t> u64 = { 0 };
      std::atomic<uint64_t> avgcount = { 0 };
      std::atomic<uint64_t> avgcount2 = { 0 };
diff --git a/ceph/src/common/scrub_types.h b/ceph/src/common/scrub_types.h

index 1309a606084d96df8d8379fce053d5b4cb3f6121..bf8f94618f7fbaf9f9490b867ac6df8464fa4ff6 100644 (file)
--- a/ceph/src/common/scrub_types.h
+++ b/ceph/src/common/scrub_types.h
@@ -87,6 +87,12 @@ public:
    void set_hinfo_corrupted() {
      errors |= err_t::HINFO_CORRUPTED;
    }
+  bool only_data_digest_mismatch_info() const {
+    return errors == err_t::DATA_DIGEST_MISMATCH_INFO;
+  }
+  void clear_data_digest_mismatch_info() {
+    errors &= ~err_t::DATA_DIGEST_MISMATCH_INFO;
+  }
    void encode(bufferlist& bl) const;
    void decode(bufferlist::iterator& bp);
  };
diff --git a/ceph/src/common/strtol.cc b/ceph/src/common/strtol.cc

index 4997a1242af1821cec1bd834fa9e398ccc9dbc35..53da739d2d1eb677d45ed8fe4091a5e5c99836e3 100644 (file)
--- a/ceph/src/common/strtol.cc
+++ b/ceph/src/common/strtol.cc
@@ -16,67 +16,60 @@
  
  #include <climits>
  #include <limits>
+#include <cmath>
  #include <sstream>
+#include <boost/utility/string_view.hpp>
  
  using std::ostringstream;
  
-long long strict_strtoll(const char *str, int base, std::string *err)
+long long strict_strtoll(const boost::string_view str, int base, std::string *err)
  {
    char *endptr;
-  std::string errStr;
    errno = 0; /* To distinguish success/failure after call (see man page) */
-  long long ret = strtoll(str, &endptr, base);
-
-  if (endptr == str) {
-    errStr = "Expected option value to be integer, got '";
-    errStr.append(str);
-    errStr.append("'");
-    *err =  errStr;
+  long long ret = strtoll(str.data(), &endptr, base);
+  if (endptr == str.data() || endptr != str.data() + str.size()) {
+    *err = (std::string{"Expected option value to be integer, got '"} +
+           std::string{str} + "'");
      return 0;
    }
-  if ((errno == ERANGE && (ret == LLONG_MAX || ret == LLONG_MIN))
-      || (errno != 0 && ret == 0)) {
-    errStr = "The option value '";
-    errStr.append(str);
-    errStr.append("'");
-    errStr.append(" seems to be invalid");
-    *err = errStr;
-    return 0;
-  }
-  if (*endptr != '\0') {
-    errStr = "The option value '";
-    errStr.append(str);
-    errStr.append("'");
-    errStr.append(" contains invalid digits");
-    *err =  errStr;
+  if (errno) {
+    *err = (std::string{"The option value '"} + std::string{str} +
+           "' seems to be invalid");
      return 0;
    }
    *err = "";
    return ret;
  }
  
-int strict_strtol(const char *str, int base, std::string *err)
+long long strict_strtoll(const char *str, int base, std::string *err)
+{
+  return strict_strtoll(boost::string_view(str), base, err);
+}
+
+int strict_strtol(const boost::string_view str, int base, std::string *err)
  {
-  std::string errStr;
    long long ret = strict_strtoll(str, base, err);
    if (!err->empty())
      return 0;
    if ((ret <= INT_MIN) || (ret >= INT_MAX)) {
-    errStr = "The option value '";
-    errStr.append(str);
-    errStr.append("'");
-    errStr.append(" seems to be invalid");
-    *err = errStr;
+    ostringstream errStr;
+    errStr << "The option value '" << str << "' seems to be invalid";
+    *err = errStr.str();
      return 0;
    }
    return static_cast<int>(ret);
  }
  
-double strict_strtod(const char *str, std::string *err)
+int strict_strtol(const char *str, int base, std::string *err)
+{
+  return strict_strtol(boost::string_view(str), base, err);
+}
+
+double strict_strtod(const boost::string_view str, std::string *err)
  {
    char *endptr;
    errno = 0; /* To distinguish success/failure after call (see man page) */
-  double ret = strtod(str, &endptr);
+  double ret = strtod(str.data(), &endptr);
    if (errno == ERANGE) {
      ostringstream oss;
      oss << "strict_strtod: floating point overflow or underflow parsing '"
@@ -100,11 +93,16 @@ double strict_strtod(const char *str, std::string *err)
    return ret;
  }
  
-float strict_strtof(const char *str, std::string *err)
+double strict_strtod(const char *str, std::string *err)
+{
+  return strict_strtod(boost::string_view(str), err);
+}
+
+float strict_strtof(const boost::string_view str, std::string *err)
  {
    char *endptr;
    errno = 0; /* To distinguish success/failure after call (see man page) */
-  float ret = strtof(str, &endptr);
+  float ret = strtof(str.data(), &endptr);
    if (errno == ERANGE) {
      ostringstream oss;
      oss << "strict_strtof: floating point overflow or underflow parsing '"
@@ -128,69 +126,194 @@ float strict_strtof(const char *str, std::string *err)
    return ret;
  }
  
+float strict_strtof(const char *str, std::string *err)
+{
+  return strict_strtof(boost::string_view(str), err);
+}
+
  template<typename T>
-T strict_si_cast(const char *str, std::string *err)
+T strict_iec_cast(const boost::string_view str, std::string *err)
  {
-  std::string s(str);
-  if (s.empty()) {
-    *err = "strict_sistrtoll: value not specified";
+  if (str.empty()) {
+    *err = "strict_iecstrtoll: value not specified";
      return 0;
    }
-  const char &u = s.back();
+  // get a view of the unit and of the value
+  boost::string_view unit;
+  boost::string_view n = str;
+  size_t u = str.find_first_not_of("0123456789-+");
    int m = 0;
-  if (u == 'B')
-    m = 0;
-  else if (u == 'K')
-    m = 10;
-  else if (u == 'M')
-    m = 20;
-  else if (u == 'G')
-    m = 30;
-  else if (u == 'T')
-    m = 40;
-  else if (u == 'P')
-    m = 50;
-  else if (u == 'E')
-    m = 60;
-  else
-    m = -1;
-
-  if (m >= 0)
-    s.pop_back();
-  else
-    m = 0;
-
-  long long ll = strict_strtoll(s.c_str(), 10, err);
+  // deal with unit prefix is there is one
+  if (u != boost::string_view::npos) {
+    n = str.substr(0, u);
+    unit = str.substr(u, str.length() - u);
+    // we accept both old si prefixes as well as the proper iec prefixes
+    // i.e. K, M, ... and Ki, Mi, ...
+    if (unit.back() == 'i') {
+      if (unit.front() == 'B') {
+        *err = "strict_iecstrtoll: illegal prefix \"Bi\"";
+        return 0;
+      }
+    }
+    if (unit.length() > 2) {
+      *err = "strict_iecstrtoll: illegal prefix (length > 2)";
+      return 0;
+    }
+    switch(unit.front()) {
+      case 'K':
+        m = 10;
+        break;
+      case 'M':
+        m = 20;
+        break;
+      case 'G':
+        m = 30;
+        break;
+      case 'T':
+        m = 40;
+        break;
+      case 'P':
+        m = 50;
+        break;
+      case 'E':
+        m = 60;
+        break;
+      case 'B':
+        break;
+      default:
+        *err = "strict_iecstrtoll: unit prefix not recognized";
+        return 0;
+    }
+  }
+
+  long long ll = strict_strtoll(n, 10, err);
    if (ll < 0 && !std::numeric_limits<T>::is_signed) {
-    *err = "strict_sistrtoll: value should not be negative";
+    *err = "strict_iecstrtoll: value should not be negative";
      return 0;
    }
    if (static_cast<unsigned>(m) >= sizeof(T) * CHAR_BIT) {
-    *err = ("strict_sistrtoll: the SI prefix is too large for the designated "
-           "type");
+    *err = ("strict_iecstrtoll: the IEC prefix is too large for the designated "
+        "type");
      return 0;
    }
    using promoted_t = typename std::common_type<decltype(ll), T>::type;
    if (static_cast<promoted_t>(ll) <
        static_cast<promoted_t>(std::numeric_limits<T>::min()) >> m) {
-    *err = "strict_sistrtoll: value seems to be too small";
+    *err = "strict_iecstrtoll: value seems to be too small";
      return 0;
    }
    if (static_cast<promoted_t>(ll) >
        static_cast<promoted_t>(std::numeric_limits<T>::max()) >> m) {
-    *err = "strict_sistrtoll: value seems to be too large";
+    *err = "strict_iecstrtoll: value seems to be too large";
      return 0;
    }
    return (ll << m);
  }
  
-template int strict_si_cast<int>(const char *str, std::string *err);
-template long strict_si_cast<long>(const char *str, std::string *err);
-template long long strict_si_cast<long long>(const char *str, std::string *err);
-template uint64_t strict_si_cast<uint64_t>(const char *str, std::string *err);
-template uint32_t strict_si_cast<uint32_t>(const char *str, std::string *err);
+template int strict_iec_cast<int>(const boost::string_view str, std::string *err);
+template long strict_iec_cast<long>(const boost::string_view str, std::string *err);
+template long long strict_iec_cast<long long>(const boost::string_view str, std::string *err);
+template uint64_t strict_iec_cast<uint64_t>(const boost::string_view str, std::string *err);
+template uint32_t strict_iec_cast<uint32_t>(const boost::string_view str, std::string *err);
+
+uint64_t strict_iecstrtoll(const boost::string_view str, std::string *err)
+{
+  return strict_iec_cast<uint64_t>(str, err);
+}
+
+uint64_t strict_iecstrtoll(const char *str, std::string *err)
+{
+  return strict_iec_cast<uint64_t>(boost::string_view(str), err);
+}
+
+template<typename T>
+T strict_iec_cast(const char *str, std::string *err)
+{
+  return strict_iec_cast<T>(boost::string_view(str), err);
+}
+
+template int strict_iec_cast<int>(const char *str, std::string *err);
+template long strict_iec_cast<long>(const char *str, std::string *err);
+template long long strict_iec_cast<long long>(const char *str, std::string *err);
+template uint64_t strict_iec_cast<uint64_t>(const char *str, std::string *err);
+template uint32_t strict_iec_cast<uint32_t>(const char *str, std::string *err);
+
+template<typename T>
+T strict_si_cast(const boost::string_view str, std::string *err)
+{
+  if (str.empty()) {
+    *err = "strict_sistrtoll: value not specified";
+    return 0;
+  }
+  boost::string_view n = str;
+  int m = 0;
+  // deal with unit prefix is there is one
+  if (str.find_first_not_of("0123456789+-") != boost::string_view::npos) {
+    const char &u = str.back();
+    if (u == 'K')
+      m = 3;
+    else if (u == 'M')
+      m = 6;
+    else if (u == 'G')
+      m = 9;
+    else if (u == 'T')
+      m = 12;
+    else if (u == 'P')
+      m = 15;
+    else if (u == 'E')
+      m = 18;
+    else if (u != 'B') {
+      *err = "strict_si_cast: unit prefix not recognized";
+      return 0;
+    }
+
+    if (m >= 3)
+      n = str.substr(0, str.length() -1);
+  }
+
+  long long ll = strict_strtoll(n, 10, err);
+  if (ll < 0 && !std::numeric_limits<T>::is_signed) {
+    *err = "strict_sistrtoll: value should not be negative";
+    return 0;
+  }
+  using promoted_t = typename std::common_type<decltype(ll), T>::type;
+  if (static_cast<promoted_t>(ll) <
+      static_cast<promoted_t>(std::numeric_limits<T>::min()) / pow (10, m)) {
+    *err = "strict_sistrtoll: value seems to be too small";
+    return 0;
+  }
+  if (static_cast<promoted_t>(ll) >
+      static_cast<promoted_t>(std::numeric_limits<T>::max()) / pow (10, m)) {
+    *err = "strict_sistrtoll: value seems to be too large";
+    return 0;
+  }
+  return (ll * pow (10,  m));
+}
+
+template int strict_si_cast<int>(const boost::string_view str, std::string *err);
+template long strict_si_cast<long>(const boost::string_view str, std::string *err);
+template long long strict_si_cast<long long>(const boost::string_view str, std::string *err);
+template uint64_t strict_si_cast<uint64_t>(const boost::string_view str, std::string *err);
+template uint32_t strict_si_cast<uint32_t>(const boost::string_view str, std::string *err);
+
+uint64_t strict_sistrtoll(const boost::string_view str, std::string *err)
+{
+  return strict_si_cast<uint64_t>(str, err);
+}
  
  uint64_t strict_sistrtoll(const char *str, std::string *err)
  {
    return strict_si_cast<uint64_t>(str, err);
  }
+
+template<typename T>
+T strict_si_cast(const char *str, std::string *err)
+{
+  return strict_si_cast<T>(boost::string_view(str), err);
+}
+
+template int strict_si_cast<int>(const char *str, std::string *err);
+template long strict_si_cast<long>(const char *str, std::string *err);
+template long long strict_si_cast<long long>(const char *str, std::string *err);
+template uint64_t strict_si_cast<uint64_t>(const char *str, std::string *err);
+template uint32_t strict_si_cast<uint32_t>(const char *str, std::string *err);
diff --git a/ceph/src/common/strtol.h b/ceph/src/common/strtol.h

index 810273ebd23258e33ee40f3908919d1f938237b0..a7c0cc220b6f9c6f02c5c1dde5d612d3ff59d9b4 100644 (file)
--- a/ceph/src/common/strtol.h
+++ b/ceph/src/common/strtol.h
@@ -28,6 +28,11 @@ double strict_strtod(const char *str, std::string *err);
  
  float strict_strtof(const char *str, std::string *err);
  
+uint64_t strict_iecstrtoll(const char *str, std::string *err);
+
+template<typename T>
+T strict_iec_cast(const char *str, std::string *err);
+
  uint64_t strict_sistrtoll(const char *str, std::string *err);
  
  template<typename T>
diff --git a/ceph/src/common/util.cc b/ceph/src/common/util.cc

index f96de189b1351f3e2eeea4a818ae0de3ed01b207..fb67d7ca109ef486631ffdebf8ac6e5d1c0b099d 100644 (file)
--- a/ceph/src/common/util.cc
+++ b/ceph/src/common/util.cc
@@ -34,73 +34,6 @@
  
  #include <stdio.h>
  
-int64_t unit_to_bytesize(string val, ostream *pss)
-{
-  if (val.empty()) {
-    if (pss)
-      *pss << "value is empty!";
-    return -EINVAL;
-  }
-
-  char c = val[val.length()-1];
-  int modifier = 0;
-  if (!::isdigit(c)) {
-    if (val.length() < 2) {
-      if (pss)
-        *pss << "invalid value: " << val;
-      return -EINVAL;
-    }
-    val = val.substr(0,val.length()-1);
-    switch (c) {
-    case 'B':
-      break;
-    case 'k':
-    case 'K':
-      modifier = 10;
-      break;
-    case 'M':
-      modifier = 20;
-      break;
-    case 'G':
-      modifier = 30;
-      break;
-    case 'T':
-      modifier = 40;
-      break;
-    case 'P':
-      modifier = 50;
-      break;
-    case 'E':
-      modifier = 60;
-      break;
-    default:
-      if (pss)
-        *pss << "unrecognized modifier '" << c << "'" << std::endl;
-      return -EINVAL;
-    }
-  }
-
-  if (val[0] == '+' || val[0] == '-') {
-    if (pss)
-      *pss << "expected numerical value, got: " << val;
-    return -EINVAL;
-  }
-
-  string err;
-  int64_t r = strict_strtoll(val.c_str(), 10, &err);
-  if ((r == 0) && !err.empty()) {
-    if (pss)
-      *pss << err;
-    return -1;
-  }
-  if (r < 0) {
-    if (pss)
-      *pss << "unable to parse positive integer '" << val << "'";
-    return -1;
-  }
-  return (r * (1LL << modifier));
-}
-
  int get_fs_stats(ceph_data_stats_t &stats, const char *path)
  {
    if (!path)
diff --git a/ceph/src/compressor/CMakeLists.txt b/ceph/src/compressor/CMakeLists.txt

index 8e0e61cdb91f256da8c6ddcdc17a7cedabbfacdd..9574d65574a15cc38a8895c2cb3c83c139173dd2 100644 (file)
--- a/ceph/src/compressor/CMakeLists.txt
+++ b/ceph/src/compressor/CMakeLists.txt
@@ -27,18 +27,3 @@ endif()
  
  add_custom_target(compressor_plugins DEPENDS
      ${ceph_compressor_libs})
-
-if(WITH_EMBEDDED)
-  include(MergeStaticLibraries)
-  add_library(cephd_compressor_base STATIC ${compressor_srcs})
-  set_target_properties(cephd_compressor_base PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-  set(cephd_compressor_libs
-      cephd_compressor_base
-      cephd_compressor_snappy
-      cephd_compressor_zlib
-      cephd_compressor_zstd)
-  if (HAVE_LZ4)
-    list(APPEND cephd_compressor_libs cephd_compressor_lz4)
-  endif()
-  merge_static_libraries(cephd_compressor ${cephd_compressor_libs})
-endif()
diff --git a/ceph/src/compressor/lz4/CMakeLists.txt b/ceph/src/compressor/lz4/CMakeLists.txt

index 7a53000e9f742b4a45feb7bbbdb1c4b0581fb375..f5531a04f601a1164404d24c459d130b8921a301 100644 (file)
--- a/ceph/src/compressor/lz4/CMakeLists.txt
+++ b/ceph/src/compressor/lz4/CMakeLists.txt
@@ -12,8 +12,3 @@ set_target_properties(ceph_lz4 PROPERTIES
    SOVERSION 2
    INSTALL_RPATH "")
  install(TARGETS ceph_lz4 DESTINATION ${compressor_plugin_dir})
-
-if(WITH_EMBEDDED)
-  add_library(cephd_compressor_lz4 STATIC ${lz4_sources})
-  set_target_properties(cephd_compressor_lz4 PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/compressor/lz4/CompressionPluginLZ4.cc b/ceph/src/compressor/lz4/CompressionPluginLZ4.cc

index c99e5dcdc8d5ab5c9314da5e148f4fc8ce8a7355..99d018765ba5e001518b78e2b81948ff1cfb33ff 100644 (file)
--- a/ceph/src/compressor/lz4/CompressionPluginLZ4.cc
+++ b/ceph/src/compressor/lz4/CompressionPluginLZ4.cc
@@ -16,8 +16,6 @@
  #include "ceph_ver.h"
  #include "CompressionPluginLZ4.h"
  
-#ifndef BUILDING_FOR_EMBEDDED
-
  // -----------------------------------------------------------------------------
  
  const char *__ceph_plugin_version()
@@ -35,5 +33,3 @@ int __ceph_plugin_init(CephContext *cct,
  
    return instance->add(type, name, new CompressionPluginLZ4(cct));
  }
-
-#endif // !BUILDING_FOR_EMBEDDED
diff --git a/ceph/src/compressor/snappy/CMakeLists.txt b/ceph/src/compressor/snappy/CMakeLists.txt

index 8ce0bd04a30018ffd080c5a4131ec9e916a770f6..1cb6d50af6060f487d05cb62f2851ffec520026c 100644 (file)
--- a/ceph/src/compressor/snappy/CMakeLists.txt
+++ b/ceph/src/compressor/snappy/CMakeLists.txt
@@ -12,8 +12,3 @@ set_target_properties(ceph_snappy PROPERTIES
    SOVERSION 2
    INSTALL_RPATH "")
  install(TARGETS ceph_snappy DESTINATION ${compressor_plugin_dir})
-
-if(WITH_EMBEDDED)
-  add_library(cephd_compressor_snappy STATIC ${snappy_sources})
-  set_target_properties(cephd_compressor_snappy PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/compressor/snappy/CompressionPluginSnappy.cc b/ceph/src/compressor/snappy/CompressionPluginSnappy.cc

index 6d67c41c746d51cb6089ff9e5b9034d341008b01..44d77ccbb6f3c68fcbc1cd5439f1ade046bd083a 100644 (file)
--- a/ceph/src/compressor/snappy/CompressionPluginSnappy.cc
+++ b/ceph/src/compressor/snappy/CompressionPluginSnappy.cc
@@ -18,8 +18,6 @@
  #include "ceph_ver.h"
  #include "CompressionPluginSnappy.h"
  
-#ifndef BUILDING_FOR_EMBEDDED
-
  // -----------------------------------------------------------------------------
  
  const char *__ceph_plugin_version()
@@ -37,5 +35,3 @@ int __ceph_plugin_init(CephContext *cct,
  
    return instance->add(type, name, new CompressionPluginSnappy(cct));
  }
-
-#endif // !BUILDING_FOR_EMBEDDED
diff --git a/ceph/src/compressor/zlib/CMakeLists.txt b/ceph/src/compressor/zlib/CMakeLists.txt

index 1b3bc259a3514811309d5c035dccb6717c8e3a21..037416c3d7fc57998af32be1804a3f2ce929af39 100644 (file)
--- a/ceph/src/compressor/zlib/CMakeLists.txt
+++ b/ceph/src/compressor/zlib/CMakeLists.txt
@@ -46,9 +46,3 @@ set_target_properties(ceph_zlib PROPERTIES
    SOVERSION 2
    INSTALL_RPATH "")
  install(TARGETS ceph_zlib DESTINATION ${compressor_plugin_dir})
-
-if(WITH_EMBEDDED)
-  add_library(cephd_compressor_zlib STATIC ${zlib_sources})
-       target_include_directories(cephd_compressor_zlib PRIVATE "${CMAKE_SOURCE_DIR}/src/isa-l/include")
-  set_target_properties(cephd_compressor_zlib PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/compressor/zlib/CompressionPluginZlib.cc b/ceph/src/compressor/zlib/CompressionPluginZlib.cc

index 26969f8377a291bca6d47ee194a3426beb58328e..3602ea4a8f75e880188dea8005c9adb2b72a9655 100644 (file)
--- a/ceph/src/compressor/zlib/CompressionPluginZlib.cc
+++ b/ceph/src/compressor/zlib/CompressionPluginZlib.cc
@@ -18,7 +18,6 @@
  #include "ceph_ver.h"
  #include "CompressionPluginZlib.h"
  
-#ifndef BUILDING_FOR_EMBEDDED
  // -----------------------------------------------------------------------------
  
  const char *__ceph_plugin_version()
@@ -36,5 +35,3 @@ int __ceph_plugin_init(CephContext *cct,
  
    return instance->add(type, name, new CompressionPluginZlib(cct));
  }
-
-#endif // !BUILDING_FOR_EMBEDDED
diff --git a/ceph/src/compressor/zstd/CMakeLists.txt b/ceph/src/compressor/zstd/CMakeLists.txt

index d9d2b6e560d3f7de863a21e8222f05dd7b076f1b..448d610cd0a2f2a393bf2ab1948ec88a991423fb 100644 (file)
--- a/ceph/src/compressor/zstd/CMakeLists.txt
+++ b/ceph/src/compressor/zstd/CMakeLists.txt
@@ -37,8 +37,3 @@ add_dependencies(ceph_zstd ${CMAKE_SOURCE_DIR}/src/ceph_ver.h)
  target_link_libraries(ceph_zstd zstd)
  set_target_properties(ceph_zstd PROPERTIES VERSION 2.0.0 SOVERSION 2)
  install(TARGETS ceph_zstd DESTINATION ${compressor_plugin_dir})
-
-if(WITH_EMBEDDED)
-  add_library(cephd_compressor_zstd STATIC ${zstd_sources})
-  set_target_properties(cephd_compressor_zstd PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/compressor/zstd/CompressionPluginZstd.cc b/ceph/src/compressor/zstd/CompressionPluginZstd.cc

index 321b3dd96f62be6d9c2e5d5832f00208d74bf664..d9007b788a4d669d14e97f3ddf1af1971bdb1953 100644 (file)
--- a/ceph/src/compressor/zstd/CompressionPluginZstd.cc
+++ b/ceph/src/compressor/zstd/CompressionPluginZstd.cc
@@ -16,8 +16,6 @@
  #include "ceph_ver.h"
  #include "CompressionPluginZstd.h"
  
-#ifndef BUILDING_FOR_EMBEDDED
-
  // -----------------------------------------------------------------------------
  
  const char *__ceph_plugin_version()
@@ -35,5 +33,3 @@ int __ceph_plugin_init(CephContext *cct,
  
    return instance->add(type, name, new CompressionPluginZstd(cct));
  }
-
-#endif // !BUILDING_FOR_EMBEDDED
diff --git a/ceph/src/erasure-code/CMakeLists.txt b/ceph/src/erasure-code/CMakeLists.txt

index 450ec716973eb2de4300f8917122042f415ad331..1b4870403a366962214cbb97ec04ec4ab7d05ec4 100644 (file)
--- a/ceph/src/erasure-code/CMakeLists.txt
+++ b/ceph/src/erasure-code/CMakeLists.txt
@@ -24,7 +24,6 @@ add_subdirectory(shec)
  if (HAVE_BETTER_YASM_ELF64)
    add_subdirectory(isa)
    set(EC_ISA_LIB ec_isa)
-  set(EC_ISA_EMBEDDED_LIB cephd_ec_isa)
  endif (HAVE_BETTER_YASM_ELF64)
  
  add_library(erasure_code STATIC ErasureCodePlugin.cc)
@@ -38,10 +37,3 @@ add_custom_target(erasure_code_plugins DEPENDS
      ec_lrc
      ec_jerasure
      ec_shec)
-
-if(WITH_EMBEDDED)
-  include(MergeStaticLibraries)
-  add_library(cephd_ec_base STATIC $<TARGET_OBJECTS:erasure_code_objs>)
-  set_target_properties(cephd_ec_base PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-  merge_static_libraries(cephd_ec cephd_ec_base ${EC_ISA_EMBEDDED_LIB} cephd_ec_jerasure cephd_ec_lrc cephd_ec_shec)
-endif()
diff --git a/ceph/src/erasure-code/isa/CMakeLists.txt b/ceph/src/erasure-code/isa/CMakeLists.txt

index e0a511e0a8e4240e4551554de102cd65978a066b..6fcbb48894af00cd71a850f3789c8aa84cdb8f66 100644 (file)
--- a/ceph/src/erasure-code/isa/CMakeLists.txt
+++ b/ceph/src/erasure-code/isa/CMakeLists.txt
@@ -66,8 +66,3 @@ target_link_libraries(ec_isa ${EXTRALIBS})
  set_target_properties(ec_isa PROPERTIES
    INSTALL_RPATH "")
  install(TARGETS ec_isa DESTINATION ${erasure_plugin_dir})
-
-if(WITH_EMBEDDED)
-  add_library(cephd_ec_isa STATIC ${isa_srcs})
-  set_target_properties(cephd_ec_isa PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/erasure-code/isa/ErasureCodePluginIsa.cc b/ceph/src/erasure-code/isa/ErasureCodePluginIsa.cc

index 0641f457d49fcbb449c9f57b8e7205b4df3d6417..5eda591bac59a7442d8f6320961e1c89d30ef106 100644 (file)
--- a/ceph/src/erasure-code/isa/ErasureCodePluginIsa.cc
+++ b/ceph/src/erasure-code/isa/ErasureCodePluginIsa.cc
@@ -65,8 +65,6 @@ int ErasureCodePluginIsa::factory(const std::string &directory,
      return 0;
  }
  
-#ifndef BUILDING_FOR_EMBEDDED
-
  // -----------------------------------------------------------------------------
  
  const char *__erasure_code_version()
@@ -82,5 +80,3 @@ int __erasure_code_init(char *plugin_name, char *directory)
  
    return instance.add(plugin_name, new ErasureCodePluginIsa());
  }
-
-#endif
diff --git a/ceph/src/erasure-code/jerasure/CMakeLists.txt b/ceph/src/erasure-code/jerasure/CMakeLists.txt

index 7f752806cda7fa172ba10ea1b1190969e95327d6..cf2c6e0fbf7ac6c558717c5889899d9426f82382 100644 (file)
--- a/ceph/src/erasure-code/jerasure/CMakeLists.txt
+++ b/ceph/src/erasure-code/jerasure/CMakeLists.txt
@@ -102,11 +102,3 @@ foreach(flavor ${jerasure_legacy_flavors})
    install(TARGETS ${plugin_name} DESTINATION ${erasure_plugin_dir})
    add_dependencies(ec_jerasure ${plugin_name})
  endforeach()
-
-if(WITH_EMBEDDED)
-  add_library(cephd_ec_jerasure STATIC
-    $<TARGET_OBJECTS:gf-complete_objs>
-    $<TARGET_OBJECTS:jerasure_objs>
-    ${jerasure_utils_src})
-  set_target_properties(cephd_ec_jerasure PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/erasure-code/jerasure/ErasureCodePluginJerasure.cc b/ceph/src/erasure-code/jerasure/ErasureCodePluginJerasure.cc

index 38f403826e76835ff7880760efcfc6a7f3e5ab4e..966c765565d9771ea18d28f41230b27ed40b1f96 100644 (file)
--- a/ceph/src/erasure-code/jerasure/ErasureCodePluginJerasure.cc
+++ b/ceph/src/erasure-code/jerasure/ErasureCodePluginJerasure.cc
@@ -71,8 +71,6 @@ int ErasureCodePluginJerasure::factory(const std::string& directory,
      return 0;
  }
  
-#ifndef BUILDING_FOR_EMBEDDED
-
  const char *__erasure_code_version() { return CEPH_GIT_NICE_VER; }
  
  int __erasure_code_init(char *plugin_name, char *directory)
@@ -85,5 +83,3 @@ int __erasure_code_init(char *plugin_name, char *directory)
    }
    return instance.add(plugin_name, new ErasureCodePluginJerasure());
  }
-
-#endif
-\ No newline at end of file
diff --git a/ceph/src/erasure-code/lrc/CMakeLists.txt b/ceph/src/erasure-code/lrc/CMakeLists.txt

index c25fc695f2fa273fcf82a18ef05199e04339206f..5d2eaeceffdcf9e79b7576fb196c4405f2e410f9 100644 (file)
--- a/ceph/src/erasure-code/lrc/CMakeLists.txt
+++ b/ceph/src/erasure-code/lrc/CMakeLists.txt
@@ -14,8 +14,3 @@ set_target_properties(ec_lrc PROPERTIES
    INSTALL_RPATH "")
  target_link_libraries(ec_lrc json_spirit)
  install(TARGETS ec_lrc DESTINATION ${erasure_plugin_dir})
-
-if(WITH_EMBEDDED)
-  add_library(cephd_ec_lrc STATIC ${lrc_srcs})
-  set_target_properties(cephd_ec_lrc PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/erasure-code/lrc/ErasureCodePluginLrc.cc b/ceph/src/erasure-code/lrc/ErasureCodePluginLrc.cc

index b5699cd74b2f7ce934593fef1650a74ca514dc11..1731148ba6e37465b2c8a5f810f4aa756c9631dc 100644 (file)
--- a/ceph/src/erasure-code/lrc/ErasureCodePluginLrc.cc
+++ b/ceph/src/erasure-code/lrc/ErasureCodePluginLrc.cc
@@ -39,8 +39,6 @@ int ErasureCodePluginLrc::factory(const std::string &directory,
      return 0;
  };
  
-#ifndef BUILDING_FOR_EMBEDDED
-
  const char *__erasure_code_version() { return CEPH_GIT_NICE_VER; }
  
  int __erasure_code_init(char *plugin_name, char *directory)
@@ -48,5 +46,3 @@ int __erasure_code_init(char *plugin_name, char *directory)
    ErasureCodePluginRegistry &instance = ErasureCodePluginRegistry::instance();
    return instance.add(plugin_name, new ErasureCodePluginLrc());
  }
-
-#endif
diff --git a/ceph/src/erasure-code/shec/CMakeLists.txt b/ceph/src/erasure-code/shec/CMakeLists.txt

index d5f8e15ada4a694b46827d6fd7920edd1803f5c8..1793b127ecd51a812a7d3fac3641dac0a635e376 100644 (file)
--- a/ceph/src/erasure-code/shec/CMakeLists.txt
+++ b/ceph/src/erasure-code/shec/CMakeLists.txt
@@ -33,9 +33,3 @@ foreach(flavor ${jerasure_legacy_flavors})
    install(TARGETS ${plugin_name} DESTINATION ${erasure_plugin_dir})
    add_dependencies(ec_shec ${plugin_name})
  endforeach()
-
-if(WITH_EMBEDDED)
-  # note we rely on the fact this will always be statically linked with jerasure
-  add_library(cephd_ec_shec STATIC ${shec_utils_srcs})
-  set_target_properties(cephd_ec_shec PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/erasure-code/shec/ErasureCodePluginShec.cc b/ceph/src/erasure-code/shec/ErasureCodePluginShec.cc

index eb967f8d4637a73709af0492d43196f8c02d0384..b0ecf3f02b1c615819d05adcdf05bb24139ef313 100644 (file)
--- a/ceph/src/erasure-code/shec/ErasureCodePluginShec.cc
+++ b/ceph/src/erasure-code/shec/ErasureCodePluginShec.cc
@@ -68,8 +68,6 @@ int ErasureCodePluginShec::factory(const std::string &directory,
      return 0;
  }
  
-#ifndef BUILDING_FOR_EMBEDDED
-
  const char *__erasure_code_version() { return CEPH_GIT_NICE_VER; }
  
  int __erasure_code_init(char *plugin_name, char *directory = (char *)"")
@@ -82,5 +80,3 @@ int __erasure_code_init(char *plugin_name, char *directory = (char *)"")
    }
    return instance.add(plugin_name, new ErasureCodePluginShec());
  }
-
-#endif
-\ No newline at end of file
diff --git a/ceph/src/global/global_init.cc b/ceph/src/global/global_init.cc

index 7d0abb622a372f7afa6548c80f247d25463f91d7..a8d040d32fa8655507e90ac780a6f9421fca850f 100644 (file)
--- a/ceph/src/global/global_init.cc
+++ b/ceph/src/global/global_init.cc
@@ -393,6 +393,29 @@ void global_init_daemonize(CephContext *cct)
  #endif
  }
  
+int reopen_as_null(CephContext *cct, int fd)
+{
+  int newfd = open("/dev/null", O_RDONLY);
+  if (newfd < 0) {
+    int err = errno;
+    lderr(cct) << __func__ << " failed to open /dev/null: " << cpp_strerror(err)
+              << dendl;
+    return -1;
+  }
+  // atomically dup newfd to target fd.  target fd is implicitly closed if
+  // open and atomically replaced; see man dup2
+  int r = dup2(newfd, fd);
+  if (r < 0) {
+    int err = errno;
+    lderr(cct) << __func__ << " failed to dup2 " << fd << ": "
+              << cpp_strerror(err) << dendl;
+    return -1;
+  }
+  // close newfd (we cloned it to target fd)
+  VOID_TEMP_FAILURE_RETRY(close(newfd));
+  return 0;
+}
+
  void global_init_postfork_start(CephContext *cct)
  {
    // restart log thread
@@ -407,13 +430,7 @@ void global_init_postfork_start(CephContext *cct)
     * guarantee that nobody ever writes to stdout, even though they're not
     * supposed to.
     */
-  VOID_TEMP_FAILURE_RETRY(close(STDIN_FILENO));
-  if (open("/dev/null", O_RDONLY) < 0) {
-    int err = errno;
-    derr << "global_init_daemonize: open(/dev/null) failed: error "
-        << err << dendl;
-    exit(1);
-  }
+  reopen_as_null(cct, STDIN_FILENO);
  
    const md_config_t *conf = cct->_conf;
    if (pidfile_write(conf) < 0)
@@ -441,13 +458,7 @@ void global_init_postfork_finish(CephContext *cct)
      }
    }
  
-  VOID_TEMP_FAILURE_RETRY(close(STDOUT_FILENO));
-  if (open("/dev/null", O_RDONLY) < 0) {
-    int err = errno;
-    derr << "global_init_daemonize: open(/dev/null) failed: error "
-        << err << dendl;
-    exit(1);
-  }
+  reopen_as_null(cct, STDOUT_FILENO);
  
    ldout(cct, 1) << "finished global_init_daemonize" << dendl;
  }
@@ -471,13 +482,7 @@ void global_init_chdir(const CephContext *cct)
   */
  int global_init_shutdown_stderr(CephContext *cct)
  {
-  VOID_TEMP_FAILURE_RETRY(close(STDERR_FILENO));
-  if (open("/dev/null", O_RDONLY) < 0) {
-    int err = errno;
-    derr << "global_init_shutdown_stderr: open(/dev/null) failed: error "
-        << err << dendl;
-    return 1;
-  }
+  reopen_as_null(cct, STDERR_FILENO);
    cct->_log->set_stderr_level(-1, -1);
    return 0;
  }
diff --git a/ceph/src/include/ceph_fs.h b/ceph/src/include/ceph_fs.h

index 9c73c5cd9dcaa972d595a08c5288ec31165cc8e7..b40683db60de4fc9c69f69cf596291e7d2bcb244 100644 (file)
--- a/ceph/src/include/ceph_fs.h
+++ b/ceph/src/include/ceph_fs.h
@@ -746,6 +746,8 @@ int ceph_flags_to_mode(int flags);
                            CEPH_CAP_LINK_EXCL |         \
                            CEPH_CAP_XATTR_EXCL |        \
                            CEPH_CAP_FILE_EXCL)
+#define CEPH_CAP_ANY_FILE_RD (CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE | \
+                              CEPH_CAP_FILE_SHARED)
  #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER |        \
                               CEPH_CAP_FILE_EXCL)
  #define CEPH_CAP_ANY_WR   (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR)
diff --git a/ceph/src/include/cephd/libcephd.h b/ceph/src/include/cephd/libcephd.h

deleted file mode 100644 (file)

index 0ec6bb7..0000000
--- a/ceph/src/include/cephd/libcephd.h
+++ /dev/null
@@ -1,108 +0,0 @@
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define LIBCEPHD_VER_MAJOR 0
-#define LIBCEPHD_VER_MINOR 1
-#define LIBCEPHD_VER_PATCH 0
-
-#define LIBCEPHFD_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
-#define LIBCEPHFD_VERSION_CODE LIBCEPHD_VERSION(LIBCEPHD_VER_MAJOR, LIBCEPHD_VER_MINOR, LIBCEPHD_VER_PATCH)
-
-#define CEPH_LIBCEPHD_API __attribute__ ((visibility ("default")))
-
-/**
- * Get the API version of libcephd. We use semantic versioning
- * for the API:
- *
- * - incrementing major is for backwards-incompatible changes
- * - incrementing minor is for backwards-compatible changes
- * - incrementing extra is for bug fixes
- *
- * @param pmajor where to store the major version number
- * @param pminor where to store the minor version number
- * @param ppatch where to store the patch version number
- */
-CEPH_LIBCEPHD_API void cephd_version(int *pmajor, int *pminor, int *ppatch);
-
-/**
- * Gets the runtime version of ceph.
- *
- * @param pmajor where to store the major version number
- * @param pminor where to store the minor version number
- * @param ppatch where to store the patch version number
- */
-CEPH_LIBCEPHD_API const char *ceph_version(int *pmajor, int *pminor, int *ppatch);
-
-/**
- * Generates a new cluster id (fsid) and returns a hexadecimal string.
- *
- * @param context where to the store the handle
- * @param buf where to write the fsid
- * @param len the size of buf in bytes (should be at least 37)
- * @returns 0 on success, negative error code on failure
- * @returns -ERANGE if the buffer is too short to contain the key
-  */
-CEPH_LIBCEPHD_API int cephd_generate_fsid(char *buf, size_t len);
-
-/**
- * Generates a new secret key and returns a base64 encoded string.
- *
- * @param context where to the store the handle
- * @param buf where to write the fsid
- * @param len the size of buf in bytes
- * @returns 0 on success, negative error code on failure
- * @returns -ERANGE if the buffer is too short to contain the key
- */
-CEPH_LIBCEPHD_API int cephd_generate_secret_key(char *buf, size_t len);
-
-/**
- * Runs ceph-mon passing in command line args
- *
- * @param argc number of parameters
- * @param argv array of string arguments
- * @returns 0 on success, negative error code on failure
- */
-CEPH_LIBCEPHD_API int cephd_run_mon(int argc, const char **argv);
-
-/**
- * Runs ceph-osd passing in command line args
- *
- * @param argc number of parameters
- * @param argv array of string arguments
- * @returns 0 on success, negative error code on failure
- */
-CEPH_LIBCEPHD_API int cephd_run_osd(int argc, const char **argv);
-
-/**
- * Runs ceph-mds passing in command line args
- *
- * @param argc number of parameters
- * @param argv array of string arguments
- * @returns 0 on success, negative error code on failure
- */
-CEPH_LIBCEPHD_API int cephd_run_mds(int argc, const char **argv);
-
-/**
- * Runs ceph-rgw passing in command line args
- *
- * @param argc number of parameters
- * @param argv array of string arguments
- * @returns 0 on success, negative error code on failure
- */
-CEPH_LIBCEPHD_API int cephd_run_rgw(int argc, const char **argv);
-
-/**
- * Runs radosgw-admin passing in command line args
- *
- * @param argc number of parameters
- * @param argv array of string arguments
- * @returns 0 on success, negative error code on failure
- */
-CEPH_LIBCEPHD_API int cephd_run_rgw_admin(int argc, const char **argv);
-
-#ifdef __cplusplus
-}
-#endif
diff --git a/ceph/src/include/config-h.in.cmake b/ceph/src/include/config-h.in.cmake

index 0b6b5248842fa2a8a2330ec41fd15eb9d86e1391..19f1d0a5fe40ca3ec8220e14af99443cfbc31da4 100644 (file)
--- a/ceph/src/include/config-h.in.cmake
+++ b/ceph/src/include/config-h.in.cmake
@@ -136,9 +136,6 @@
  /* ibverbs experimental conditional compilation */
  #cmakedefine HAVE_IBV_EXP
  
-/* define if embedded enabled */
-#cmakedefine WITH_EMBEDDED
-
  /* define if cephfs enabled */
  #cmakedefine WITH_CEPHFS
  
diff --git a/ceph/src/include/rados/objclass.h b/ceph/src/include/rados/objclass.h

index ea07dfc790aefc9708bced9dd90082ae6ab18067..e61ce8163eb4d9540aba42e8ad682e0199ecba37 100644 (file)
--- a/ceph/src/include/rados/objclass.h
+++ b/ceph/src/include/rados/objclass.h
@@ -11,7 +11,6 @@
  extern "C" {
  #endif
  
-#ifndef BUILDING_FOR_EMBEDDED
  #define CLS_VER(maj,min) \
  int __cls_ver__## maj ## _ ##min = 0; \
  int __cls_ver_maj = maj; \
@@ -22,12 +21,6 @@ int __cls_name__## name = 0; \
  const char *__cls_name = #name;
  #define CLS_INIT(name) \
  void CEPH_CLS_API __cls_init()
-#else
-#define CLS_VER(maj,min)
-#define CLS_NAME(name)
-#define CLS_INIT(name) \
-void CEPH_CLS_API name##_cls_init()
-#endif
  
  #define CLS_METHOD_RD       0x1 /// method executes read operations
  #define CLS_METHOD_WR       0x2 /// method executes write operations
diff --git a/ceph/src/include/types.h b/ceph/src/include/types.h

index e904a151d75d800112de1a342a4fb3dec76dc139..bc5b755ec3944d95ff5157937b3993d2915cb56f 100644 (file)
--- a/ceph/src/include/types.h
+++ b/ceph/src/include/types.h
@@ -313,96 +313,87 @@ inline ostream& operator<<(ostream& out, const client_t& c) {
  
  // --
  
-struct prettybyte_t {
-  uint64_t v;
-  // cppcheck-suppress noExplicitConstructor
-  prettybyte_t(uint64_t _v) : v(_v) {}
-};
-
-inline ostream& operator<<(ostream& out, const prettybyte_t& b)
-{
-  uint64_t bump_after = 100;
-  if (b.v > bump_after << 60)
-    return out << (b.v >> 60) << " EB";    
-  if (b.v > bump_after << 50)
-    return out << (b.v >> 50) << " PB";    
-  if (b.v > bump_after << 40)
-    return out << (b.v >> 40) << " TB";    
-  if (b.v > bump_after << 30)
-    return out << (b.v >> 30) << " GB";    
-  if (b.v > bump_after << 20)
-    return out << (b.v >> 20) << " MB";    
-  if (b.v > bump_after << 10)
-    return out << (b.v >> 10) << " kB";
-  return out << b.v << " bytes";
+namespace {
+  inline ostream& format_u(ostream& out, const uint64_t v, const uint64_t n,
+      const int index, const uint64_t mult, const char* u)
+  {
+    char buffer[32];
+
+    if (index == 0) {
+      (void) snprintf(buffer, sizeof(buffer), "%" PRId64 "%s", n, u);
+    } else if ((v % mult) == 0) {
+      // If this is an even multiple of the base, always display
+      // without any decimal fraction.
+      (void) snprintf(buffer, sizeof(buffer), "%" PRId64 "%s", n, u);
+    } else {
+      // We want to choose a precision that reflects the best choice
+      // for fitting in 5 characters.  This can get rather tricky when
+      // we have numbers that are very close to an order of magnitude.
+      // For example, when displaying 10239 (which is really 9.999K),
+      // we want only a single place of precision for 10.0K.  We could
+      // develop some complex heuristics for this, but it's much
+      // easier just to try each combination in turn.
+      int i;
+      for (i = 2; i >= 0; i--) {
+        if (snprintf(buffer, sizeof(buffer), "%.*f%s", i,
+          static_cast<double>(v) / mult, u) <= 7)
+          break;
+      }
+    }
+
+    return out << buffer;
+  }
  }
  
-struct si_t {
+/*
+ * Use this struct to pretty print values that should be formatted with a
+ * decimal unit prefix (the classic SI units). No actual unit will be added.
+ */
+struct si_u_t {
    uint64_t v;
-  // cppcheck-suppress noExplicitConstructor
-  si_t(uint64_t _v) : v(_v) {}
+  explicit si_u_t(uint64_t _v) : v(_v) {};
  };
  
-inline ostream& operator<<(ostream& out, const si_t& b)
+inline ostream& operator<<(ostream& out, const si_u_t& b)
  {
-  uint64_t bump_after = 100;
-  if (b.v > bump_after << 60)
-    return out << (b.v >> 60) << "E";
-  if (b.v > bump_after << 50)
-    return out << (b.v >> 50) << "P";
-  if (b.v > bump_after << 40)
-    return out << (b.v >> 40) << "T";
-  if (b.v > bump_after << 30)
-    return out << (b.v >> 30) << "G";
-  if (b.v > bump_after << 20)
-    return out << (b.v >> 20) << "M";
-  if (b.v > bump_after << 10)
-    return out << (b.v >> 10) << "k";
-  return out << b.v;
+  uint64_t n = b.v;
+  int index = 0;
+  uint64_t mult = 1;
+  const char* u[] = {"", "k", "M", "G", "T", "P", "E"};
+
+  while (n >= 1000 && index < 7) {
+    n /= 1000;
+    index++;
+    mult *= 1000;
+  }
+
+  return format_u(out, b.v, n, index, mult, u[index]);
  }
  
-struct pretty_si_t {
+/*
+ * Use this struct to pretty print values that should be formatted with a
+ * binary unit prefix (IEC units). Since binary unit prefixes are to be used for
+ * "multiples of units in data processing, data transmission, and digital
+ * information" (so bits and bytes) and so far bits are not printed, the unit
+ * "B" for "byte" is added besides the multiplier.
+ */
+struct byte_u_t {
    uint64_t v;
-  // cppcheck-suppress noExplicitConstructor
-  pretty_si_t(uint64_t _v) : v(_v) {}
+  explicit byte_u_t(uint64_t _v) : v(_v) {};
  };
  
-inline ostream& operator<<(ostream& out, const pretty_si_t& b)
+inline ostream& operator<<(ostream& out, const byte_u_t& b)
  {
-  uint64_t bump_after = 100;
-  if (b.v > bump_after << 60)
-    return out << (b.v >> 60) << " E";
-  if (b.v > bump_after << 50)
-    return out << (b.v >> 50) << " P";
-  if (b.v > bump_after << 40)
-    return out << (b.v >> 40) << " T";
-  if (b.v > bump_after << 30)
-    return out << (b.v >> 30) << " G";
-  if (b.v > bump_after << 20)
-    return out << (b.v >> 20) << " M";
-  if (b.v > bump_after << 10)
-    return out << (b.v >> 10) << " k";
-  return out << b.v << " ";
-}
+  uint64_t n = b.v;
+  int index = 0;
+  const char* u[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB"};
  
-struct kb_t {
-  uint64_t v;
-  // cppcheck-suppress noExplicitConstructor
-  kb_t(uint64_t _v) : v(_v) {}
-};
+  while (n >= 1024 && index < 7) {
+    n /= 1024;
+    index++;
+  }
  
-inline ostream& operator<<(ostream& out, const kb_t& kb)
-{
-  uint64_t bump_after = 100;
-  if (kb.v > bump_after << 40)
-    return out << (kb.v >> 40) << " PB";    
-  if (kb.v > bump_after << 30)
-    return out << (kb.v >> 30) << " TB";    
-  if (kb.v > bump_after << 20)
-    return out << (kb.v >> 20) << " GB";    
-  if (kb.v > bump_after << 10)
-    return out << (kb.v >> 10) << " MB";
-  return out << kb.v << " kB";
+  return format_u(out, b.v, n, index, 1ULL << (10 * index), u[index]);
  }
  
  inline ostream& operator<<(ostream& out, const ceph_mon_subscribe_item& i)
diff --git a/ceph/src/include/util.h b/ceph/src/include/util.h

index 3de4c3d3ec1e3ed3e58616877af2325ccfd8cc93..65d205f534db982c21df91ad3fd6c5094c2df74a 100644 (file)
--- a/ceph/src/include/util.h
+++ b/ceph/src/include/util.h
@@ -17,8 +17,6 @@
  #include "common/Formatter.h"
  #include "include/types.h"
  
-int64_t unit_to_bytesize(string val, ostream *pss);
-
  std::string bytes2str(uint64_t count);
  
  struct ceph_data_stats
diff --git a/ceph/src/key_value_store/CMakeLists.txt b/ceph/src/key_value_store/CMakeLists.txt

index 2c65614479f1ade9dd36e171907a1a445845455b..0b17ede1ddcf6b670a76025da587ff4f7f3fa4bc 100644 (file)
--- a/ceph/src/key_value_store/CMakeLists.txt
+++ b/ceph/src/key_value_store/CMakeLists.txt
@@ -5,8 +5,3 @@ set_target_properties(cls_kvs PROPERTIES
    SOVERSION "1"
    INSTALL_RPATH "")
  install(TARGETS cls_kvs DESTINATION ${CMAKE_INSTALL_LIBDIR}/rados-classes)
-
-if(WITH_EMBEDDED)
-  add_library(cephd_cls_kvs STATIC ${kvs_srcs})
-  set_target_properties(cephd_cls_kvs PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-endif()
diff --git a/ceph/src/kv/RocksDBStore.cc b/ceph/src/kv/RocksDBStore.cc

index a13c47291c869f5c765693bb4a35aec5d5c1a57c..d18056d24b06996f5e1d9ab8724e357bd9a1514a 100644 (file)
--- a/ceph/src/kv/RocksDBStore.cc
+++ b/ceph/src/kv/RocksDBStore.cc
@@ -162,14 +162,14 @@ int RocksDBStore::tryInterpret(const string &key, const string &val, rocksdb::Op
  {
    if (key == "compaction_threads") {
      std::string err;
-    int f = strict_sistrtoll(val.c_str(), &err);
+    int f = strict_iecstrtoll(val.c_str(), &err);
      if (!err.empty())
        return -EINVAL;
      //Low priority threadpool is used for compaction
      opt.env->SetBackgroundThreads(f, rocksdb::Env::Priority::LOW);
    } else if (key == "flusher_threads") {
      std::string err;
-    int f = strict_sistrtoll(val.c_str(), &err);
+    int f = strict_iecstrtoll(val.c_str(), &err);
      if (!err.empty())
        return -EINVAL;
      //High priority threadpool is used for flusher
@@ -354,8 +354,8 @@ int RocksDBStore::do_open(ostream &out, bool create_if_missing)
  
    opt.table_factory.reset(rocksdb::NewBlockBasedTableFactory(bbt_opts));
    dout(10) << __func__ << " block size " << g_conf->rocksdb_block_size
-           << ", block_cache size " << prettybyte_t(block_cache_size)
-          << ", row_cache size " << prettybyte_t(row_cache_size)
+           << ", block_cache size " << byte_u_t(block_cache_size)
+          << ", row_cache size " << byte_u_t(row_cache_size)
            << "; shards "
            << (1 << g_conf->rocksdb_cache_shard_bits)
            << ", type " << g_conf->rocksdb_cache_type
diff --git a/ceph/src/libcephd/CMakeLists.txt b/ceph/src/libcephd/CMakeLists.txt

deleted file mode 100644 (file)

index 223184f..0000000
--- a/ceph/src/libcephd/CMakeLists.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-include(MergeStaticLibraries)
-
-add_library(cephd_base STATIC
-  libcephd.cc
-  ../ceph_mon.cc
-  ../ceph_osd.cc
-  ../ceph_mds.cc)
-
-set_target_properties(cephd_base PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-
-set(merge_libs
-  cephd_base
-  cephd_compressor
-  cephd_ec
-  cephd_cls
-  cephd_cls_kvs
-  cephd_rados
-  common
-  common_utf8
-  erasure_code
-  global
-  json_spirit
-  kv
-  mds
-  mon
-  os
-  osd
-  osdc)
-
-if(NOT WITH_SYSTEM_ROCKSDB)
-  list(APPEND merge_libs ${ROCKSDB_LIBRARIES})
-endif(NOT WITH_SYSTEM_ROCKSDB)
-
-if(WITH_RADOSGW)
-  list(APPEND merge_libs cephd_rgw)
-endif(WITH_RADOSGW)
-
-if(WITH_RBD)
-  list(APPEND merge_libs cephd_rbd)
-endif(WITH_RBD)
-
-if(HAVE_ARMV8_CRC)
-  list(APPEND merge_libs common_crc_aarch64)
-endif(HAVE_ARMV8_CRC)
-
-merge_static_libraries(cephd ${merge_libs})
-
-# TODO: install these libraries and add them to rpm and deb packages
-#install(TARGETS cephd DESTINATION ${CMAKE_INSTALL_LIBDIR})
-#install(FILES ../include/cephd/libcephd.h
-#  DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cephd)
diff --git a/ceph/src/libcephd/libcephd.cc b/ceph/src/libcephd/libcephd.cc

deleted file mode 100644 (file)

index 71b0767..0000000
--- a/ceph/src/libcephd/libcephd.cc
+++ /dev/null
@@ -1,262 +0,0 @@
-#include "acconfig.h"
-#include "auth/Auth.h"
-#include "auth/Crypto.h"
-#include "auth/KeyRing.h"
-#include "common/ceph_argparse.h"
-#include "common/version.h"
-#include "common/PluginRegistry.h"
-#include "compressor/snappy/CompressionPluginSnappy.h"
-#include "compressor/zlib/CompressionPluginZlib.h"
-#include "compressor/zstd/CompressionPluginZstd.h"
-#include "erasure-code/ErasureCodePlugin.h"
-#if __x86_64__ && defined(HAVE_BETTER_YASM_ELF64)
-#include "erasure-code/isa/ErasureCodePluginIsa.h"
-#endif
-#include "erasure-code/jerasure/ErasureCodePluginJerasure.h"
-#include "erasure-code/jerasure/jerasure_init.h"
-#include "erasure-code/lrc/ErasureCodePluginLrc.h"
-#include "erasure-code/shec/ErasureCodePluginShec.h"
-#include "include/cephd/libcephd.h"
-#include "global/global_context.h"
-#include "global/global_init.h"
-#include "objclass/objclass.h"
-#include "osd/OSD.h"
-#include "osd/ClassHandler.h"
-
-// forward declarations of RADOS class init functions
-CLS_INIT(cephfs);
-CLS_INIT(hello);
-CLS_INIT(journal);
-CLS_INIT(kvs);
-CLS_INIT(lock);
-CLS_INIT(log);
-CLS_INIT(lua);
-CLS_INIT(numops);
-CLS_INIT(rbd);
-CLS_INIT(refcount);
-CLS_INIT(replica_log);
-CLS_INIT(rgw);
-CLS_INIT(statelog);
-CLS_INIT(timeindex);
-CLS_INIT(user);
-CLS_INIT(version);
-
-extern "C" void cephd_version(int *pmajor, int *pminor, int *ppatch)
-{
-  if (pmajor)
-    *pmajor = LIBCEPHD_VER_MAJOR;
-  if (pminor)
-    *pminor = LIBCEPHD_VER_MINOR;
-  if (ppatch)
-    *ppatch = LIBCEPHD_VER_PATCH;
-}
-
-extern "C" const char *ceph_version(int *pmajor, int *pminor, int *ppatch)
-{
-  int major, minor, patch;
-  const char *v = ceph_version_to_str();
-
-  int n = sscanf(v, "%d.%d.%d", &major, &minor, &patch);
-  if (pmajor)
-    *pmajor = (n >= 1) ? major : 0;
-  if (pminor)
-    *pminor = (n >= 2) ? minor : 0;
-  if (ppatch)
-    *ppatch = (n >= 3) ? patch : 0;
-  return v;
-}
-
-extern "C" int cephd_generate_fsid(char *buf, size_t len)
-{
-    if (len < sizeof("b06ad912-70d7-4263-a5ff-011462a5929a")) {
-        return -ERANGE;
-    }
-
-    uuid_d fsid;
-    fsid.generate_random();
-    fsid.print(buf);
-
-    return 0;
-}
-
-extern "C" int cephd_generate_secret_key(char *buf, size_t len)
-{
-    CephInitParameters iparams(CEPH_ENTITY_TYPE_MON);
-    CephContext *cct = common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, 0);
-    cct->_conf->apply_changes(NULL);
-    cct->init_crypto();
-
-    CryptoKey key;
-    key.create(cct, CEPH_CRYPTO_AES);
-
-    cct->put();
-
-    string keystr;
-    key.encode_base64(keystr);
-    if (keystr.length() >= len) {
-        return -ERANGE;
-    }
-    strcpy(buf, keystr.c_str());
-    return keystr.length();
-}
-
-// load the embedded plugins. This is safe to call multiple
-// times in the same process
-void cephd_preload_embedded_plugins()
-{
-  int r;
-
-  // load erasure coding plugins
-  {
-    ErasureCodePlugin* plugin;
-    ErasureCodePluginRegistry& reg = ErasureCodePluginRegistry::instance();
-    Mutex::Locker l(reg.lock);
-    reg.disable_dlclose = true;
-
-    // initialize jerasure (and gf-complete)
-    int w[] = { 4, 8, 16, 32 };
-    r = jerasure_init(4, w);
-    assert(r == 0);
-
-    plugin = new ErasureCodePluginJerasure();
-    r = reg.add("jerasure", plugin);
-    if (r == -EEXIST) {
-      delete plugin;
-    }
-    assert(r == 0);
-
-    plugin = new ErasureCodePluginLrc();
-    r = reg.add("lrc", plugin);
-    if (r == -EEXIST) {
-      delete plugin;
-    }
-    assert(r == 0);
-
-    plugin = new ErasureCodePluginShec();
-    r = reg.add("shec", plugin);
-    if (r == -EEXIST) {
-      delete plugin;
-    }
-    assert(r == 0);
-
-#if __x86_64__ && defined(HAVE_BETTER_YASM_ELF64)
-    plugin = new ErasureCodePluginIsa();
-    r = reg.add("isa", plugin);
-    if (r == -EEXIST) {
-      delete plugin;
-    }
-    assert(r == 0);
-#endif
-  }
-
-  // now load the compression plugins
-  {
-    Plugin *plugin;
-    PluginRegistry *reg = g_ceph_context->get_plugin_registry();
-    Mutex::Locker l(reg->lock);
-    reg->disable_dlclose = true;
-
-    plugin = new CompressionPluginSnappy(g_ceph_context);
-    r = reg->add("compressor", "snappy", plugin);
-    if (r == -EEXIST) {
-      delete plugin;
-    }
-    assert(r == 0);
-
-    plugin = new CompressionPluginZlib(g_ceph_context);
-    r = reg->add("compressor", "zlib", plugin);
-    if (r == -EEXIST) {
-      delete plugin;
-    }
-    assert(r == 0);
-
-    plugin = new CompressionPluginZstd(g_ceph_context);
-    r = reg->add("compressor", "zstd", plugin);
-    if (r == -EEXIST) {
-      delete plugin;
-    }
-    assert(r == 0);
-  }
-}
-
-void cephd_preload_rados_classes(OSD *osd)
-{
-  // intialize RADOS classes
-  {
-    ClassHandler  *class_handler = osd->class_handler;
-    Mutex::Locker l(class_handler->mutex);
-
-#ifdef WITH_CEPHFS
-    class_handler->add_embedded_class("cephfs");
-    cephfs_cls_init();
-#endif
-    class_handler->add_embedded_class("hello");
-    hello_cls_init();
-    class_handler->add_embedded_class("journal");
-    journal_cls_init();
-#ifdef WITH_KVS
-    class_handler->add_embedded_class("kvs");
-    kvs_cls_init();
-#endif
-    class_handler->add_embedded_class("lock");
-    lock_cls_init();
-    class_handler->add_embedded_class("log");
-    log_cls_init();
-    class_handler->add_embedded_class("lua");
-    lua_cls_init();
-    class_handler->add_embedded_class("numops");
-    numops_cls_init();
-#ifdef WITH_RBD
-    class_handler->add_embedded_class("rbd");
-    rbd_cls_init();
-#endif
-    class_handler->add_embedded_class("refcount");
-    refcount_cls_init();
-    class_handler->add_embedded_class("replica_log");
-    replica_log_cls_init();
-#ifdef WITH_RADOSGW
-    class_handler->add_embedded_class("rgw");
-    rgw_cls_init();
-#endif
-    class_handler->add_embedded_class("statelog");
-    statelog_cls_init();
-    class_handler->add_embedded_class("timeindex");
-    timeindex_cls_init();
-    class_handler->add_embedded_class("user");
-    user_cls_init();
-    class_handler->add_embedded_class("version");
-    version_cls_init();
-  }
-}
-
-extern "C" int cephd_mon(int argc, const char **argv);
-extern "C" int cephd_osd(int argc, const char **argv);
-extern "C" int cephd_mds(int argc, const char **argv);
-extern "C" int cephd_rgw(int argc, const char **argv);
-extern "C" int cephd_rgw_admin(int argc, const char **argv);
-
-int cephd_run_mon(int argc, const char **argv)
-{
-    return cephd_mon(argc, argv);
-}
-
-int cephd_run_osd(int argc, const char **argv)
-{
-    return cephd_osd(argc, argv);
-}
-
-int cephd_run_mds(int argc, const char **argv)
-{
-    return cephd_mds(argc, argv);
-}
-
-
-int cephd_run_rgw(int argc, const char **argv)
-{
-    return cephd_rgw(argc, argv);
-}
-
-int cephd_run_rgw_admin(int argc, const char **argv)
-{
-    return cephd_rgw_admin(argc, argv);
-}
diff --git a/ceph/src/libcephfs.cc b/ceph/src/libcephfs.cc

index f3fa5a97b2efb9c80d6fb079ec916153e19d9966..6b99359d8c83b94f0319c64b99ca7c726abab037 100644 (file)
--- a/ceph/src/libcephfs.cc
+++ b/ceph/src/libcephfs.cc
@@ -1393,41 +1393,7 @@ extern "C" int ceph_ll_lookup_inode(
      struct inodeno_t ino,
      Inode **inode)
  {
-  int r = (cmount->get_client())->lookup_ino(ino, cmount->default_perms, inode);
-  if (r) {
-    return r;
-  }
-
-  assert(inode != NULL);
-  assert(*inode != NULL);
-
-  // Request the parent inode, so that we can look up the name
-  Inode *parent;
-  r = (cmount->get_client())->lookup_parent(*inode, cmount->default_perms, &parent);
-  if (r && r != -EINVAL) {
-    // Unexpected error
-    (cmount->get_client())->ll_forget(*inode, 1);
-    return r;
-  } else if (r == -EINVAL) {
-    // EINVAL indicates node without parents (root), drop out now
-    // and don't try to look up the non-existent dentry.
-    return 0;
-  }
-  // FIXME: I don't think this works; lookup_parent() returns 0 if the parent
-  // is already in cache
-  assert(parent != NULL);
-
-  // Finally, get the name (dentry) of the requested inode
-  r = (cmount->get_client())->lookup_name(*inode, parent, cmount->default_perms);
-  if (r) {
-    // Unexpected error
-    (cmount->get_client())->ll_forget(parent, 1);
-    (cmount->get_client())->ll_forget(*inode, 1);
-    return r;
-  }
-
-  (cmount->get_client())->ll_forget(parent, 1);
-  return 0;
+  return (cmount->get_client())->ll_lookup_inode(ino, cmount->default_perms, inode);
  }
  
  extern "C" int ceph_ll_lookup(struct ceph_mount_info *cmount,
diff --git a/ceph/src/librados/CMakeLists.txt b/ceph/src/librados/CMakeLists.txt

index d8b48256e94a0ae860e601db77179aa597d7b283..e9e402400d6885f1b2d0495d2ee61e013fcb2b49 100644 (file)
--- a/ceph/src/librados/CMakeLists.txt
+++ b/ceph/src/librados/CMakeLists.txt
@@ -35,11 +35,6 @@ else(ENABLE_SHARED)
  endif(ENABLE_SHARED)
  install(TARGETS librados DESTINATION ${CMAKE_INSTALL_LIBDIR})
  
-if(WITH_EMBEDDED)
-  add_library(cephd_rados STATIC
-    $<TARGET_OBJECTS:librados_api_obj>
-    $<TARGET_OBJECTS:librados_objs>)
-endif()
  if(WITH_LTTNG AND WITH_EVENTTRACE)
    add_dependencies(librados_api_obj eventtrace_tp)
  endif()
diff --git a/ceph/src/librados/IoCtxImpl.cc b/ceph/src/librados/IoCtxImpl.cc

index dbd0818da61567c159f640fadb2a3c1c8d080fa5..b2a3f1701640a3a0e38efdb59339445b9eb6ab08 100644 (file)
--- a/ceph/src/librados/IoCtxImpl.cc
+++ b/ceph/src/librados/IoCtxImpl.cc
@@ -2017,9 +2017,11 @@ void librados::IoCtxImpl::C_aio_Complete::finish(int r)
    c->cond.Signal();
  
    if (r == 0 && c->blp && c->blp->length() > 0) {
-    if (c->out_buf && !c->blp->is_provided_buffer(c->out_buf))
-      c->blp->copy(0, c->blp->length(), c->out_buf);
-    c->rval = c->blp->length();
+    if (c->out_buf && !c->blp->is_contiguous()) {
+      c->rval = -ERANGE;
+    } else {
+      c->rval = c->blp->length();
+    }
    }
  
    if (c->callback_complete ||
diff --git a/ceph/src/librbd/CMakeLists.txt b/ceph/src/librbd/CMakeLists.txt

index 1b04ec91ab81f5b0051f99cb28d2cec0b11a3d77..072eaed491fec88f8a6a88d22445caf62756ab9e 100644 (file)
--- a/ceph/src/librbd/CMakeLists.txt
+++ b/ceph/src/librbd/CMakeLists.txt
@@ -139,8 +139,3 @@ if(ENABLE_SHARED)
      LINK_FLAGS "-Wl,--exclude-libs,ALL")
  endif(ENABLE_SHARED)
  install(TARGETS librbd DESTINATION ${CMAKE_INSTALL_LIBDIR})
-
-if(WITH_EMBEDDED)
-  add_library(cephd_rbd_base STATIC librbd.cc ${CMAKE_SOURCE_DIR}/src/common/ContextCompletion.cc)
-  merge_static_libraries(cephd_rbd cephd_rbd_base rbd_internal rbd_types journal)
-endif()
diff --git a/ceph/src/librbd/ImageCtx.cc b/ceph/src/librbd/ImageCtx.cc

index 3f64a8003b05a95a489aa86f08c615a501b43cbc..e92a17702012af7608ce0b782d17b138d70f43b4 100644 (file)
--- a/ceph/src/librbd/ImageCtx.cc
+++ b/ceph/src/librbd/ImageCtx.cc
@@ -373,25 +373,25 @@ struct C_InvalidateCache : public Context {
  
      plb.add_u64_counter(l_librbd_rd, "rd", "Reads", "r", perf_prio);
      plb.add_u64_counter(l_librbd_rd_bytes, "rd_bytes", "Data size in reads",
-                        "rb", perf_prio);
+                        "rb", perf_prio, unit_t(BYTES));
      plb.add_time_avg(l_librbd_rd_latency, "rd_latency", "Latency of reads",
                       "rl", perf_prio);
      plb.add_u64_counter(l_librbd_wr, "wr", "Writes", "w", perf_prio);
      plb.add_u64_counter(l_librbd_wr_bytes, "wr_bytes", "Written data",
-                        "wb", perf_prio);
+                        "wb", perf_prio, unit_t(BYTES));
      plb.add_time_avg(l_librbd_wr_latency, "wr_latency", "Write latency",
                       "wl", perf_prio);
      plb.add_u64_counter(l_librbd_discard, "discard", "Discards");
-    plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data");
+    plb.add_u64_counter(l_librbd_discard_bytes, "discard_bytes", "Discarded data", NULL, 0, unit_t(BYTES));
      plb.add_time_avg(l_librbd_discard_latency, "discard_latency", "Discard latency");
      plb.add_u64_counter(l_librbd_flush, "flush", "Flushes");
      plb.add_u64_counter(l_librbd_aio_flush, "aio_flush", "Async flushes");
      plb.add_time_avg(l_librbd_aio_flush_latency, "aio_flush_latency", "Latency of async flushes");
      plb.add_u64_counter(l_librbd_ws, "ws", "WriteSames");
-    plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data");
+    plb.add_u64_counter(l_librbd_ws_bytes, "ws_bytes", "WriteSame data", NULL, 0, unit_t(BYTES));
      plb.add_time_avg(l_librbd_ws_latency, "ws_latency", "WriteSame latency");
      plb.add_u64_counter(l_librbd_cmp, "cmp", "CompareAndWrites");
-    plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps");
+    plb.add_u64_counter(l_librbd_cmp_bytes, "cmp_bytes", "Data size in cmps", NULL, 0, unit_t(BYTES));
      plb.add_time_avg(l_librbd_cmp_latency, "cmp_latency", "Latency of cmps");
      plb.add_u64_counter(l_librbd_snap_create, "snap_create", "Snap creations");
      plb.add_u64_counter(l_librbd_snap_remove, "snap_remove", "Snap removals");
@@ -400,7 +400,7 @@ struct C_InvalidateCache : public Context {
      plb.add_u64_counter(l_librbd_notify, "notify", "Updated header notifications");
      plb.add_u64_counter(l_librbd_resize, "resize", "Resizes");
      plb.add_u64_counter(l_librbd_readahead, "readahead", "Read ahead");
-    plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead");
+    plb.add_u64_counter(l_librbd_readahead_bytes, "readahead_bytes", "Data size in read ahead", NULL, 0, unit_t(BYTES));
      plb.add_u64_counter(l_librbd_invalidate_cache, "invalidate_cache", "Cache invalidates");
  
      plb.add_time(l_librbd_opened_time, "opened_time", "Opened time",
diff --git a/ceph/src/librbd/image/RemoveRequest.cc b/ceph/src/librbd/image/RemoveRequest.cc

index 5782702d78f926640edc68b57e7088764e181e16..10efaa677df7c235c9a1fc49e98fc6aa2c949f69 100644 (file)
--- a/ceph/src/librbd/image/RemoveRequest.cc
+++ b/ceph/src/librbd/image/RemoveRequest.cc
@@ -9,6 +9,7 @@
  #include "librbd/ObjectMap.h"
  #include "librbd/ExclusiveLock.h"
  #include "librbd/MirroringWatcher.h"
+#include "librbd/journal/DisabledPolicy.h"
  #include "librbd/journal/RemoveRequest.h"
  #include "librbd/image/RemoveRequest.h"
  #include "librbd/operation/TrimRequest.h"
@@ -103,6 +104,13 @@ template<typename I>
  void RemoveRequest<I>::acquire_exclusive_lock() {
    ldout(m_cct, 20) << dendl;
  
+  // do not attempt to open the journal when removing the image in case
+  // it's corrupt
+  if (m_image_ctx->test_features(RBD_FEATURE_JOURNALING)) {
+    RWLock::WLocker snap_locker(m_image_ctx->snap_lock);
+    m_image_ctx->set_journal_policy(new journal::DisabledPolicy());
+  }
+
    using klass = RemoveRequest<I>;
    if (m_force) {
      Context *ctx = create_context_callback<
diff --git a/ceph/src/librbd/object_map/RefreshRequest.cc b/ceph/src/librbd/object_map/RefreshRequest.cc

index 41e5aafba549bee9dea22749c4c04e1c1e70618d..840b8c1e7824a00a45fddeb08d4badfaf00c0fbd 100644 (file)
--- a/ceph/src/librbd/object_map/RefreshRequest.cc
+++ b/ceph/src/librbd/object_map/RefreshRequest.cc
@@ -169,7 +169,7 @@ void RefreshRequest<I>::send_invalidate() {
    Context *ctx = create_context_callback<
      klass, &klass::handle_invalidate>(this);
    InvalidateRequest<I> *req = InvalidateRequest<I>::create(
-    m_image_ctx, m_snap_id, false, ctx);
+    m_image_ctx, m_snap_id, true, ctx);
  
    RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
@@ -199,7 +199,7 @@ void RefreshRequest<I>::send_resize_invalidate() {
    Context *ctx = create_context_callback<
      klass, &klass::handle_resize_invalidate>(this);
    InvalidateRequest<I> *req = InvalidateRequest<I>::create(
-    m_image_ctx, m_snap_id, false, ctx);
+    m_image_ctx, m_snap_id, true, ctx);
  
    RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
diff --git a/ceph/src/mds/Beacon.cc b/ceph/src/mds/Beacon.cc

index e94390f65bf4fbb834ade342b9bcfbc85547058f..5750adc375927d23a4b6708691c56c0f4219554b 100644 (file)
--- a/ceph/src/mds/Beacon.cc
+++ b/ceph/src/mds/Beacon.cc
@@ -246,7 +246,7 @@ void Beacon::_notify_mdsmap(MDSMap const *mdsmap)
  
    if (mdsmap->get_epoch() != epoch) {
      epoch = mdsmap->get_epoch();
-    compat = get_mdsmap_compat_set_default();
+    compat = MDSMap::get_compat_set_default();
      compat.merge(mdsmap->compat);
    }
  }
diff --git a/ceph/src/mds/CDir.cc b/ceph/src/mds/CDir.cc

index f2396b31ba8b652db30d5444ce4295dc1bf833b0..ae43b20f7fa59b6785a4fd193af3f007296bbc59 100644 (file)
--- a/ceph/src/mds/CDir.cc
+++ b/ceph/src/mds/CDir.cc
@@ -985,10 +985,7 @@ void CDir::split(int bits, list<CDir*>& subs, list<MDSInternalContextBase*>& wai
      CDir *f = new CDir(inode, *p, cache, is_auth());
      f->state_set(state & (MASK_STATE_FRAGMENT_KEPT | STATE_COMPLETE));
      f->get_replicas() = get_replicas();
-    f->dir_auth = dir_auth;
-    f->init_fragment_pins();
      f->set_version(get_version());
-
      f->pop_me = pop_me;
      f->pop_me.scale(fac);
  
@@ -1006,6 +1003,7 @@ void CDir::split(int bits, list<CDir*>& subs, list<MDSInternalContextBase*>& wai
  
      f->set_dir_auth(get_dir_auth());
      f->prepare_new_fragment(replay);
+    f->init_fragment_pins();
    }
    
    // repartition dentries
@@ -2610,7 +2608,9 @@ void CDir::set_dir_auth(mds_authority_t a)
    // new subtree root?
    if (!was_subtree && is_subtree_root()) {
      dout(10) << " new subtree root, adjusting auth_pins" << dendl;
-    
+
+    inode->num_subtree_roots++;   
+
      // adjust nested auth pins
      if (get_cum_auth_pins())
        inode->adjust_nested_auth_pins(-1, NULL);
@@ -2624,7 +2624,9 @@ void CDir::set_dir_auth(mds_authority_t a)
    } 
    if (was_subtree && !is_subtree_root()) {
      dout(10) << " old subtree root, adjusting auth_pins" << dendl;
-    
+
+    inode->num_subtree_roots--;
+  
      // adjust nested auth pins
      if (get_cum_auth_pins())
        inode->adjust_nested_auth_pins(1, NULL);
diff --git a/ceph/src/mds/CDir.h b/ceph/src/mds/CDir.h

index 0ce4ac0ba0a015b83ee825dabfbf30e33a31ab7c..ea5879a703da82db1542297870d0e55441c64f75 100644 (file)
--- a/ceph/src/mds/CDir.h
+++ b/ceph/src/mds/CDir.h
@@ -166,6 +166,15 @@ public:
    void assimilate_dirty_rstat_inodes();
    void assimilate_dirty_rstat_inodes_finish(MutationRef& mut, EMetaBlob *blob);
  
+  void mark_exporting() {
+    state_set(CDir::STATE_EXPORTING);
+    inode->num_exporting_dirs++;
+  }
+  void clear_exporting() {
+    state_clear(CDir::STATE_EXPORTING);
+    inode->num_exporting_dirs--;
+  }
+
  protected:
    version_t projected_version;
    mempool::mds_co::list<fnode_t> projected_fnode;
diff --git a/ceph/src/mds/CInode.cc b/ceph/src/mds/CInode.cc

index 1d07d8756959572ace40a691460fb40860d2dc5a..af4b5c2a9a6109475bb768a0bdf7dcf1f14e7fef 100644 (file)
--- a/ceph/src/mds/CInode.cc
+++ b/ceph/src/mds/CInode.cc
@@ -675,6 +675,9 @@ void CInode::close_dirfrag(frag_t fg)
      dir->state_clear(CDir::STATE_STICKY);
      dir->put(CDir::PIN_STICKY);
    }
+
+  if (dir->is_subtree_root())
+    num_subtree_roots--;
    
    // dump any remaining dentries, for debugging purposes
    for (const auto &p : dir->items)
@@ -693,21 +696,22 @@ void CInode::close_dirfrags()
  
  bool CInode::has_subtree_root_dirfrag(int auth)
  {
-  for (const auto &p : dirfrags) {
-    if (p.second->is_subtree_root() &&
-       (auth == -1 || p.second->dir_auth.first == auth))
+  if (num_subtree_roots > 0) {
+    if (auth == -1)
        return true;
+    for (const auto &p : dirfrags) {
+      if (p.second->is_subtree_root() &&
+         p.second->dir_auth.first == auth)
+       return true;
+    }
    }
    return false;
  }
  
  bool CInode::has_subtree_or_exporting_dirfrag()
  {
-  for (const auto &p : dirfrags) {
-    if (p.second->is_subtree_root() ||
-       p.second->state_test(CDir::STATE_EXPORTING))
-      return true;
-  }
+  if (num_subtree_roots > 0 || num_exporting_dirs > 0)
+    return true;
    return false;
  }
  
diff --git a/ceph/src/mds/CInode.h b/ceph/src/mds/CInode.h

index 9b356d9f5c4c422eeb9f11a4c5d7119a84313ff9..b27e29f797b3f3ce29b35b71f5cefad5e6921465 100644 (file)
--- a/ceph/src/mds/CInode.h
+++ b/ceph/src/mds/CInode.h
@@ -505,6 +505,11 @@ public:
    // -- cache infrastructure --
  private:
    mempool::mds_co::compact_map<frag_t,CDir*> dirfrags; // cached dir fragments under this Inode
+
+  //for the purpose of quickly determining whether there's a subtree root or exporting dir
+  int num_subtree_roots = 0;
+  int num_exporting_dirs = 0;
+
    int stickydir_ref = 0;
    scrub_info_t *scrub_infop = nullptr;
  
@@ -674,6 +679,8 @@ public:
      clear_file_locks();
      assert(num_projected_xattrs == 0);
      assert(num_projected_srnodes == 0);
+    assert(num_subtree_roots == 0);
+    assert(num_exporting_dirs == 0);
    }
    
  
diff --git a/ceph/src/mds/FSMap.cc b/ceph/src/mds/FSMap.cc

index 504aa41f9b2e6b5909111e7c5bb69d24213ed72a..a4f33db8d3b0101cadcfc232d10cbdf01aed7c8f 100644 (file)
--- a/ceph/src/mds/FSMap.cc
+++ b/ceph/src/mds/FSMap.cc
@@ -476,7 +476,7 @@ void FSMap::decode(bufferlist::iterator& p)
      if (ev >= 3)
        ::decode(legacy_mds_map.compat, p);
      else
-      legacy_mds_map.compat = get_mdsmap_compat_set_base();
+      legacy_mds_map.compat = MDSMap::get_compat_set_base();
      if (ev < 5) {
        __u32 n;
        ::decode(n, p);
diff --git a/ceph/src/mds/FSMap.h b/ceph/src/mds/FSMap.h

index 720a22f0be78e4a8f14d3ce06615e3955edfc55f..639a5d87ae4219a3f4ccc0d854735d6b622d7020 100644 (file)
--- a/ceph/src/mds/FSMap.h
+++ b/ceph/src/mds/FSMap.h
@@ -46,19 +46,10 @@ class health_check_map_t;
   */
  class Filesystem
  {
-  public:
-  fs_cluster_id_t fscid;
-  MDSMap mds_map;
-
+public:
    void encode(bufferlist& bl, uint64_t features) const;
    void decode(bufferlist::iterator& p);
  
-  Filesystem()
-    :
-      fscid(FS_CLUSTER_ID_NONE)
-  {
-  }
-
    void dump(Formatter *f) const;
    void print(std::ostream& out) const;
  
@@ -78,17 +69,20 @@ class Filesystem
  
      return false;
    }
+
+  fs_cluster_id_t fscid = FS_CLUSTER_ID_NONE;
+  MDSMap mds_map;
  };
  WRITE_CLASS_ENCODER_FEATURES(Filesystem)
  
  class FSMap {
  protected:
-  epoch_t epoch;
-  uint64_t next_filesystem_id;
-  fs_cluster_id_t legacy_client_fscid;
+  epoch_t epoch = 0;
+  uint64_t next_filesystem_id = FS_CLUSTER_ID_ANONYMOUS + 1;
+  fs_cluster_id_t legacy_client_fscid = FS_CLUSTER_ID_NONE;
    CompatSet compat;
-  bool enable_multiple;
-  bool ever_enabled_multiple; // < the cluster had multiple MDSes enabled once
+  bool enable_multiple = false;
+  bool ever_enabled_multiple = false; // < the cluster had multiple MDSes enabled once
  
    std::map<fs_cluster_id_t, std::shared_ptr<Filesystem> > filesystems;
  
@@ -105,13 +99,7 @@ public:
    friend class MDSMonitor;
    friend class PaxosFSMap;
  
-  FSMap() 
-    : epoch(0),
-      next_filesystem_id(FS_CLUSTER_ID_ANONYMOUS + 1),
-      legacy_client_fscid(FS_CLUSTER_ID_NONE),
-      compat(get_mdsmap_compat_set_default()),
-      enable_multiple(false), ever_enabled_multiple(false)
-  { }
+  FSMap() : compat(MDSMap::get_compat_set_default()) {}
  
    FSMap(const FSMap &rhs)
      :
@@ -419,6 +407,7 @@ public:
  
    size_t filesystem_count() const {return filesystems.size();}
    bool filesystem_exists(fs_cluster_id_t fscid) const {return filesystems.count(fscid) > 0;}
+  const std::shared_ptr<Filesystem> &get_filesystem(fs_cluster_id_t fscid) {return filesystems.at(fscid);}
    std::shared_ptr<const Filesystem> get_filesystem(fs_cluster_id_t fscid) const {return std::const_pointer_cast<const Filesystem>(filesystems.at(fscid));}
    std::shared_ptr<const Filesystem> get_filesystem(void) const {return std::const_pointer_cast<const Filesystem>(filesystems.begin()->second);}
    std::shared_ptr<const Filesystem> get_filesystem(boost::string_view name) const
diff --git a/ceph/src/mds/Locker.cc b/ceph/src/mds/Locker.cc

index f9858b573f5f660f306c58127b2ad5c1c02c6dba..c58f480e4aa6eedc24e93eb07838a037851390fe 100644 (file)
--- a/ceph/src/mds/Locker.cc
+++ b/ceph/src/mds/Locker.cc
@@ -2698,7 +2698,16 @@ void Locker::handle_client_caps(MClientCaps *m)
        mdcache->wait_replay_cap_reconnect(m->get_ino(), new C_MDS_RetryMessage(mds, m));
        return;
      }
-    dout(1) << "handle_client_caps on unknown ino " << m->get_ino() << ", dropping" << dendl;
+
+    /*
+     * "handle_client_caps on unknown ino xxx” is normal after migrating a subtree
+     * Sequence of events that cause this are:
+     *   - client sends caps message to mds.a
+     *   - mds finishes subtree migration, send cap export to client
+     *   - mds trim its cache
+     *   - mds receives cap messages from client
+     */
+    dout(7) << "handle_client_caps on unknown ino " << m->get_ino() << ", dropping" << dendl;
      m->put();
      return;
    }
diff --git a/ceph/src/mds/MDCache.cc b/ceph/src/mds/MDCache.cc

index 887c1062779e8562bfa923e86229cb26da568723..a79f7aea250396a230d8b576fd7c2ee251331958 100644 (file)
--- a/ceph/src/mds/MDCache.cc
+++ b/ceph/src/mds/MDCache.cc
@@ -7707,9 +7707,11 @@ bool MDCache::shutdown_pass()
    }
    assert(subtrees.empty());
  
-  if (myin)
+  if (myin) {
      remove_inode(myin);
-  
+    assert(!myin);
+  }
+
    // done!
    dout(2) << "shutdown done." << dendl;
    return true;
@@ -8658,9 +8660,10 @@ void MDCache::do_open_ino_peer(inodeno_t ino, open_ino_info_t& info)
  {
    set<mds_rank_t> all, active;
    mds->mdsmap->get_mds_set(all);
-  mds->mdsmap->get_clientreplay_or_active_or_stopping_mds_set(active);
    if (mds->get_state() == MDSMap::STATE_REJOIN)
-    mds->mdsmap->get_mds_set(active, MDSMap::STATE_REJOIN);
+    mds->mdsmap->get_mds_set_lower_bound(active, MDSMap::STATE_REJOIN);
+  else
+    mds->mdsmap->get_mds_set_lower_bound(active, MDSMap::STATE_CLIENTREPLAY);
  
    dout(10) << "do_open_ino_peer " << ino << " active " << active
            << " all " << all << " checked " << info.checked << dendl;
@@ -8872,7 +8875,7 @@ void MDCache::_do_find_ino_peer(find_ino_peer_info_t& fip)
  {
    set<mds_rank_t> all, active;
    mds->mdsmap->get_mds_set(all);
-  mds->mdsmap->get_clientreplay_or_active_or_stopping_mds_set(active);
+  mds->mdsmap->get_mds_set_lower_bound(active, MDSMap::STATE_CLIENTREPLAY);
  
    dout(10) << "_do_find_ino_peer " << fip.tid << " " << fip.ino
            << " active " << active << " all " << all
@@ -10777,9 +10780,6 @@ void MDCache::adjust_dir_fragments(CInode *diri,
        }
  
        show_subtrees(10);
-
-      // dir has no PIN_SUBTREE; CDir::purge_stolen() drops it.
-      dir->dir_auth = CDIR_AUTH_DEFAULT;
      }
      
      diri->close_dirfrag(dir->get_frag());
@@ -12124,6 +12124,13 @@ void MDCache::enqueue_scrub(
  
    mdr->internal_op_finish = cs;
    enqueue_scrub_work(mdr);
+
+  // since recursive scrub is asynchronous, dump minimal output
+  // to not upset cli tools.
+  if (recursive) {
+    f->open_object_section("results");
+    f->close_section(); // results
+  }
  }
  
  void MDCache::enqueue_scrub_work(MDRequestRef& mdr)
diff --git a/ceph/src/mds/MDSDaemon.cc b/ceph/src/mds/MDSDaemon.cc

index 452d5bbf6602be0125b1b629a084790380251b82..0b651a7d523bc3ddec1fcd51930da14968305a8f 100644 (file)
--- a/ceph/src/mds/MDSDaemon.cc
+++ b/ceph/src/mds/MDSDaemon.cc
@@ -363,6 +363,7 @@ const char** MDSDaemon::get_tracked_conf_keys() const
      "mds_max_purge_ops_per_pg",
      "mds_max_purge_files",
      "mds_inject_migrator_session_race",
+    "mds_inject_migrator_message_loss",
      "clog_to_graylog",
      "clog_to_graylog_host",
      "clog_to_graylog_port",
@@ -863,16 +864,17 @@ out:
  void MDSDaemon::handle_mds_map(MMDSMap *m)
  {
    version_t epoch = m->get_epoch();
-  dout(5) << "handle_mds_map epoch " << epoch << " from " << m->get_source() << dendl;
  
    // is it new?
    if (epoch <= mdsmap->get_epoch()) {
-    dout(5) << " old map epoch " << epoch << " <= " << mdsmap->get_epoch()
-           << ", discarding" << dendl;
+    dout(5) << "handle_mds_map old map epoch " << epoch << " <= "
+            << mdsmap->get_epoch() << ", discarding" << dendl;
      m->put();
      return;
    }
  
+  dout(1) << "Updating MDS map to version " << epoch << " from " << m->get_source() << dendl;
+
    entity_addr_t addr;
  
    // keep old map, for a moment
@@ -891,7 +893,7 @@ void MDSDaemon::handle_mds_map(MMDSMap *m)
    mds_rank_t whoami = mdsmap->get_rank_gid(mds_gid_t(monc->get_global_id()));
  
    // verify compatset
-  CompatSet mdsmap_compat(get_mdsmap_compat_set_all());
+  CompatSet mdsmap_compat(MDSMap::get_compat_set_all());
    dout(10) << "     my compat " << mdsmap_compat << dendl;
    dout(10) << " mdsmap compat " << mdsmap->compat << dendl;
    if (!mdsmap_compat.writeable(mdsmap->compat)) {
@@ -931,7 +933,7 @@ void MDSDaemon::handle_mds_map(MMDSMap *m)
          if (mds_gid_t existing = mdsmap->find_mds_gid_by_name(name)) {
            const MDSMap::mds_info_t& i = mdsmap->get_info_gid(existing);
            if (i.global_id > myid) {
-            dout(1) << "map replaced me with another mds." << whoami
+            dout(1) << "Map replaced me with another mds." << whoami
                      << " with gid (" << i.global_id << ") larger than myself ("
                      << myid << "); quitting!" << dendl;
              // Call suicide() rather than respawn() because if someone else
@@ -944,7 +946,7 @@ void MDSDaemon::handle_mds_map(MMDSMap *m)
          }
        }
  
-      dout(1) << "map removed me (mds." << whoami << " gid:"
+      dout(1) << "Map removed me (mds." << whoami << " gid:"
                << myid << ") from cluster due to lost contact; respawning" << dendl;
        respawn();
      }
@@ -992,7 +994,7 @@ void MDSDaemon::_handle_mds_map(MDSMap *oldmap)
    // Normal rankless case, we're marked as standby
    if (new_state == MDSMap::STATE_STANDBY) {
      beacon.set_want_state(mdsmap, new_state);
-    dout(1) << "handle_mds_map standby" << dendl;
+    dout(1) << "Map has assigned me to become a standby" << dendl;
  
      return;
    }
@@ -1032,7 +1034,7 @@ void MDSDaemon::suicide()
    assert(stopping == false);
    stopping = true;
  
-  dout(1) << "suicide.  wanted state "
+  dout(1) << "suicide! Wanted state "
            << ceph_mds_state_name(beacon.get_want_state()) << dendl;
  
    if (tick_event) {
@@ -1071,7 +1073,11 @@ void MDSDaemon::suicide()
  
  void MDSDaemon::respawn()
  {
-  dout(1) << "respawn" << dendl;
+  dout(1) << "respawn!" << dendl;
+
+  /* Dump recent in case the MDS was stuck doing something which caused it to
+   * be removed from the MDSMap leading to respawn. */
+  g_ceph_context->_log->dump_recent();
  
    char *new_argv[orig_argc+1];
    dout(1) << " e: '" << orig_argv[0] << "'" << dendl;
diff --git a/ceph/src/mds/MDSDaemon.h b/ceph/src/mds/MDSDaemon.h

index 119b22b2d913c6ef3ec5385af82bbcc16cf773af..ab79d1ed33a087139d5b94c2113c32d14b4f21ad 100644 (file)
--- a/ceph/src/mds/MDSDaemon.h
+++ b/ceph/src/mds/MDSDaemon.h
@@ -29,7 +29,7 @@
  #include "MDSMap.h"
  #include "MDSRank.h"
  
-#define CEPH_MDS_PROTOCOL    30 /* cluster internal */
+#define CEPH_MDS_PROTOCOL    31 /* cluster internal */
  
  class AuthAuthorizeHandlerRegistry;
  class Message;
diff --git a/ceph/src/mds/MDSMap.cc b/ceph/src/mds/MDSMap.cc

index 9dfce950f5a8d46fa2d410b198122e0d965e11c5..a8beb89383a395caa3ca5061621f2de871a50e8b 100644 (file)
--- a/ceph/src/mds/MDSMap.cc
+++ b/ceph/src/mds/MDSMap.cc
@@ -24,7 +24,7 @@ using std::stringstream;
  #define dout_subsys ceph_subsys_
  
  // features
-CompatSet get_mdsmap_compat_set_all() {
+CompatSet MDSMap::get_compat_set_all() {
    CompatSet::FeatureSet feature_compat;
    CompatSet::FeatureSet feature_ro_compat;
    CompatSet::FeatureSet feature_incompat;
@@ -41,7 +41,7 @@ CompatSet get_mdsmap_compat_set_all() {
    return CompatSet(feature_compat, feature_ro_compat, feature_incompat);
  }
  
-CompatSet get_mdsmap_compat_set_default() {
+CompatSet MDSMap::get_compat_set_default() {
    CompatSet::FeatureSet feature_compat;
    CompatSet::FeatureSet feature_ro_compat;
    CompatSet::FeatureSet feature_incompat;
@@ -58,7 +58,7 @@ CompatSet get_mdsmap_compat_set_default() {
  }
  
  // base (pre v0.20)
-CompatSet get_mdsmap_compat_set_base() {
+CompatSet MDSMap::get_compat_set_base() {
    CompatSet::FeatureSet feature_compat_base;
    CompatSet::FeatureSet feature_incompat_base;
    feature_incompat_base.insert(MDS_FEATURE_INCOMPAT_BASE);
@@ -199,7 +199,7 @@ void MDSMap::generate_test_instances(list<MDSMap*>& ls)
    m->data_pools.push_back(0);
    m->metadata_pool = 1;
    m->cas_pool = 2;
-  m->compat = get_mdsmap_compat_set_all();
+  m->compat = get_compat_set_all();
  
    // these aren't the defaults, just in case anybody gets confused
    m->session_timeout = 61;
@@ -692,7 +692,7 @@ void MDSMap::decode(bufferlist::iterator& p)
    if (ev >= 3)
      ::decode(compat, p);
    else
-    compat = get_mdsmap_compat_set_base();
+    compat = get_compat_set_base();
    if (ev < 5) {
      __u32 n;
      ::decode(n, p);
@@ -822,12 +822,12 @@ bool MDSMap::state_transition_valid(DaemonState prev, DaemonState next)
          state_valid = false;
        }
      } else if (prev == MDSMap::STATE_REJOIN) {
-      if (next != MDSMap::STATE_ACTIVE
-          && next != MDSMap::STATE_CLIENTREPLAY
-          && next != MDSMap::STATE_STOPPED) {
+      if (next != MDSMap::STATE_ACTIVE &&
+         next != MDSMap::STATE_CLIENTREPLAY &&
+         next != MDSMap::STATE_STOPPED) {
          state_valid = false;
        }
-    } else if (prev >= MDSMap::STATE_RECONNECT && prev < MDSMap::STATE_ACTIVE) {
+    } else if (prev >= MDSMap::STATE_RESOLVE && prev < MDSMap::STATE_ACTIVE) {
        // Once I have entered replay, the only allowable transitions are to
        // the next next along in the sequence.
        if (next != prev + 1) {
diff --git a/ceph/src/mds/MDSMap.h b/ceph/src/mds/MDSMap.h

index dc283024a836e116523215701136e46eb6cfabc4..86a538ce476ec2e989cf07460bcd934eed8b47bc 100644 (file)
--- a/ceph/src/mds/MDSMap.h
+++ b/ceph/src/mds/MDSMap.h
@@ -62,10 +62,6 @@
  class CephContext;
  class health_check_map_t;
  
-extern CompatSet get_mdsmap_compat_set_all();
-extern CompatSet get_mdsmap_compat_set_default();
-extern CompatSet get_mdsmap_compat_set_base(); // pre v0.20
-
  #define MDS_FEATURE_INCOMPAT_BASE CompatSet::Feature(1, "base v0.20")
  #define MDS_FEATURE_INCOMPAT_CLIENTRANGES CompatSet::Feature(2, "client writeable ranges")
  #define MDS_FEATURE_INCOMPAT_FILELAYOUT CompatSet::Feature(3, "default file layouts on dirs")
@@ -167,28 +163,32 @@ public:
      void encode_unversioned(bufferlist& bl) const;
    };
  
+  static CompatSet get_compat_set_all();
+  static CompatSet get_compat_set_default();
+  static CompatSet get_compat_set_base(); // pre v0.20
  
  protected:
    // base map
-  epoch_t epoch;
-  bool enabled;
-  std::string fs_name;
-  uint32_t flags;        // flags
-  epoch_t last_failure;  // mds epoch of last failure
-  epoch_t last_failure_osd_epoch; // osd epoch of last failure; any mds entering replay needs
+  epoch_t epoch = 0;
+  bool enabled = false;
+  std::string fs_name = MDS_FS_NAME_DEFAULT;
+  uint32_t flags = CEPH_MDSMAP_DEFAULTS; // flags
+  epoch_t last_failure = 0;  // mds epoch of last failure
+  epoch_t last_failure_osd_epoch = 0; // osd epoch of last failure; any mds entering replay needs
                                    // at least this osdmap to ensure the blacklist propagates.
-  utime_t created, modified;
+  utime_t created;
+  utime_t modified;
  
-  mds_rank_t tableserver;   // which MDS has snaptable
-  mds_rank_t root;          // which MDS has root directory
+  mds_rank_t tableserver = 0;   // which MDS has snaptable
+  mds_rank_t root = 0;          // which MDS has root directory
  
-  __u32 session_timeout;
-  __u32 session_autoclose;
-  uint64_t max_file_size;
+  __u32 session_timeout = 60;
+  __u32 session_autoclose = 300;
+  uint64_t max_file_size = 1ULL<<40; /* 1TB */
  
    std::vector<int64_t> data_pools;  // file data pools available to clients (via an ioctl).  first is the default.
-  int64_t cas_pool;            // where CAS objects go
-  int64_t metadata_pool;       // where fs metadata objects go
+  int64_t cas_pool = -1;            // where CAS objects go
+  int64_t metadata_pool = -1;       // where fs metadata objects go
    
    /*
     * in: the set of logical mds #'s that define the cluster.  this is the set
@@ -200,8 +200,8 @@ protected:
     *    @up + @failed = @in.  @in * @stopped = {}.
     */
  
-  mds_rank_t max_mds; /* The maximum number of active MDSes. Also, the maximum rank. */
-  mds_rank_t standby_count_wanted;
+  mds_rank_t max_mds = 1; /* The maximum number of active MDSes. Also, the maximum rank. */
+  mds_rank_t standby_count_wanted = -1;
    string balancer;    /* The name/version of the mantle balancer (i.e. the rados obj name) */
  
    std::set<mds_rank_t> in;              // currently defined cluster
@@ -211,12 +211,12 @@ protected:
    std::map<mds_rank_t, mds_gid_t> up;        // who is in those roles
    std::map<mds_gid_t, mds_info_t> mds_info;
  
-  uint8_t ever_allowed_features; //< bitmap of features the cluster has allowed
-  uint8_t explicitly_allowed_features; //< bitmap of features explicitly enabled 
+  uint8_t ever_allowed_features = 0; //< bitmap of features the cluster has allowed
+  uint8_t explicitly_allowed_features = 0; //< bitmap of features explicitly enabled
  
-  bool inline_data_enabled;
+  bool inline_data_enabled = false;
  
-  uint64_t cached_up_features;
+  uint64_t cached_up_features = 0;
  
  public:
    CompatSet compat;
@@ -226,24 +226,6 @@ public:
    friend class FSMap;
  
  public:
-  MDSMap() 
-    : epoch(0), enabled(false), fs_name(MDS_FS_NAME_DEFAULT),
-      flags(CEPH_MDSMAP_DEFAULTS), last_failure(0),
-      last_failure_osd_epoch(0),
-      tableserver(0), root(0),
-      session_timeout(0),
-      session_autoclose(0),
-      max_file_size(0),
-      cas_pool(-1),
-      metadata_pool(-1),
-      max_mds(0),
-      standby_count_wanted(-1),
-      ever_allowed_features(0),
-      explicitly_allowed_features(0),
-      inline_data_enabled(false),
-      cached_up_features(0)
-  { }
-
    bool get_inline_data_enabled() const { return inline_data_enabled; }
    void set_inline_data_enabled(bool enabled) { inline_data_enabled = enabled; }
  
@@ -457,12 +439,11 @@ public:
         s.insert(p.second.rank);
    }
  
-  void
-  get_clientreplay_or_active_or_stopping_mds_set(std::set<mds_rank_t>& s) const {
+  void get_mds_set_lower_bound(std::set<mds_rank_t>& s, DaemonState first) const {
      for (std::map<mds_gid_t, mds_info_t>::const_iterator p = mds_info.begin();
          p != mds_info.end();
          ++p)
-      if (p->second.state >= STATE_CLIENTREPLAY && p->second.state <= STATE_STOPPING)
+      if (p->second.state >= first && p->second.state <= STATE_STOPPING)
         s.insert(p->second.rank);
    }
    void get_mds_set(std::set<mds_rank_t>& s, DaemonState state) const {
diff --git a/ceph/src/mds/MDSRank.cc b/ceph/src/mds/MDSRank.cc

index e6f6db65b077e3ecaae73038b4e15c41cd5f8d61..bdebfed5bf994a68bfdda315047e9c1fe47bf5dc 100644 (file)
--- a/ceph/src/mds/MDSRank.cc
+++ b/ceph/src/mds/MDSRank.cc
@@ -1497,7 +1497,7 @@ void MDSRank::recovery_done(int oldstate)
    // kick snaptable (resent AGREEs)
    if (mdsmap->get_tableserver() == whoami) {
      set<mds_rank_t> active;
-    mdsmap->get_clientreplay_or_active_or_stopping_mds_set(active);
+    mdsmap->get_mds_set_lower_bound(active, MDSMap::STATE_CLIENTREPLAY);
      snapserver->finish_recovery(active);
    }
  
@@ -1650,6 +1650,62 @@ void MDSRankDispatcher::handle_mds_map(
    if (g_conf->mds_dump_cache_on_map)
      mdcache->dump_cache();
  
+  cluster_degraded = mdsmap->is_degraded();
+
+  // mdsmap and oldmap can be discontinuous. failover might happen in the missing mdsmap.
+  // the 'restart' set tracks ranks that have restarted since the old mdsmap
+  set<mds_rank_t> restart;
+  // replaying mds does not communicate with other ranks
+  if (state >= MDSMap::STATE_RESOLVE) {
+    // did someone fail?
+    //   new down?
+    set<mds_rank_t> olddown, down;
+    oldmap->get_down_mds_set(&olddown);
+    mdsmap->get_down_mds_set(&down);
+    for (const auto& r : down) {
+      if (oldmap->have_inst(r) && olddown.count(r) == 0) {
+       messenger->mark_down(oldmap->get_inst(r).addr);
+       handle_mds_failure(r);
+      }
+    }
+
+    // did someone fail?
+    //   did their addr/inst change?
+    set<mds_rank_t> up;
+    mdsmap->get_up_mds_set(up);
+    for (const auto& r : up) {
+      auto& info = mdsmap->get_info(r);
+      if (oldmap->have_inst(r)) {
+       auto& oldinfo = oldmap->get_info(r);
+       if (info.inc != oldinfo.inc) {
+         messenger->mark_down(oldinfo.addr);
+         if (info.state == MDSMap::STATE_REPLAY ||
+             info.state == MDSMap::STATE_RESOLVE) {
+           restart.insert(r);
+           handle_mds_failure(r);
+         } else {
+           assert(info.state == MDSMap::STATE_STARTING ||
+                  info.state == MDSMap::STATE_ACTIVE);
+           // -> stopped (missing) -> starting -> active
+           restart.insert(r);
+           mdcache->migrator->handle_mds_failure_or_stop(r);
+         }
+       }
+      } else {
+       if (info.state == MDSMap::STATE_REPLAY ||
+           info.state == MDSMap::STATE_RESOLVE) {
+         // -> starting/creating (missing) -> active (missing) -> replay -> resolve
+         restart.insert(r);
+         handle_mds_failure(r);
+       } else {
+         assert(info.state == MDSMap::STATE_CREATING ||
+                info.state == MDSMap::STATE_STARTING ||
+                info.state == MDSMap::STATE_ACTIVE);
+       }
+      }
+    }
+  }
+
    // did it change?
    if (oldstate != state) {
      dout(1) << "handle_mds_map state change "
@@ -1693,9 +1749,8 @@ void MDSRankDispatcher::handle_mds_map(
  
    // RESOLVE
    // is someone else newly resolving?
-  if (is_resolve() || is_reconnect() || is_rejoin() ||
-      is_clientreplay() || is_active() || is_stopping()) {
-    if (!oldmap->is_resolving() && mdsmap->is_resolving()) {
+  if (state >= MDSMap::STATE_RESOLVE) {
+    if ((!oldmap->is_resolving() || !restart.empty()) && mdsmap->is_resolving()) {
        set<mds_rank_t> resolve;
        mdsmap->get_mds_set(resolve, MDSMap::STATE_RESOLVE);
        dout(10) << " resolve set is " << resolve << dendl;
@@ -1706,7 +1761,7 @@ void MDSRankDispatcher::handle_mds_map(
  
    // REJOIN
    // is everybody finally rejoining?
-  if (is_rejoin() || is_clientreplay() || is_active() || is_stopping()) {
+  if (state >= MDSMap::STATE_REJOIN) {
      // did we start?
      if (!oldmap->is_rejoining() && mdsmap->is_rejoining())
        rejoin_joint_start();
@@ -1720,22 +1775,19 @@ void MDSRankDispatcher::handle_mds_map(
         oldstate == MDSMap::STATE_STARTING) {
        // ACTIVE|CLIENTREPLAY|REJOIN => we can discover from them.
        set<mds_rank_t> olddis, dis;
-      oldmap->get_mds_set(olddis, MDSMap::STATE_ACTIVE);
-      oldmap->get_mds_set(olddis, MDSMap::STATE_CLIENTREPLAY);
-      oldmap->get_mds_set(olddis, MDSMap::STATE_REJOIN);
-      mdsmap->get_mds_set(dis, MDSMap::STATE_ACTIVE);
-      mdsmap->get_mds_set(dis, MDSMap::STATE_CLIENTREPLAY);
-      mdsmap->get_mds_set(dis, MDSMap::STATE_REJOIN);
-      for (set<mds_rank_t>::iterator p = dis.begin(); p != dis.end(); ++p)
-       if (*p != whoami &&            // not me
-           olddis.count(*p) == 0) {  // newly so?
-         mdcache->kick_discovers(*p);
-         mdcache->kick_open_ino_peers(*p);
+      oldmap->get_mds_set_lower_bound(olddis, MDSMap::STATE_REJOIN);
+      mdsmap->get_mds_set_lower_bound(dis, MDSMap::STATE_REJOIN);
+      for (const auto& r : dis) {
+       if (r == whoami)
+         continue; // not me
+       if (!olddis.count(r) || restart.count(r)) {  // newly so?
+         mdcache->kick_discovers(r);
+         mdcache->kick_open_ino_peers(r);
         }
+      }
      }
    }
  
-  cluster_degraded = mdsmap->is_degraded();
    if (oldmap->is_degraded() && !cluster_degraded && state >= MDSMap::STATE_ACTIVE) {
      dout(1) << "cluster recovered." << dendl;
      auto it = waiting_for_active_peer.find(MDS_RANK_NONE);
@@ -1746,55 +1798,27 @@ void MDSRankDispatcher::handle_mds_map(
    }
  
    // did someone go active?
-  if (oldstate >= MDSMap::STATE_CLIENTREPLAY &&
-      (is_clientreplay() || is_active() || is_stopping())) {
+  if (state >= MDSMap::STATE_CLIENTREPLAY &&
+      oldstate >= MDSMap::STATE_CLIENTREPLAY) {
      set<mds_rank_t> oldactive, active;
-    oldmap->get_mds_set(oldactive, MDSMap::STATE_ACTIVE);
-    oldmap->get_mds_set(oldactive, MDSMap::STATE_CLIENTREPLAY);
-    mdsmap->get_mds_set(active, MDSMap::STATE_ACTIVE);
-    mdsmap->get_mds_set(active, MDSMap::STATE_CLIENTREPLAY);
-    for (set<mds_rank_t>::iterator p = active.begin(); p != active.end(); ++p)
-      if (*p != whoami &&            // not me
-         oldactive.count(*p) == 0)  // newly so?
-       handle_mds_recovery(*p);
-  }
-
-  // did someone fail?
-  //   new down?
-  {
-    set<mds_rank_t> olddown, down;
-    oldmap->get_down_mds_set(&olddown);
-    mdsmap->get_down_mds_set(&down);
-    for (set<mds_rank_t>::iterator p = down.begin(); p != down.end(); ++p) {
-      if (oldmap->have_inst(*p) && olddown.count(*p) == 0) {
-        messenger->mark_down(oldmap->get_inst(*p).addr);
-        handle_mds_failure(*p);
-      }
+    oldmap->get_mds_set_lower_bound(oldactive, MDSMap::STATE_CLIENTREPLAY);
+    mdsmap->get_mds_set_lower_bound(active, MDSMap::STATE_CLIENTREPLAY);
+    for (const auto& r : active) {
+      if (r == whoami)
+       continue; // not me
+      if (!oldactive.count(r) || restart.count(r))  // newly so?
+       handle_mds_recovery(r);
      }
    }
  
-  // did someone fail?
-  //   did their addr/inst change?
-  {
-    set<mds_rank_t> up;
-    mdsmap->get_up_mds_set(up);
-    for (set<mds_rank_t>::iterator p = up.begin(); p != up.end(); ++p) {
-      if (oldmap->have_inst(*p) &&
-         oldmap->get_inst(*p) != mdsmap->get_inst(*p)) {
-        messenger->mark_down(oldmap->get_inst(*p).addr);
-        handle_mds_failure(*p);
-      }
-    }
-  }
-
-  if (is_clientreplay() || is_active() || is_stopping()) {
+  if (state >= MDSMap::STATE_CLIENTREPLAY) {
      // did anyone stop?
      set<mds_rank_t> oldstopped, stopped;
      oldmap->get_stopped_mds_set(oldstopped);
      mdsmap->get_stopped_mds_set(stopped);
-    for (set<mds_rank_t>::iterator p = stopped.begin(); p != stopped.end(); ++p)
-      if (oldstopped.count(*p) == 0)      // newly so?
-       mdcache->migrator->handle_mds_failure_or_stop(*p);
+    for (const auto& r : stopped)
+      if (oldstopped.count(r) == 0)     // newly so?
+       mdcache->migrator->handle_mds_failure_or_stop(r);
    }
  
    {
diff --git a/ceph/src/mds/Migrator.cc b/ceph/src/mds/Migrator.cc

index 40a89626bc798710342ca357424d873370d4ecf8..f58ec7fd7ef52ec8ebe4a1ac8df2c09db39d3b2e 100644 (file)
--- a/ceph/src/mds/Migrator.cc
+++ b/ceph/src/mds/Migrator.cc
@@ -110,6 +110,14 @@ public:
  /* This function DOES put the passed message before returning*/
  void Migrator::dispatch(Message *m)
  {
+  if (unlikely(inject_message_loss)) {
+    if (inject_message_loss == m->get_type() - MDS_PORT_MIGRATOR) {
+      dout(0) << "inject message loss " << *m << dendl;
+      m->put();
+      return;
+    }
+  }
+
    switch (m->get_type()) {
      // import
    case MSG_MDS_EXPORTDIRDISCOVER:
@@ -156,6 +164,9 @@ void Migrator::dispatch(Message *m)
    case MSG_MDS_EXPORTCAPS:
      handle_export_caps(static_cast<MExportCaps*>(m));
      break;
+  case MSG_MDS_EXPORTCAPSACK:
+    handle_export_caps_ack(static_cast<MExportCapsAck*>(m));
+    break;
    case MSG_MDS_GATHERCAPS:
      handle_gather_caps(static_cast<MGatherCaps*>(m));
      break;
@@ -370,7 +381,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
  
      if (it->second.state == EXPORT_CANCELLED) {
        export_state.erase(it);
-      dir->state_clear(CDir::STATE_EXPORTING);
+      dir->clear_exporting();
        // send pending import_maps?
        cache->maybe_send_pending_resolves();
      }
@@ -395,7 +406,7 @@ void Migrator::export_try_cancel(CDir *dir, bool notify_peer)
  void Migrator::export_cancel_finish(CDir *dir)
  {
    assert(dir->state_test(CDir::STATE_EXPORTING));
-  dir->state_clear(CDir::STATE_EXPORTING);
+  dir->clear_exporting();
  
    // pinned by Migrator::export_notify_abort()
    dir->auth_unpin(this);
@@ -860,7 +871,7 @@ void Migrator::export_dir(CDir *dir, mds_rank_t dest)
    mds->hit_export_target(ceph_clock_now(), dest, -1);
  
    dir->auth_pin(this);
-  dir->state_set(CDir::STATE_EXPORTING);
+  dir->mark_exporting();
  
    MDRequestRef mdr = mds->mdcache->request_start_internal(CEPH_MDS_OP_EXPORTDIR);
    mdr->more()->export_dir = dir;
@@ -1074,7 +1085,7 @@ void Migrator::export_frozen(CDir *dir, uint64_t tid)
      mds->send_message_mds(new MExportDirCancel(dir->dirfrag(), it->second.tid), it->second.peer);
      export_state.erase(it);
  
-    dir->state_clear(CDir::STATE_EXPORTING);
+    dir->clear_exporting();
      cache->maybe_send_pending_resolves();
      return;
    }
@@ -2073,7 +2084,7 @@ void Migrator::export_finish(CDir *dir)
    MutationRef mut = it->second.mut;
    // remove from exporting list, clean up state
    export_state.erase(it);
-  dir->state_clear(CDir::STATE_EXPORTING);
+  dir->clear_exporting();
  
    cache->show_subtrees();
    audit();
@@ -3090,10 +3101,16 @@ void Migrator::finish_import_inode_caps(CInode *in, mds_rank_t peer, bool auth_c
         cap->mark_importing();
      }
  
-    Capability::Import& im = import_map[it.first];
-    im.cap_id = cap->get_cap_id();
-    im.mseq = auth_cap ? it.second.mseq : cap->get_mseq();
-    im.issue_seq = cap->get_last_seq() + 1;
+    // Always ask exporter mds to send cap export messages for auth caps.
+    // For non-auth caps, ask exporter mds to send cap export messages to
+    // clients who haven't opened sessions. The cap export messages will
+    // make clients open sessions.
+    if (auth_cap || session->connection == nullptr) {
+      Capability::Import& im = import_map[it.first];
+      im.cap_id = cap->get_cap_id();
+      im.mseq = auth_cap ? it.second.mseq : cap->get_mseq();
+      im.issue_seq = cap->get_last_seq() + 1;
+    }
  
      if (peer >= 0) {
        cap->merge(it.second, auth_cap);
@@ -3299,16 +3316,55 @@ void Migrator::export_caps(CInode *in)
    mds->send_message_mds(ex, dest);
  }
  
+/* This function DOES put the passed message before returning*/
+void Migrator::handle_export_caps_ack(MExportCapsAck *ack)
+{
+  mds_rank_t from = ack->get_source().num();
+  CInode *in = cache->get_inode(ack->ino);
+  if (in) {
+    assert(!in->is_auth());
+
+    dout(10) << "handle_export_caps_ack " << *ack << " from "
+            << ack->get_source() << " on " << *in << dendl;
+
+    map<client_t,Capability::Import> imported_caps;
+    map<client_t,uint64_t> caps_ids;
+    auto blp = ack->cap_bl.begin();
+    ::decode(imported_caps, blp);
+    ::decode(caps_ids, blp);
+
+    for (auto& it : imported_caps) {
+      Capability *cap = in->get_client_cap(it.first);
+      if (!cap || cap->get_cap_id() != caps_ids.at(it.first))
+       continue;
+
+      dout(7) << __func__ << " telling client." << it.first
+             << " exported caps on " << *in << dendl;
+      MClientCaps *m = new MClientCaps(CEPH_CAP_OP_EXPORT, in->ino(), 0,
+                                      cap->get_cap_id(), cap->get_mseq(),
+                                      mds->get_osd_epoch_barrier());
+      m->set_cap_peer(it.second.cap_id, it.second.issue_seq, it.second.mseq, from, 0);
+      mds->send_message_client_counted(m, it.first);
+
+      in->remove_client_cap(it.first);
+    }
+
+    mds->locker->request_inode_file_caps(in);
+    mds->locker->try_eval(in, CEPH_CAP_LOCKS);
+  }
+
+  ack->put();
+}
+
  void Migrator::handle_gather_caps(MGatherCaps *m)
  {
    CInode *in = cache->get_inode(m->ino);
-
    if (!in)
      goto out;
  
    dout(10) << "handle_gather_caps " << *m << " from " << m->get_source()
-           << " on " << *in
-          << dendl;
+           << " on " << *in << dendl;
+
    if (in->is_any_caps() &&
        !in->is_auth() &&
        !in->is_ambiguous_auth() &&
@@ -3384,14 +3440,25 @@ void Migrator::logged_import_caps(CInode *in,
    // force open client sessions and finish cap import
    mds->server->finish_force_open_sessions(imported_session_map);
  
-  map<client_t,Capability::Import> imported_caps;
-
    auto it = peer_exports.find(in);
    assert(it != peer_exports.end());
  
    // clients will release caps from the exporter when they receive the cap import message.
+  map<client_t,Capability::Import> imported_caps;
    finish_import_inode_caps(in, from, false, imported_session_map, it->second, imported_caps);
    mds->locker->eval(in, CEPH_CAP_LOCKS, true);
+
+  if (!imported_caps.empty()) {
+    MExportCapsAck *ack = new MExportCapsAck(in->ino());
+    map<client_t,uint64_t> peer_caps_ids;
+    for (auto &p : imported_caps )
+      peer_caps_ids[p.first] = it->second.at(p.first).cap_id;
+
+    ::encode(imported_caps, ack->cap_bl);
+    ::encode(peer_caps_ids, ack->cap_bl);
+    mds->send_message_mds(ack, from);
+  }
+
    in->auth_unpin(this);
  }
  
@@ -3403,4 +3470,9 @@ void Migrator::handle_conf_change(const struct md_config_t *conf,
      inject_session_race = conf->get_val<bool>("mds_inject_migrator_session_race");
      dout(0) << "mds_inject_migrator_session_race is " << inject_session_race << dendl;
    }
+
+  if (changed.count("mds_inject_migrator_message_loss")) {
+    inject_message_loss = g_conf->get_val<int64_t>("mds_inject_migrator_message_loss");
+    dout(0) << "mds_inject_migrator_message_loss is " << inject_message_loss << dendl;
+  }
  }
diff --git a/ceph/src/mds/Migrator.h b/ceph/src/mds/Migrator.h

index 148b2fb4fd2c01422109e9f36455929c5269a291..59d3a55b9a43727d3177d6766cfbdaac4af85199 100644 (file)
--- a/ceph/src/mds/Migrator.h
+++ b/ceph/src/mds/Migrator.h
@@ -104,6 +104,7 @@ public:
    // -- cons --
    Migrator(MDSRank *m, MDCache *c) : mds(m), cache(c) {
      inject_session_race = g_conf->get_val<bool>("mds_inject_migrator_session_race");
+    inject_message_loss = g_conf->get_val<int64_t>("mds_inject_migrator_message_loss");
    }
  
    void handle_conf_change(const struct md_config_t *conf,
@@ -194,6 +195,7 @@ protected:
    void handle_export_finish(MExportDirFinish *m);
  
    void handle_export_caps(MExportCaps *m);
+  void handle_export_caps_ack(MExportCapsAck *m);
    void logged_import_caps(CInode *in,
                           mds_rank_t from,
                           map<client_t,pair<Session*,uint64_t> >& imported_session_map,
@@ -352,6 +354,7 @@ private:
    MDSRank *mds;
    MDCache *cache;
    bool inject_session_race = false;
+  int inject_message_loss = 0;
  };
  
  #endif
diff --git a/ceph/src/mds/PurgeQueue.cc b/ceph/src/mds/PurgeQueue.cc

index cec43ecd34704cc3972a5372712778d62b717203..afaf9b5f53d9fa3454f8aef212b18f8cb87af824 100644 (file)
--- a/ceph/src/mds/PurgeQueue.cc
+++ b/ceph/src/mds/PurgeQueue.cc
@@ -359,6 +359,12 @@ bool PurgeQueue::_consume()
        delayed_flush = nullptr;
      }
  
+    if (int r = journaler.get_error()) {
+      derr << "Error " << r << " recovering write_pos" << dendl;
+      on_error->complete(r);
+      return could_consume;
+    }
+
      if (!journaler.is_readable()) {
        dout(10) << " not readable right now" << dendl;
        // Because we are the writer and the reader of the journal
@@ -368,6 +374,8 @@ bool PurgeQueue::_consume()
            Mutex::Locker l(lock);
            if (r == 0) {
              _consume();
+          } else if (r != -EAGAIN) {
+            on_error->complete(r);
            }
          }));
        }
diff --git a/ceph/src/mds/Server.cc b/ceph/src/mds/Server.cc

index 6e9c61191d3f45d6bcc56243dcf25c1d4f38d3a7..fd48bea1918839a484920ffcbd5d4e18e2951803 100644 (file)
--- a/ceph/src/mds/Server.cc
+++ b/ceph/src/mds/Server.cc
@@ -4000,6 +4000,8 @@ void Server::handle_client_file_setlock(MDRequestRef& mdr)
         mdr->more()->flock_was_waiting = true;
         mds->locker->drop_locks(mdr.get());
         mdr->drop_local_auth_pins();
+       mdr->mark_event("failed to add lock, waiting");
+       mdr->mark_nowarn();
         cur->add_waiter(CInode::WAIT_FLOCK, new C_MDS_RetryRequest(mdcache, mdr));
        }
      } else
diff --git a/ceph/src/messages/MClientReply.h b/ceph/src/messages/MClientReply.h

index 228db2f9daf687d96983ab225b6d64a5ce14272a..e8c8e7320ccc5ee6384817f21fe7c63e8f4c5371 100644 (file)
--- a/ceph/src/messages/MClientReply.h
+++ b/ceph/src/messages/MClientReply.h
@@ -182,7 +182,7 @@ struct InodeStat {
      if (features & CEPH_FEATURE_MDS_QUOTA)
        ::decode(quota, p);
      else
-      memset(&quota, 0, sizeof(quota));
+      quota = quota_info_t{};
  
      if ((features & CEPH_FEATURE_FS_FILE_LAYOUT_V2))
        ::decode(layout.pool_ns, p);
diff --git a/ceph/src/messages/MExportCapsAck.h b/ceph/src/messages/MExportCapsAck.h

index 5a21d9465de31fc866b4bd726301e691bfb6491f..74286e6ac84e922b0d7f6294b0ec69ca7e433d54 100644 (file)
--- a/ceph/src/messages/MExportCapsAck.h
+++ b/ceph/src/messages/MExportCapsAck.h
@@ -22,6 +22,7 @@
  class MExportCapsAck : public Message {
   public:  
    inodeno_t ino;
+  bufferlist cap_bl;
  
    MExportCapsAck() :
      Message(MSG_MDS_EXPORTCAPSACK) {}
@@ -38,12 +39,13 @@ public:
  
    void encode_payload(uint64_t features) override {
      ::encode(ino, payload);
+    ::encode(cap_bl, payload);
    }
    void decode_payload() override {
-    bufferlist::iterator p = payload.begin();
+    auto p = payload.begin();
      ::decode(ino, p);
+    ::decode(cap_bl, p);
    }
-
  };
  
  #endif
diff --git a/ceph/src/messages/MMgrReport.h b/ceph/src/messages/MMgrReport.h

index f6af6dfb86aa06a0da4b0e1744fba0e1916ab6c1..e759f9b487808cb509169aa51430ac796e5f198c 100644 (file)
--- a/ceph/src/messages/MMgrReport.h
+++ b/ceph/src/messages/MMgrReport.h
@@ -34,25 +34,27 @@ public:
    // is "useful" so that mgr plugins filtering on prio will get some
    // data (albeit probably more than they wanted)
    uint8_t priority = PerfCountersBuilder::PRIO_USEFUL;
+  enum unit_t unit;
  
    void encode(bufferlist &bl) const
    {
      // TODO: decide whether to drop the per-type
      // encoding here, we could rely on the MgrReport
      // verisoning instead.
-    ENCODE_START(2, 1, bl);
+    ENCODE_START(3, 1, bl);
      ::encode(path, bl);
      ::encode(description, bl);
      ::encode(nick, bl);
      static_assert(sizeof(type) == 1, "perfcounter_type_d must be one byte");
      ::encode((uint8_t)type, bl);
      ::encode(priority, bl);
+    ::encode((uint8_t)unit, bl);
      ENCODE_FINISH(bl);
    }
    
    void decode(bufferlist::iterator &p)
    {
-    DECODE_START(2, p);
+    DECODE_START(3, p);
      ::decode(path, p);
      ::decode(description, p);
      ::decode(nick, p);
@@ -60,6 +62,9 @@ public:
      if (struct_v >= 2) {
        ::decode(priority, p);
      }
+    if (struct_v >= 3) {
+      ::decode((uint8_t&)unit, p);
+    }
      DECODE_FINISH(p);
    }
  };
diff --git a/ceph/src/messages/MMonSubscribeAck.h b/ceph/src/messages/MMonSubscribeAck.h

index 148d90db75d70190c63e0193ac6bc55ccb58e97a..0e46bbfb32f564c42aaf613f4bf2536f059ea064 100644 (file)
--- a/ceph/src/messages/MMonSubscribeAck.h
+++ b/ceph/src/messages/MMonSubscribeAck.h
@@ -23,7 +23,6 @@ struct MMonSubscribeAck : public Message {
    
    MMonSubscribeAck() : Message(CEPH_MSG_MON_SUBSCRIBE_ACK),
                        interval(0) {
-    memset(&fsid, 0, sizeof(fsid));
    }
    MMonSubscribeAck(uuid_d& f, int i) : Message(CEPH_MSG_MON_SUBSCRIBE_ACK),
                                        interval(i), fsid(f) { }
diff --git a/ceph/src/mgr/ActivePyModules.cc b/ceph/src/mgr/ActivePyModules.cc

index 0694d23082e4f511a6a0f782352a1582ea58fec2..ea7128c53996fc4d0c89e61489b729571be89759 100644 (file)
--- a/ceph/src/mgr/ActivePyModules.cc
+++ b/ceph/src/mgr/ActivePyModules.cc
@@ -187,9 +187,13 @@ PyObject *ActivePyModules::get_python(const std::string &what)
        }
      });
      return f.get();
-  } else if (what == "config") {
+  } else if (what.substr(0, 6) == "config") {
      PyFormatter f;
-    g_conf->show_config(&f);
+    if (what == "config_options") {
+      g_conf->config_options(&f);  
+    } else if (what == "config") {
+      g_conf->show_config(&f);
+    }
      return f.get();
    } else if (what == "mon_map") {
      PyFormatter f;
@@ -637,6 +641,7 @@ PyObject* ActivePyModules::get_perf_schema_python(
         }
         f.dump_unsigned("type", type.type);
         f.dump_unsigned("priority", type.priority);
+       f.dump_unsigned("units", type.unit);
         f.close_section();
        }
        f.close_section();
diff --git a/ceph/src/mgr/DaemonServer.cc b/ceph/src/mgr/DaemonServer.cc

index 013b5fec622ad25e6cc97200979afa96ea732a7f..2f81e4054b7e549de4f3bda6dd2bcf0c152cb78f 100644 (file)
--- a/ceph/src/mgr/DaemonServer.cc
+++ b/ceph/src/mgr/DaemonServer.cc
@@ -1084,8 +1084,7 @@ bool DaemonServer::handle_command(MCommand *m)
                 auto p = pg_map.osd_stat.find(osd);
                 if (p == pg_map.osd_stat.end()) {
                   missing_stats.insert(osd);
-               }
-               if (p->second.num_pgs > 0) {
+               } else if (p->second.num_pgs > 0) {
                   stored_pgs.insert(osd);
                 }
               }
diff --git a/ceph/src/mgr/MgrClient.cc b/ceph/src/mgr/MgrClient.cc

index 5614e00d3a060ea1eba1d124e40041edc32f475c..0faf3ec0e2bd8c40f91d05f48ea0f029730b62ca 100644 (file)
--- a/ceph/src/mgr/MgrClient.cc
+++ b/ceph/src/mgr/MgrClient.cc
@@ -279,7 +279,8 @@ void MgrClient::send_report()
           type.nick = data.nick;
         }
         type.type = data.type;
-        type.priority = perf_counters.get_adjusted_priority(data.prio);
+       type.priority = perf_counters.get_adjusted_priority(data.prio);
+       type.unit = data.unit;
         report->declare_types.push_back(std::move(type));
         session->declared.insert(path);
        }
@@ -465,5 +466,6 @@ int MgrClient::service_daemon_update_status(
  
  void MgrClient::update_osd_health(std::vector<OSDHealthMetric>&& metrics)
  {
+  Mutex::Locker l(lock);
    osd_health_metrics = std::move(metrics);
  }
diff --git a/ceph/src/mon/DataHealthService.cc b/ceph/src/mon/DataHealthService.cc

index 4a5b42ab388897075a0499762cc64e6aa606c8d1..28a4a9febdb40259af6f7f4fc163c456dea7fa7e 100644 (file)
--- a/ceph/src/mon/DataHealthService.cc
+++ b/ceph/src/mon/DataHealthService.cc
@@ -91,8 +91,8 @@ void DataHealthService::get_health(
          health_detail.append("; ");
        stringstream ss;
        ss << "store is getting too big! "
-         << prettybyte_t(stats.store_stats.bytes_total)
-         << " >= " << prettybyte_t(g_conf->mon_data_size_warn);
+         << byte_u_t(stats.store_stats.bytes_total)
+         << " >= " << byte_u_t(g_conf->mon_data_size_warn);
        health_detail.append(ss.str());
      }
  
@@ -134,9 +134,9 @@ int DataHealthService::update_stats()
      return err;
    }
    dout(0) << __func__ << " avail " << ours.fs_stats.avail_percent << "%"
-          << " total " << prettybyte_t(ours.fs_stats.byte_total)
-          << ", used " << prettybyte_t(ours.fs_stats.byte_used)
-          << ", avail " << prettybyte_t(ours.fs_stats.byte_avail) << dendl;
+          << " total " << byte_u_t(ours.fs_stats.byte_total)
+          << ", used " << byte_u_t(ours.fs_stats.byte_used)
+          << ", avail " << byte_u_t(ours.fs_stats.byte_avail) << dendl;
    ours.last_update = ceph_clock_now();
  
    return update_store_stats(ours);
diff --git a/ceph/src/mon/FSCommands.cc b/ceph/src/mon/FSCommands.cc

index 54869d8b51e12e8db4da6f1474df5627e3bf63c3..3ce6f5565f00e320517ab1bf1a3d6241d54cb4ee 100644 (file)
--- a/ceph/src/mon/FSCommands.cc
+++ b/ceph/src/mon/FSCommands.cc
@@ -631,7 +631,7 @@ class RemoveFilesystemHandler : public FileSystemCommandHandler
      for (const auto &gid : to_fail) {
        // Standby replays don't write, so it isn't important to
        // wait for an osdmap propose here: ignore return value.
-      mon->mdsmon()->fail_mds_gid(gid);
+      mon->mdsmon()->fail_mds_gid(fsmap, gid);
      }
  
      fsmap.erase_filesystem(fs->fscid);
diff --git a/ceph/src/mon/HealthMonitor.cc b/ceph/src/mon/HealthMonitor.cc

index e9e5ad3aa4a23c55fbba507f0e536e9e35499ee5..883cc056dfa42ef96387e02e9aea9fa7f3970960 100644 (file)
--- a/ceph/src/mon/HealthMonitor.cc
+++ b/ceph/src/mon/HealthMonitor.cc
@@ -226,9 +226,9 @@ bool HealthMonitor::check_member_health()
    stats.store_stats.bytes_misc = extra["misc"];
    stats.last_update = ceph_clock_now();
    dout(10) << __func__ << " avail " << stats.fs_stats.avail_percent << "%"
-          << " total " << prettybyte_t(stats.fs_stats.byte_total)
-          << ", used " << prettybyte_t(stats.fs_stats.byte_used)
-          << ", avail " << prettybyte_t(stats.fs_stats.byte_avail) << dendl;
+          << " total " << byte_u_t(stats.fs_stats.byte_total)
+          << ", used " << byte_u_t(stats.fs_stats.byte_used)
+          << ", avail " << byte_u_t(stats.fs_stats.byte_avail) << dendl;
  
    // MON_DISK_{LOW,CRIT,BIG}
    health_check_map_t next;
@@ -252,9 +252,9 @@ bool HealthMonitor::check_member_health()
      ss << "mon%plurals% %names% %isorare% using a lot of disk space";
      auto& d = next.add("MON_DISK_BIG", HEALTH_WARN, ss.str());
      ss2 << "mon." << mon->name << " is "
-       << prettybyte_t(stats.store_stats.bytes_total)
+       << byte_u_t(stats.store_stats.bytes_total)
         << " >= mon_data_size_warn ("
-       << prettybyte_t(g_conf->mon_data_size_warn) << ")";
+       << byte_u_t(g_conf->mon_data_size_warn) << ")";
      d.detail.push_back(ss2.str());
    }
  
@@ -298,9 +298,12 @@ bool HealthMonitor::check_member_health()
      quorum_checks[mon->rank] = next;
      changed = true;
    } else {
-    // tell the leader
-    mon->messenger->send_message(new MMonHealthChecks(next),
-                                 mon->monmap->get_inst(mon->get_leader()));
+    // tell the leader, but only if the quorum is luminous
+    if (mon->quorum_mon_features.contains_all(
+         ceph::features::mon::FEATURE_LUMINOUS)) {
+      mon->messenger->send_message(new MMonHealthChecks(next),
+                                  mon->monmap->get_inst(mon->get_leader()));
+    }
    }
  
    return changed;
diff --git a/ceph/src/mon/LogMonitor.cc b/ceph/src/mon/LogMonitor.cc

index d4fa0dbe3e5de0cde932ee7899e9bd643c4f5798..2d692840ea1a14420a085e6caea7c4a09a641632 100644 (file)
--- a/ceph/src/mon/LogMonitor.cc
+++ b/ceph/src/mon/LogMonitor.cc
@@ -62,7 +62,6 @@ void LogMonitor::create_initial()
  {
    dout(10) << "create_initial -- creating initial map" << dendl;
    LogEntry e;
-  memset(&e.who, 0, sizeof(e.who));
    e.name = g_conf->name;
    e.stamp = ceph_clock_now();
    e.prio = CLOG_INFO;
diff --git a/ceph/src/mon/MDSMonitor.cc b/ceph/src/mon/MDSMonitor.cc

index d402c0089971c8940141cb7b3086589104439aa5..9bfe9dac5e2aa98f8775df7b5ab8f9b313b2238d 100644 (file)
--- a/ceph/src/mon/MDSMonitor.cc
+++ b/ceph/src/mon/MDSMonitor.cc
@@ -189,7 +189,7 @@ void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t)
      t->erase(MDS_HEALTH_PREFIX, stringify(*i));
    }
    pending_daemon_health_rm.clear();
-  remove_from_metadata(t);
+  remove_from_metadata(pending, t);
  
    // health
    health_check_map_t new_checks;
@@ -326,8 +326,9 @@ void MDSMonitor::_note_beacon(MMDSBeacon *m)
    version_t seq = m->get_seq();
  
    dout(15) << "_note_beacon " << *m << " noting time" << dendl;
-  last_beacon[gid].stamp = ceph_clock_now();
-  last_beacon[gid].seq = seq;
+  auto &beacon = last_beacon[gid];
+  beacon.stamp = mono_clock::now();
+  beacon.seq = seq;
  }
  
  bool MDSMonitor::preprocess_beacon(MonOpRequestRef op)
@@ -340,7 +341,7 @@ bool MDSMonitor::preprocess_beacon(MonOpRequestRef op)
    MDSMap::mds_info_t info;
    epoch_t effective_epoch = 0;
  
-  const auto &fsmap = get_working_fsmap();
+  const auto &fsmap = get_fsmap();
  
    // check privileges, ignore if fails
    MonSession *session = m->get_session();
@@ -383,6 +384,10 @@ bool MDSMonitor::preprocess_beacon(MonOpRequestRef op)
        dout(7) << "mds_beacon " << *m << " is not in fsmap (state "
                << ceph_mds_state_name(state) << ")" << dendl;
  
+      /* We can't send an MDSMap this MDS was a part of because we no longer
+       * know which FS it was part of. Nor does this matter. Sending an empty
+       * MDSMap is sufficient for getting the MDS to respawn.
+       */
        MDSMap null_map;
        null_map.epoch = fsmap.epoch;
        null_map.compat = fsmap.compat;
@@ -478,25 +483,26 @@ bool MDSMonitor::preprocess_offload_targets(MonOpRequestRef op)
    MMDSLoadTargets *m = static_cast<MMDSLoadTargets*>(op->get_req());
    dout(10) << "preprocess_offload_targets " << *m << " from " << m->get_orig_source() << dendl;
  
-  auto &fsmap = get_working_fsmap();
+  const auto &fsmap = get_fsmap();
    
    // check privileges, ignore message if fails
    MonSession *session = m->get_session();
    if (!session)
-    goto done;
+    goto ignore;
    if (!session->is_capable("mds", MON_CAP_X)) {
      dout(0) << "preprocess_offload_targets got MMDSLoadTargets from entity with insufficient caps "
             << session->caps << dendl;
-    goto done;
+    goto ignore;
    }
  
    if (fsmap.gid_exists(m->global_id) &&
        m->targets == fsmap.get_info_gid(m->global_id).export_targets)
-    goto done;
+    goto ignore;
  
    return false;
  
- done:
+ ignore:
+  mon->no_reply(op);
    return true;
  }
  
@@ -587,7 +593,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
          const MDSMap::mds_info_t &existing_info =
            pending.get_info_gid(existing);
          mon->clog->info() << existing_info.human_name() << " restarted";
-       fail_mds_gid(existing);
+       fail_mds_gid(pending, existing);
          failed_mds = true;
        }
        if (failed_mds) {
@@ -619,7 +625,6 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
            info.standby_for_name);
        if (leaderinfo && (leaderinfo->rank >= 0)) {
          const auto &fscid = pending.mds_roles.at(leaderinfo->global_id);
-        const auto &fs = pending.get_filesystem(fscid);
  
          pending.modify_daemon(gid, [fscid, leaderinfo](
                MDSMap::mds_info_t *info) {
@@ -630,8 +635,9 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
      }
  
      // initialize the beacon timer
-    last_beacon[gid].stamp = ceph_clock_now();
-    last_beacon[gid].seq = seq;
+    auto &beacon = last_beacon[gid];
+    beacon.stamp = mono_clock::now();
+    beacon.seq = seq;
  
      // new incompat?
      if (!pending.compat.writeable(m->get_compat())) {
@@ -733,7 +739,7 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op)
          return false;
        }
  
-      fail_mds_gid(gid);
+      fail_mds_gid(pending, gid);
        assert(mon->osdmon()->is_writeable());
        request_proposal(mon->osdmon());
  
@@ -922,7 +928,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
    stringstream ss, ds;
  
    map<string, cmd_vartype> cmdmap;
-  const auto &fsmap = get_working_fsmap();
+  const auto &fsmap = get_fsmap();
  
    if (!cmdmap_from_json(m->cmd, &cmdmap, ss)) {
      // ss has reason for failure
@@ -935,7 +941,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
    cmd_getval(g_ceph_context, cmdmap, "prefix", prefix);
    string format;
    cmd_getval(g_ceph_context, cmdmap, "format", format, string("plain"));
-  boost::scoped_ptr<Formatter> f(Formatter::create(format));
+  std::unique_ptr<Formatter> f(Formatter::create(format));
  
    MonSession *session = m->get_session();
    if (!session) {
@@ -1000,7 +1006,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
      int64_t epocharg;
      epoch_t epoch;
  
-    const FSMap *fsmapp = &get_fsmap();
+    const FSMap *fsmapp = &fsmap;
      FSMap dummy;
      if (cmd_getval(g_ceph_context, cmdmap, "epoch", epocharg)) {
        epoch = epocharg;
@@ -1050,7 +1056,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
          f->open_object_section("mds");
          f->dump_string("name", info.name);
          std::ostringstream get_err;
-        r = dump_metadata(info.name, f.get(), get_err);
+        r = dump_metadata(fsmap, info.name, f.get(), get_err);
          if (r == -EINVAL || r == -ENOENT) {
            // Drop error, list what metadata we do have
            dout(1) << get_err.str() << dendl;
@@ -1068,7 +1074,7 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
      } else {
        // Dump a single daemon's metadata
        f->open_object_section("mds_metadata");
-      r = dump_metadata(who, f.get(), ss);
+      r = dump_metadata(fsmap, who, f.get(), ss);
        f->close_section();
      }
      f->flush(ds);
@@ -1141,41 +1147,31 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
    } else if (prefix == "fs ls") {
      if (f) {
        f->open_array_section("filesystems");
-      {
-        for (const auto &p : fsmap.filesystems) {
-          const auto &fs = p.second;
-          f->open_object_section("filesystem");
-          {
-            const MDSMap &mds_map = fs->mds_map;
-            f->dump_string("name", mds_map.fs_name);
-            /* Output both the names and IDs of pools, for use by
-             * humans and machines respectively */
-            f->dump_string("metadata_pool", mon->osdmon()->osdmap.get_pool_name(
-                  mds_map.metadata_pool));
-            f->dump_int("metadata_pool_id", mds_map.metadata_pool);
-            f->open_array_section("data_pool_ids");
-            {
-              for (auto dpi = mds_map.data_pools.begin();
-                   dpi != mds_map.data_pools.end(); ++dpi) {
-                f->dump_int("data_pool_id", *dpi);
-              }
-            }
-            f->close_section();
-
-            f->open_array_section("data_pools");
-            {
-                for (auto dpi = mds_map.data_pools.begin();
-                   dpi != mds_map.data_pools.end(); ++dpi) {
-                  const auto &name = mon->osdmon()->osdmap.get_pool_name(
-                      *dpi);
-                  f->dump_string("data_pool", name);
-                }
-            }
+      for (const auto &p : fsmap.filesystems) {
+        const auto &fs = p.second;
+        f->open_object_section("filesystem");
+        {
+          const MDSMap &mds_map = fs->mds_map;
+          f->dump_string("name", mds_map.fs_name);
+          /* Output both the names and IDs of pools, for use by
+           * humans and machines respectively */
+          f->dump_string("metadata_pool", mon->osdmon()->osdmap.get_pool_name(
+                mds_map.metadata_pool));
+          f->dump_int("metadata_pool_id", mds_map.metadata_pool);
+          f->open_array_section("data_pool_ids");
+          for (const auto &id : mds_map.data_pools) {
+            f->dump_int("data_pool_id", id);
+          }
+          f->close_section();
  
-            f->close_section();
+          f->open_array_section("data_pools");
+          for (const auto &id : mds_map.data_pools) {
+            const auto &name = mon->osdmon()->osdmap.get_pool_name(id);
+            f->dump_string("data_pool", name);
            }
            f->close_section();
          }
+        f->close_section();
        }
        f->close_section();
        f->flush(ds);
@@ -1188,8 +1184,8 @@ bool MDSMonitor::preprocess_command(MonOpRequestRef op)
          
          ds << "name: " << mds_map.fs_name << ", metadata pool: "
             << md_pool_name << ", data pools: [";
-        for (auto dpi : mds_map.data_pools) {
-          const string &pool_name = mon->osdmon()->osdmap.get_pool_name(dpi);
+        for (const auto &id : mds_map.data_pools) {
+          const string &pool_name = mon->osdmon()->osdmap.get_pool_name(id);
            ds << pool_name << " ";
          }
          ds << "]" << std::endl;
@@ -1213,11 +1209,9 @@ out:
      return false;
  }
  
-bool MDSMonitor::fail_mds_gid(mds_gid_t gid)
+bool MDSMonitor::fail_mds_gid(FSMap &fsmap, mds_gid_t gid)
  {
-  auto &pending = get_pending_fsmap_writeable();
-
-  const MDSMap::mds_info_t &info = pending.get_info_gid(gid);
+  const MDSMap::mds_info_t &info = fsmap.get_info_gid(gid);
    dout(10) << "fail_mds_gid " << gid << " mds." << info.name << " role " << info.rank << dendl;
  
    epoch_t blacklist_epoch = 0;
@@ -1227,7 +1221,7 @@ bool MDSMonitor::fail_mds_gid(mds_gid_t gid)
      blacklist_epoch = mon->osdmon()->blacklist(info.addr, until);
    }
  
-  pending.erase(gid, blacklist_epoch);
+  fsmap.erase(gid, blacklist_epoch);
    last_beacon.erase(gid);
    if (pending_daemon_health.count(gid)) {
      pending_daemon_health.erase(gid);
@@ -1237,14 +1231,12 @@ bool MDSMonitor::fail_mds_gid(mds_gid_t gid)
    return blacklist_epoch != 0;
  }
  
-mds_gid_t MDSMonitor::gid_from_arg(const std::string& arg, std::ostream &ss)
+mds_gid_t MDSMonitor::gid_from_arg(const FSMap &fsmap, const std::string &arg, std::ostream &ss)
  {
-  const auto &fsmap = get_working_fsmap();
-
    // Try parsing as a role
    mds_role_t role;
    std::ostringstream ignore_err;  // Don't spam 'ss' with parse_role errors
-  int r = parse_role(arg, &role, ignore_err);
+  int r = fsmap.parse_role(arg, &role, ignore_err);
    if (r == 0) {
      // See if a GID is assigned to this role
      const auto &fs = fsmap.get_filesystem(role.fscid);
@@ -1285,12 +1277,12 @@ mds_gid_t MDSMonitor::gid_from_arg(const std::string& arg, std::ostream &ss)
    return MDS_GID_NONE;
  }
  
-int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg,
-    MDSMap::mds_info_t *failed_info)
+int MDSMonitor::fail_mds(FSMap &fsmap, std::ostream &ss,
+    const std::string &arg, MDSMap::mds_info_t *failed_info)
  {
    assert(failed_info != nullptr);
  
-  mds_gid_t gid = gid_from_arg(arg, ss);
+  mds_gid_t gid = gid_from_arg(fsmap, arg, ss);
    if (gid == MDS_GID_NONE) {
      return 0;
    }
@@ -1300,9 +1292,9 @@ int MDSMonitor::fail_mds(std::ostream &ss, const std::string &arg,
  
    // Take a copy of the info before removing the MDS from the map,
    // so that the caller knows which mds (if any) they ended up removing.
-  *failed_info = get_pending_fsmap().get_info_gid(gid);
+  *failed_info = fsmap.get_info_gid(gid);
  
-  fail_mds_gid(gid);
+  fail_mds_gid(fsmap, gid);
    ss << "failed mds gid " << gid;
    assert(mon->osdmon()->is_writeable());
    request_proposal(mon->osdmon());
@@ -1363,7 +1355,7 @@ bool MDSMonitor::prepare_command(MonOpRequestRef op)
      }
    }
  
-  r = filesystem_command(op, prefix, cmdmap, ss);
+  r = filesystem_command(pending, op, prefix, cmdmap, ss);
    if (r >= 0) {
      goto out;
    } else if (r == -EAGAIN) {
@@ -1385,7 +1377,7 @@ bool MDSMonitor::prepare_command(MonOpRequestRef op)
      goto out;
    }
  
-  r = legacy_filesystem_command(op, prefix, cmdmap, ss);
+  r = legacy_filesystem_command(pending, op, prefix, cmdmap, ss);
  
    if (r == -ENOSYS && ss.str().empty()) {
      ss << "unrecognized command";
@@ -1412,25 +1404,8 @@ out:
    }
  }
  
-
-/**
- * Given one of the following forms:
- *   <fs name>:<rank>
- *   <fs id>:<rank>
- *   <rank>
- *
- * Parse into a mds_role_t.  The rank-only form is only valid
- * if legacy_client_ns is set.
- */
-int MDSMonitor::parse_role(
-    const std::string &role_str,
-    mds_role_t *role,
-    std::ostream &ss)
-{
-  return get_working_fsmap().parse_role(role_str, role, ss);
-}
-
  int MDSMonitor::filesystem_command(
+    FSMap &fsmap,
      MonOpRequestRef op,
      std::string const &prefix,
      map<string, cmd_vartype> &cmdmap,
@@ -1442,15 +1417,14 @@ int MDSMonitor::filesystem_command(
    string whostr;
    cmd_getval(g_ceph_context, cmdmap, "who", whostr);
  
-  auto &pending = get_pending_fsmap_writeable();
    if (prefix == "mds stop" ||
        prefix == "mds deactivate") {
      mds_role_t role;
-    r = parse_role(whostr, &role, ss);
+    r = fsmap.parse_role(whostr, &role, ss);
      if (r < 0 ) {
        return r;
      }
-    const auto &fs = pending.get_filesystem(role.fscid);
+    const auto &fs = fsmap.get_filesystem(role.fscid);
  
      if (!fs->mds_map.is_active(role.rank)) {
        r = -EEXIST;
@@ -1473,9 +1447,9 @@ int MDSMonitor::filesystem_command(
        r = 0;
        mds_gid_t gid = fs->mds_map.up.at(role.rank);
        ss << "telling mds." << role << " "
-         << pending.get_info_gid(gid).addr << " to deactivate";
+         << fsmap.get_info_gid(gid).addr << " to deactivate";
  
-      pending.modify_daemon(gid, [](MDSMap::mds_info_t *info) {
+      fsmap.modify_daemon(gid, [](MDSMap::mds_info_t *info) {
          info->state = MDSMap::STATE_STOPPING;
        });
      }
@@ -1492,8 +1466,8 @@ int MDSMonitor::filesystem_command(
           << cmd_vartype_stringify(cmdmap["state"]) << "'";
        return -EINVAL;
      }
-    if (pending.gid_exists(gid)) {
-      pending.modify_daemon(gid, [state](MDSMap::mds_info_t *info) {
+    if (fsmap.gid_exists(gid)) {
+      fsmap.modify_daemon(gid, [state](MDSMap::mds_info_t *info) {
          info->state = state;
        });
        ss << "set mds gid " << gid << " to state " << state << " "
@@ -1505,7 +1479,7 @@ int MDSMonitor::filesystem_command(
      cmd_getval(g_ceph_context, cmdmap, "who", who);
  
      MDSMap::mds_info_t failed_info;
-    r = fail_mds(ss, who, &failed_info);
+    r = fail_mds(fsmap, ss, who, &failed_info);
      if (r < 0 && r == -EAGAIN) {
        mon->osdmon()->wait_for_writeable(op, new C_RetryMessage(this, op));
        return -EAGAIN; // don't propose yet; wait for message to be retried
@@ -1523,18 +1497,18 @@ int MDSMonitor::filesystem_command(
           << cmd_vartype_stringify(cmdmap["gid"]) << "'";
        return -EINVAL;
      }
-    if (!pending.gid_exists(gid)) {
+    if (!fsmap.gid_exists(gid)) {
        ss << "mds gid " << gid << " dne";
        r = 0;
      } else {
-      const auto &info = pending.get_info_gid(gid);
+      const auto &info = fsmap.get_info_gid(gid);
        MDSMap::DaemonState state = info.state;
        if (state > 0) {
          ss << "cannot remove active mds." << info.name
             << " rank " << info.rank;
          return -EBUSY;
        } else {
-        pending.erase(gid, {});
+        fsmap.erase(gid, {});
          ss << "removed mds gid " << gid;
          return 0;
        }
@@ -1551,13 +1525,13 @@ int MDSMonitor::filesystem_command(
      std::string role_str;
      cmd_getval(g_ceph_context, cmdmap, "who", role_str);
      mds_role_t role;
-    int r = parse_role(role_str, &role, ss);
+    int r = fsmap.parse_role(role_str, &role, ss);
      if (r < 0) {
        ss << "invalid role '" << role_str << "'";
        return -EINVAL;
      }
  
-    pending.modify_filesystem(
+    fsmap.modify_filesystem(
          role.fscid,
          [role](std::shared_ptr<Filesystem> fs)
      {
@@ -1573,13 +1547,13 @@ int MDSMonitor::filesystem_command(
           << cmd_vartype_stringify(cmdmap["feature"]) << "'";
        return -EINVAL;
      }
-    if (pending.compat.compat.contains(f)) {
+    if (fsmap.compat.compat.contains(f)) {
        ss << "removing compat feature " << f;
-      CompatSet modified = pending.compat;
+      CompatSet modified = fsmap.compat;
        modified.compat.remove(f);
-      pending.update_compat(modified);
+      fsmap.update_compat(modified);
      } else {
-      ss << "compat feature " << f << " not present in " << pending.compat;
+      ss << "compat feature " << f << " not present in " << fsmap.compat;
      }
      r = 0;
    } else if (prefix == "mds compat rm_incompat") {
@@ -1589,25 +1563,25 @@ int MDSMonitor::filesystem_command(
           << cmd_vartype_stringify(cmdmap["feature"]) << "'";
        return -EINVAL;
      }
-    if (pending.compat.incompat.contains(f)) {
+    if (fsmap.compat.incompat.contains(f)) {
        ss << "removing incompat feature " << f;
-      CompatSet modified = pending.compat;
+      CompatSet modified = fsmap.compat;
        modified.incompat.remove(f);
-      pending.update_compat(modified);
+      fsmap.update_compat(modified);
      } else {
-      ss << "incompat feature " << f << " not present in " << pending.compat;
+      ss << "incompat feature " << f << " not present in " << fsmap.compat;
      }
      r = 0;
    } else if (prefix == "mds repaired") {
      std::string role_str;
      cmd_getval(g_ceph_context, cmdmap, "rank", role_str);
      mds_role_t role;
-    r = parse_role(role_str, &role, ss);
+    r = fsmap.parse_role(role_str, &role, ss);
      if (r < 0) {
        return r;
      }
  
-    bool modified = pending.undamaged(role.fscid, role.rank);
+    bool modified = fsmap.undamaged(role.fscid, role.rank);
      if (modified) {
        dout(4) << "repaired: restoring rank " << role << dendl;
      } else {
@@ -1625,12 +1599,11 @@ int MDSMonitor::filesystem_command(
  /**
   * Helper to legacy_filesystem_command
   */
-void MDSMonitor::modify_legacy_filesystem(
+void MDSMonitor::modify_legacy_filesystem(FSMap &fsmap,
      std::function<void(std::shared_ptr<Filesystem> )> fn)
  {
-  auto &pending_fsmap = get_pending_fsmap_writeable();
-  pending_fsmap.modify_filesystem(
-    pending_fsmap.legacy_client_fscid,
+  fsmap.modify_filesystem(
+    fsmap.legacy_client_fscid,
      fn
    );
  }
@@ -1647,6 +1620,7 @@ void MDSMonitor::modify_legacy_filesystem(
   * @retval < 0      An error has occurred; **ss** may have been set.
   */
  int MDSMonitor::legacy_filesystem_command(
+    FSMap &fsmap,
      MonOpRequestRef op,
      std::string const &prefix,
      map<string, cmd_vartype> &cmdmap,
@@ -1658,9 +1632,7 @@ int MDSMonitor::legacy_filesystem_command(
    string whostr;
    cmd_getval(g_ceph_context, cmdmap, "who", whostr);
  
-  auto &pending_fsmap = get_pending_fsmap_writeable();
-
-  assert (pending_fsmap.legacy_client_fscid != FS_CLUSTER_ID_NONE);
+  assert (fsmap.legacy_client_fscid != FS_CLUSTER_ID_NONE);
  
    if (prefix == "mds set_max_mds") {
      // NOTE: deprecated by "fs set max_mds"
@@ -1670,7 +1642,7 @@ int MDSMonitor::legacy_filesystem_command(
      }
  
      const MDSMap& mdsmap =
-      pending_fsmap.filesystems.at(pending_fsmap.legacy_client_fscid)->mds_map;
+      fsmap.filesystems.at(fsmap.legacy_client_fscid)->mds_map;
        
      if (!mdsmap.allows_multimds() &&
         maxmds > mdsmap.get_max_mds() &&
@@ -1684,7 +1656,7 @@ int MDSMonitor::legacy_filesystem_command(
        return -EINVAL;
      }
  
-    modify_legacy_filesystem(
+    modify_legacy_filesystem(fsmap,
          [maxmds](std::shared_ptr<Filesystem> fs)
      {
        fs->mds_map.set_max_mds(maxmds);
@@ -1694,7 +1666,7 @@ int MDSMonitor::legacy_filesystem_command(
      ss << "max_mds = " << maxmds;
    } else if (prefix == "mds cluster_down") {
      // NOTE: deprecated by "fs set cluster_down"
-    modify_legacy_filesystem(
+    modify_legacy_filesystem(fsmap,
          [](std::shared_ptr<Filesystem> fs)
      {
        fs->mds_map.set_flag(CEPH_MDSMAP_DOWN);
@@ -1703,7 +1675,7 @@ int MDSMonitor::legacy_filesystem_command(
      r = 0;
    } else if (prefix == "mds cluster_up") {
      // NOTE: deprecated by "fs set cluster_up"
-    modify_legacy_filesystem(
+    modify_legacy_filesystem(fsmap,
          [](std::shared_ptr<Filesystem> fs)
      {
        fs->mds_map.clear_flag(CEPH_MDSMAP_DOWN);
@@ -1792,9 +1764,9 @@ void MDSMonitor::check_sub(Subscription *sub)
      if (is_mds) {
        // What (if any) namespace are you assigned to?
        auto mds_info = fsmap.get_mds_info();
-      for (const auto &i : mds_info) {
-        if (i.second.addr == sub->session->inst.addr) {
-          mds_gid = i.first;
+      for (const auto &p : mds_info) {
+        if (p.second.addr == sub->session->inst.addr) {
+          mds_gid = p.first;
            fscid = fsmap.mds_roles.at(mds_gid);
          }
        }
@@ -1892,16 +1864,15 @@ void MDSMonitor::update_metadata(mds_gid_t gid,
    paxos->trigger_propose();
  }
  
-void MDSMonitor::remove_from_metadata(MonitorDBStore::TransactionRef t)
+void MDSMonitor::remove_from_metadata(const FSMap &fsmap, MonitorDBStore::TransactionRef t)
  {
    bool update = false;
-  for (map<mds_gid_t, Metadata>::iterator i = pending_metadata.begin();
-       i != pending_metadata.end(); ) {
-    if (!get_pending_fsmap().gid_exists(i->first)) {
-      pending_metadata.erase(i++);
+  for (auto it = pending_metadata.begin(); it != pending_metadata.end(); ) {
+    if (!fsmap.gid_exists(it->first)) {
+      it = pending_metadata.erase(it);
        update = true;
      } else {
-      ++i;
+      ++it;
      }
    }
    if (!update)
@@ -1925,7 +1896,7 @@ int MDSMonitor::load_metadata(map<mds_gid_t, Metadata>& m)
    return 0;
  }
  
-void MDSMonitor::count_metadata(const string& field, map<string,int> *out)
+void MDSMonitor::count_metadata(const std::string &field, map<string,int> *out)
  {
    map<mds_gid_t,Metadata> meta;
    load_metadata(meta);
@@ -1939,7 +1910,7 @@ void MDSMonitor::count_metadata(const string& field, map<string,int> *out)
    }
  }
  
-void MDSMonitor::count_metadata(const string& field, Formatter *f)
+void MDSMonitor::count_metadata(const std::string &field, Formatter *f)
  {
    map<string,int> by_val;
    count_metadata(field, &by_val);
@@ -1950,11 +1921,12 @@ void MDSMonitor::count_metadata(const string& field, Formatter *f)
    f->close_section();
  }
  
-int MDSMonitor::dump_metadata(const std::string &who, Formatter *f, ostream& err)
+int MDSMonitor::dump_metadata(const FSMap& fsmap, const std::string &who,
+    Formatter *f, ostream& err)
  {
    assert(f);
  
-  mds_gid_t gid = gid_from_arg(who, err);
+  mds_gid_t gid = gid_from_arg(fsmap, who, err);
    if (gid == MDS_GID_NONE) {
      return -EINVAL;
    }
@@ -1979,26 +1951,27 @@ int MDSMonitor::print_nodes(Formatter *f)
  {
    assert(f);
  
+  const auto &fsmap = get_fsmap();
+
    map<mds_gid_t, Metadata> metadata;
    if (int r = load_metadata(metadata)) {
      return r;
    }
  
    map<string, list<int> > mdses; // hostname => rank
-  for (map<mds_gid_t, Metadata>::iterator it = metadata.begin();
-       it != metadata.end(); ++it) {
-    const Metadata& m = it->second;
+  for (const auto &p : metadata) {
+    const mds_gid_t& gid = p.first;
+    const Metadata& m = p.second;
      Metadata::const_iterator hostname = m.find("hostname");
      if (hostname == m.end()) {
        // not likely though
        continue;
      }
-    const mds_gid_t gid = it->first;
-    if (!get_fsmap().gid_exists(gid)) {
+    if (!fsmap.gid_exists(gid)) {
        dout(5) << __func__ << ": GID " << gid << " not existent" << dendl;
        continue;
      }
-    const MDSMap::mds_info_t& mds_info = get_fsmap().get_info_gid(gid);
+    const MDSMap::mds_info_t& mds_info = fsmap.get_info_gid(gid);
      // FIXME: include filesystem name with rank here
      mdses[hostname->second].push_back(mds_info.rank);
    }
@@ -2011,41 +1984,45 @@ int MDSMonitor::print_nodes(Formatter *f)
   * If a cluster is undersized (with respect to max_mds), then
   * attempt to find daemons to grow it.
   */
-bool MDSMonitor::maybe_expand_cluster(std::shared_ptr<Filesystem> &fs)
+bool MDSMonitor::maybe_expand_cluster(FSMap &fsmap, fs_cluster_id_t fscid)
  {
-  bool do_propose = false;
-  auto &pending = get_pending_fsmap_writeable();
+  auto fs = fsmap.get_filesystem(fscid);
+  auto &mds_map = fs->mds_map;
  
    if (fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) {
-    return do_propose;
+    return false;
    }
  
-  while (fs->mds_map.get_num_in_mds() < size_t(fs->mds_map.get_max_mds()) &&
-        !fs->mds_map.is_degraded()) {
+  int in = mds_map.get_num_in_mds();
+  int max = mds_map.get_max_mds();
+
+  dout(20) << __func__ << " in " << in << " max " << max << dendl;
+
+  if (in < max) {
      mds_rank_t mds = mds_rank_t(0);
      string name;
-    while (fs->mds_map.is_in(mds)) {
+    while (mds_map.is_in(mds)) {
        mds++;
      }
-    mds_gid_t newgid = pending.find_replacement_for({fs->fscid, mds},
+    mds_gid_t newgid = fsmap.find_replacement_for({fscid, mds},
                           name, g_conf->mon_force_standby_active);
      if (newgid == MDS_GID_NONE) {
-      break;
+      return false;
      }
  
-    const auto &new_info = pending.get_info_gid(newgid);
+    const auto &new_info = fsmap.get_info_gid(newgid);
      dout(1) << "assigned standby " << new_info.addr
              << " as mds." << mds << dendl;
  
      mon->clog->info() << new_info.human_name() << " assigned to "
-                         "filesystem " << fs->mds_map.fs_name << " as rank "
-                      << mds << " (now has " << fs->mds_map.get_num_in_mds() + 1
+                         "filesystem " << mds_map.fs_name << " as rank "
+                      << mds << " (now has " << mds_map.get_num_in_mds() + 1
                        << " ranks)";
-    pending.promote(newgid, fs, mds);
-    do_propose = true;
+    fsmap.promote(newgid, fs, mds);
+    return true;
    }
  
-  return do_propose;
+  return false;
  }
  
  
@@ -2054,25 +2031,25 @@ bool MDSMonitor::maybe_expand_cluster(std::shared_ptr<Filesystem> &fs)
   * is available, fail this daemon (remove from map) and pass its
   * role to another daemon.
   */
-void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info,
-    bool *mds_propose, bool *osd_propose)
+void MDSMonitor::maybe_replace_gid(FSMap &fsmap, mds_gid_t gid,
+    const MDSMap::mds_info_t& info, bool *mds_propose, bool *osd_propose)
  {
    assert(mds_propose != nullptr);
    assert(osd_propose != nullptr);
  
-  auto &pending = get_pending_fsmap_writeable();
-  const auto fscid = pending.mds_roles.at(gid);
+  const auto fscid = fsmap.mds_roles.at(gid);
  
    // We will only take decisive action (replacing/removing a daemon)
    // if we have some indicating that some other daemon(s) are successfully
    // getting beacons through recently.
-  utime_t latest_beacon;
-  for (const auto & i : last_beacon) {
-    latest_beacon = MAX(i.second.stamp, latest_beacon);
+  mono_time latest_beacon = mono_clock::zero();
+  for (const auto &p : last_beacon) {
+    latest_beacon = std::max(p.second.stamp, latest_beacon);
    }
-  const bool may_replace = latest_beacon >
-    (ceph_clock_now() -
-     MAX(g_conf->mds_beacon_interval, g_conf->mds_beacon_grace * 0.5));
+  mono_time now = mono_clock::now();
+  chrono::duration<double> since = now-latest_beacon;
+  const bool may_replace = since.count() <
+      std::max(g_conf->mds_beacon_interval, g_conf->mds_beacon_grace * 0.5);
  
    // are we in?
    // and is there a non-laggy standby that can take over for us?
@@ -2081,12 +2058,12 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info
        info.state != MDSMap::STATE_STANDBY &&
        info.state != MDSMap::STATE_STANDBY_REPLAY &&
        may_replace &&
-      !pending.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) &&
-      (sgid = pending.find_replacement_for({fscid, info.rank}, info.name,
+      !fsmap.get_filesystem(fscid)->mds_map.test_flag(CEPH_MDSMAP_DOWN) &&
+      (sgid = fsmap.find_replacement_for({fscid, info.rank}, info.name,
                  g_conf->mon_force_standby_active)) != MDS_GID_NONE)
    {
      
-    MDSMap::mds_info_t si = pending.get_info_gid(sgid);
+    MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
      dout(10) << " replacing " << gid << " " << info.addr << " mds."
        << info.rank << "." << info.inc
        << " " << ceph_mds_state_name(info.state)
@@ -2098,14 +2075,14 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info
                        << " with standby " << si.human_name();
  
      // Remember what NS the old one was in
-    const fs_cluster_id_t fscid = pending.mds_roles.at(gid);
+    const fs_cluster_id_t fscid = fsmap.mds_roles.at(gid);
  
      // Remove the old one
-    *osd_propose |= fail_mds_gid(gid);
+    *osd_propose |= fail_mds_gid(fsmap, gid);
  
      // Promote the replacement
-    auto fs = pending.filesystems.at(fscid);
-    pending.promote(sgid, fs, info.rank);
+    auto fs = fsmap.filesystems.at(fscid);
+    fsmap.promote(sgid, fs, info.rank);
  
      *mds_propose = true;
    } else if ((info.state == MDSMap::STATE_STANDBY_REPLAY ||
@@ -2115,25 +2092,23 @@ void MDSMonitor::maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info
        << dendl;
      mon->clog->info() << "Standby " << info.human_name() << " is not "
                           "responding, dropping it";
-    fail_mds_gid(gid);
+    fail_mds_gid(fsmap, gid);
      *mds_propose = true;
    } else if (!info.laggy()) {
        dout(10) << " marking " << gid << " " << info.addr << " mds." << info.rank << "." << info.inc
          << " " << ceph_mds_state_name(info.state)
          << " laggy" << dendl;
-      pending.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info) {
+      fsmap.modify_daemon(info.global_id, [](MDSMap::mds_info_t *info) {
            info->laggy_since = ceph_clock_now();
        });
        *mds_propose = true;
    }
  }
  
-bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
+bool MDSMonitor::maybe_promote_standby(FSMap &fsmap, std::shared_ptr<Filesystem> &fs)
  {
    assert(!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN));
  
-  auto &pending = get_pending_fsmap_writeable();
-
    bool do_propose = false;
  
    // have a standby take over?
@@ -2143,17 +2118,17 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
      set<mds_rank_t>::iterator p = failed.begin();
      while (p != failed.end()) {
        mds_rank_t f = *p++;
-      mds_gid_t sgid = pending.find_replacement_for({fs->fscid, f}, {},
+      mds_gid_t sgid = fsmap.find_replacement_for({fs->fscid, f}, {},
            g_conf->mon_force_standby_active);
        if (sgid) {
-        const MDSMap::mds_info_t si = pending.get_info_gid(sgid);
+        const MDSMap::mds_info_t si = fsmap.get_info_gid(sgid);
          dout(0) << " taking over failed mds." << f << " with " << sgid
                  << "/" << si.name << " " << si.addr << dendl;
          mon->clog->info() << "Standby " << si.human_name()
                            << " assigned to filesystem " << fs->mds_map.fs_name
                            << " as rank " << f;
  
-        pending.promote(sgid, fs, f);
+        fsmap.promote(sgid, fs, f);
         do_propose = true;
        }
      }
@@ -2165,12 +2140,12 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
      // them while perhaps-modifying standby_daemons during the loop
      // (if we promote anyone they are removed from standby_daemons)
      std::vector<mds_gid_t> standby_gids;
-    for (const auto &j : pending.standby_daemons) {
+    for (const auto &j : fsmap.standby_daemons) {
        standby_gids.push_back(j.first);
      }
  
      for (const auto &gid : standby_gids) {
-      const auto &info = pending.standby_daemons.at(gid);
+      const auto &info = fsmap.standby_daemons.at(gid);
        assert(info.state == MDSMap::STATE_STANDBY);
  
        if (!info.standby_replay) {
@@ -2189,14 +2164,14 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
          // the standby_for_rank refers to: lookup via legacy_client_fscid
          mds_role_t target_role = {
            info.standby_for_fscid == FS_CLUSTER_ID_NONE ?
-            pending.legacy_client_fscid : info.standby_for_fscid,
+            fsmap.legacy_client_fscid : info.standby_for_fscid,
            info.standby_for_rank};
  
          // It is possible that the map contains a standby_for_fscid
          // that doesn't correspond to an existing filesystem, especially
          // if we loaded from a version with a bug (#17466)
          if (info.standby_for_fscid != FS_CLUSTER_ID_NONE
-            && !pending.filesystem_exists(info.standby_for_fscid)) {
+            && !fsmap.filesystem_exists(info.standby_for_fscid)) {
            derr << "gid " << gid << " has invalid standby_for_fscid "
                 << info.standby_for_fscid << dendl;
            continue;
@@ -2204,11 +2179,9 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
  
          // If we managed to resolve a full target role
          if (target_role.fscid != FS_CLUSTER_ID_NONE) {
-          const auto &fs = pending.get_filesystem(target_role.fscid);
+          const auto &fs = fsmap.get_filesystem(target_role.fscid);
            if (fs->mds_map.is_followable(target_role.rank)) {
-            do_propose |= try_standby_replay(
-                info,
-                *fs,
+            do_propose |= try_standby_replay(fsmap, info, *fs,
                  fs->mds_map.get_info(target_role.rank));
            }
          }
@@ -2217,7 +2190,7 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
        }
  
        // check everyone
-      for (const auto &p : pending.filesystems) {
+      for (const auto &p : fsmap.filesystems) {
         if (info.standby_for_fscid != FS_CLUSTER_ID_NONE &&
             info.standby_for_fscid != p.first)
           continue;
@@ -2233,7 +2206,7 @@ bool MDSMonitor::maybe_promote_standby(std::shared_ptr<Filesystem> &fs)
                continue;   // we're supposed to follow someone else
              }
  
-            if (try_standby_replay(info, *fs, cand_info)) {
+            if (try_standby_replay(fsmap, info, *fs, cand_info)) {
               assigned = true;
                break;
              }
@@ -2255,11 +2228,7 @@ void MDSMonitor::tick()
    // make sure mds's are still alive
    // ...if i am an active leader
  
-  if (!is_active()) return;
-
-  dout(10) << get_working_fsmap() << dendl;
-
-  if (!is_leader()) return;
+  if (!is_active() || !is_leader()) return;
  
    auto &pending = get_pending_fsmap_writeable();
  
@@ -2269,66 +2238,67 @@ void MDSMonitor::tick()
  
    // expand mds cluster (add new nodes to @in)?
    for (auto &p : pending.filesystems) {
-    do_propose |= maybe_expand_cluster(p.second);
+    do_propose |= maybe_expand_cluster(pending, p.second->fscid);
    }
  
-  const auto now = ceph_clock_now();
-  if (last_tick.is_zero()) {
+  mono_time now = mono_clock::now();
+  if (last_tick == decltype(last_tick)::min()) {
      last_tick = now;
    }
+  chrono::duration<double> since_last = now-last_tick;
  
-  if (now - last_tick > (g_conf->mds_beacon_grace - g_conf->mds_beacon_interval)) {
+  if (since_last.count() >
+      (g_conf->mds_beacon_grace - g_conf->mds_beacon_interval)) {
      // This case handles either local slowness (calls being delayed
      // for whatever reason) or cluster election slowness (a long gap
      // between calls while an election happened)
      dout(4) << __func__ << ": resetting beacon timeouts due to mon delay "
              "(slow election?) of " << now - last_tick << " seconds" << dendl;
-    for (auto &i : last_beacon) {
-      i.second.stamp = now;
+    for (auto &p : last_beacon) {
+      p.second.stamp = now;
      }
    }
  
    last_tick = now;
  
-  // check beacon timestamps
-  utime_t cutoff = now;
-  cutoff -= g_conf->mds_beacon_grace;
-
    // make sure last_beacon is fully populated
    for (auto &p : pending.mds_roles) {
      auto &gid = p.first;
-    if (last_beacon.count(gid) == 0) {
-      last_beacon[gid].stamp = now;
-      last_beacon[gid].seq = 0;
-    }
+    last_beacon.emplace(std::piecewise_construct,
+        std::forward_as_tuple(gid),
+        std::forward_as_tuple(mono_clock::now(), 0));
    }
  
+
+  // check beacon timestamps
    bool propose_osdmap = false;
    bool osdmap_writeable = mon->osdmon()->is_writeable();
-  auto p = last_beacon.begin();
-  while (p != last_beacon.end()) {
-    mds_gid_t gid = p->first;
-    auto beacon_info = p->second;
-    ++p;
+  for (auto it = last_beacon.begin(); it != last_beacon.end(); ) {
+    mds_gid_t gid = it->first;
+    auto beacon_info = it->second;
+    chrono::duration<double> since_last = now-beacon_info.stamp;
  
      if (!pending.gid_exists(gid)) {
        // clean it out
-      last_beacon.erase(gid);
+      it = last_beacon.erase(it);
        continue;
      }
  
-    if (beacon_info.stamp < cutoff) {
+
+    if (since_last.count() >= g_conf->mds_beacon_grace) {
        auto &info = pending.get_info_gid(gid);
        dout(1) << "no beacon from mds." << info.rank << "." << info.inc
                << " (gid: " << gid << " addr: " << info.addr
                << " state: " << ceph_mds_state_name(info.state) << ")"
-              << " since " << beacon_info.stamp << dendl;
+              << " since " << since_last.count() << "s" << dendl;
        // If the OSDMap is writeable, we can blacklist things, so we can
        // try failing any laggy MDS daemons.  Consider each one for failure.
        if (osdmap_writeable) {
-        maybe_replace_gid(gid, info, &do_propose, &propose_osdmap);
+        maybe_replace_gid(pending, gid, info, &do_propose, &propose_osdmap);
        }
      }
+
+    ++it;
    }
    if (propose_osdmap) {
      request_proposal(mon->osdmon());
@@ -2337,7 +2307,7 @@ void MDSMonitor::tick()
    for (auto &p : pending.filesystems) {
      auto &fs = p.second;
      if (!fs->mds_map.test_flag(CEPH_MDSMAP_DOWN)) {
-      do_propose |= maybe_promote_standby(fs);
+      do_propose |= maybe_promote_standby(pending, fs);
      }
    }
  
@@ -2352,6 +2322,7 @@ void MDSMonitor::tick()
   * ainfo: the would-be leader
   */
  bool MDSMonitor::try_standby_replay(
+    FSMap &fsmap,
      const MDSMap::mds_info_t& finfo,
      const Filesystem &leader_fs,
      const MDSMap::mds_info_t& ainfo)
@@ -2363,7 +2334,7 @@ bool MDSMonitor::try_standby_replay(
    } else {
      // Assign the new role to the standby
      dout(10) << "  setting to follow mds rank " << ainfo.rank << dendl;
-    get_pending_fsmap_writeable().assign_standby_replay(finfo.global_id, leader_fs.fscid, ainfo.rank);
+    fsmap.assign_standby_replay(finfo.global_id, leader_fs.fscid, ainfo.rank);
      return true;
    }
  }
@@ -2377,7 +2348,7 @@ MDSMonitor::MDSMonitor(Monitor *mn, Paxos *p, string service_name)
  void MDSMonitor::on_restart()
  {
    // Clear out the leader-specific state.
-  last_tick = utime_t();
+  last_tick = mono_clock::now();
    last_beacon.clear();
  }
  
diff --git a/ceph/src/mon/MDSMonitor.h b/ceph/src/mon/MDSMonitor.h

index 88e9decf266ca25b9672f517a680f3a23e249620..e83df726340653103b238de71120474f1b5f8ce6 100644 (file)
--- a/ceph/src/mon/MDSMonitor.h
+++ b/ceph/src/mon/MDSMonitor.h
@@ -64,7 +64,7 @@ class MDSMonitor : public PaxosService, public PaxosFSMap {
    /**
     * Return true if a blacklist was done (i.e. OSD propose needed)
     */
-  bool fail_mds_gid(mds_gid_t gid);
+  bool fail_mds_gid(FSMap &fsmap, mds_gid_t gid);
  
    bool is_leader() const override { return mon->is_leader(); }
  
@@ -85,25 +85,24 @@ class MDSMonitor : public PaxosService, public PaxosFSMap {
    void get_health(list<pair<health_status_t,string> >& summary,
                   list<pair<health_status_t,string> > *detail,
                   CephContext *cct) const override;
-  int fail_mds(std::ostream &ss, const std::string &arg,
+  int fail_mds(FSMap &fsmap, std::ostream &ss,
+      const std::string &arg,
        MDSMap::mds_info_t *failed_info);
  
    bool preprocess_command(MonOpRequestRef op);
    bool prepare_command(MonOpRequestRef op);
  
-  int parse_role(
-      const std::string &role_str,
-      mds_role_t *role,
-      std::ostream &ss);
-
    void modify_legacy_filesystem(
+      FSMap &fsmap,
        std::function<void(std::shared_ptr<Filesystem> )> fn);
    int legacy_filesystem_command(
+      FSMap &fsmap,
        MonOpRequestRef op,
        std::string const &prefix,
        map<string, cmd_vartype> &cmdmap,
        std::stringstream &ss);
    int filesystem_command(
+      FSMap &fsmap,
        MonOpRequestRef op,
        std::string const &prefix,
        map<string, cmd_vartype> &cmdmap,
@@ -111,47 +110,47 @@ class MDSMonitor : public PaxosService, public PaxosFSMap {
  
    // beacons
    struct beacon_info_t {
-    utime_t stamp;
-    uint64_t seq;
+    mono_time stamp = mono_clock::zero();
+    uint64_t seq = 0;
+    beacon_info_t() {}
+    beacon_info_t(mono_time stamp, uint64_t seq) : stamp(stamp), seq(seq) {}
    };
    map<mds_gid_t, beacon_info_t> last_beacon;
  
-  bool try_standby_replay(
-      const MDSMap::mds_info_t& finfo,
-      const Filesystem &leader_fs,
-      const MDSMap::mds_info_t& ainfo);
+  bool try_standby_replay(FSMap &fsmap, const MDSMap::mds_info_t& finfo,
+      const Filesystem &leader_fs, const MDSMap::mds_info_t& ainfo);
  
    std::list<std::shared_ptr<FileSystemCommandHandler> > handlers;
  
-  bool maybe_promote_standby(std::shared_ptr<Filesystem> &fs);
-  bool maybe_expand_cluster(std::shared_ptr<Filesystem> &fs);
-  void maybe_replace_gid(mds_gid_t gid, const MDSMap::mds_info_t& info,
-      bool *mds_propose, bool *osd_propose);
+  bool maybe_promote_standby(FSMap &fsmap, std::shared_ptr<Filesystem> &fs);
+  bool maybe_expand_cluster(FSMap &fsmap, fs_cluster_id_t fscid);
+  void maybe_replace_gid(FSMap &fsmap, mds_gid_t gid,
+      const MDSMap::mds_info_t& info, bool *mds_propose, bool *osd_propose);
    void tick() override;     // check state, take actions
  
-  int dump_metadata(const string& who, Formatter *f, ostream& err);
+  int dump_metadata(const FSMap &fsmap, const std::string &who, Formatter *f,
+      ostream& err);
  
    void update_metadata(mds_gid_t gid, const Metadata& metadata);
-  void remove_from_metadata(MonitorDBStore::TransactionRef t);
+  void remove_from_metadata(const FSMap &fsmap, MonitorDBStore::TransactionRef t);
    int load_metadata(map<mds_gid_t, Metadata>& m);
-  void count_metadata(const string& field, Formatter *f);
+  void count_metadata(const std::string& field, Formatter *f);
  public:
-  void count_metadata(const string& field, map<string,int> *out);
+  void count_metadata(const std::string& field, map<string,int> *out);
  protected:
  
    // MDS daemon GID to latest health state from that GID
    std::map<uint64_t, MDSHealth> pending_daemon_health;
    std::set<uint64_t> pending_daemon_health_rm;
  
-
    map<mds_gid_t, Metadata> pending_metadata;
  
-  mds_gid_t gid_from_arg(const std::string& arg, std::ostream& err);
+  mds_gid_t gid_from_arg(const FSMap &fsmap, const std::string &arg, std::ostream& err);
  
    // When did the mon last call into our tick() method?  Used for detecting
    // when the mon was not updating us for some period (e.g. during slow
    // election) to reset last_beacon timeouts
-  utime_t last_tick;
+  mono_time last_tick = mono_clock::zero();
  };
  
  #endif
diff --git a/ceph/src/mon/MonMap.h b/ceph/src/mon/MonMap.h

index 63cc2209021a2e701241786a2fc30fa6eb8e13ba..fece39c99dce8874cbab28d7615bbc966eae3463 100644 (file)
--- a/ceph/src/mon/MonMap.h
+++ b/ceph/src/mon/MonMap.h
@@ -118,7 +118,6 @@ public:
  
    MonMap()
      : epoch(0) {
-    memset(&fsid, 0, sizeof(fsid));
    }
  
    uuid_d& get_fsid() { return fsid; }
diff --git a/ceph/src/mon/Monitor.cc b/ceph/src/mon/Monitor.cc

index 6348f44c97b8cdc6fac8cb486f951492d74f9e6f..da1fac90edd28a610752e77de0f7d5dd9d10da0a 100644 (file)
--- a/ceph/src/mon/Monitor.cc
+++ b/ceph/src/mon/Monitor.cc
@@ -601,9 +601,9 @@ int Monitor::preinit()
      pcb.add_u64(l_cluster_num_osd_up, "num_osd_up", "OSDs that are up");
      pcb.add_u64(l_cluster_num_osd_in, "num_osd_in", "OSD in state \"in\" (they are in cluster)");
      pcb.add_u64(l_cluster_osd_epoch, "osd_epoch", "Current epoch of OSD map");
-    pcb.add_u64(l_cluster_osd_bytes, "osd_bytes", "Total capacity of cluster");
-    pcb.add_u64(l_cluster_osd_bytes_used, "osd_bytes_used", "Used space");
-    pcb.add_u64(l_cluster_osd_bytes_avail, "osd_bytes_avail", "Available space");
+    pcb.add_u64(l_cluster_osd_bytes, "osd_bytes", "Total capacity of cluster", NULL, 0, unit_t(BYTES));
+    pcb.add_u64(l_cluster_osd_bytes_used, "osd_bytes_used", "Used space", NULL, 0, unit_t(BYTES));
+    pcb.add_u64(l_cluster_osd_bytes_avail, "osd_bytes_avail", "Available space", NULL, 0, unit_t(BYTES));
      pcb.add_u64(l_cluster_num_pool, "num_pool", "Pools");
      pcb.add_u64(l_cluster_num_pg, "num_pg", "Placement groups");
      pcb.add_u64(l_cluster_num_pg_active_clean, "num_pg_active_clean", "Placement groups in active+clean state");
diff --git a/ceph/src/mon/OSDMonitor.cc b/ceph/src/mon/OSDMonitor.cc

index f8efabb0399b333b243cac66bdf1956d655ad309..53c3769654d57b682945dca71dbec4b514be8427 100644 (file)
--- a/ceph/src/mon/OSDMonitor.cc
+++ b/ceph/src/mon/OSDMonitor.cc
@@ -5267,13 +5267,13 @@ bool OSDMonitor::preprocess_command(MonOpRequestRef op)
        if (p->quota_max_objects == 0)
          rs << "N/A";
        else
-        rs << si_t(p->quota_max_objects) << " objects";
+        rs << si_u_t(p->quota_max_objects) << " objects";
        rs << "\n"
           << "  max bytes  : ";
        if (p->quota_max_bytes == 0)
          rs << "N/A";
        else
-        rs << si_t(p->quota_max_bytes) << "B";
+        rs << byte_u_t(p->quota_max_bytes);
        rdata.append(rs.str());
      }
      rdata.append("\n");
@@ -5669,7 +5669,7 @@ bool OSDMonitor::update_pools_status()
            (uint64_t)sum.num_bytes >= pool.quota_max_bytes) {
          mon->clog->warn() << "pool '" << pool_name << "' is full"
                           << " (reached quota's max_bytes: "
-                         << si_t(pool.quota_max_bytes) << ")";
+                         << byte_u_t(pool.quota_max_bytes) << ")";
        }
        if (pool.quota_max_objects > 0 &&
                  (uint64_t)sum.num_objects >= pool.quota_max_objects) {
@@ -5799,7 +5799,7 @@ int OSDMonitor::normalize_profile(const string& profilename,
    auto it = profile.find("stripe_unit");
    if (it != profile.end()) {
      string err_str;
-    uint32_t stripe_unit = strict_si_cast<uint32_t>(it->second.c_str(), &err_str);
+    uint32_t stripe_unit = strict_iecstrtoll(it->second.c_str(), &err_str);
      if (!err_str.empty()) {
        *ss << "could not parse stripe_unit '" << it->second
           << "': " << err_str << std::endl;
@@ -6079,7 +6079,7 @@ int OSDMonitor::prepare_pool_stripe_width(const unsigned pool_type,
        auto it = profile.find("stripe_unit");
        if (it != profile.end()) {
         string err_str;
-       stripe_unit = strict_si_cast<uint32_t>(it->second.c_str(), &err_str);
+       stripe_unit = strict_iecstrtoll(it->second.c_str(), &err_str);
         assert(err_str.empty());
        }
        *stripe_width = data_chunks *
@@ -11480,11 +11480,18 @@ bool OSDMonitor::prepare_command_impl(MonOpRequestRef op,
      // val could contain unit designations, so we treat as a string
      string val;
      cmd_getval(g_ceph_context, cmdmap, "val", val);
-    stringstream tss;
-    int64_t value = unit_to_bytesize(val, &tss);
-    if (value < 0) {
-      ss << "error parsing value '" << value << "': " << tss.str();
-      err = value;
+    string tss;
+    int64_t value;
+    if (field == "max_objects") {
+      value = strict_sistrtoll(val.c_str(), &tss);
+    } else if (field == "max_bytes") {
+      value = strict_iecstrtoll(val.c_str(), &tss);
+    } else {
+      assert(0 == "unrecognized option");
+    }
+    if (!tss.empty()) {
+      ss << "error parsing value '" << val << "': " << tss;
+      err = -EINVAL;
        goto reply;
      }
  
diff --git a/ceph/src/mon/PGMap.cc b/ceph/src/mon/PGMap.cc

index 909e3a8163085cc45ce3069c197b91ca6e570257..c5def7d1ce6b4a067ab6e319048271db313b5319 100644 (file)
--- a/ceph/src/mon/PGMap.cc
+++ b/ceph/src/mon/PGMap.cc
@@ -171,12 +171,12 @@ void PGMapDigest::print_summary(Formatter *f, ostream *out) const
    } else {
      *out << "    pools:   " << pg_pool_sum.size() << " pools, "
           << num_pg << " pgs\n";
-    *out << "    objects: " << si_t(pg_sum.stats.sum.num_objects) << " objects, "
-         << prettybyte_t(pg_sum.stats.sum.num_bytes) << "\n";
+    *out << "    objects: " << si_u_t(pg_sum.stats.sum.num_objects) << " objects, "
+         << byte_u_t(pg_sum.stats.sum.num_bytes) << "\n";
      *out << "    usage:   "
-         << kb_t(osd_sum.kb_used) << " used, "
-         << kb_t(osd_sum.kb_avail) << " / "
-         << kb_t(osd_sum.kb) << " avail\n";
+         << byte_u_t(osd_sum.kb_used << 10) << " used, "
+         << byte_u_t(osd_sum.kb_avail << 10) << " / "
+         << byte_u_t(osd_sum.kb << 10) << " avail\n";
      *out << "    pgs:     ";
    }
  
@@ -296,10 +296,10 @@ void PGMapDigest::print_oneline_summary(Formatter *f, ostream *out) const
    if (out)
      *out << num_pg << " pgs: "
           << states << "; "
-         << prettybyte_t(pg_sum.stats.sum.num_bytes) << " data, "
-         << kb_t(osd_sum.kb_used) << " used, "
-         << kb_t(osd_sum.kb_avail) << " / "
-         << kb_t(osd_sum.kb) << " avail";
+         << byte_u_t(pg_sum.stats.sum.num_bytes) << " data, "
+         << byte_u_t(osd_sum.kb_used << 10) << " used, "
+         << byte_u_t(osd_sum.kb_avail << 10) << " / "
+         << byte_u_t(osd_sum.kb << 10) << " avail";
    if (f) {
      f->dump_unsigned("num_pgs", num_pg);
      f->dump_unsigned("num_bytes", pg_sum.stats.sum.num_bytes);
@@ -320,20 +320,20 @@ void PGMapDigest::print_oneline_summary(Formatter *f, ostream *out) const
      if (pos_delta.stats.sum.num_rd) {
        int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta;
        if (out)
-       *out << pretty_si_t(rd) << "B/s rd, ";
+       *out << byte_u_t(rd) << "/s rd, ";
        if (f)
         f->dump_unsigned("read_bytes_sec", rd);
      }
      if (pos_delta.stats.sum.num_wr) {
        int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta;
        if (out)
-       *out << pretty_si_t(wr) << "B/s wr, ";
+       *out << byte_u_t(wr) << "/s wr, ";
        if (f)
         f->dump_unsigned("write_bytes_sec", wr);
      }
      int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta;
      if (out)
-      *out << pretty_si_t(iops) << "op/s";
+      *out << si_u_t(iops) << "op/s";
      if (f)
        f->dump_unsigned("io_sec", iops);
    }
@@ -425,10 +425,10 @@ void PGMapDigest::recovery_rate_summary(Formatter *f, ostream *out,
        f->dump_int("num_bytes_recovered", pos_delta.stats.sum.num_bytes_recovered);
        f->dump_int("num_keys_recovered", pos_delta.stats.sum.num_keys_recovered);
      } else {
-      *out << pretty_si_t(bps) << "B/s";
+      *out << byte_u_t(bps) << "/s";
        if (pos_delta.stats.sum.num_keys_recovered)
-       *out << ", " << pretty_si_t(kps) << "keys/s";
-      *out << ", " << pretty_si_t(objps) << "objects/s";
+       *out << ", " << si_u_t(kps) << "keys/s";
+      *out << ", " << si_u_t(objps) << "objects/s";
      }
    }
  }
@@ -478,7 +478,7 @@ void PGMapDigest::client_io_rate_summary(Formatter *f, ostream *out,
        if (f) {
         f->dump_int("read_bytes_sec", rd);
        } else {
-       *out << pretty_si_t(rd) << "B/s rd, ";
+       *out << byte_u_t(rd) << "/s rd, ";
        }
      }
      if (pos_delta.stats.sum.num_wr) {
@@ -486,7 +486,7 @@ void PGMapDigest::client_io_rate_summary(Formatter *f, ostream *out,
        if (f) {
         f->dump_int("write_bytes_sec", wr);
        } else {
-       *out << pretty_si_t(wr) << "B/s wr, ";
+       *out << byte_u_t(wr) << "/s wr, ";
        }
      }
      int64_t iops_rd = pos_delta.stats.sum.num_rd / (double)delta_stamp;
@@ -495,7 +495,7 @@ void PGMapDigest::client_io_rate_summary(Formatter *f, ostream *out,
        f->dump_int("read_op_per_sec", iops_rd);
        f->dump_int("write_op_per_sec", iops_wr);
      } else {
-      *out << pretty_si_t(iops_rd) << "op/s rd, " << pretty_si_t(iops_wr) << "op/s wr";
+      *out << si_u_t(iops_rd) << "op/s rd, " << si_u_t(iops_wr) << "op/s wr";
      }
    }
  }
@@ -530,7 +530,7 @@ void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
      if (f) {
        f->dump_int("flush_bytes_sec", flush);
      } else {
-      *out << pretty_si_t(flush) << "B/s flush";
+      *out << byte_u_t(flush) << "/s flush";
        have_output = true;
      }
    }
@@ -541,7 +541,7 @@ void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
      } else {
        if (have_output)
         *out << ", ";
-      *out << pretty_si_t(evict) << "B/s evict";
+      *out << byte_u_t(evict) << "/s evict";
        have_output = true;
      }
    }
@@ -552,7 +552,7 @@ void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
      } else {
        if (have_output)
         *out << ", ";
-      *out << pretty_si_t(promote) << "op/s promote";
+      *out << si_u_t(promote) << "op/s promote";
        have_output = true;
      }
    }
@@ -562,7 +562,7 @@ void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
      } else {
        if (have_output)
         *out << ", ";
-      *out << pretty_si_t(pos_delta.stats.sum.num_flush_mode_low) << "PG(s) flushing";
+      *out << si_u_t(pos_delta.stats.sum.num_flush_mode_low) << "PG(s) flushing";
        have_output = true;
      }
    }
@@ -572,7 +572,7 @@ void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
      } else {
        if (have_output)
         *out << ", ";
-      *out << pretty_si_t(pos_delta.stats.sum.num_flush_mode_high) << "PG(s) flushing (high)";
+      *out << si_u_t(pos_delta.stats.sum.num_flush_mode_high) << "PG(s) flushing (high)";
        have_output = true;
      }
    }
@@ -582,7 +582,7 @@ void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
      } else {
        if (have_output)
         *out << ", ";
-      *out << pretty_si_t(pos_delta.stats.sum.num_evict_mode_some) << "PG(s) evicting";
+      *out << si_u_t(pos_delta.stats.sum.num_evict_mode_some) << "PG(s) evicting";
        have_output = true;
      }
    }
@@ -592,7 +592,7 @@ void PGMapDigest::cache_io_rate_summary(Formatter *f, ostream *out,
      } else {
        if (have_output)
         *out << ", ";
-      *out << pretty_si_t(pos_delta.stats.sum.num_evict_mode_full) << "PG(s) evicting (full)";
+      *out << si_u_t(pos_delta.stats.sum.num_evict_mode_full) << "PG(s) evicting (full)";
      }
    }
  }
@@ -745,12 +745,12 @@ void PGMapDigest::dump_pool_stats_full(
          if (pool->quota_max_objects == 0)
            tbl << "N/A";
          else
-          tbl << si_t(pool->quota_max_objects);
+          tbl << si_u_t(pool->quota_max_objects);
  
          if (pool->quota_max_bytes == 0)
            tbl << "N/A";
          else
-          tbl << si_t(pool->quota_max_bytes);
+          tbl << byte_u_t(pool->quota_max_bytes);
        }
  
      }
@@ -794,16 +794,16 @@ void PGMapDigest::dump_fs_stats(stringstream *ss, Formatter *f, bool verbose) co
      if (verbose) {
        tbl.define_column("OBJECTS", TextTable::LEFT, TextTable::RIGHT);
      }
-    tbl << stringify(si_t(osd_sum.kb*1024))
-        << stringify(si_t(osd_sum.kb_avail*1024))
-        << stringify(si_t(osd_sum.kb_used*1024));
+    tbl << stringify(byte_u_t(osd_sum.kb*1024))
+        << stringify(byte_u_t(osd_sum.kb_avail*1024))
+        << stringify(byte_u_t(osd_sum.kb_used*1024));
      float used = 0.0;
      if (osd_sum.kb > 0) {
        used = ((float)osd_sum.kb_used / osd_sum.kb);
      }
      tbl << percentify(used*100);
      if (verbose) {
-      tbl << stringify(si_t(pg_sum.stats.sum.num_objects));
+      tbl << stringify(si_u_t(pg_sum.stats.sum.num_objects));
      }
      tbl << TextTable::endrow;
  
@@ -849,15 +849,15 @@ void PGMapDigest::dump_object_stat_sum(
        f->dump_int("raw_bytes_used", sum.num_bytes * raw_used_rate * curr_object_copies_rate);
      }
    } else {
-    tbl << stringify(si_t(sum.num_bytes));
+    tbl << stringify(byte_u_t(sum.num_bytes));
      tbl << percentify(used*100);
-    tbl << si_t(avail / raw_used_rate);
+    tbl << byte_u_t(avail / raw_used_rate);
      tbl << sum.num_objects;
      if (verbose) {
-      tbl << stringify(si_t(sum.num_objects_dirty))
-          << stringify(si_t(sum.num_rd))
-          << stringify(si_t(sum.num_wr))
-          << stringify(si_t(sum.num_bytes * raw_used_rate * curr_object_copies_rate));
+      tbl << stringify(si_u_t(sum.num_objects_dirty))
+          << stringify(byte_u_t(sum.num_rd))
+          << stringify(byte_u_t(sum.num_wr))
+          << stringify(byte_u_t(sum.num_bytes * raw_used_rate * curr_object_copies_rate));
      }
    }
  }
@@ -1960,9 +1960,9 @@ void PGMap::dump_osd_stats(ostream& ss) const
         p != osd_stat.end();
         ++p) {
      tab << p->first
-        << si_t(p->second.kb_used << 10)
-        << si_t(p->second.kb_avail << 10)
-        << si_t(p->second.kb << 10)
+        << byte_u_t(p->second.kb_used << 10)
+        << byte_u_t(p->second.kb_avail << 10)
+        << byte_u_t(p->second.kb << 10)
          << p->second.hb_peers
          << get_num_pg_by_osd(p->first)
          << get_num_primary_pg_by_osd(p->first)
@@ -1970,9 +1970,9 @@ void PGMap::dump_osd_stats(ostream& ss) const
    }
  
    tab << "sum"
-      << si_t(osd_sum.kb_used << 10)
-      << si_t(osd_sum.kb_avail << 10)
-      << si_t(osd_sum.kb << 10)
+      << byte_u_t(osd_sum.kb_used << 10)
+      << byte_u_t(osd_sum.kb_avail << 10)
+      << byte_u_t(osd_sum.kb << 10)
        << TextTable::endrow;
  
    ss << tab;
@@ -1988,9 +1988,9 @@ void PGMap::dump_osd_sum_stats(ostream& ss) const
    tab.define_column("TOTAL", TextTable::LEFT, TextTable::RIGHT);
  
    tab << "sum"
-      << si_t(osd_sum.kb_used << 10)
-      << si_t(osd_sum.kb_avail << 10)
-      << si_t(osd_sum.kb << 10)
+      << byte_u_t(osd_sum.kb_used << 10)
+      << byte_u_t(osd_sum.kb_avail << 10)
+      << byte_u_t(osd_sum.kb << 10)
        << TextTable::endrow;
  
    ss << tab;
@@ -2937,9 +2937,9 @@ void PGMap::get_health_checks(
           p.second.target_max_objects * (ratio / 1000000.0)) {
         ostringstream ss;
         ss << "cache pool '" << name << "' with "
-          << si_t(st.stats.sum.num_objects)
+          << si_u_t(st.stats.sum.num_objects)
            << " objects at/near target max "
-          << si_t(p.second.target_max_objects) << " objects";
+          << si_u_t(p.second.target_max_objects) << " objects";
         detail.push_back(ss.str());
         nearfull = true;
        }
@@ -2949,9 +2949,9 @@ void PGMap::get_health_checks(
           p.second.target_max_bytes * (ratio / 1000000.0)) {
         ostringstream ss;
         ss << "cache pool '" << name
-          << "' with " << si_t(st.stats.sum.num_bytes)
-          << "B at/near target max "
-          << si_t(p.second.target_max_bytes) << "B";
+          << "' with " << byte_u_t(st.stats.sum.num_bytes)
+          << " at/near target max "
+          << byte_u_t(p.second.target_max_bytes);
         detail.push_back(ss.str());
         nearfull = true;
        }
@@ -3093,15 +3093,15 @@ void PGMap::get_health_checks(
         } else if (crit_threshold > 0 &&
                    sum.num_bytes >= pool.quota_max_bytes*crit_threshold) {
           ss << "pool '" << pool_name
-            << "' has " << si_t(sum.num_bytes) << " bytes"
-            << " (max " << si_t(pool.quota_max_bytes) << ")";
+            << "' has " << byte_u_t(sum.num_bytes)
+            << " (max " << byte_u_t(pool.quota_max_bytes) << ")";
           full_detail.push_back(ss.str());
           full = true;
         } else if (warn_threshold > 0 &&
                    sum.num_bytes >= pool.quota_max_bytes*warn_threshold) {
           ss << "pool '" << pool_name
-            << "' has " << si_t(sum.num_bytes) << " bytes"
-            << " (max " << si_t(pool.quota_max_bytes) << ")";
+            << "' has " << byte_u_t(sum.num_bytes)
+            << " (max " << byte_u_t(pool.quota_max_bytes) << ")";
           nearfull_detail.push_back(ss.str());
           nearfull = true;
         }
@@ -3652,9 +3652,9 @@ void PGMap::get_health(
        if (detail) {
         ostringstream ss;
         ss << "cache pool '" << name << "' with "
-          << si_t(st.stats.sum.num_objects)
+          << si_u_t(st.stats.sum.num_objects)
            << " objects at/near target max "
-          << si_t(p->second.target_max_objects) << " objects";
+          << si_u_t(p->second.target_max_objects) << " objects";
         detail->push_back(make_pair(HEALTH_WARN, ss.str()));
        }
      }
@@ -3666,9 +3666,9 @@ void PGMap::get_health(
        if (detail) {
         ostringstream ss;
         ss << "cache pool '" << name
-          << "' with " << si_t(st.stats.sum.num_bytes)
+          << "' with " << byte_u_t(st.stats.sum.num_bytes)
            << "B at/near target max "
-          << si_t(p->second.target_max_bytes) << "B";
+          << byte_u_t(p->second.target_max_bytes) << "B";
         detail->push_back(make_pair(HEALTH_WARN, ss.str()));
        }
      }
@@ -3806,14 +3806,14 @@ void PGMap::get_health(
        } else if (crit_threshold > 0 &&
                  sum.num_bytes >= pool.quota_max_bytes*crit_threshold) {
          ss << "pool '" << pool_name
-           << "' has " << si_t(sum.num_bytes) << " bytes"
-           << " (max " << si_t(pool.quota_max_bytes) << ")";
+           << "' has " << byte_u_t(sum.num_bytes) << " bytes"
+           << " (max " << byte_u_t(pool.quota_max_bytes) << ")";
          status = HEALTH_ERR;
        } else if (warn_threshold > 0 &&
                  sum.num_bytes >= pool.quota_max_bytes*warn_threshold) {
          ss << "pool '" << pool_name
-           << "' has " << si_t(sum.num_bytes) << " bytes"
-           << " (max " << si_t(pool.quota_max_bytes) << ")";
+           << "' has " << byte_u_t(sum.num_bytes) << " bytes"
+           << " (max " << byte_u_t(pool.quota_max_bytes) << ")";
          status = HEALTH_WARN;
        }
        if (status != HEALTH_OK) {
diff --git a/ceph/src/mon/Paxos.cc b/ceph/src/mon/Paxos.cc

index e92438769f088a910be5e7c6097e8a19e31caad3..3ad444c76fb514e76d6e50bb3158e06bc023158f 100644 (file)
--- a/ceph/src/mon/Paxos.cc
+++ b/ceph/src/mon/Paxos.cc
@@ -98,17 +98,17 @@ void Paxos::init_logger()
    pcb.add_time_avg(l_paxos_refresh_latency, "refresh_latency", "Refresh latency");
    pcb.add_u64_counter(l_paxos_begin, "begin", "Started and handled begins");
    pcb.add_u64_avg(l_paxos_begin_keys, "begin_keys", "Keys in transaction on begin");
-  pcb.add_u64_avg(l_paxos_begin_bytes, "begin_bytes", "Data in transaction on begin");
+  pcb.add_u64_avg(l_paxos_begin_bytes, "begin_bytes", "Data in transaction on begin", NULL, 0, unit_t(BYTES));
    pcb.add_time_avg(l_paxos_begin_latency, "begin_latency", "Latency of begin operation");
    pcb.add_u64_counter(l_paxos_commit, "commit",
        "Commits", "cmt");
    pcb.add_u64_avg(l_paxos_commit_keys, "commit_keys", "Keys in transaction on commit");
-  pcb.add_u64_avg(l_paxos_commit_bytes, "commit_bytes", "Data in transaction on commit");
+  pcb.add_u64_avg(l_paxos_commit_bytes, "commit_bytes", "Data in transaction on commit", NULL, 0, unit_t(BYTES));
    pcb.add_time_avg(l_paxos_commit_latency, "commit_latency",
        "Commit latency", "clat");
    pcb.add_u64_counter(l_paxos_collect, "collect", "Peon collects");
    pcb.add_u64_avg(l_paxos_collect_keys, "collect_keys", "Keys in transaction on peon collect");
-  pcb.add_u64_avg(l_paxos_collect_bytes, "collect_bytes", "Data in transaction on peon collect");
+  pcb.add_u64_avg(l_paxos_collect_bytes, "collect_bytes", "Data in transaction on peon collect", NULL, 0, unit_t(BYTES));
    pcb.add_time_avg(l_paxos_collect_latency, "collect_latency", "Peon collect latency");
    pcb.add_u64_counter(l_paxos_collect_uncommitted, "collect_uncommitted", "Uncommitted values in started and handled collects");
    pcb.add_u64_counter(l_paxos_collect_timeout, "collect_timeout", "Collect timeouts");
@@ -117,11 +117,11 @@ void Paxos::init_logger()
    pcb.add_u64_counter(l_paxos_lease_timeout, "lease_timeout", "Lease timeouts");
    pcb.add_u64_counter(l_paxos_store_state, "store_state", "Store a shared state on disk");
    pcb.add_u64_avg(l_paxos_store_state_keys, "store_state_keys", "Keys in transaction in stored state");
-  pcb.add_u64_avg(l_paxos_store_state_bytes, "store_state_bytes", "Data in transaction in stored state");
+  pcb.add_u64_avg(l_paxos_store_state_bytes, "store_state_bytes", "Data in transaction in stored state", NULL, 0, unit_t(BYTES));
    pcb.add_time_avg(l_paxos_store_state_latency, "store_state_latency", "Storing state latency");
    pcb.add_u64_counter(l_paxos_share_state, "share_state", "Sharings of state");
    pcb.add_u64_avg(l_paxos_share_state_keys, "share_state_keys", "Keys in shared state");
-  pcb.add_u64_avg(l_paxos_share_state_bytes, "share_state_bytes", "Data in shared state");
+  pcb.add_u64_avg(l_paxos_share_state_bytes, "share_state_bytes", "Data in shared state", NULL, 0, unit_t(BYTES));
    pcb.add_u64_counter(l_paxos_new_pn, "new_pn", "New proposal number queries");
    pcb.add_time_avg(l_paxos_new_pn_latency, "new_pn_latency", "New proposal number getting latency");
    logger = pcb.create_perf_counters();
diff --git a/ceph/src/mon/PaxosFSMap.h b/ceph/src/mon/PaxosFSMap.h

index 8d7c8c1f1cc7dc0edf888a57d64b572d4adca103..e1ee081424de6164eb4871537c6660dd40f18c1d 100644 (file)
--- a/ceph/src/mon/PaxosFSMap.h
+++ b/ceph/src/mon/PaxosFSMap.h
@@ -32,15 +32,6 @@ public:
  protected:
    FSMap &get_pending_fsmap_writeable() { assert(is_leader()); return pending_fsmap; }
  
-  /* get_working_fsmap returns the "relevant" version of the fsmap (see MDSMonitor.cc history)
-   * used depending in helper methods of MDSMonitor.cc.
-   *
-   * This is technically evil and will be removed in the future.
-   *
-   * See discussion: https://github.com/ceph/ceph/pull/21458#discussion_r182081366
-   */
-  const FSMap &get_working_fsmap() const { return is_leader() ? pending_fsmap : fsmap; }
-
    FSMap &create_pending() {
      assert(is_leader());
      pending_fsmap = fsmap;
diff --git a/ceph/src/mon/Session.h b/ceph/src/mon/Session.h

index 6dddee0716b05a9fb367bd24fade7581405767bd..9b54f962264f94a69f8f3d8012c3c1a4b8bf2f3e 100644 (file)
--- a/ceph/src/mon/Session.h
+++ b/ceph/src/mon/Session.h
@@ -225,9 +225,10 @@ struct MonSessionMap {
  
  inline ostream& operator<<(ostream& out, const MonSession& s)
  {
-  out << "MonSession(" << s.inst << " is "
-      << (s.closed ? "closed" : "open");
-  out << " " << s.caps << ")";
+  out << "MonSession(" << s.inst << " is " << (s.closed ? "closed" : "open")
+      << " " << s.caps << ", features 0x" << std::hex << s.con_features << std::dec
+      <<  " (" << ceph_release_name(ceph_release_from_features(s.con_features))
+      << "))";
    return out;
  }
  
diff --git a/ceph/src/msg/async/Stack.h b/ceph/src/msg/async/Stack.h

index a201406016ef575ed050b230171bb59d3a25f3e1..104cd3cf3b96524b6638854cadc35b887ad74120 100644 (file)
--- a/ceph/src/msg/async/Stack.h
+++ b/ceph/src/msg/async/Stack.h
@@ -229,8 +229,8 @@ class Worker {
  
      plb.add_u64_counter(l_msgr_recv_messages, "msgr_recv_messages", "Network received messages");
      plb.add_u64_counter(l_msgr_send_messages, "msgr_send_messages", "Network sent messages");
-    plb.add_u64_counter(l_msgr_recv_bytes, "msgr_recv_bytes", "Network received bytes");
-    plb.add_u64_counter(l_msgr_send_bytes, "msgr_send_bytes", "Network sent bytes");
+    plb.add_u64_counter(l_msgr_recv_bytes, "msgr_recv_bytes", "Network received bytes", NULL, 0, unit_t(BYTES));
+    plb.add_u64_counter(l_msgr_send_bytes, "msgr_send_bytes", "Network sent bytes", NULL, 0, unit_t(BYTES));
      plb.add_u64_counter(l_msgr_active_connections, "msgr_active_connections", "Active connection number");
      plb.add_u64_counter(l_msgr_created_connections, "msgr_created_connections", "Created connection number");
  
diff --git a/ceph/src/msg/async/dpdk/DPDK.cc b/ceph/src/msg/async/dpdk/DPDK.cc

index 8da12c3fd35eedb3bf938677a14a334fd2abeb1e..ad053c30ab15cfe2d9e42ecdd35520e11856132e 100644 (file)
--- a/ceph/src/msg/async/dpdk/DPDK.cc
+++ b/ceph/src/msg/async/dpdk/DPDK.cc
@@ -633,16 +633,16 @@ DPDKQueuePair::DPDKQueuePair(CephContext *c, EventCenter *cen, DPDKDevice* dev,
    plb.add_u64_counter(l_dpdk_qp_tx_packets, "dpdk_send_packets", "DPDK sendd packets");
    plb.add_u64_counter(l_dpdk_qp_rx_bad_checksum_errors, "dpdk_receive_bad_checksum_errors", "DPDK received bad checksum packets");
    plb.add_u64_counter(l_dpdk_qp_rx_no_memory_errors, "dpdk_receive_no_memory_errors", "DPDK received no memory packets");
-  plb.add_u64_counter(l_dpdk_qp_rx_bytes, "dpdk_receive_bytes", "DPDK received bytes");
-  plb.add_u64_counter(l_dpdk_qp_tx_bytes, "dpdk_send_bytes", "DPDK sendd bytes");
+  plb.add_u64_counter(l_dpdk_qp_rx_bytes, "dpdk_receive_bytes", "DPDK received bytes", NULL, 0, unit_t(BYTES));
+  plb.add_u64_counter(l_dpdk_qp_tx_bytes, "dpdk_send_bytes", "DPDK sendd bytes", NULL, 0, unit_t(BYTES));
    plb.add_u64_counter(l_dpdk_qp_rx_last_bunch, "dpdk_receive_last_bunch", "DPDK last received bunch");
    plb.add_u64_counter(l_dpdk_qp_tx_last_bunch, "dpdk_send_last_bunch", "DPDK last send bunch");
    plb.add_u64_counter(l_dpdk_qp_rx_fragments, "dpdk_receive_fragments", "DPDK received total fragments");
    plb.add_u64_counter(l_dpdk_qp_tx_fragments, "dpdk_send_fragments", "DPDK sendd total fragments");
    plb.add_u64_counter(l_dpdk_qp_rx_copy_ops, "dpdk_receive_copy_ops", "DPDK received copy operations");
    plb.add_u64_counter(l_dpdk_qp_tx_copy_ops, "dpdk_send_copy_ops", "DPDK sendd copy operations");
-  plb.add_u64_counter(l_dpdk_qp_rx_copy_bytes, "dpdk_receive_copy_bytes", "DPDK received copy bytes");
-  plb.add_u64_counter(l_dpdk_qp_tx_copy_bytes, "dpdk_send_copy_bytes", "DPDK send copy bytes");
+  plb.add_u64_counter(l_dpdk_qp_rx_copy_bytes, "dpdk_receive_copy_bytes", "DPDK received copy bytes", NULL, 0, unit_t(BYTES));
+  plb.add_u64_counter(l_dpdk_qp_tx_copy_bytes, "dpdk_send_copy_bytes", "DPDK send copy bytes", NULL, 0, unit_t(BYTES));
    plb.add_u64_counter(l_dpdk_qp_rx_linearize_ops, "dpdk_receive_linearize_ops", "DPDK received linearize operations");
    plb.add_u64_counter(l_dpdk_qp_tx_linearize_ops, "dpdk_send_linearize_ops", "DPDK send linearize operations");
    plb.add_u64_counter(l_dpdk_qp_tx_queue_length, "dpdk_send_queue_length", "DPDK send queue length");
diff --git a/ceph/src/msg/async/rdma/RDMAStack.cc b/ceph/src/msg/async/rdma/RDMAStack.cc

index e16052f1dec22129eb3955eafd00bd30138c0310..d94185057797ca0e2921a51077169d7ad45520fa 100644 (file)
--- a/ceph/src/msg/async/rdma/RDMAStack.cc
+++ b/ceph/src/msg/async/rdma/RDMAStack.cc
@@ -414,9 +414,9 @@ RDMAWorker::RDMAWorker(CephContext *c, unsigned i)
    plb.add_u64_counter(l_msgr_rdma_rx_no_registered_mem, "rx_no_registered_mem", "The count of no registered buffer when receiving");
  
    plb.add_u64_counter(l_msgr_rdma_tx_chunks, "tx_chunks", "The number of tx chunks transmitted");
-  plb.add_u64_counter(l_msgr_rdma_tx_bytes, "tx_bytes", "The bytes of tx chunks transmitted");
+  plb.add_u64_counter(l_msgr_rdma_tx_bytes, "tx_bytes", "The bytes of tx chunks transmitted", NULL, 0, unit_t(BYTES));
    plb.add_u64_counter(l_msgr_rdma_rx_chunks, "rx_chunks", "The number of rx chunks transmitted");
-  plb.add_u64_counter(l_msgr_rdma_rx_bytes, "rx_bytes", "The bytes of rx chunks transmitted");
+  plb.add_u64_counter(l_msgr_rdma_rx_bytes, "rx_bytes", "The bytes of rx chunks transmitted", NULL, 0, unit_t(BYTES));
    plb.add_u64_counter(l_msgr_rdma_pending_sent_conns, "pending_sent_conns", "The count of pending sent conns");
  
    perf_logger = plb.create_perf_counters();
diff --git a/ceph/src/os/bluestore/BlueFS.cc b/ceph/src/os/bluestore/BlueFS.cc

index 8992761689bc8aedebeb452f2247beb52fdfafdb..cf9e1d60392a3af0578a552462654f41f2ce5cb1 100644 (file)
--- a/ceph/src/os/bluestore/BlueFS.cc
+++ b/ceph/src/os/bluestore/BlueFS.cc
@@ -56,36 +56,36 @@ void BlueFS::_init_logger()
    PerfCountersBuilder b(cct, "bluefs",
                          l_bluefs_first, l_bluefs_last);
    b.add_u64_counter(l_bluefs_gift_bytes, "gift_bytes",
-                   "Bytes gifted from BlueStore");
+                   "Bytes gifted from BlueStore", NULL, 0, unit_t(BYTES));
    b.add_u64_counter(l_bluefs_reclaim_bytes, "reclaim_bytes",
-                   "Bytes reclaimed by BlueStore");
+                   "Bytes reclaimed by BlueStore", NULL, 0, unit_t(BYTES));
    b.add_u64(l_bluefs_db_total_bytes, "db_total_bytes",
             "Total bytes (main db device)",
-           "b", PerfCountersBuilder::PRIO_USEFUL);
+           "b", PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    b.add_u64(l_bluefs_db_used_bytes, "db_used_bytes",
             "Used bytes (main db device)",
-           "u", PerfCountersBuilder::PRIO_USEFUL);
+           "u", PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    b.add_u64(l_bluefs_wal_total_bytes, "wal_total_bytes",
             "Total bytes (wal device)",
-           "walb", PerfCountersBuilder::PRIO_USEFUL);
+           "walb", PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    b.add_u64(l_bluefs_wal_used_bytes, "wal_used_bytes",
             "Used bytes (wal device)",
-           "walu", PerfCountersBuilder::PRIO_USEFUL);
+           "walu", PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    b.add_u64(l_bluefs_slow_total_bytes, "slow_total_bytes",
             "Total bytes (slow device)",
-           "slob", PerfCountersBuilder::PRIO_USEFUL);
+           "slob", PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    b.add_u64(l_bluefs_slow_used_bytes, "slow_used_bytes",
             "Used bytes (slow device)",
-           "slou", PerfCountersBuilder::PRIO_USEFUL);
+           "slou", PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    b.add_u64(l_bluefs_num_files, "num_files", "File count",
             "f", PerfCountersBuilder::PRIO_USEFUL);
    b.add_u64(l_bluefs_log_bytes, "log_bytes", "Size of the metadata log",
-           "jlen", PerfCountersBuilder::PRIO_INTERESTING);
+           "jlen", PerfCountersBuilder::PRIO_INTERESTING, unit_t(BYTES));
    b.add_u64_counter(l_bluefs_log_compactions, "log_compactions",
                     "Compactions of the metadata log");
    b.add_u64_counter(l_bluefs_logged_bytes, "logged_bytes",
                     "Bytes written to the metadata log", "j",
-                   PerfCountersBuilder::PRIO_CRITICAL);
+                   PerfCountersBuilder::PRIO_CRITICAL, unit_t(BYTES));
    b.add_u64_counter(l_bluefs_files_written_wal, "files_written_wal",
                     "Files written to WAL");
    b.add_u64_counter(l_bluefs_files_written_sst, "files_written_sst",
@@ -95,7 +95,7 @@ void BlueFS::_init_logger()
                     PerfCountersBuilder::PRIO_CRITICAL);
    b.add_u64_counter(l_bluefs_bytes_written_sst, "bytes_written_sst",
                     "Bytes written to SSTs", "sst",
-                   PerfCountersBuilder::PRIO_CRITICAL);
+                   PerfCountersBuilder::PRIO_CRITICAL, unit_t(BYTES));
    logger = b.create_perf_counters();
    cct->get_perfcounters_collection()->add(logger);
  }
@@ -141,7 +141,7 @@ int BlueFS::add_block_device(unsigned id, const string& path)
      return r;
    }
    dout(1) << __func__ << " bdev " << id << " path " << path
-         << " size " << pretty_si_t(b->get_size()) << "B" << dendl;
+         << " size " << byte_u_t(b->get_size()) << dendl;
    bdev[id] = b;
    ioc[id] = new IOContext(cct, NULL);
    return 0;
@@ -281,9 +281,9 @@ void BlueFS::get_usage(vector<pair<uint64_t,uint64_t>> *usage)
        (block_total[id] - (*usage)[id].first) * 100 / block_total[id];
      dout(10) << __func__ << " bdev " << id
              << " free " << (*usage)[id].first
-            << " (" << pretty_si_t((*usage)[id].first) << "B)"
+            << " (" << byte_u_t((*usage)[id].first) << ")"
              << " / " << (*usage)[id].second
-            << " (" << pretty_si_t((*usage)[id].second) << "B)"
+            << " (" << byte_u_t((*usage)[id].second) << ")"
              << ", used " << used << "%"
              << dendl;
    }
diff --git a/ceph/src/os/bluestore/BlueStore.cc b/ceph/src/os/bluestore/BlueStore.cc

index 6fdd11b170b2403af505c2cf68717d686314893f..47112cc70abad4933c8a5648a411416d808d09d1 100644 (file)
--- a/ceph/src/os/bluestore/BlueStore.cc
+++ b/ceph/src/os/bluestore/BlueStore.cc
@@ -802,15 +802,15 @@ void BlueStore::Cache::trim(
    target_buffer = min(target_bytes - target_meta, target_buffer);
  
    if (current <= target_bytes) {
-    dout(10) << __func__
-            << " shard target " << pretty_si_t(target_bytes)
+    dout(30) << __func__
+            << " shard target " << byte_u_t(target_bytes)
              << " meta/data ratios " << target_meta_ratio
              << " + " << target_data_ratio << " ("
-            << pretty_si_t(target_meta) << " + "
-            << pretty_si_t(target_buffer) << "), "
-            << " current " << pretty_si_t(current) << " ("
-            << pretty_si_t(current_meta) << " + "
-            << pretty_si_t(current_buffer) << ")"
+            << byte_u_t(target_meta) << " + "
+            << byte_u_t(target_buffer) << "), "
+            << " current " << byte_u_t(current) << " ("
+            << byte_u_t(current_meta) << " + "
+            << byte_u_t(current_buffer) << ")"
              << dendl;
      return;
    }
@@ -831,17 +831,17 @@ void BlueStore::Cache::trim(
    uint64_t max_meta = current_meta - free_meta;
    uint64_t max_onodes = max_meta / bytes_per_onode;
  
-  dout(10) << __func__
-          << " shard target " << pretty_si_t(target_bytes)
+  dout(20) << __func__
+          << " shard target " << byte_u_t(target_bytes)
            << " ratio " << target_meta_ratio << " ("
-          << pretty_si_t(target_meta) << " + "
-          << pretty_si_t(target_buffer) << "), "
-          << " current " << pretty_si_t(current) << " ("
-          << pretty_si_t(current_meta) << " + "
-          << pretty_si_t(current_buffer) << "),"
-          << " need_to_free " << pretty_si_t(need_to_free) << " ("
-          << pretty_si_t(free_meta) << " + "
-          << pretty_si_t(free_buffer) << ")"
+          << byte_u_t(target_meta) << " + "
+          << byte_u_t(target_buffer) << "), "
+          << " current " << byte_u_t(current) << " ("
+          << byte_u_t(current_meta) << " + "
+          << byte_u_t(current_buffer) << "),"
+          << " need_to_free " << byte_u_t(need_to_free) << " ("
+          << byte_u_t(free_meta) << " + "
+          << byte_u_t(free_buffer) << ")"
            << " -> max " << max_onodes << " onodes + "
            << max_buffer << " buffer"
            << dendl;
@@ -1137,7 +1137,7 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max)
      }
  
      if (evicted > 0) {
-      dout(20) << __func__ << " evicted " << prettybyte_t(evicted)
+      dout(20) << __func__ << " evicted " << byte_u_t(evicted)
                 << " from warm_in list, done evicting warm_in buffers"
                 << dendl;
      }
@@ -1163,7 +1163,7 @@ void BlueStore::TwoQCache::_trim(uint64_t onode_max, uint64_t buffer_max)
      }
  
      if (evicted > 0) {
-      dout(20) << __func__ << " evicted " << prettybyte_t(evicted)
+      dout(20) << __func__ << " evicted " << byte_u_t(evicted)
                 << " from hot list, done evicting hot buffers"
                 << dendl;
      }
@@ -1650,11 +1650,6 @@ BlueStore::SharedBlob::SharedBlob(uint64_t i, Collection *_coll)
  
  BlueStore::SharedBlob::~SharedBlob()
  {
-  if (get_cache()) {   // the dummy instances have a nullptr
-    std::lock_guard<std::recursive_mutex> l(get_cache()->lock);
-    bc._clear(get_cache());
-    get_cache()->rm_blob();
-  }
    if (loaded && persistent) {
      delete persistent; 
    }
@@ -1666,8 +1661,17 @@ void BlueStore::SharedBlob::put()
      ldout(coll->store->cct, 20) << __func__ << " " << this
                              << " removing self from set " << get_parent()
                              << dendl;
-    if (get_parent()) {
-      get_parent()->remove(this);
+  again:
+    auto coll_snap = coll;
+    if (coll_snap) {
+      std::lock_guard<std::recursive_mutex> l(coll_snap->cache->lock);
+      if (coll_snap != coll) {
+       goto again;
+      }
+      coll_snap->shared_blob_set.remove(this);
+
+      bc._clear(coll_snap->cache);
+      coll_snap->cache->rm_blob();
      }
      delete this;
    }
@@ -3729,6 +3733,23 @@ void BlueStore::_set_blob_size()
             << std::dec << dendl;
  }
  
+void BlueStore::_set_finisher_num()
+{
+  if (cct->_conf->bluestore_shard_finishers) {
+    if (cct->_conf->osd_op_num_shards) {
+      m_finisher_num = cct->_conf->osd_op_num_shards;
+    } else {
+      assert(bdev);
+      if (bdev->is_rotational()) {
+       m_finisher_num = cct->_conf->osd_op_num_shards_hdd;
+      } else {
+       m_finisher_num = cct->_conf->osd_op_num_shards_ssd;
+      }
+    }
+  }
+  assert(m_finisher_num != 0);
+}
+
  int BlueStore::_set_cache_sizes()
  {
    assert(bdev);
@@ -3884,11 +3905,11 @@ void BlueStore::_init_logger()
    b.add_u64_counter(l_bluestore_compress_rejected_count, "compress_rejected_count",
      "Sum for compress ops rejected due to low net gain of space");
    b.add_u64_counter(l_bluestore_write_pad_bytes, "write_pad_bytes",
-    "Sum for write-op padded bytes");
+                   "Sum for write-op padded bytes", NULL, 0, unit_t(BYTES));
    b.add_u64_counter(l_bluestore_deferred_write_ops, "deferred_write_ops",
                     "Sum for deferred write op");
    b.add_u64_counter(l_bluestore_deferred_write_bytes, "deferred_write_bytes",
-                   "Sum for deferred write bytes", "def");
+                   "Sum for deferred write bytes", "def", 0, unit_t(BYTES));
    b.add_u64_counter(l_bluestore_write_penalty_read_ops, "write_penalty_read_ops",
                     "Sum for write penalty read ops");
    b.add_u64(l_bluestore_allocated, "bluestore_allocated",
@@ -3920,22 +3941,22 @@ void BlueStore::_init_logger()
    b.add_u64(l_bluestore_buffers, "bluestore_buffers",
             "Number of buffers in cache");
    b.add_u64(l_bluestore_buffer_bytes, "bluestore_buffer_bytes",
-           "Number of buffer bytes in cache");
+           "Number of buffer bytes in cache", NULL, 0, unit_t(BYTES));
    b.add_u64(l_bluestore_buffer_hit_bytes, "bluestore_buffer_hit_bytes",
-    "Sum for bytes of read hit in the cache");
+           "Sum for bytes of read hit in the cache", NULL, 0, unit_t(BYTES));
    b.add_u64(l_bluestore_buffer_miss_bytes, "bluestore_buffer_miss_bytes",
-    "Sum for bytes of read missed in the cache");
+           "Sum for bytes of read missed in the cache", NULL, 0, unit_t(BYTES));
  
    b.add_u64_counter(l_bluestore_write_big, "bluestore_write_big",
                     "Large aligned writes into fresh blobs");
    b.add_u64_counter(l_bluestore_write_big_bytes, "bluestore_write_big_bytes",
-                   "Large aligned writes into fresh blobs (bytes)");
+                   "Large aligned writes into fresh blobs (bytes)", NULL, 0, unit_t(BYTES));
    b.add_u64_counter(l_bluestore_write_big_blobs, "bluestore_write_big_blobs",
                     "Large aligned writes into fresh blobs (blobs)");
    b.add_u64_counter(l_bluestore_write_small, "bluestore_write_small",
                     "Small writes into existing or sparse small blobs");
    b.add_u64_counter(l_bluestore_write_small_bytes, "bluestore_write_small_bytes",
-                   "Small writes into existing or sparse small blobs (bytes)");
+                   "Small writes into existing or sparse small blobs (bytes)", NULL, 0, unit_t(BYTES));
    b.add_u64_counter(l_bluestore_write_small_unused,
                     "bluestore_write_small_unused",
                     "Small writes into unused portion of existing blob");
@@ -4335,7 +4356,7 @@ int BlueStore::_open_alloc()
      bytes += length;
    }
    fm->enumerate_reset();
-  dout(1) << __func__ << " loaded " << pretty_si_t(bytes)
+  dout(1) << __func__ << " loaded " << byte_u_t(bytes)
           << " in " << num << " extents"
           << dendl;
  
@@ -4872,9 +4893,9 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
    float bluefs_ratio = (float)bluefs_free / (float)total_free;
  
    dout(10) << __func__
-          << " bluefs " << pretty_si_t(bluefs_free)
+          << " bluefs " << byte_u_t(bluefs_free)
            << " free (" << bluefs_free_ratio
-          << ") bluestore " << pretty_si_t(my_free)
+          << ") bluestore " << byte_u_t(my_free)
            << " free (" << my_free_ratio
            << "), bluefs_ratio " << bluefs_ratio
            << dendl;
@@ -4885,14 +4906,14 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
      gift = cct->_conf->bluestore_bluefs_gift_ratio * total_free;
      dout(10) << __func__ << " bluefs_ratio " << bluefs_ratio
              << " < min_ratio " << cct->_conf->bluestore_bluefs_min_ratio
-            << ", should gift " << pretty_si_t(gift) << dendl;
+            << ", should gift " << byte_u_t(gift) << dendl;
    } else if (bluefs_ratio > cct->_conf->bluestore_bluefs_max_ratio) {
      reclaim = cct->_conf->bluestore_bluefs_reclaim_ratio * total_free;
      if (bluefs_total - reclaim < cct->_conf->bluestore_bluefs_min)
        reclaim = bluefs_total - cct->_conf->bluestore_bluefs_min;
      dout(10) << __func__ << " bluefs_ratio " << bluefs_ratio
              << " > max_ratio " << cct->_conf->bluestore_bluefs_max_ratio
-            << ", should reclaim " << pretty_si_t(reclaim) << dendl;
+            << ", should reclaim " << byte_u_t(reclaim) << dendl;
    }
  
    // don't take over too much of the freespace
@@ -4902,7 +4923,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
      uint64_t g = cct->_conf->bluestore_bluefs_min - bluefs_total;
      dout(10) << __func__ << " bluefs_total " << bluefs_total
              << " < min " << cct->_conf->bluestore_bluefs_min
-            << ", should gift " << pretty_si_t(g) << dendl;
+            << ", should gift " << byte_u_t(g) << dendl;
      if (g > gift)
        gift = g;
      reclaim = 0;
@@ -4913,7 +4934,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
      uint64_t g = min_free - bluefs_free;
      dout(10) << __func__ << " bluefs_free " << bluefs_total
              << " < min " << min_free
-            << ", should gift " << pretty_si_t(g) << dendl;
+            << ", should gift " << byte_u_t(g) << dendl;
      if (g > gift)
        gift = g;
      reclaim = 0;
@@ -4926,7 +4947,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
      // hard cap to fit into 32 bits
      gift = MIN(gift, 1ull<<31);
      dout(10) << __func__ << " gifting " << gift
-            << " (" << pretty_si_t(gift) << ")" << dendl;
+            << " (" << byte_u_t(gift) << ")" << dendl;
  
      // fixme: just do one allocation to start...
      int r = alloc->reserve(gift);
@@ -4968,7 +4989,7 @@ int BlueStore::_balance_bluefs_freespace(PExtentVector *extents)
      // hard cap to fit into 32 bits
      reclaim = MIN(reclaim, 1ull<<31);
      dout(10) << __func__ << " reclaiming " << reclaim
-            << " (" << pretty_si_t(reclaim) << ")" << dendl;
+            << " (" << byte_u_t(reclaim) << ")" << dendl;
  
      while (reclaim > 0) {
        // NOTE: this will block and do IO.
@@ -5120,7 +5141,7 @@ int BlueStore::_setup_block_symlink_or_file(
           }
         }
         dout(1) << __func__ << " resized " << name << " file to "
-               << pretty_si_t(size) << "B" << dendl;
+               << byte_u_t(size) << dendl;
        }
        VOID_TEMP_FAILURE_RETRY(::close(fd));
      } else {
@@ -7720,6 +7741,8 @@ int BlueStore::_open_super_meta()
    _set_compression();
    _set_blob_size();
  
+  _set_finisher_num();
+
    return 0;
  }
  
@@ -7911,8 +7934,6 @@ void BlueStore::_txc_state_proc(TransContext *txc)
        }
        return;
      case TransContext::STATE_KV_SUBMITTED:
-      txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat);
-      txc->state = TransContext::STATE_KV_DONE;
        _txc_committed_kv(txc);
        // ** fall-thru **
  
@@ -8163,9 +8184,14 @@ void BlueStore::_txc_committed_kv(TransContext *txc)
      txc->onreadable = NULL;
    }
  
-  if (!txc->oncommits.empty()) {
-    finishers[n]->queue(txc->oncommits);
+  {
+    std::lock_guard<std::mutex> l(txc->osr->qlock);
+    txc->state = TransContext::STATE_KV_DONE;
+    if (!txc->oncommits.empty()) {
+      finishers[n]->queue(txc->oncommits);
+    }
    }
+  txc->log_state_latency(logger, l_bluestore_state_kv_committing_lat);
  }
  
  void BlueStore::_txc_finish(TransContext *txc)
@@ -8348,21 +8374,6 @@ void BlueStore::_kv_start()
  {
    dout(10) << __func__ << dendl;
  
-  if (cct->_conf->bluestore_shard_finishers) {
-    if (cct->_conf->osd_op_num_shards) {
-      m_finisher_num = cct->_conf->osd_op_num_shards;
-    } else {
-      assert(bdev);
-      if (bdev->is_rotational()) {
-        m_finisher_num = cct->_conf->osd_op_num_shards_hdd;
-      } else {
-        m_finisher_num = cct->_conf->osd_op_num_shards_ssd;
-      }
-    }
-  }
-
-  assert(m_finisher_num != 0);
-
    for (int i = 0; i < m_finisher_num; ++i) {
      ostringstream oss;
      oss << "finisher-" << i;
@@ -10413,6 +10424,7 @@ int BlueStore::_do_gc(
  {
    auto& extents_to_collect = gc.get_extents_to_collect();
  
+  bool dirty_range_updated = false;
    WriteContext wctx_gc;
    wctx_gc.fork(wctx); // make a clone for garbage collection
  
@@ -10429,12 +10441,17 @@ int BlueStore::_do_gc(
  
      if (*dirty_start > it->offset) {
        *dirty_start = it->offset;
+      dirty_range_updated = true;
      }
  
      if (*dirty_end < it->offset + it->length) {
        *dirty_end = it->offset + it->length;
+      dirty_range_updated = true;
      }
    }
+  if (dirty_range_updated) {
+    o->extent_map.fault_range(db, *dirty_start, *dirty_end);
+  }
  
    dout(30) << __func__ << " alloc write" << dendl;
    int r = _do_alloc_write(txc, c, o, &wctx_gc);
@@ -10516,9 +10533,10 @@ int BlueStore::_do_write(
               << dendl;
          goto out;
        }
+      dout(20)<<__func__<<" gc range is " << std::hex << dirty_start
+             << "~" << dirty_end - dirty_start << std::dec << dendl;
      }
    }
-
    o->extent_map.compress_extent_map(dirty_start, dirty_end - dirty_start);
    o->extent_map.dirty_range(dirty_start, dirty_end - dirty_start);
  
diff --git a/ceph/src/os/bluestore/BlueStore.h b/ceph/src/os/bluestore/BlueStore.h

index 387c22373243ba6c038d45dfb59d12081ec29f1c..836bc01b827bf8dfb63609d2c46ee8d761c9b701 100644 (file)
--- a/ceph/src/os/bluestore/BlueStore.h
+++ b/ceph/src/os/bluestore/BlueStore.h
@@ -1970,6 +1970,7 @@ private:
    void _close_fsid();
    void _set_alloc_sizes();
    void _set_blob_size();
+  void _set_finisher_num();
  
    int _open_bdev(bool create);
    void _close_bdev();
diff --git a/ceph/src/os/bluestore/KernelDevice.cc b/ceph/src/os/bluestore/KernelDevice.cc

index 6c2d5ec1bf16fd719b85df6c1508ee124ac23031..7e740968f3f2d0e605b5160f22624a6b8930a803 100644 (file)
--- a/ceph/src/os/bluestore/KernelDevice.cc
+++ b/ceph/src/os/bluestore/KernelDevice.cc
@@ -163,9 +163,9 @@ int KernelDevice::open(const string& p)
    dout(1) << __func__
           << " size " << size
           << " (0x" << std::hex << size << std::dec << ", "
-         << pretty_si_t(size) << "B)"
+         << byte_u_t(size) << ")"
           << " block_size " << block_size
-         << " (" << pretty_si_t(block_size) << "B)"
+         << " (" << byte_u_t(block_size) << ")"
           << " " << (rotational ? "rotational" : "non-rotational")
           << dendl;
    return 0;
diff --git a/ceph/src/os/bluestore/NVMEDevice.cc b/ceph/src/os/bluestore/NVMEDevice.cc

index 2eb278aa0d08c2d6b184dcd3dc739e09406bfd3b..72e5db9aea31dd1654391bc23313df1bd640b97f 100644 (file)
--- a/ceph/src/os/bluestore/NVMEDevice.cc
+++ b/ceph/src/os/bluestore/NVMEDevice.cc
@@ -917,9 +917,9 @@ int NVMEDevice::open(const string& p)
    // round size down to an even block
    size &= ~(block_size - 1);
  
-  dout(1) << __func__ << " size " << size << " (" << pretty_si_t(size) << "B)"
-          << " block_size " << block_size << " (" << pretty_si_t(block_size)
-          << "B)" << dendl;
+  dout(1) << __func__ << " size " << size << " (" << byte_u_t(size) << ")"
+          << " block_size " << block_size << " (" << byte_u_t(block_size)
+          << ")" << dendl;
  
    return 0;
  }
@@ -928,6 +928,8 @@ void NVMEDevice::close()
  {
    dout(1) << __func__ << dendl;
  
+  delete queue_t;
+  queue_t = nullptr;
    name.clear();
    driver->remove_device(this);
  
diff --git a/ceph/src/os/bluestore/PMEMDevice.cc b/ceph/src/os/bluestore/PMEMDevice.cc

index 262eeb1c3bb0e49c958032b881fc515fd306ef58..bb8ffc4d2e5b1daa0b0637cdb45666c11a237caa 100644 (file)
--- a/ceph/src/os/bluestore/PMEMDevice.cc
+++ b/ceph/src/os/bluestore/PMEMDevice.cc
@@ -115,9 +115,9 @@ int PMEMDevice::open(const string& p)
  
    dout(1) << __func__
      << " size " << size
-    << " (" << pretty_si_t(size) << "B)"
+    << " (" << byte_u_t(size) << ")"
      << " block_size " << block_size
-    << " (" << pretty_si_t(block_size) << "B)"
+    << " (" << byte_u_t(block_size) << ")"
      << dendl;
    return 0;
  
diff --git a/ceph/src/os/filestore/CollectionIndex.h b/ceph/src/os/filestore/CollectionIndex.h

index 7c57a38b564744a6b01751871429d06fbc699c0f..c5d120e1accc30a1f22d504e39307049f7e776a6 100644 (file)
--- a/ceph/src/os/filestore/CollectionIndex.h
+++ b/ceph/src/os/filestore/CollectionIndex.h
@@ -191,7 +191,7 @@ protected:
        uint64_t expected_num_objs  ///< [in] expected number of objects this collection has
        ) { ceph_abort(); return 0; }
  
-  virtual int apply_layout_settings() { ceph_abort(); return 0; }
+  virtual int apply_layout_settings(int target_level) { ceph_abort(); return 0; }
  
    /// Read index-wide settings (should be called after construction)
    virtual int read_settings() { return 0; }
diff --git a/ceph/src/os/filestore/FileStore.cc b/ceph/src/os/filestore/FileStore.cc

index c3c9e3759c4ef968e28df15a8fcaa58ed44b22d0..fd7f1cc3812cbc1f2590ca7802f4ef52d572ea74 100644 (file)
--- a/ceph/src/os/filestore/FileStore.cc
+++ b/ceph/src/os/filestore/FileStore.cc
@@ -5974,9 +5974,10 @@ uint64_t FileStore::estimate_objects_overhead(uint64_t num_objects)
    return res;
  }
  
-int FileStore::apply_layout_settings(const coll_t &cid)
+int FileStore::apply_layout_settings(const coll_t &cid, int target_level)
  {
-  dout(20) << __FUNC__ << ": " << cid << dendl;
+  dout(20) << __FUNC__ << ": " << cid << " target level: " 
+           << target_level << dendl;
    Index index;
    int r = get_index(cid, &index);
    if (r < 0) {
@@ -5985,7 +5986,7 @@ int FileStore::apply_layout_settings(const coll_t &cid)
      return r;
    }
  
-  return index->apply_layout_settings();
+  return index->apply_layout_settings(target_level);
  }
  
  
diff --git a/ceph/src/os/filestore/FileStore.h b/ceph/src/os/filestore/FileStore.h

index 1b725676a6ca8f97ab808f306687bc13dc08282f..cde43d7003105614e1bd950b75bcec23b142fc7e 100644 (file)
--- a/ceph/src/os/filestore/FileStore.h
+++ b/ceph/src/os/filestore/FileStore.h
@@ -739,7 +739,7 @@ public:
    void dump_stop();
    void dump_transactions(vector<Transaction>& ls, uint64_t seq, OpSequencer *osr);
  
-  virtual int apply_layout_settings(const coll_t &cid);
+  virtual int apply_layout_settings(const coll_t &cid, int target_level);
  
  private:
    void _inject_failure();
diff --git a/ceph/src/os/filestore/HashIndex.cc b/ceph/src/os/filestore/HashIndex.cc

index e0d38f4bdc2a904152990afdcde97584c8b081d1..11212feaf5795789c79c7ca5a0e40f67de634836 100644 (file)
--- a/ceph/src/os/filestore/HashIndex.cc
+++ b/ceph/src/os/filestore/HashIndex.cc
@@ -307,8 +307,9 @@ int HashIndex::_split(
      &mkdirred);
  }
  
-int HashIndex::split_dirs(const vector<string> &path) {
-  dout(20) << __func__ << " " << path << dendl;
+int HashIndex::split_dirs(const vector<string> &path, int target_level) {
+  dout(20) << __func__ << " " << path << " target level: " 
+           << target_level << dendl;
    subdir_info_s info;
    int r = get_info(path, &info);
    if (r < 0) {
@@ -317,9 +318,10 @@ int HashIndex::split_dirs(const vector<string> &path) {
      return r;
    }
  
-  if (must_split(info)) {
+  if (must_split(info, target_level)) {
      dout(1) << __func__ << " " << path << " has " << info.objs
-            << " objects, starting split." << dendl;
+            << " objects, " << info.hash_level 
+            << " level, starting split in pg " << coll() << "." << dendl;
      r = initiate_split(path, info);
      if (r < 0) {
        dout(10) << "error initiating split on " << path << ": "
@@ -328,7 +330,7 @@ int HashIndex::split_dirs(const vector<string> &path) {
      }
  
      r = complete_split(path, info);
-    dout(1) << __func__ << " " << path << " split completed."
+    dout(1) << __func__ << " " << path << " split completed in pg " << coll() << "."
              << dendl;
      if (r < 0) {
        dout(10) << "error completing split on " << path << ": "
@@ -348,7 +350,7 @@ int HashIndex::split_dirs(const vector<string> &path) {
         it != subdirs.end(); ++it) {
      vector<string> subdir_path(path);
      subdir_path.push_back(*it);
-    r = split_dirs(subdir_path);
+    r = split_dirs(subdir_path, target_level);
      if (r < 0) {
        return r;
      }
@@ -357,16 +359,17 @@ int HashIndex::split_dirs(const vector<string> &path) {
    return r;
  }
  
-int HashIndex::apply_layout_settings() {
+int HashIndex::apply_layout_settings(int target_level) {
    vector<string> path;
    dout(10) << __func__ << " split multiple = " << split_multiplier
            << " merge threshold = " << merge_threshold
            << " split rand factor = " << cct->_conf->filestore_split_rand_factor
+          << " target level = " << target_level
            << dendl;
    int r = write_settings();
    if (r < 0)
      return r;
-  return split_dirs(path);
+  return split_dirs(path, target_level);
  }
  
  int HashIndex::_init() {
@@ -422,12 +425,12 @@ int HashIndex::_created(const vector<string> &path,
  
    if (must_split(info)) {
      dout(1) << __func__ << " " << path << " has " << info.objs
-            << " objects, starting split." << dendl;
+            << " objects, starting split in pg " << coll() << "." << dendl;
      int r = initiate_split(path, info);
      if (r < 0)
        return r;
      r = complete_split(path, info);
-    dout(1) << __func__ << " " << path << " split completed."
+    dout(1) << __func__ << " " << path << " split completed in pg " << coll() << "."
              << dendl;
      return r;
    } else {
@@ -741,10 +744,13 @@ bool HashIndex::must_merge(const subdir_info_s &info) {
           info.subdirs == 0);
  }
  
-bool HashIndex::must_split(const subdir_info_s &info) {
+bool HashIndex::must_split(const subdir_info_s &info, int target_level) {
+  // target_level is used for ceph-objectstore-tool to split dirs offline.
+  // if it is set (defalult is 0) and current hash level < target_level, 
+  // this dir would be split no matters how many objects it has.
    return (info.hash_level < (unsigned)MAX_HASH_LEVEL &&
-         info.objs > ((unsigned)(abs(merge_threshold) * split_multiplier + settings.split_rand_factor) * 16));
-
+         ((target_level > 0 && info.hash_level < (unsigned)target_level) ||
+         (info.objs > ((unsigned)(abs(merge_threshold) * split_multiplier + settings.split_rand_factor) * 16))));
  }
  
  int HashIndex::initiate_merge(const vector<string> &path, subdir_info_s info) {
diff --git a/ceph/src/os/filestore/HashIndex.h b/ceph/src/os/filestore/HashIndex.h

index 216659bff4d9601848273811728461c5704a8a91..db76abbb4d41272bb4eee10c1243b74d2f92d4b7 100644 (file)
--- a/ceph/src/os/filestore/HashIndex.h
+++ b/ceph/src/os/filestore/HashIndex.h
@@ -189,7 +189,7 @@ public:
      ) override;
  
    /// @see CollectionIndex
-  int apply_layout_settings() override;
+  int apply_layout_settings(int target_level) override;
  
  protected:
    int _init() override;
@@ -272,7 +272,8 @@ private:
  
    /// Encapsulates logic for when to merge.
    bool must_split(
-    const subdir_info_s &info ///< [in] Info to check
+    const subdir_info_s &info, ///< [in] Info to check
+    int target_level = 0
      ); /// @return True if info must be split, False otherwise
  
    /// Initiates merge
@@ -436,7 +437,7 @@ private:
    int recursive_create_path(vector<string>& path, int level);
  
    /// split each dir below the given path
-  int split_dirs(const vector<string> &path);
+  int split_dirs(const vector<string> &path, int target_level = 0);
  
    int write_settings();
  };
diff --git a/ceph/src/os/filestore/WBThrottle.cc b/ceph/src/os/filestore/WBThrottle.cc

index 71884445c76a12cee0679e2eaeb607fba94631fe..8930cd95a180256624acd4a6547f2655ea32aa63 100644 (file)
--- a/ceph/src/os/filestore/WBThrottle.cc
+++ b/ceph/src/os/filestore/WBThrottle.cc
@@ -22,8 +22,8 @@ WBThrottle::WBThrottle(CephContext *cct) :
    PerfCountersBuilder b(
      cct, string("WBThrottle"),
      l_wbthrottle_first, l_wbthrottle_last);
-  b.add_u64(l_wbthrottle_bytes_dirtied, "bytes_dirtied", "Dirty data");
-  b.add_u64(l_wbthrottle_bytes_wb, "bytes_wb", "Written data");
+  b.add_u64(l_wbthrottle_bytes_dirtied, "bytes_dirtied", "Dirty data", NULL, 0, unit_t(BYTES));
+  b.add_u64(l_wbthrottle_bytes_wb, "bytes_wb", "Written data", NULL, 0, unit_t(BYTES));
    b.add_u64(l_wbthrottle_ios_dirtied, "ios_dirtied", "Dirty operations");
    b.add_u64(l_wbthrottle_ios_wb, "ios_wb", "Written operations");
    b.add_u64(l_wbthrottle_inodes_dirtied, "inodes_dirtied", "Entries waiting for write");
diff --git a/ceph/src/osd/OSD.cc b/ceph/src/osd/OSD.cc

index 54fedcddcabf0624d4820294494ee9ab58691a8b..4f88cdf870c853a4ec75d39574da305fad9c0303 100644 (file)
--- a/ceph/src/osd/OSD.cc
+++ b/ceph/src/osd/OSD.cc
@@ -694,9 +694,9 @@ void OSDService::promote_throttle_recalibrate()
    uint64_t attempts, obj, bytes;
    promote_counter.sample_and_attenuate(&attempts, &obj, &bytes);
    dout(10) << __func__ << " " << attempts << " attempts, promoted "
-          << obj << " objects and " << pretty_si_t(bytes) << " bytes; target "
+          << obj << " objects and " << byte_u_t(bytes) << "; target "
            << target_obj_sec << " obj/sec or "
-          << pretty_si_t(target_bytes_sec) << " bytes/sec"
+          << byte_u_t(target_bytes_sec) << "/sec"
            << dendl;
  
    // calculate what the probability *should* be, given the targets
@@ -3027,11 +3027,11 @@ void OSD::create_logger()
    osd_plb.add_u64_counter(
      l_osd_op_inb,   "op_in_bytes",
      "Client operations total write size",
-    "wr", PerfCountersBuilder::PRIO_INTERESTING);
+    "wr", PerfCountersBuilder::PRIO_INTERESTING, unit_t(BYTES));
    osd_plb.add_u64_counter(
      l_osd_op_outb,  "op_out_bytes",
      "Client operations total read size",
-    "rd", PerfCountersBuilder::PRIO_INTERESTING);
+    "rd", PerfCountersBuilder::PRIO_INTERESTING, unit_t(BYTES));
    osd_plb.add_time_avg(
      l_osd_op_lat,   "op_latency",
      "Latency of client operations (including queue time)",
@@ -3046,7 +3046,7 @@ void OSD::create_logger()
    osd_plb.add_u64_counter(
      l_osd_op_r, "op_r", "Client read operations");
    osd_plb.add_u64_counter(
-    l_osd_op_r_outb, "op_r_out_bytes", "Client data read");
+    l_osd_op_r_outb, "op_r_out_bytes", "Client data read", NULL, PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    osd_plb.add_time_avg(
      l_osd_op_r_lat, "op_r_latency",
      "Latency of read operation (including queue time)");
@@ -3082,10 +3082,10 @@ void OSD::create_logger()
      "Client read-modify-write operations");
    osd_plb.add_u64_counter(
      l_osd_op_rw_inb, "op_rw_in_bytes",
-    "Client read-modify-write operations write in");
+    "Client read-modify-write operations write in", NULL, PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    osd_plb.add_u64_counter(
      l_osd_op_rw_outb,"op_rw_out_bytes",
-    "Client read-modify-write operations read out ");
+    "Client read-modify-write operations read out ", NULL, PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    osd_plb.add_time_avg(
      l_osd_op_rw_lat, "op_rw_latency",
      "Latency of read-modify-write operation (including queue time)");
@@ -3116,12 +3116,12 @@ void OSD::create_logger()
    osd_plb.add_u64_counter(
      l_osd_sop, "subop", "Suboperations");
    osd_plb.add_u64_counter(
-    l_osd_sop_inb, "subop_in_bytes", "Suboperations total size");
+    l_osd_sop_inb, "subop_in_bytes", "Suboperations total size", NULL, 0, unit_t(BYTES));
    osd_plb.add_time_avg(l_osd_sop_lat, "subop_latency", "Suboperations latency");
  
    osd_plb.add_u64_counter(l_osd_sop_w, "subop_w", "Replicated writes");
    osd_plb.add_u64_counter(
-    l_osd_sop_w_inb, "subop_w_in_bytes", "Replicated written data size");
+    l_osd_sop_w_inb, "subop_w_in_bytes", "Replicated written data size", NULL, 0, unit_t(BYTES));
    osd_plb.add_time_avg(
      l_osd_sop_w_lat, "subop_w_latency", "Replicated writes latency");
    osd_plb.add_u64_counter(
@@ -3131,13 +3131,13 @@ void OSD::create_logger()
    osd_plb.add_u64_counter(
      l_osd_sop_push, "subop_push", "Suboperations push messages");
    osd_plb.add_u64_counter(
-    l_osd_sop_push_inb, "subop_push_in_bytes", "Suboperations pushed size");
+    l_osd_sop_push_inb, "subop_push_in_bytes", "Suboperations pushed size", NULL, 0, unit_t(BYTES));
    osd_plb.add_time_avg(
      l_osd_sop_push_lat, "subop_push_latency", "Suboperations push latency");
  
    osd_plb.add_u64_counter(l_osd_pull, "pull", "Pull requests sent");
    osd_plb.add_u64_counter(l_osd_push, "push", "Push messages sent");
-  osd_plb.add_u64_counter(l_osd_push_outb, "push_out_bytes", "Pushed size");
+  osd_plb.add_u64_counter(l_osd_push_outb, "push_out_bytes", "Pushed size", NULL, 0, unit_t(BYTES));
  
    osd_plb.add_u64_counter(
      l_osd_rop, "recovery_ops",
@@ -3145,8 +3145,8 @@ void OSD::create_logger()
      "rop", PerfCountersBuilder::PRIO_INTERESTING);
  
    osd_plb.add_u64(l_osd_loadavg, "loadavg", "CPU load");
-  osd_plb.add_u64(l_osd_buf, "buffer_bytes", "Total allocated buffer size");
-  osd_plb.add_u64(l_osd_history_alloc_bytes, "history_alloc_Mbytes");
+  osd_plb.add_u64(l_osd_buf, "buffer_bytes", "Total allocated buffer size", NULL, 0, unit_t(BYTES));
+  osd_plb.add_u64(l_osd_history_alloc_bytes, "history_alloc_Mbytes", NULL, 0, unit_t(BYTES));
    osd_plb.add_u64(l_osd_history_alloc_num, "history_alloc_num");
    osd_plb.add_u64(
      l_osd_cached_crc, "cached_crc", "Total number getting crc from crc_cache");
@@ -3200,11 +3200,11 @@ void OSD::create_logger()
  
    osd_plb.add_u64(
      l_osd_stat_bytes, "stat_bytes", "OSD size", "size",
-    PerfCountersBuilder::PRIO_USEFUL);
+    PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
    osd_plb.add_u64(
      l_osd_stat_bytes_used, "stat_bytes_used", "Used space", "used",
-    PerfCountersBuilder::PRIO_USEFUL);
-  osd_plb.add_u64(l_osd_stat_bytes_avail, "stat_bytes_avail", "Available space");
+    PerfCountersBuilder::PRIO_USEFUL, unit_t(BYTES));
+  osd_plb.add_u64(l_osd_stat_bytes_avail, "stat_bytes_avail", "Available space", NULL, 0, unit_t(BYTES));
  
    osd_plb.add_u64_counter(
      l_osd_copyfrom, "copyfrom", "Rados \"copy-from\" operations");
@@ -4510,7 +4510,7 @@ bool OSD::maybe_wait_for_max_pg(spg_t pgid, bool is_mon_create)
      bool is_primary = osdmap->get_pg_acting_rank(pgid.pgid, whoami) == 0;
      pending_creates_from_osd.emplace(pgid.pgid, is_primary);
    }
-  dout(5) << __func__ << " withhold creation of pg " << pgid
+  dout(1) << __func__ << " withhold creation of pg " << pgid
           << ": " << pg_map.size() << " >= "<< max_pgs_per_osd << dendl;
    return true;
  }
@@ -6756,7 +6756,7 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
        // having a sane value.  If we allow any block size to be set things
        // can still go sideways.
        ss << "block 'size' values are capped at "
-         << prettybyte_t(cct->_conf->osd_bench_max_block_size) << ". If you wish to use"
+         << byte_u_t(cct->_conf->osd_bench_max_block_size) << ". If you wish to use"
           << " a higher value, please adjust 'osd_bench_max_block_size'";
        r = -EINVAL;
        goto out;
@@ -6769,7 +6769,7 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
          bsize * duration * cct->_conf->osd_bench_small_size_max_iops;
        if (count > max_count) {
          ss << "'count' values greater than " << max_count
-           << " for a block size of " << prettybyte_t(bsize) << ", assuming "
+           << " for a block size of " << byte_u_t(bsize) << ", assuming "
             << cct->_conf->osd_bench_small_size_max_iops << " IOPS,"
             << " for " << duration << " seconds,"
             << " can cause ill effects on osd. "
@@ -6792,8 +6792,8 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
          cct->_conf->osd_bench_large_size_max_throughput * duration;
        if (count > max_count) {
          ss << "'count' values greater than " << max_count
-           << " for a block size of " << prettybyte_t(bsize) << ", assuming "
-           << prettybyte_t(cct->_conf->osd_bench_large_size_max_throughput) << "/s,"
+           << " for a block size of " << byte_u_t(bsize) << ", assuming "
+           << byte_u_t(cct->_conf->osd_bench_large_size_max_throughput) << "/s,"
             << " for " << duration << " seconds,"
             << " can cause ill effects on osd. "
             << " Please adjust 'osd_bench_large_size_max_throughput'"
@@ -6807,7 +6807,7 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
        bsize = osize;
  
      dout(1) << " bench count " << count
-            << " bsize " << prettybyte_t(bsize) << dendl;
+            << " bsize " << byte_u_t(bsize) << dendl;
  
      ObjectStore::Transaction cleanupt;
  
@@ -6887,9 +6887,9 @@ void OSD::do_command(Connection *con, ceph_tid_t tid, vector<string>& cmd, buffe
        f->close_section();
        f->flush(ss);
      } else {
-      ss << "bench: wrote " << prettybyte_t(count)
-        << " in blocks of " << prettybyte_t(bsize) << " in "
-        << (end-start) << " sec at " << prettybyte_t(rate) << "/sec";
+      ss << "bench: wrote " << byte_u_t(count)
+        << " in blocks of " << byte_u_t(bsize) << " in "
+        << (end-start) << " sec at " << byte_u_t(rate) << "/sec";
      }
    }
  
diff --git a/ceph/src/osd/OSDMap.cc b/ceph/src/osd/OSDMap.cc

index dc9e2e5083ec0663bb40ac1b87e5f8b151a43efa..2bb8beb94e72b32d225ff5dabf57ea4c74b23ab7 100644 (file)
--- a/ceph/src/osd/OSDMap.cc
+++ b/ceph/src/osd/OSDMap.cc
@@ -2358,12 +2358,12 @@ uint64_t OSDMap::get_encoding_features() const
    }
    if (require_osd_release < CEPH_RELEASE_KRAKEN) {
      f &= ~(CEPH_FEATURE_SERVER_KRAKEN |
-          CEPH_FEATURE_MSG_ADDR2 |
-          CEPH_FEATURE_CRUSH_TUNABLES5);
+          CEPH_FEATURE_MSG_ADDR2);
    }
    if (require_osd_release < CEPH_RELEASE_JEWEL) {
      f &= ~(CEPH_FEATURE_SERVER_JEWEL |
-          CEPH_FEATURE_NEW_OSDOP_ENCODING);
+          CEPH_FEATURE_NEW_OSDOP_ENCODING |
+          CEPH_FEATURE_CRUSH_TUNABLES5);
    }
    return f;
  }
@@ -4377,9 +4377,9 @@ public:
      *tbl << ""
          << ""
          << "" << "TOTAL"
-        << si_t(pgs->get_osd_sum().kb << 10)
-        << si_t(pgs->get_osd_sum().kb_used << 10)
-        << si_t(pgs->get_osd_sum().kb_avail << 10)
+        << byte_u_t(pgs->get_osd_sum().kb << 10)
+        << byte_u_t(pgs->get_osd_sum().kb_used << 10)
+        << byte_u_t(pgs->get_osd_sum().kb_avail << 10)
          << lowprecision_t(average_util)
          << ""
          << TextTable::endrow;
@@ -4409,9 +4409,9 @@ protected:
          << c
          << weightf_t(qi.weight)
          << weightf_t(reweight)
-        << si_t(kb << 10)
-        << si_t(kb_used << 10)
-        << si_t(kb_avail << 10)
+        << byte_u_t(kb << 10)
+        << byte_u_t(kb_used << 10)
+        << byte_u_t(kb_avail << 10)
          << lowprecision_t(util)
          << lowprecision_t(var);
  
diff --git a/ceph/src/osd/OSDMap.h b/ceph/src/osd/OSDMap.h

index 5d59754cef911a2186de7284d75aa9952af9d8a8..2a5f985c5fc9aa5af57c343bed786389fa093f3d 100644 (file)
--- a/ceph/src/osd/OSDMap.h
+++ b/ceph/src/osd/OSDMap.h
@@ -431,7 +431,6 @@ public:
        encode_features(0),
        epoch(e), new_pool_max(-1), new_flags(-1), new_max_osd(-1),
        have_crc(false), full_crc(0), inc_crc(0) {
-      memset(&fsid, 0, sizeof(fsid));
      }
      explicit Incremental(bufferlist &bl) {
        bufferlist::iterator p = bl.begin();
@@ -596,7 +595,6 @@ private:
              cached_up_osd_features(0),
              crc_defined(false), crc(0),
              crush(std::make_shared<CrushWrapper>()) {
-    memset(&fsid, 0, sizeof(fsid));
    }
  
    // no copying
diff --git a/ceph/src/osd/PG.cc b/ceph/src/osd/PG.cc

index 60a604ae29fd6f116eafd14c3380043c6a6c63b7..9e45796b89a77c523afd77409827d203dd2e3f47 100644 (file)
--- a/ceph/src/osd/PG.cc
+++ b/ceph/src/osd/PG.cc
@@ -556,9 +556,14 @@ bool PG::search_for_missing(
      from, oinfo, omissing, ctx->handle);
    if (found_missing && num_unfound_before != missing_loc.num_unfound())
      publish_stats_to_osd();
+  // avoid doing this if the peer is empty.  This is abit of paranoia
+  // to avoid doing something rash if add_source_info() above
+  // incorrectly decided we found something new. (if the peer has
+  // last_update=0'0 that's impossible.)
    if (found_missing &&
        (get_osdmap()->get_features(CEPH_ENTITY_TYPE_OSD, NULL) &
-       CEPH_FEATURE_OSD_ERASURE_CODES)) {
+       CEPH_FEATURE_OSD_ERASURE_CODES) &&
+      oinfo.last_update != eversion_t()) {
      pg_info_t tinfo(oinfo);
      tinfo.pgid.shard = pg_whoami.shard;
      (*(ctx->info_map))[from.osd].push_back(
@@ -636,7 +641,6 @@ bool PG::MissingLoc::add_source_info(
      if (p->second.is_delete()) {
        ldout(pg->cct, 10) << __func__ << " " << soid
                          << " delete, ignoring source" << dendl;
-      found_missing = true;
        continue;
      }
      if (oinfo.last_update < need) {
@@ -5466,6 +5470,26 @@ void PG::fulfill_log(
    osd->send_message_osd_cluster(mlog, con.get());
  }
  
+void PG::fulfill_query(const MQuery& query, RecoveryCtx *rctx)
+{
+  if (query.query.type == pg_query_t::INFO) {
+    pair<pg_shard_t, pg_info_t> notify_info;
+    update_history(query.query.history);
+    fulfill_info(query.from, query.query, notify_info);
+    rctx->send_notify(
+      notify_info.first,
+      pg_notify_t(
+       notify_info.first.shard, pg_whoami.shard,
+       query.query_epoch,
+       get_osdmap()->get_epoch(),
+       notify_info.second),
+      past_intervals);
+  } else {
+    update_history(query.query.history);
+    fulfill_log(query.from, query.query, query.query_epoch);
+  }
+}
+
  void PG::check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap)
  {
    bool changed = false;
@@ -5512,9 +5536,12 @@ bool PG::should_restart_peering(
      dout(20) << "new interval newup " << newup
              << " newacting " << newacting << dendl;
      return true;
-  } else {
-    return false;
    }
+  if (!lastmap->is_up(osd->whoami) && osdmap->is_up(osd->whoami)) {
+    dout(10) << __func__ << " osd transitioned from down -> up" << dendl;
+    return true;
+  }
+  return false;
  }
  
  bool PG::old_peering_msg(epoch_t reply_epoch, epoch_t query_epoch)
@@ -7796,13 +7823,11 @@ boost::statechart::result PG::RecoveryState::ReplicaActive::react(const ActMap&)
    return discard_event();
  }
  
-boost::statechart::result PG::RecoveryState::ReplicaActive::react(const MQuery& query)
+boost::statechart::result PG::RecoveryState::ReplicaActive::react(
+  const MQuery& query)
  {
    PG *pg = context< RecoveryMachine >().pg;
-  if (query.query.type == pg_query_t::MISSING) {
-    pg->update_history(query.query.history);
-    pg->fulfill_log(query.from, query.query, query.query_epoch);
-  } // else: from prior to activation, safe to ignore
+  pg->fulfill_query(query, context<RecoveryMachine>().get_recovery_ctx());
    return discard_event();
  }
  
@@ -7893,21 +7918,7 @@ boost::statechart::result PG::RecoveryState::Stray::react(const MInfoRec& infoev
  boost::statechart::result PG::RecoveryState::Stray::react(const MQuery& query)
  {
    PG *pg = context< RecoveryMachine >().pg;
-  if (query.query.type == pg_query_t::INFO) {
-    pair<pg_shard_t, pg_info_t> notify_info;
-    pg->update_history(query.query.history);
-    pg->fulfill_info(query.from, query.query, notify_info);
-    context< RecoveryMachine >().send_notify(
-      notify_info.first,
-      pg_notify_t(
-       notify_info.first.shard, pg->pg_whoami.shard,
-       query.query_epoch,
-       pg->get_osdmap()->get_epoch(),
-       notify_info.second),
-      pg->past_intervals);
-  } else {
-    pg->fulfill_log(query.from, query.query, query.query_epoch);
-  }
+  pg->fulfill_query(query, context<RecoveryMachine>().get_recovery_ctx());
    return discard_event();
  }
  
diff --git a/ceph/src/osd/PG.h b/ceph/src/osd/PG.h

index 932dc51a181c3bd00567c5fc882bac5aee031d55..e8acafae53cc00f21b78475be2d3a049fcbb0f6f 100644 (file)
--- a/ceph/src/osd/PG.h
+++ b/ceph/src/osd/PG.h
@@ -698,6 +698,12 @@ public:
         ovec.insert(ovec.end(), i->second.begin(), i->second.end());
        }
      }
+
+    void send_notify(pg_shard_t to,
+                    const pg_notify_t &info, const PastIntervals &pi) {
+      assert(notify_list);
+      (*notify_list)[to.osd].push_back(make_pair(info, pi));
+    }
    };
  
  
@@ -1751,8 +1757,7 @@ protected:
        void send_notify(pg_shard_t to,
                        const pg_notify_t &info, const PastIntervals &pi) {
         assert(state->rctx);
-       assert(state->rctx->notify_list);
-       (*state->rctx->notify_list)[to.osd].push_back(make_pair(info, pi));
+       state->rctx->send_notify(to, info, pi);
        }
      };
      friend class RecoveryMachine;
@@ -2615,7 +2620,7 @@ public:
    void fulfill_info(pg_shard_t from, const pg_query_t &query,
                     pair<pg_shard_t, pg_info_t> &notify_info);
    void fulfill_log(pg_shard_t from, const pg_query_t &query, epoch_t query_epoch);
-
+  void fulfill_query(const MQuery& q, RecoveryCtx *rctx);
    void check_full_transition(OSDMapRef lastmap, OSDMapRef osdmap);
  
    bool should_restart_peering(
diff --git a/ceph/src/osd/PGBackend.cc b/ceph/src/osd/PGBackend.cc

index 7c508adf396d5ddb102f6fa2b9111404f3efddc4..f77b8bfad933203c9c3d3e6520f988f90c3f069e 100644 (file)
--- a/ceph/src/osd/PGBackend.cc
+++ b/ceph/src/osd/PGBackend.cc
@@ -742,9 +742,12 @@ bool PGBackend::be_compare_scrub_objects(
    return error == FOUND_ERROR;
  }
  
-static int dcount(const object_info_t &oi)
+static int dcount(const object_info_t &oi, bool prioritize)
  {
    int count = 0;
+  // Prioritize bluestore objects when osd_distrust_data_digest is set
+  if (prioritize)
+    count += 1000;
    if (oi.is_data_digest())
      count++;
    if (oi.is_omap_digest())
@@ -758,9 +761,11 @@ map<pg_shard_t, ScrubMap *>::const_iterator
    const map<pg_shard_t,ScrubMap*> &maps,
    object_info_t *auth_oi,
    map<pg_shard_t, shard_info_wrapper> &shard_map,
-  inconsistent_obj_wrapper &object_error)
+  inconsistent_obj_wrapper &object_error,
+  bool &digest_match)
  {
    eversion_t auth_version;
+  bool auth_prio = false;
    bufferlist first_oi_bl, first_ss_bl, first_hk_bl;
  
    // Create list of shards with primary first so it will be auth copy all
@@ -776,7 +781,9 @@ map<pg_shard_t, ScrubMap *>::const_iterator
    shards.push_front(get_parent()->whoami_shard());
  
    map<pg_shard_t, ScrubMap *>::const_iterator auth = maps.end();
+  digest_match = true;
    for (auto &l : shards) {
+    bool oi_prio = false;
      map<pg_shard_t, ScrubMap *>::const_iterator j = maps.find(l);
      map<hobject_t, ScrubMap::object>::iterator i =
        j->second->objects.find(obj);
@@ -898,16 +905,37 @@ map<pg_shard_t, ScrubMap *>::const_iterator
        error_string += " obj_size_info_mismatch";
      }
  
+    // digest_match will only be true if computed digests are the same
+    if (auth_version != eversion_t()
+        && auth->second->objects[obj].digest_present
+        && i->second.digest_present
+        && auth->second->objects[obj].digest != i->second.digest) {
+      digest_match = false;
+      dout(10) << __func__ << " digest_match = false, " << obj << " data_digest 0x" << std::hex << i->second.digest
+                   << " != data_digest 0x" << auth->second->objects[obj].digest << std::dec
+                   << dendl;
+    }
+
      // Don't use this particular shard due to previous errors
      // XXX: For now we can't pick one shard for repair and another's object info or snapset
      if (shard_info.errors)
        goto out;
  
+    // XXX: Do I want replicated only?
+    if (parent->get_pool().is_replicated() && cct->_conf->osd_distrust_data_digest) {
+      // This is a boost::optional<bool> so see if option set AND it has the value true
+      // We give priority to a replica where the ObjectStore like BlueStore has builtin checksum
+      if (j->second->has_builtin_csum && j->second->has_builtin_csum == true) {
+        oi_prio = true;
+      }
+    }
+
      if (auth_version == eversion_t() || oi.version > auth_version ||
-        (oi.version == auth_version && dcount(oi) > dcount(*auth_oi))) {
+        (oi.version == auth_version && dcount(oi, oi_prio) > dcount(*auth_oi, auth_prio))) {
        auth = j;
        *auth_oi = oi;
        auth_version = oi.version;
+      auth_prio = oi_prio;
      }
  
  out:
@@ -953,8 +981,10 @@ void PGBackend::be_compare_scrubmaps(
  
      inconsistent_obj_wrapper object_error{*k};
  
+    bool digest_match;
      map<pg_shard_t, ScrubMap *>::const_iterator auth =
-      be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error);
+      be_select_auth_object(*k, maps, &auth_oi, shard_map, object_error,
+                            digest_match);
  
      list<pg_shard_t> auth_list;
      set<pg_shard_t> object_errors;
@@ -975,6 +1005,7 @@ void PGBackend::be_compare_scrubmaps(
      ScrubMap::object& auth_object = auth->second->objects[*k];
      set<pg_shard_t> cur_missing;
      set<pg_shard_t> cur_inconsistent;
+    bool fix_digest = false;
  
      for (auto  j = maps.cbegin(); j != maps.cend(); ++j) {
        if (j == auth)
@@ -990,6 +1021,30 @@ void PGBackend::be_compare_scrubmaps(
                                    shard_map[j->first],
                                    object_error,
                                    ss);
+
+       dout(20) << __func__ << (repair ? " repair " : " ") << (parent->get_pool().is_replicated() ? "replicated " : "")
+         << (j == auth ? "auth " : "") << "shards " << shard_map.size() << (digest_match ? " digest_match " : " ")
+         << (shard_map[j->first].has_data_digest_mismatch_info() ? "info_mismatch " : "")
+         << (shard_map[j->first].only_data_digest_mismatch_info() ? "only" : "")
+         << dendl;
+
+        if (cct->_conf->osd_distrust_data_digest) {
+         if (digest_match && parent->get_pool().is_replicated()
+              && shard_map[j->first].has_data_digest_mismatch_info()) {
+           fix_digest = true;
+         }
+         shard_map[j->first].clear_data_digest_mismatch_info();
+       // If all replicas match, but they don't match object_info we can
+       // repair it by using missing_digest mechanism
+       } else if (repair && parent->get_pool().is_replicated() && j == auth && shard_map.size() > 1
+           && digest_match && shard_map[j->first].only_data_digest_mismatch_info()
+           && auth_object.digest_present) {
+         // Set in missing_digests
+         fix_digest = true;
+         // Clear the error
+         shard_map[j->first].clear_data_digest_mismatch_info();
+         errorstream << pgid << " : soid " << *k << " repairing object info data_digest" << "\n";
+       }
         // Some errors might have already been set in be_select_auth_object()
         if (shard_map[j->first].errors != 0) {
           cur_inconsistent.insert(j->first);
@@ -1002,11 +1057,12 @@ void PGBackend::be_compare_scrubmaps(
           if (found)
             errorstream << pgid << " shard " << j->first << ": soid " << *k
                       << " " << ss.str() << "\n";
-       } else if (found) {
+       } else if (object_error.errors != 0) {
           // Track possible shard to use as authoritative, if needed
           // There are errors, without identifying the shard
           object_errors.insert(j->first);
-         errorstream << pgid << " : soid " << *k << " " << ss.str() << "\n";
+         if (found)
+           errorstream << pgid << " : soid " << *k << " " << ss.str() << "\n";
         } else {
           // XXX: The auth shard might get here that we don't know
           // that it has the "correct" data.
@@ -1050,9 +1106,22 @@ void PGBackend::be_compare_scrubmaps(
      if (!cur_inconsistent.empty()) {
        inconsistent[*k] = cur_inconsistent;
      }
+
+    if (fix_digest) {
+      boost::optional<uint32_t> data_digest, omap_digest;
+      assert(auth_object.digest_present);
+      data_digest = auth_object.digest;
+      if (auth_object.omap_digest_present) {
+        omap_digest = auth_object.omap_digest;
+      }
+      missing_digest[*k] = make_pair(data_digest, omap_digest);
+    }
+    // Special handling of this particular type of inconsistency
+    // This can over-ride a data_digest or set an omap_digest
+    // when all replicas match but the object info is wrong.
      if (!cur_inconsistent.empty() || !cur_missing.empty()) {
        authoritative[*k] = auth_list;
-    } else if (parent->get_pool().is_replicated()) {
+    } else if (!fix_digest && parent->get_pool().is_replicated()) {
        enum {
         NO = 0,
         MAYBE = 1,
@@ -1071,7 +1140,8 @@ void PGBackend::be_compare_scrubmaps(
        // recorded digest != actual digest?
        if (auth_oi.is_data_digest() && auth_object.digest_present &&
           auth_oi.data_digest != auth_object.digest) {
-        assert(shard_map[auth->first].has_data_digest_mismatch_info());
+       assert(cct->_conf->osd_distrust_data_digest
+              || shard_map[auth->first].has_data_digest_mismatch_info());
         errorstream << pgid << " recorded data digest 0x"
                     << std::hex << auth_oi.data_digest << " != on disk 0x"
                     << auth_object.digest << std::dec << " on " << auth_oi.soid
diff --git a/ceph/src/osd/PGBackend.h b/ceph/src/osd/PGBackend.h

index d69e511d36f90a7a1ff68acac9fdab539044262c..054aa3e054a610253d189a2d283b0ed7e8caf484 100644 (file)
--- a/ceph/src/osd/PGBackend.h
+++ b/ceph/src/osd/PGBackend.h
@@ -576,7 +576,8 @@ typedef ceph::shared_ptr<const OSDMap> OSDMapRef;
       const map<pg_shard_t,ScrubMap*> &maps,
       object_info_t *auth_oi,
       map<pg_shard_t, shard_info_wrapper> &shard_map,
-     inconsistent_obj_wrapper &object_error);
+     inconsistent_obj_wrapper &object_error,
+     bool &digest_match);
     void be_compare_scrubmaps(
       const map<pg_shard_t,ScrubMap*> &maps,
       const set<hobject_t> &master_set,
diff --git a/ceph/src/osd/PrimaryLogPG.cc b/ceph/src/osd/PrimaryLogPG.cc

index aaf9136a45e672b91acdbfcc2dc6f7c3ed3e3f28..e96c8cbc5cccf7bbec36511164ea38eb2a3476e6 100644 (file)
--- a/ceph/src/osd/PrimaryLogPG.cc
+++ b/ceph/src/osd/PrimaryLogPG.cc
@@ -348,6 +348,7 @@ void PrimaryLogPG::on_local_recover(
      set<snapid_t> snaps;
      dout(20) << " snapset " << recovery_info.ss
              << " legacy_snaps " << recovery_info.oi.legacy_snaps << dendl;
+    bool error = false;
      if (recovery_info.ss.is_legacy() ||
         recovery_info.ss.seq == 0 /* jewel osd doesn't populate this */) {
        assert(recovery_info.oi.legacy_snaps.size());
@@ -355,14 +356,20 @@ void PrimaryLogPG::on_local_recover(
                    recovery_info.oi.legacy_snaps.end());
      } else {
        auto p = recovery_info.ss.clone_snaps.find(hoid.snap);
-      assert(p != recovery_info.ss.clone_snaps.end());  // hmm, should we warn?
-      snaps.insert(p->second.begin(), p->second.end());
+      if (p != recovery_info.ss.clone_snaps.end()) {
+        snaps.insert(p->second.begin(), p->second.end());
+      } else {
+        derr << __func__ << " " << hoid << " had no clone_snaps" << dendl;
+        error = true;
+      }
+    }
+    if (!error) {
+      dout(20) << " snaps " << snaps << dendl;
+      snap_mapper.add_oid(
+        recovery_info.soid,
+        snaps,
+        &_t);
      }
-    dout(20) << " snaps " << snaps << dendl;
-    snap_mapper.add_oid(
-      recovery_info.soid,
-      snaps,
-      &_t);
    }
    if (!is_delete && pg_log.get_missing().is_missing(recovery_info.soid) &&
        pg_log.get_missing().get_items().find(recovery_info.soid)->second.need > recovery_info.version) {
@@ -4873,6 +4880,18 @@ int PrimaryLogPG::do_read(OpContext *ctx, OSDOp& osd_op) {
    } else {
      int r = pgbackend->objects_read_sync(
        soid, op.extent.offset, op.extent.length, op.flags, &osd_op.outdata);
+    // whole object?  can we verify the checksum?
+    if (!skip_data_digest && r >= 0 && op.extent.offset == 0 &&
+        (uint64_t)r == oi.size && oi.is_data_digest()) {
+      uint32_t crc = osd_op.outdata.crc32c(-1);
+      if (oi.data_digest != crc) {
+        osd->clog->error() << info.pgid << std::hex
+                           << " full-object read crc 0x" << crc
+                           << " != expected 0x" << oi.data_digest
+                           << std::dec << " on " << soid;
+        r = -EIO; // try repair later
+      }
+    }
      if (r == -EIO) {
        r = rep_repair_primary_object(soid, ctx->op);
      }
@@ -4884,20 +4903,6 @@ int PrimaryLogPG::do_read(OpContext *ctx, OSDOp& osd_op) {
      }
      dout(10) << " read got " << r << " / " << op.extent.length
              << " bytes from obj " << soid << dendl;
-
-    // whole object?  can we verify the checksum?
-    if (!skip_data_digest &&
-       op.extent.length == oi.size && oi.is_data_digest()) {
-      uint32_t crc = osd_op.outdata.crc32c(-1);
-      if (oi.data_digest != crc) {
-        osd->clog->error() << info.pgid << std::hex
-                          << " full-object read crc 0x" << crc
-                          << " != expected 0x" << oi.data_digest
-                          << std::dec << " on " << soid;
-        // FIXME fall back to replica or something?
-        result = -EIO;
-      }
-    }
    }
  
    // XXX the op.extent.length is the requested length for async read
@@ -5036,8 +5041,10 @@ int PrimaryLogPG::do_sparse_read(OpContext *ctx, OSDOp& osd_op) {
            << " full-object read crc 0x" << crc
            << " != expected 0x" << oi.data_digest
            << std::dec << " on " << soid;
-        // FIXME fall back to replica or something?
-        return -EIO;
+        r = rep_repair_primary_object(soid, ctx->op);
+       if (r < 0) {
+         return r;
+       }
        }
      }
  
@@ -5063,8 +5070,7 @@ int PrimaryLogPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
    object_info_t& oi = obs.oi;
    const hobject_t& soid = oi.soid;
    bool skip_data_digest =
-    (osd->store->has_builtin_csum() && g_conf->osd_skip_data_digest) ||
-    g_conf->osd_distrust_data_digest;
+    osd->store->has_builtin_csum() && g_conf->osd_skip_data_digest;
  
    PGTransaction* t = ctx->op_t.get();
  
diff --git a/ceph/src/osd/Session.h b/ceph/src/osd/Session.h

index 8e26a7262d212ea4680738047636cea08b8322e5..898886dee1df8b7c200c1cf50d275cc64643576b 100644 (file)
--- a/ceph/src/osd/Session.h
+++ b/ceph/src/osd/Session.h
@@ -175,7 +175,7 @@ struct Session : public RefCountedObject {
      }
      auto p = i->second.lower_bound(oid);
      if (p != i->second.begin() &&
-       p->first > oid) {
+       (p == i->second.end() || p->first > oid)) {
        --p;
      }
      if (p != i->second.end()) {
diff --git a/ceph/src/osd/osd_types.h b/ceph/src/osd/osd_types.h

index ec268b118c6fcc1261a372648d511817ec89211a..254e7b4264f49406a7213cf620c10720c86d2049 100644 (file)
--- a/ceph/src/osd/osd_types.h
+++ b/ceph/src/osd/osd_types.h
@@ -957,9 +957,9 @@ inline bool operator!=(const osd_stat_t& l, const osd_stat_t& r) {
  
  
  inline ostream& operator<<(ostream& out, const osd_stat_t& s) {
-  return out << "osd_stat(" << kb_t(s.kb_used) << " used, "
-            << kb_t(s.kb_avail) << " avail, "
-            << kb_t(s.kb) << " total, "
+  return out << "osd_stat(" << byte_u_t(s.kb_used << 10) << " used, "
+            << byte_u_t(s.kb_avail << 10) << " avail, "
+            << byte_u_t(s.kb << 10) << " total, "
              << "peers " << s.hb_peers
              << " op hist " << s.op_queue_age_hist.h
              << ")";
@@ -4966,6 +4966,7 @@ struct ScrubMap {
    eversion_t valid_through;
    eversion_t incr_since;
    bool has_large_omap_object_errors:1;
+  boost::optional<bool> has_builtin_csum;
  
    void merge_incr(const ScrubMap &l);
    void clear_from(const hobject_t& start) {
diff --git a/ceph/src/osdc/ObjectCacher.cc b/ceph/src/osdc/ObjectCacher.cc

index bb974bed670f2305ac13ca06563d0573820ff796..26705ce1a15fbbc54d7e37e6255f6711a1e7272a 100644 (file)
--- a/ceph/src/osdc/ObjectCacher.cc
+++ b/ceph/src/osdc/ObjectCacher.cc
@@ -680,9 +680,9 @@ void ObjectCacher::perf_start()
    plb.add_u64_counter(l_objectcacher_cache_ops_miss,
                       "cache_ops_miss", "Miss operations");
    plb.add_u64_counter(l_objectcacher_cache_bytes_hit,
-                     "cache_bytes_hit", "Hit data");
+                     "cache_bytes_hit", "Hit data", NULL, 0, unit_t(BYTES));
    plb.add_u64_counter(l_objectcacher_cache_bytes_miss,
-                     "cache_bytes_miss", "Miss data");
+                     "cache_bytes_miss", "Miss data", NULL, 0, unit_t(BYTES));
    plb.add_u64_counter(l_objectcacher_data_read,
                       "data_read", "Read data");
    plb.add_u64_counter(l_objectcacher_data_written,
@@ -696,7 +696,7 @@ void ObjectCacher::perf_start()
                       "Write operations, delayed due to dirty limits");
    plb.add_u64_counter(l_objectcacher_write_bytes_blocked,
                       "write_bytes_blocked",
-                     "Write data blocked on dirty limit");
+                     "Write data blocked on dirty limit", NULL, 0, unit_t(BYTES));
    plb.add_time(l_objectcacher_write_time_blocked, "write_time_blocked",
                "Time spent blocking a write due to dirty limits");
  
@@ -1729,7 +1729,7 @@ int ObjectCacher::writex(OSDWrite *wr, ObjectSet *oset, Context *onfreespace,
        ldout(cct, 10) << "writex writing " << f_it->first << "~"
                      << f_it->second << " into " << *bh << " at " << opos
                      << dendl;
-      uint64_t bhoff = bh->start() - opos;
+      uint64_t bhoff = opos - bh->start();
        assert(f_it->second <= bh->length() - bhoff);
  
        // get the frag we're mapping in
diff --git a/ceph/src/osdc/Objecter.cc b/ceph/src/osdc/Objecter.cc

index 4e99ec0bbbc25303966888a7f28897b7453f83da..c7851e91c8e53e01dca04c628955def4ee84d96e 100644 (file)
--- a/ceph/src/osdc/Objecter.cc
+++ b/ceph/src/osdc/Objecter.cc
@@ -246,7 +246,7 @@ void Objecter::init()
                 PerfCountersBuilder::PRIO_CRITICAL);
      pcb.add_u64(l_osdc_op_laggy, "op_laggy", "Laggy operations");
      pcb.add_u64_counter(l_osdc_op_send, "op_send", "Sent operations");
-    pcb.add_u64_counter(l_osdc_op_send_bytes, "op_send_bytes", "Sent data");
+    pcb.add_u64_counter(l_osdc_op_send_bytes, "op_send_bytes", "Sent data", NULL, 0, unit_t(BYTES));
      pcb.add_u64_counter(l_osdc_op_resend, "op_resend", "Resent operations");
      pcb.add_u64_counter(l_osdc_op_reply, "op_reply", "Operation reply");
  
diff --git a/ceph/src/pybind/mgr/balancer/module.py b/ceph/src/pybind/mgr/balancer/module.py

index b9a318b90ad361688076aab4c5a5d0fdf61a0077..2fb59aaf36fa57fe560f42aa0a74714b15a2f687 100644 (file)
--- a/ceph/src/pybind/mgr/balancer/module.py
+++ b/ceph/src/pybind/mgr/balancer/module.py
@@ -8,6 +8,7 @@ import errno
  import json
  import math
  import random
+import six
  import time
  from mgr_module import MgrModule, CommandResult
  from threading import Event
@@ -38,13 +39,13 @@ class MappingState:
          self.pg_up_by_poolid = {}
          for poolid in self.poolids:
              self.pg_up_by_poolid[poolid] = osdmap.map_pool_pgs_up(poolid)
-            for a,b in self.pg_up_by_poolid[poolid].iteritems():
+            for a,b in six.iteritems(self.pg_up_by_poolid[poolid]):
                  self.pg_up[a] = b
  
      def calc_misplaced_from(self, other_ms):
          num = len(other_ms.pg_up)
          misplaced = 0
-        for pgid, before in other_ms.pg_up.iteritems():
+        for pgid, before in six.iteritems(other_ms.pg_up):
              if before != self.pg_up.get(pgid, []):
                  misplaced += 1
          if num > 0:
@@ -81,10 +82,10 @@ class Plan:
          if len(self.compat_ws) and \
             '-1' not in self.initial.crush_dump.get('choose_args', {}):
              ls.append('ceph osd crush weight-set create-compat')
-        for osd, weight in self.compat_ws.iteritems():
+        for osd, weight in six.iteritems(self.compat_ws):
              ls.append('ceph osd crush weight-set reweight-compat %s %f' %
                        (osd, weight))
-        for osd, weight in self.osd_weights.iteritems():
+        for osd, weight in six.iteritems(self.osd_weights):
              ls.append('ceph osd reweight osd.%d %f' % (osd, weight))
          incdump = self.inc.dump()
          for pgid in incdump.get('old_pg_upmap_items', []):
@@ -160,7 +161,7 @@ class Eval:
              score = 0.0
              sum_weight = 0.0
  
-            for k, v in count[t].iteritems():
+            for k, v in six.iteritems(count[t]):
                  # adjust/normalize by weight
                  if target[k]:
                      adjusted = float(v) / target[k] / float(num)
@@ -460,12 +461,12 @@ class Module(MgrModule):
              weight_map = ms.crush.get_take_weight_osd_map(rootid)
              adjusted_map = {
                  osd: cw * osd_weight[osd]
-                for osd,cw in weight_map.iteritems() if osd in osd_weight and cw > 0
+                for osd,cw in six.iteritems(weight_map) if osd in osd_weight and cw > 0
              }
              sum_w = sum(adjusted_map.values())
              assert len(adjusted_map) == 0 or sum_w > 0
              pe.target_by_root[root] = { osd: w / sum_w
-                                        for osd,w in adjusted_map.iteritems() }
+                                        for osd,w in six.iteritems(adjusted_map) }
              actual_by_root[root] = {
                  'pgs': {},
                  'objects': {},
@@ -485,7 +486,7 @@ class Module(MgrModule):
          self.log.debug('target_by_root %s' % pe.target_by_root)
  
          # pool and root actual
-        for pool, pi in pool_info.iteritems():
+        for pool, pi in six.iteritems(pool_info):
              poolid = pi['pool']
              pm = ms.pg_up_by_poolid[poolid]
              pgs = 0
@@ -499,7 +500,7 @@ class Module(MgrModule):
                      pgs_by_osd[osd] = 0
                      objects_by_osd[osd] = 0
                      bytes_by_osd[osd] = 0
-            for pgid, up in pm.iteritems():
+            for pgid, up in six.iteritems(pm):
                  for osd in [int(osd) for osd in up]:
                      if osd == CRUSHMap.ITEM_NONE:
                          continue
@@ -525,29 +526,29 @@ class Module(MgrModule):
              pe.count_by_pool[pool] = {
                  'pgs': {
                      k: v
-                    for k, v in pgs_by_osd.iteritems()
+                    for k, v in six.iteritems(pgs_by_osd)
                  },
                  'objects': {
                      k: v
-                    for k, v in objects_by_osd.iteritems()
+                    for k, v in six.iteritems(objects_by_osd)
                  },
                  'bytes': {
                      k: v
-                    for k, v in bytes_by_osd.iteritems()
+                    for k, v in six.iteritems(bytes_by_osd)
                  },
              }
              pe.actual_by_pool[pool] = {
                  'pgs': {
                      k: float(v) / float(max(pgs, 1))
-                    for k, v in pgs_by_osd.iteritems()
+                    for k, v in six.iteritems(pgs_by_osd)
                  },
                  'objects': {
                      k: float(v) / float(max(objects, 1))
-                    for k, v in objects_by_osd.iteritems()
+                    for k, v in six.iteritems(objects_by_osd)
                  },
                  'bytes': {
                      k: float(v) / float(max(bytes, 1))
-                    for k, v in bytes_by_osd.iteritems()
+                    for k, v in six.iteritems(bytes_by_osd)
                  },
              }
              pe.total_by_pool[pool] = {
@@ -559,29 +560,29 @@ class Module(MgrModule):
              pe.count_by_root[root] = {
                  'pgs': {
                      k: float(v)
-                    for k, v in actual_by_root[root]['pgs'].iteritems()
+                    for k, v in six.iteritems(actual_by_root[root]['pgs'])
                  },
                  'objects': {
                      k: float(v)
-                    for k, v in actual_by_root[root]['objects'].iteritems()
+                    for k, v in six.iteritems(actual_by_root[root]['objects'])
                  },
                  'bytes': {
                      k: float(v)
-                    for k, v in actual_by_root[root]['bytes'].iteritems()
+                    for k, v in six.iteritems(actual_by_root[root]['bytes'])
                  },
              }
              pe.actual_by_root[root] = {
                  'pgs': {
                      k: float(v) / float(max(pe.total_by_root[root]['pgs'], 1))
-                    for k, v in actual_by_root[root]['pgs'].iteritems()
+                    for k, v in six.iteritems(actual_by_root[root]['pgs'])
                  },
                  'objects': {
                      k: float(v) / float(max(pe.total_by_root[root]['objects'], 1))
-                    for k, v in actual_by_root[root]['objects'].iteritems()
+                    for k, v in six.iteritems(actual_by_root[root]['objects'])
                  },
                  'bytes': {
                      k: float(v) / float(max(pe.total_by_root[root]['bytes'], 1))
-                    for k, v in actual_by_root[root]['bytes'].iteritems()
+                    for k, v in six.iteritems(actual_by_root[root]['bytes'])
                  },
              }
          self.log.debug('actual_by_pool %s' % pe.actual_by_pool)
@@ -593,7 +594,7 @@ class Module(MgrModule):
                  b,
                  pe.target_by_root[a],
                  pe.total_by_root[a]
-            ) for a, b in pe.count_by_root.iteritems()
+            ) for a, b in six.iteritems(pe.count_by_root)
          }
          self.log.debug('stats_by_root %s' % pe.stats_by_root)
  
@@ -609,8 +610,8 @@ class Module(MgrModule):
  
          # total score is just average of normalized stddevs
          pe.score = 0.0
-        for r, vs in pe.score_by_root.iteritems():
-            for k, v in vs.iteritems():
+        for r, vs in six.iteritems(pe.score_by_root):
+            for k, v in six.iteritems(vs):
                  pe.score += v
          pe.score /= 3 * len(roots)
          return pe
@@ -728,14 +729,14 @@ class Module(MgrModule):
          # get current osd reweights
          orig_osd_weight = { a['osd']: a['weight']
                              for a in ms.osdmap_dump.get('osds',[]) }
-        reweighted_osds = [ a for a,b in orig_osd_weight.iteritems()
+        reweighted_osds = [ a for a,b in six.iteritems(orig_osd_weight)
                              if b < 1.0 and b > 0.0 ]
  
          # get current compat weight-set weights
          orig_ws = self.get_compat_weight_set_weights(ms)
          if not orig_ws:
              return -errno.EAGAIN, 'compat weight-set not available'
-        orig_ws = { a: b for a, b in orig_ws.iteritems() if a >= 0 }
+        orig_ws = { a: b for a, b in six.iteritems(orig_ws) if a >= 0 }
  
          # Make sure roots don't overlap their devices.  If so, we
          # can't proceed.
@@ -744,7 +745,7 @@ class Module(MgrModule):
          visited = {}
          overlap = {}
          root_ids = {}
-        for root, wm in pe.target_by_root.iteritems():
+        for root, wm in six.iteritems(pe.target_by_root):
              for osd in wm.iterkeys():
                  if osd in visited:
                      overlap[osd] = 1
@@ -814,7 +815,7 @@ class Module(MgrModule):
  
                  # normalize weights under this root
                  root_weight = crush.get_item_weight(pe.root_ids[root])
-                root_sum = sum(b for a,b in next_ws.iteritems()
+                root_sum = sum(b for a,b in six.iteritems(next_ws)
                                 if a in target.keys())
                  if root_sum > 0 and root_weight > 0:
                      factor = root_sum / root_weight
@@ -871,7 +872,7 @@ class Module(MgrModule):
          if best_pe.score < pe.score + fudge:
              self.log.info('Success, score %f -> %f', pe.score, best_pe.score)
              plan.compat_ws = best_ws
-            for osd, w in best_ow.iteritems():
+            for osd, w in six.iteritems(best_ow):
                  if w != orig_osd_weight[osd]:
                      self.log.debug('osd.%d reweight %f', osd, w)
                      plan.osd_weights[osd] = w
@@ -958,7 +959,7 @@ class Module(MgrModule):
                  self.log.error('Error creating compat weight-set')
                  return r, outs
  
-        for osd, weight in plan.compat_ws.iteritems():
+        for osd, weight in six.iteritems(plan.compat_ws):
              self.log.info('ceph osd crush weight-set reweight-compat osd.%d %f',
                            osd, weight)
              result = CommandResult('')
@@ -972,7 +973,7 @@ class Module(MgrModule):
  
          # new_weight
          reweightn = {}
-        for osd, weight in plan.osd_weights.iteritems():
+        for osd, weight in six.iteritems(plan.osd_weights):
              reweightn[str(osd)] = str(int(weight * float(0x10000)))
          if len(reweightn):
              self.log.info('ceph osd reweightn %s', reweightn)
diff --git a/ceph/src/pybind/mgr/dashboard/base.html b/ceph/src/pybind/mgr/dashboard/base.html

index 9936eaeca37967f03d2c1870cbc279e15bed9f98..6debffa5ce74e1a6e661fe26e1ed630c585191ef 100644 (file)
--- a/ceph/src/pybind/mgr/dashboard/base.html
+++ b/ceph/src/pybind/mgr/dashboard/base.html
@@ -347,7 +347,7 @@
              <i class="fa fa-heartbeat" rv-style="health_status | health_color"></i>
              <span>Cluster health</span></a>
          </li>
-        <li class="treeview{%if path_info.startswith(('/server', '/osd'))%} active{%endif%}">
+        <li class="treeview{%if path_info.startswith(('/server', '/osd','/config_options'))%} active{%endif%}">
            <a href="#"><i class="fa fa-server"></i> <span>Cluster</span>
              <span class="pull-right-container">
                <i class="fa fa-angle-left pull-right"></i>
@@ -360,6 +360,9 @@
              <li>
                  <a href="{{ url_prefix }}/osd">OSDs</a>
              </li>
+           <li>
+              <a href="{{ url_prefix }}/config_options">Configuration</a>
+            </li>
            </ul>
          </li>
          <li class="treeview{%if path_info.startswith('/rbd')%} active{%endif%}">
diff --git a/ceph/src/pybind/mgr/dashboard/config_options.html b/ceph/src/pybind/mgr/dashboard/config_options.html

new file mode 100644 (file)

index 0000000..e8367a0
--- /dev/null
+++ b/ceph/src/pybind/mgr/dashboard/config_options.html
@@ -0,0 +1,120 @@
+
+{% extends "base.html" %}
+
+{% block content %}
+
+
+<script>
+  $(document).ready(function(){
+             // Pre-populated initial data at page load
+            var content_data = {{ content_data }};
+           
+            rivets.formatters.display_arrays = function(arr) {
+                result = arr.join().replace(/,/g, "<br/>");
+                return "<div style='width:90px;word-break:break-all'>" + result + "</div>";
+             };
+
+             str_to_level = function(str) {
+                 if (str == "basic")
+                     return "0"
+                 else if (str == "advanced")
+                     return "1"
+                 else if (str == "developer")
+                     return "2" 
+             };
+  
+             apply_filters = function() {
+                 content_data.options_list = [];
+                 var selection = "#" + content_data.service;
+                 $(selection).attr('selected','selected');
+                 var level = $("#level").val();
+                 var service = $("#service").val();
+                 if (level == "developer" && service == "any") {
+                     content_data.options_list = content_data.options.options;
+                 }
+                 for (var opt of content_data.options.options) {
+                     if (service == "any" && str_to_level(opt.level) <= level) {
+                         content_data.options_list.push(opt); 
+                     } else if (opt.services.includes(service) && str_to_level(opt.level) <= level) {
+                         content_data.options_list.push(opt);
+                     }
+                 }
+         
+             };
+             
+             rivets.bind($("#content"), content_data);         
+             apply_filters();
+
+         });
+</script>
+
+<!-- Page Header -->
+<section class="content-header">
+    <h1 style="font-weight:bold">
+      Configuration Options
+      <div class="pull-right" style="font-size:17px">
+       <label>Services:</label>
+       <select id="service" style="color:grey" onchange="apply_filters()">
+         <option id="mon">mon</option>
+         <option id="mgr">mgr</option>
+         <option id="osd">osd</option>
+         <option id="mds">mds</option>
+         <option id="common">common</option>
+         <option id="mds_client">mds_client</option>
+         <option id="rgw">rgw</option>
+         <option id="any">any</option>
+       </select>
+       <label>Level:</label>
+       <select id="level" style="color:grey" onchange="apply_filters()">
+         <option value="0">basic</option>
+         <option value="1">advanced</option>
+         <option value="2">developer</option>
+       </select>
+      </div>
+    </h1>
+</section>
+       
+<!-- Main content -->
+<section class="content">
+    <div class="box" style="overflow:auto">
+      <div class="box-body">
+           <table class="table table-bordered">
+               <thead>
+                   <tr>
+                       <th>Name</th>
+                       <th>Description</th>
+                       <th>Long description</th>
+                       <th>Type</th>
+                       <th>Level</th>
+                       <th>Default</th>
+                       <th>Daemon default</th>
+                       <th>Tags</th>
+                       <th>Services</th>
+                       <th>See_also</th>
+                       <th>Max</th>
+                       <th>Min</th>
+                   </tr>
+               </thead>
+               <tbody>
+                   <tr rv-each-opt="options_list">
+                       <td><div style="width:120px;word-break:break-all">{opt.name}</div></td>
+                       <td><div style="width:80px;word-break:break-all">{opt.desc}</td>
+                       <td><div style="width:120px;word-break:break-all">{opt.long_desc}</div></td>
+                       <td><div style="width:70px;word-break:break-all">{opt.type}<div></td>
+                       <td>{opt.level}</td>
+                       <td><div style="width:80px;word-break:break-all">{opt.default}<div></td>
+                       <td><div style="width:120px;word-break:break-all">{opt.daemon_default}</div></td>
+                       <td rv-html="opt.tags | display_arrays"</td>
+                       <td rv-html="opt.services |  display_arrays"></td>
+                       <td rv-html="opt.see_also | display_arrays"</td>
+                       <td>{opt.max}</td>
+                       <td>{opt.min}</td>    
+                   </tr>
+               </tbody>
+           </table>
+       </div>  
+    </div>
+</section>
+ 
+
+{% endblock %}
diff --git a/ceph/src/pybind/mgr/dashboard/module.py b/ceph/src/pybind/mgr/dashboard/module.py

index 3d5e3191aa654415cce5684e5e0fa0227d63e1a9..39add806393cded839fd5a07731db203815aab04 100644 (file)
--- a/ceph/src/pybind/mgr/dashboard/module.py
+++ b/ceph/src/pybind/mgr/dashboard/module.py
@@ -750,6 +750,28 @@ class Module(MgrModule):
                      toplevel_data=json.dumps(self._toplevel_data(), indent=2),
                      content_data=json.dumps(self._servers(), indent=2)
                  )
+            
+            @cherrypy.expose
+            def config_options(self, service="any"):
+                template = env.get_template("config_options.html")
+                return template.render(
+                    url_prefix = global_instance().url_prefix,
+                    ceph_version=global_instance().version,
+                    path_info=cherrypy.request.path_info,
+                    toplevel_data=json.dumps(self._toplevel_data(), indent=2),
+                    content_data=json.dumps(self.config_options_data(service), indent=2)
+                )
+
+            @cherrypy.expose
+            @cherrypy.tools.json_out()
+            def config_options_data(self, service):
+                options = {}
+                options = global_instance().get("config_options")
+
+                return {
+                    'options': options,
+                    'service': service,
+                }
  
              def _servers(self):
                  return {
@@ -761,6 +783,43 @@ class Module(MgrModule):
              def servers_data(self):
                  return self._servers()
  
+            @cherrypy.expose
+            def perf_counters(self, service_type, service_id):
+                template = env.get_template("perf_counters.html")
+                toplevel_data = self._toplevel_data()
+                
+                return template.render(
+                    url_prefix = global_instance().url_prefix,
+                    ceph_version=global_instance().version,
+                    path_info=cherrypy.request.path_info,
+                    toplevel_data=json.dumps(toplevel_data, indent=2),
+                    content_data=json.dumps(self.perf_counters_data(service_type, service_id), indent=2)
+                )
+            
+            @cherrypy.expose
+            @cherrypy.tools.json_out()
+            def perf_counters_data(self, service_type, service_id):
+                schema = global_instance().get_perf_schema(service_type, str(service_id)).values()[0]
+                counters = []
+                
+                for key, value in sorted(schema.items()):
+                    counter = dict()
+                    counter["name"] = str(key)
+                    counter["description"] = value["description"]
+                    if global_instance()._stattype_to_str(value["type"]) == 'counter':
+                        counter["value"] = global_instance().get_rate(service_type, service_id, key)
+                        counter["unit"]  = global_instance()._unit_to_str(value["units"])
+                    else:
+                        counter["value"] = global_instance().get_latest(service_type, service_id, key)
+                        counter["unit"] = ""
+                    counters.append(counter)
+                                                    
+                return {
+                    'service_type': service_type,
+                    'service_id': service_id,
+                    'counters': counters,
+                }  
+                
              def _health(self):
                  # Fuse osdmap with pg_summary to get description of pools
                  # including their PG states
diff --git a/ceph/src/pybind/mgr/dashboard/osd_perf.html b/ceph/src/pybind/mgr/dashboard/osd_perf.html

index b13ad17e1fd7ba0791ca12afa59f8ce1d11b940a..0ebe3d08943877a641ad500313f5f93e3fdddc43 100644 (file)
--- a/ceph/src/pybind/mgr/dashboard/osd_perf.html
+++ b/ceph/src/pybind/mgr/dashboard/osd_perf.html
@@ -116,6 +116,7 @@
  <section class="content-header">
      <h1>
          osd.{osd.osd}
+        <button class="pull-right btn btn-default" style="margin-right:5px"><a href="{{url_prefix}}/config_options/osd">Configuration</a></button>
      </h1>
  </section>
  
diff --git a/ceph/src/pybind/mgr/influx/module.py b/ceph/src/pybind/mgr/influx/module.py

index b4039ac70806576dc1ea2b4997272a9ffa95eade..96eb33ce54a6b8fba453e8e6987120ea1ca0699b 100644 (file)
--- a/ceph/src/pybind/mgr/influx/module.py
+++ b/ceph/src/pybind/mgr/influx/module.py
@@ -2,6 +2,7 @@ from datetime import datetime
  from threading import Event
  import json
  import errno
+import six
  import time
  
  from mgr_module import MgrModule
@@ -110,7 +111,7 @@ class Module(MgrModule):
  
          now = datetime.utcnow().isoformat() + 'Z'
  
-        for daemon, counters in self.get_all_perf_counters().iteritems():
+        for daemon, counters in six.iteritems(self.get_all_perf_counters()):
              svc_type, svc_id = daemon.split(".", 1)
              metadata = self.get_metadata(svc_type, svc_id)
  
diff --git a/ceph/src/pybind/mgr/mgr_module.py b/ceph/src/pybind/mgr/mgr_module.py

index 230d6f20b928d3d7ad9a86e45994af5f214480a7..27fe50bdce45544617e08c5d99bd0af2586cfc70 100644 (file)
--- a/ceph/src/pybind/mgr/mgr_module.py
+++ b/ceph/src/pybind/mgr/mgr_module.py
@@ -6,6 +6,7 @@ import ceph_module  # noqa
  
  import json
  import logging
+import six
  import threading
  from collections import defaultdict
  
@@ -142,7 +143,7 @@ class CRUSHMap(ceph_module.BasePyCRUSH):
  
      def get_take_weight_osd_map(self, root):
          uglymap = self._get_take_weight_osd_map(root)
-        return { int(k): v for k, v in uglymap.get('weights', {}).iteritems() }
+        return { int(k): v for k, v in six.iteritems(uglymap.get('weights', {})) }
  
  class MgrStandbyModule(ceph_module.BaseMgrStandbyModule):
      """
@@ -223,6 +224,10 @@ class MgrModule(ceph_module.BaseMgrModule):
      PERFCOUNTER_HISTOGRAM = 0x10
      PERFCOUNTER_TYPE_MASK = ~3
  
+    # units supported
+    BYTES = 0
+    NONE = 1
+    
      def __init__(self, module_name, py_modules_ptr, this_ptr):
          self.module_name = module_name
  
@@ -320,6 +325,12 @@ class MgrModule(ceph_module.BaseMgrModule):
          else:
              return value
  
+    def _unit_to_str(self, unit):
+        if unit == self.NONE:
+            return "/s"
+        elif unit == self.BYTES:
+            return "B/s"  
+    
      def get_server(self, hostname):
          """
          Called by the plugin to load information about a particular
diff --git a/ceph/src/pybind/mgr/restful/common.py b/ceph/src/pybind/mgr/restful/common.py

index 9c731685e334f297f1f21dfd860c9ad755ad9df1..83e32fb60a0c6dc48feeb531ef366ca25bb27e1e 100644 (file)
--- a/ceph/src/pybind/mgr/restful/common.py
+++ b/ceph/src/pybind/mgr/restful/common.py
@@ -27,10 +27,7 @@ POOL_QUOTA_PROPERTIES = [
      ('quota_max_objects', 'max_objects'),
  ]
  
-POOL_ARGS = POOL_PROPERTIES + map(
-    lambda x: x[0],
-    POOL_QUOTA_PROPERTIES
-)
+POOL_ARGS = POOL_PROPERTIES + [x for x,_ in POOL_QUOTA_PROPERTIES]
  
  
  # Transform command to a human readable form
diff --git a/ceph/src/pybind/mgr/restful/module.py b/ceph/src/pybind/mgr/restful/module.py

index 5125253e4667b246eff95d5a0e40a4933d662d52..5832081d19361f6139283e4578f67f0de5df0543 100644 (file)
--- a/ceph/src/pybind/mgr/restful/module.py
+++ b/ceph/src/pybind/mgr/restful/module.py
@@ -10,6 +10,7 @@ import inspect
  import tempfile
  import threading
  import traceback
+import six
  import socket
  
  import common
@@ -23,6 +24,7 @@ from werkzeug.serving import make_server, make_ssl_devcert
  from hooks import ErrorHook
  from mgr_module import MgrModule, CommandResult
  
+
  # Global instance to share
  instance = None
  
@@ -51,10 +53,8 @@ class CommandsRequest(object):
          self.id = str(id(self))
  
          # Filter out empty sub-requests
-        commands_arrays = filter(
-            lambda x: len(x) != 0,
-            commands_arrays,
-        )
+        commands_arrays = [x for x in commands_arrays
+                           if len(x) != 0]
  
          self.running = []
          self.waiting = commands_arrays[1:]
@@ -251,7 +251,7 @@ class Module(MgrModule):
                  self._serve()
                  self.server.socket.close()
              except CannotServe as cs:
-                self.log.warn("server not running: {0}".format(cs.message))
+                self.log.warn("server not running: %s", cs)
              except:
                  self.log.error(str(traceback.format_exc()))
  
@@ -262,7 +262,7 @@ class Module(MgrModule):
      def refresh_keys(self):
          self.keys = {}
          rawkeys = self.get_config_prefix('keys/') or {}
-        for k, v in rawkeys.iteritems():
+        for k, v in six.iteritems(rawkeys):
              self.keys[k[5:]] = v  # strip of keys/ prefix
  
      def _serve(self):
@@ -286,7 +286,7 @@ class Module(MgrModule):
          cert = self.get_localized_config("crt")
          if cert is not None:
              cert_tmp = tempfile.NamedTemporaryFile()
-            cert_tmp.write(cert)
+            cert_tmp.write(cert.encode('utf-8'))
              cert_tmp.flush()
              cert_fname = cert_tmp.name
          else:
@@ -295,7 +295,7 @@ class Module(MgrModule):
          pkey = self.get_localized_config("key")
          if pkey is not None:
              pkey_tmp = tempfile.NamedTemporaryFile()
-            pkey_tmp.write(pkey)
+            pkey_tmp.write(pkey.encode('utf-8'))
              pkey_tmp.flush()
              pkey_fname = pkey_tmp.name
          else:
@@ -362,10 +362,7 @@ class Module(MgrModule):
              if tag == 'seq':
                  return
  
-            request = filter(
-                lambda x: x.is_running(tag),
-                self.requests)
-
+            request = [x for x in self.requests if x.is_running(tag)]
              if len(request) != 1:
                  self.log.warn("Unknown request '%s'" % str(tag))
                  return
@@ -438,9 +435,8 @@ class Module(MgrModule):
  
          elif command['prefix'] == "restful create-self-signed-cert":
              cert, pkey = self.create_self_signed_cert()
-
-            self.set_config(self.get_mgr_id() + '/crt', cert)
-            self.set_config(self.get_mgr_id() + '/key', pkey)
+            self.set_config(self.get_mgr_id() + '/crt', cert.decode('utf-8'))
+            self.set_config(self.get_mgr_id() + '/key', pkey.decode('utf-8'))
  
              self.restart()
              return (
@@ -533,10 +529,7 @@ class Module(MgrModule):
  
          # Filter by osd ids
          if ids is not None:
-            osds = filter(
-                lambda x: str(x['osd']) in ids,
-                osds
-            )
+            osds = [x for x in osds if str(x['osd']) in ids]
  
          # Get list of pools per osd node
          pools_map = self.get_osd_pools()
@@ -562,19 +555,14 @@ class Module(MgrModule):
          # Filter by pool
          if pool_id:
              pool_id = int(pool_id)
-            osds = filter(
-                lambda x: pool_id in x['pools'],
-                osds
-            )
+            osds = [x for x in osds if pool_id in x['pools']]
  
          return osds
  
  
      def get_osd_by_id(self, osd_id):
-        osd = filter(
-            lambda x: x['osd'] == osd_id,
-            self.get('osd_map')['osds']
-        )
+        osd = [x for x in self.get('osd_map')['osds']
+               if x['osd'] == osd_id]
  
          if len(osd) != 1:
              return None
@@ -583,10 +571,8 @@ class Module(MgrModule):
  
  
      def get_pool_by_id(self, pool_id):
-        pool = filter(
-            lambda x: x['pool'] == pool_id,
-            self.get('osd_map')['pools'],
-        )
+        pool = [x for x in self.get('osd_map')['pools']
+                if x['pool'] == pool_id]
  
          if len(pool) != 1:
              return None
diff --git a/ceph/src/pybind/mgr/status/module.py b/ceph/src/pybind/mgr/status/module.py

index 51756257025634448092e6ecd08d823a51b7d853..12eddf516b350f60c3bf726372059a390d6beff6 100644 (file)
--- a/ceph/src/pybind/mgr/status/module.py
+++ b/ceph/src/pybind/mgr/status/module.py
@@ -5,9 +5,10 @@ High level status display commands
  
  from collections import defaultdict
  from prettytable import PrettyTable
-import prettytable
-import fnmatch
  import errno
+import fnmatch
+import prettytable
+import six
  
  from mgr_module import MgrModule
  
@@ -187,7 +188,7 @@ class Module(MgrModule):
                      ])
  
              # Find the standby replays
-            for gid_str, daemon_info in mdsmap['info'].iteritems():
+            for gid_str, daemon_info in six.iteritems(mdsmap['info']):
                  if daemon_info['state'] != "up:standby-replay":
                      continue
  
@@ -242,7 +243,7 @@ class Module(MgrModule):
              output += "MDS version: {0}".format(mds_versions.keys()[0])
          else:
              version_table = PrettyTable(["version", "daemons"])
-            for version, daemons in mds_versions.iteritems():
+            for version, daemons in six.iteritems(mds_versions):
                  version_table.add_row([
                      version,
                      ", ".join(daemons)
diff --git a/ceph/src/pybind/rados/rados.pyx b/ceph/src/pybind/rados/rados.pyx

index baa4af4e4a87d39d0ae3f9b8b1988a9d22bd9ac0..e9829937a11655f18a03b257d142c6d997d04bf3 100644 (file)
--- a/ceph/src/pybind/rados/rados.pyx
+++ b/ceph/src/pybind/rados/rados.pyx
@@ -328,21 +328,12 @@ ADMIN_AUID = 0
  
  class Error(Exception):
      """ `Error` class, derived from `Exception` """
-    pass
-
-
-class InvalidArgumentError(Error):
-    pass
-
-
-class OSError(Error):
-    """ `OSError` class, derived from `Error` """
      def __init__(self, message, errno=None):
-        super(OSError, self).__init__(message)
+        super(Exception, self).__init__(message)
          self.errno = errno
  
      def __str__(self):
-        msg = super(OSError, self).__str__()
+        msg = super(Exception, self).__str__()
          if self.errno is None:
              return msg
          return '[errno {0}] {1}'.format(self.errno, msg)
@@ -350,6 +341,13 @@ class OSError(Error):
      def __reduce__(self):
          return (self.__class__, (self.message, self.errno))
  
+class InvalidArgumentError(Error):
+    pass
+
+class OSError(Error):
+    """ `OSError` class, derived from `Error` """
+    pass
+
  class InterruptedOrTimeoutError(OSError):
      """ `InterruptedOrTimeoutError` class, derived from `OSError` """
      pass
diff --git a/ceph/src/rgw/CMakeLists.txt b/ceph/src/rgw/CMakeLists.txt

index b3aa4e38ada85425af6fa0569e59510934e5b099..ff2d2d8a12045deb7b263c3b44c90b0eb95b9904 100644 (file)
--- a/ceph/src/rgw/CMakeLists.txt
+++ b/ceph/src/rgw/CMakeLists.txt
@@ -257,16 +257,3 @@ target_link_libraries(rgw LINK_PRIVATE
  set_target_properties(rgw PROPERTIES OUTPUT_NAME rgw VERSION 2.0.0
    SOVERSION 2)
  install(TARGETS rgw DESTINATION ${CMAKE_INSTALL_LIBDIR})
-
-if(WITH_EMBEDDED)
-  include(MergeStaticLibraries)
-  add_library(cephd_rgw_base STATIC rgw_main.cc ${radosgw_admin_srcs})
-  if(WITH_RADOSGW_FCGI_FRONTEND)  
-    target_include_directories(cephd_rgw_base PUBLIC ${FCGI_INCLUDE_DIR})
-  endif()
-  set_target_properties(cephd_rgw_base PROPERTIES COMPILE_DEFINITIONS BUILDING_FOR_EMBEDDED)
-  merge_static_libraries(cephd_rgw cephd_rgw_base rgw_a radosgw_a)
-  if(WITH_RADOSGW_FCGI_FRONTEND) 
-    target_link_libraries(cephd_rgw ${FCGI_LIBRARY})
-  endif()
-endif()
diff --git a/ceph/src/rgw/rgw_admin.cc b/ceph/src/rgw/rgw_admin.cc

index 350875984125895fa3aa0e19535b5525340f4ad1..62ee84a5a03075f60195fb32e8c1efbae74acea2 100644 (file)
--- a/ceph/src/rgw/rgw_admin.cc
+++ b/ceph/src/rgw/rgw_admin.cc
@@ -2498,11 +2498,7 @@ int create_new_bucket_instance(RGWRados *store,
  }
  
  
-#ifdef BUILDING_FOR_EMBEDDED
-extern "C" int cephd_rgw_admin(int argc, const char **argv)
-#else
  int main(int argc, const char **argv)
-#endif
  {
    vector<const char*> args;
    argv_to_vec(argc, (const char **)argv, args);
@@ -2756,7 +2752,7 @@ int main(int argc, const char **argv)
          return EINVAL;
        }
      } else if (ceph_argparse_witharg(args, i, &val, "--max-size", (char*)NULL)) {
-      max_size = strict_si_cast<int64_t>(val.c_str(), &err);
+      max_size = strict_iecstrtoll(val.c_str(), &err);
        if (!err.empty()) {
          cerr << "ERROR: failed to parse max size: " << err << std::endl;
          return EINVAL;
@@ -3947,7 +3943,9 @@ int main(int argc, const char **argv)
        {
         RGWRealm realm(realm_id, realm_name);
         int ret = realm.init(g_ceph_context, store);
-       if (ret < 0) {
+       bool default_realm_not_exist = (ret == -ENOENT && realm_id.empty() && realm_name.empty());
+
+       if (ret < 0 && !default_realm_not_exist ) {
           cerr << "failed to init realm: " << cpp_strerror(-ret) << std::endl;
           return -ret;
         }
@@ -3962,7 +3960,7 @@ int main(int argc, const char **argv)
         if (ret < 0) {
           return 1;
         }
-       if (zonegroup.realm_id.empty()) {
+       if (zonegroup.realm_id.empty() && !default_realm_not_exist) {
           zonegroup.realm_id = realm.get_id();
         }
         ret = zonegroup.create();
@@ -5753,9 +5751,8 @@ next:
    }
  
    if (opt_cmd == OPT_OBJECTS_EXPIRE) {
-    int ret = store->process_expire_objects();
-    if (ret < 0) {
-      cerr << "ERROR: process_expire_objects() processing returned error: " << cpp_strerror(-ret) << std::endl;
+    if (!store->process_expire_objects()) {
+      cerr << "ERROR: process_expire_objects() processing returned error." << std::endl;
        return 1;
      }
    }
@@ -5801,9 +5798,11 @@ next:
      formatter->open_array_section("objects");
      while (is_truncated) {
        map<string, rgw_bucket_dir_entry> result;
-      int r = store->cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true,
-                                     result, &is_truncated, &marker,
-                                     bucket_object_check_filter);
+      int r =
+       store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD, marker,
+                                      prefix, 1000, true,
+                                      result, &is_truncated, &marker,
+                                      bucket_object_check_filter);
  
        if (r < 0 && r != -ENOENT) {
          cerr << "ERROR: failed operation r=" << r << std::endl;
@@ -6229,6 +6228,13 @@ next:
    }
  
    if (opt_cmd == OPT_ORPHANS_FIND) {
+    if (!yes_i_really_mean_it) {
+      cerr << "accidental removal of active objects can not be reversed; "
+          << "do you really mean it? (requires --yes-i-really-mean-it)"
+          << std::endl;
+      return EINVAL;
+    }
+
      RGWOrphanSearch search(store, max_concurrent_ios, orphan_stale_secs);
  
      if (job_id.empty()) {
diff --git a/ceph/src/rgw/rgw_auth_s3.h b/ceph/src/rgw/rgw_auth_s3.h

index 2a875d4798f021d2251d9087c2502b1379d70b94..6f860a72a2e1fc0d98e522fd6b5896d108432671 100644 (file)
--- a/ceph/src/rgw/rgw_auth_s3.h
+++ b/ceph/src/rgw/rgw_auth_s3.h
@@ -119,6 +119,42 @@ class AWSAuthStrategy : public rgw::auth::Strategy,
    }
  
  public:
+  using engine_map_t = std::map <std::string, std::reference_wrapper<const Engine>>;
+  void add_engines(const std::vector <std::string>& auth_order,
+                  engine_map_t eng_map)
+  {
+    auto ctrl_flag = Control::SUFFICIENT;
+    for (const auto &eng : auth_order) {
+      // fallback to the last engine, in case of multiple engines, since ctrl
+      // flag is sufficient for others, error from earlier engine is returned
+      if (&eng == &auth_order.back() && eng_map.size() > 1) {
+        ctrl_flag = Control::FALLBACK;
+      }
+      const auto kv = eng_map.find(eng);
+      if (kv != eng_map.end()) {
+        add_engine(ctrl_flag, kv->second);
+      }
+    }
+  }
+
+  std::vector<std::string> parse_auth_order(CephContext* const cct)
+  {
+    std::vector <std::string> result;
+
+    const std::set <boost::string_view> allowed_auth = { "external", "local" };
+    std::vector <std::string> default_order = { "external", "local"};
+    // supplied strings may contain a space, so let's bypass that
+    boost::split(result, cct->_conf->rgw_s3_auth_order,
+                boost::is_any_of(", "), boost::token_compress_on);
+
+    if (std::any_of(result.begin(), result.end(),
+                   [allowed_auth](boost::string_view s)
+                   { return allowed_auth.find(s) == allowed_auth.end();})){
+      return default_order;
+    }
+    return result;
+  }
+
    AWSAuthStrategy(CephContext* const cct,
                    rgw::auth::ImplicitTenants& implicit_tenant_context,
                    RGWRados* const store)
@@ -134,20 +170,17 @@ public:
        add_engine(Control::SUFFICIENT, anonymous_engine);
      }
  
+    auto auth_order = parse_auth_order(cct);
+    engine_map_t engine_map;
      /* The external auth. */
-    Control local_engine_mode;
      if (! external_engines.is_empty()) {
-      add_engine(Control::SUFFICIENT, external_engines);
-
-      local_engine_mode = Control::FALLBACK;
-    } else {
-      local_engine_mode = Control::SUFFICIENT;
+      engine_map.insert(std::make_pair("external", std::cref(external_engines)));
      }
-
      /* The local auth. */
      if (cct->_conf->rgw_s3_auth_use_rados) {
-      add_engine(local_engine_mode, local_engine);
+      engine_map.insert(std::make_pair("local", std::cref(local_engine)));
      }
+    add_engines(auth_order, engine_map);
    }
  
    const char* get_name() const noexcept override {
diff --git a/ceph/src/rgw/rgw_bucket.cc b/ceph/src/rgw/rgw_bucket.cc

index 5ad7911c48f19adca10db0c1ecfb68b510d5bea6..ab6b6cbb7c1a3c6f5c7564a64ae4a76650e1632d 100644 (file)
--- a/ceph/src/rgw/rgw_bucket.cc
+++ b/ceph/src/rgw/rgw_bucket.cc
@@ -1150,19 +1150,18 @@ int RGWBucket::check_object_index(RGWBucketAdminOpState& op_state,
    while (is_truncated) {
      map<string, rgw_bucket_dir_entry> result;
  
-    int r = store->cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, 1000, true,
-                                   result, &is_truncated, &marker,
-                                   bucket_object_check_filter);
+    int r = store->cls_bucket_list_ordered(bucket_info, RGW_NO_SHARD,
+                                          marker, prefix, 1000, true,
+                                          result, &is_truncated, &marker,
+                                          bucket_object_check_filter);
      if (r == -ENOENT) {
        break;
      } else if (r < 0 && r != -ENOENT) {
        set_err_msg(err_msg, "ERROR: failed operation r=" + cpp_strerror(-r));
      }
  
-
      dump_bucket_index(result, formatter);
      flusher.flush();
-
    }
  
    formatter->close_section();
diff --git a/ceph/src/rgw/rgw_cache.cc b/ceph/src/rgw/rgw_cache.cc

index fee86e5b97b31bdbc1dc1e446bf6eb7eeab07e1d..9f4cd957dec836eef1738c91937e356abfd1a816 100644 (file)
--- a/ceph/src/rgw/rgw_cache.cc
+++ b/ceph/src/rgw/rgw_cache.cc
@@ -18,14 +18,29 @@ int ObjectCache::get(const string& name, ObjectCacheInfo& info, uint32_t mask, r
    }
  
    auto iter = cache_map.find(name);
-  if (iter == cache_map.end() ||
-      (expiry.count() &&
-       (ceph::coarse_mono_clock::now() - iter->second.info.time_added) > expiry)) {
+  if (iter == cache_map.end()) {
      ldout(cct, 10) << "cache get: name=" << name << " : miss" << dendl;
      if (perfcounter)
        perfcounter->inc(l_rgw_cache_miss);
      return -ENOENT;
    }
+  if (expiry.count() &&
+       (ceph::coarse_mono_clock::now() - iter->second.info.time_added) > expiry) {
+    ldout(cct, 10) << "cache get: name=" << name << " : expiry miss" << dendl;
+    lock.unlock();
+    lock.get_write();
+    // check that wasn't already removed by other thread
+    iter = cache_map.find(name);
+    if (iter != cache_map.end()) {
+      for (auto &kv : iter->second.chained_entries)
+        kv.first->invalidate(kv.second);
+      remove_lru(name, iter->second.lru_iter);
+      cache_map.erase(iter);
+    }
+    if(perfcounter)
+      perfcounter->inc(l_rgw_cache_miss);
+    return -ENOENT;
+  }
  
    ObjectCacheEntry *entry = &iter->second;
  
diff --git a/ceph/src/rgw/rgw_common.cc b/ceph/src/rgw/rgw_common.cc

index fd14b26b14ead1357a0eb754bda7df58550d77e2..96007f398568974a68f583813d458245e27290ca 100644 (file)
--- a/ceph/src/rgw/rgw_common.cc
+++ b/ceph/src/rgw/rgw_common.cc
@@ -1790,56 +1790,11 @@ bool RGWUserCaps::is_valid_cap_type(const string& tp)
    return false;
  }
  
-static ssize_t unescape_str(const string& s, ssize_t ofs, char esc_char, char special_char, string *dest)
-{
-  const char *src = s.c_str();
-  char dest_buf[s.size() + 1];
-  char *destp = dest_buf;
-  bool esc = false;
-
-  dest_buf[0] = '\0';
-
-  for (size_t i = ofs; i < s.size(); i++) {
-    char c = src[i];
-    if (!esc && c == esc_char) {
-      esc = true;
-      continue;
-    }
-    if (!esc && c == special_char) {
-      *destp = '\0';
-      *dest = dest_buf;
-      return (ssize_t)i + 1;
-    }
-    *destp++ = c;
-    esc = false;
-  }
-  *destp = '\0';
-  *dest = dest_buf;
-  return string::npos;
-}
-
-static void escape_str(const string& s, char esc_char, char special_char, string *dest)
-{
-  const char *src = s.c_str();
-  char dest_buf[s.size() * 2 + 1];
-  char *destp = dest_buf;
-
-  for (size_t i = 0; i < s.size(); i++) {
-    char c = src[i];
-    if (c == esc_char || c == special_char) {
-      *destp++ = esc_char;
-    }
-    *destp++ = c;
-  }
-  *destp++ = '\0';
-  *dest = dest_buf;
-}
-
  void rgw_pool::from_str(const string& s)
  {
-  size_t pos = unescape_str(s, 0, '\\', ':', &name);
+  size_t pos = rgw_unescape_str(s, 0, '\\', ':', &name);
    if (pos != string::npos) {
-    pos = unescape_str(s, pos, '\\', ':', &ns);
+    pos = rgw_unescape_str(s, pos, '\\', ':', &ns);
      /* ignore return; if pos != string::npos it means that we had a colon
       * in the middle of ns that wasn't escaped, we're going to stop there
       */
@@ -1849,12 +1804,12 @@ void rgw_pool::from_str(const string& s)
  string rgw_pool::to_str() const
  {
    string esc_name;
-  escape_str(name, '\\', ':', &esc_name);
+  rgw_escape_str(name, '\\', ':', &esc_name);
    if (ns.empty()) {
      return esc_name;
    }
    string esc_ns;
-  escape_str(ns, '\\', ':', &esc_ns);
+  rgw_escape_str(ns, '\\', ':', &esc_ns);
    return esc_name + ":" + esc_ns;
  }
  
diff --git a/ceph/src/rgw/rgw_common.h b/ceph/src/rgw/rgw_common.h

index 281c8545fa5ef4992ecfbff1cfa8a154da36052a..04f6d5fc95fe81f8e1061e983a6840e8c649681f 100644 (file)
--- a/ceph/src/rgw/rgw_common.h
+++ b/ceph/src/rgw/rgw_common.h
@@ -317,8 +317,7 @@ class NameVal
  };
  
  /** Stores the XML arguments associated with the HTTP request in req_state*/
-class RGWHTTPArgs
-{
+class RGWHTTPArgs {
    string str, empty_str;
    map<string, string> val_map;
    map<string, string> sys_val_map;
@@ -377,7 +376,7 @@ class RGWHTTPArgs
    const string& get_str() {
      return str;
    }
-};
+}; // RGWHTTPArgs
  
  const char *rgw_conf_get(const map<string, string, ltstr_nocase>& conf_map, const char *name, const char *def_val);
  int rgw_conf_get_int(const map<string, string, ltstr_nocase>& conf_map, const char *name, int def_val);
@@ -1187,8 +1186,7 @@ inline ostream& operator<<(ostream& out, const RGWBucketIndexType &index_type)
    }
  }
  
-struct RGWBucketInfo
-{
+struct RGWBucketInfo {
    enum BIShardsHashType {
      MOD = 0
    };
@@ -2361,4 +2359,52 @@ extern bool match_policy(boost::string_view pattern, boost::string_view input,
  void rgw_setup_saved_curl_handles();
  void rgw_release_all_curl_handles();
  
+static inline void rgw_escape_str(const string& s, char esc_char,
+                                 char special_char, string *dest)
+{
+  const char *src = s.c_str();
+  char dest_buf[s.size() * 2 + 1];
+  char *destp = dest_buf;
+
+  for (size_t i = 0; i < s.size(); i++) {
+    char c = src[i];
+    if (c == esc_char || c == special_char) {
+      *destp++ = esc_char;
+    }
+    *destp++ = c;
+  }
+  *destp++ = '\0';
+  *dest = dest_buf;
+}
+
+static inline ssize_t rgw_unescape_str(const string& s, ssize_t ofs,
+                                      char esc_char, char special_char,
+                                      string *dest)
+{
+  const char *src = s.c_str();
+  char dest_buf[s.size() + 1];
+  char *destp = dest_buf;
+  bool esc = false;
+
+  dest_buf[0] = '\0';
+
+  for (size_t i = ofs; i < s.size(); i++) {
+    char c = src[i];
+    if (!esc && c == esc_char) {
+      esc = true;
+      continue;
+    }
+    if (!esc && c == special_char) {
+      *destp = '\0';
+      *dest = dest_buf;
+      return (ssize_t)i + 1;
+    }
+    *destp++ = c;
+    esc = false;
+  }
+  *destp = '\0';
+  *dest = dest_buf;
+  return string::npos;
+}
+
  #endif
diff --git a/ceph/src/rgw/rgw_data_sync.cc b/ceph/src/rgw/rgw_data_sync.cc

index 703bdd7ee25e9cc0d700dd374595956f13a43ea9..439951e1a076b123769a56dded3630ac23e403b7 100644 (file)
--- a/ceph/src/rgw/rgw_data_sync.cc
+++ b/ceph/src/rgw/rgw_data_sync.cc
@@ -2869,6 +2869,12 @@ int RGWBucketShardIncrementalSyncCR::operate()
                /* we have reported this error */
              }
            }
+          if (sync_status != 0)
+            break;
+        }
+        if (sync_status != 0) {
+          /* get error, stop */
+          break;
          }
          if (!marker_tracker.index_key_to_marker(key, cur_id)) {
            set_status() << "can't do op, sync already in progress for object";
diff --git a/ceph/src/rgw/rgw_gc.cc b/ceph/src/rgw/rgw_gc.cc

index e29af16ed1762379f302745a141d31aef819b6e8..bf64925abda588a90918314202fce9bbd72dcf91 100644 (file)
--- a/ceph/src/rgw/rgw_gc.cc
+++ b/ceph/src/rgw/rgw_gc.cc
@@ -43,7 +43,7 @@ void RGWGC::finalize()
  
  int RGWGC::tag_index(const string& tag)
  {
-  return rgw_shards_hash(tag, max_objs);
+  return rgw_shard_id(tag, max_objs);
  }
  
  void RGWGC::add_chain(ObjectWriteOperation& op, cls_rgw_obj_chain& chain, const string& tag)
diff --git a/ceph/src/rgw/rgw_http_client.cc b/ceph/src/rgw/rgw_http_client.cc

index 61146748d635f58be05ec10e961110341c196443..32f6498b07a72ec74efce6b832863b0c5568a128 100644 (file)
--- a/ceph/src/rgw/rgw_http_client.cc
+++ b/ceph/src/rgw/rgw_http_client.cc
@@ -401,6 +401,8 @@ int RGWHTTPClient::process(const char *method, const char *url)
    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, simple_receive_http_data);
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)this);
    curl_easy_setopt(curl_handle, CURLOPT_ERRORBUFFER, (void *)error_buf);
+  curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_TIME, cct->_conf->rgw_curl_low_speed_time);
+  curl_easy_setopt(curl_handle, CURLOPT_LOW_SPEED_LIMIT, cct->_conf->rgw_curl_low_speed_limit);
    if (h) {
      curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, (void *)h);
    }
@@ -479,6 +481,8 @@ int RGWHTTPClient::init_request(const char *method, const char *url, rgw_http_re
    curl_easy_setopt(easy_handle, CURLOPT_WRITEFUNCTION, receive_http_data);
    curl_easy_setopt(easy_handle, CURLOPT_WRITEDATA, (void *)req_data);
    curl_easy_setopt(easy_handle, CURLOPT_ERRORBUFFER, (void *)req_data->error_buf);
+  curl_easy_setopt(easy_handle, CURLOPT_LOW_SPEED_TIME, cct->_conf->rgw_curl_low_speed_time);
+  curl_easy_setopt(easy_handle, CURLOPT_LOW_SPEED_LIMIT, cct->_conf->rgw_curl_low_speed_limit);
    if (h) {
      curl_easy_setopt(easy_handle, CURLOPT_HTTPHEADER, (void *)h);
    }
@@ -1117,6 +1121,9 @@ void *RGWHTTPManager::reqs_thread_entry()
          switch (result) {
            case CURLE_OK:
              break;
+          case CURLE_OPERATION_TIMEDOUT:
+            dout(0) << "WARNING: curl operation timed out, network average transfer speed less than " 
+              << cct->_conf->rgw_curl_low_speed_limit << " Bytes per second during " << cct->_conf->rgw_curl_low_speed_time << " seconds." << dendl;
            default:
              dout(20) << "ERROR: msg->data.result=" << result << " req_data->id=" << id << " http_status=" << http_status << dendl;
             break;
diff --git a/ceph/src/rgw/rgw_lc.cc b/ceph/src/rgw/rgw_lc.cc

index 00ce467ab93fbcd75ece7548b1b1781613865985..a57526327ba205f66f4df9c8f6b2968c5baea7be 100644 (file)
--- a/ceph/src/rgw/rgw_lc.cc
+++ b/ceph/src/rgw/rgw_lc.cc
@@ -1,3 +1,6 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
  #include <string.h>
  #include <iostream>
  #include <map>
@@ -433,10 +436,10 @@ int RGWLC::bucket_lc_process(string& shard_id)
          continue;
        }
        if (prefix_iter != prefix_map.begin() && 
-        (prefix_iter->first.compare(0, prev(prefix_iter)->first.length(), prev(prefix_iter)->first) == 0)) {
-        list_op.next_marker = pre_marker;
+          (prefix_iter->first.compare(0, prev(prefix_iter)->first.length(), prev(prefix_iter)->first) == 0)) {
+       list_op.get_next_marker() = pre_marker;
        } else {
-        pre_marker = list_op.get_next_marker();
+       pre_marker = list_op.get_next_marker();
        }
        list_op.params.prefix = prefix_iter->first;
        rgw_bucket_dir_entry pre_obj;
@@ -473,7 +476,7 @@ int RGWLC::bucket_lc_process(string& shard_id)
                if ((obj_iter + 1)==objs.end()) {
                  if (is_truncated) {
                    //deal with it in next round because we can't judge whether this marker is the only version
-                  list_op.next_marker = obj_iter->key;
+                  list_op.get_next_marker() = obj_iter->key;
                    break;
                  }
                } else if (obj_iter->key.name.compare((obj_iter + 1)->key.name) == 0) {   //*obj_iter is delete marker and isn't the only version, do nothing.
diff --git a/ceph/src/rgw/rgw_main.cc b/ceph/src/rgw/rgw_main.cc

index ad5848924dbe4a591ec3b67cea14f1416d6dc1aa..8f4b6a917b025b0878a4a25107260becd08db4df 100644 (file)
--- a/ceph/src/rgw/rgw_main.cc
+++ b/ceph/src/rgw/rgw_main.cc
@@ -184,11 +184,7 @@ static RGWRESTMgr *rest_filter(RGWRados *store, int dialect, RGWRESTMgr *orig)
  /*
   * start up the RADOS connection and then handle HTTP messages as they come in
   */
-#ifdef BUILDING_FOR_EMBEDDED
-extern "C" int cephd_rgw(int argc, const char **argv)
-#else
  int main(int argc, const char **argv)
-#endif
  {
    // dout() messages will be sent to stderr, but FCGX wants messages on stdout
    // Redirect stderr to stdout.
diff --git a/ceph/src/rgw/rgw_op.cc b/ceph/src/rgw/rgw_op.cc

index 98a6db4703f2caa1471757c95ff06ec00a5dc096..d186e2e1a9c48e547ef0b883cb663b86a28304cd 100644 (file)
--- a/ceph/src/rgw/rgw_op.cc
+++ b/ceph/src/rgw/rgw_op.cc
@@ -916,7 +916,7 @@ static bool validate_cors_rule_method(RGWCORSRule *rule, const char *req_meth) {
    else if (strcmp(req_meth, "DELETE") == 0) flags = RGW_CORS_DELETE;
    else if (strcmp(req_meth, "HEAD") == 0) flags = RGW_CORS_HEAD;
  
-  if ((rule->get_allowed_methods() & flags) == flags) {
+  if (rule->get_allowed_methods() & flags) {
      dout(10) << "Method " << req_meth << " is supported" << dendl;
    } else {
      dout(5) << "Method " << req_meth << " is not supported" << dendl;
@@ -2311,6 +2311,13 @@ void RGWListBucket::execute()
      return;
    }
  
+  if (allow_unordered && !delimiter.empty()) {
+    ldout(s->cct, 0) <<
+      "ERROR: unordered bucket listing requested with a delimiter" << dendl;
+    op_ret = -EINVAL;
+    return;
+  }
+
    if (need_container_stats()) {
      map<string, RGWBucketEnt> m;
      m[s->bucket.name] = RGWBucketEnt();
@@ -2332,6 +2339,7 @@ void RGWListBucket::execute()
    list_op.params.marker = marker;
    list_op.params.end_marker = end_marker;
    list_op.params.list_versions = list_versions;
+  list_op.params.allow_unordered = allow_unordered;
  
    op_ret = list_op.list_objects(max, &objs, &common_prefixes, &is_truncated);
    if (op_ret >= 0) {
@@ -3500,7 +3508,7 @@ void RGWPutObj::execute()
  
      op_ret = put_data_and_throttle(filter, data, ofs, need_to_wait);
      if (op_ret < 0) {
-      if (!need_to_wait || op_ret != -EEXIST) {
+      if (op_ret != -EEXIST) {
          ldout(s->cct, 20) << "processor->thottle_data() returned ret="
                           << op_ret << dendl;
          goto done;
diff --git a/ceph/src/rgw/rgw_op.h b/ceph/src/rgw/rgw_op.h

index 7800692113996b345a32748178f4e25b6adbd079..e4d8cd4a980bc9d952ef3b6b82635adcc2c75572 100644 (file)
--- a/ceph/src/rgw/rgw_op.h
+++ b/ceph/src/rgw/rgw_op.h
@@ -644,7 +644,7 @@ public:
    const string name() override { return "list_buckets"; }
    RGWOpType get_type() override { return RGW_OP_LIST_BUCKETS; }
    uint32_t op_mask() override { return RGW_OP_TYPE_READ; }
-};
+}; // class RGWListBuckets
  
  class RGWGetUsage : public RGWOp {
  protected:
@@ -708,6 +708,7 @@ protected:
  
    int default_max;
    bool is_truncated;
+  bool allow_unordered;
  
    int shard_id;
  
@@ -715,7 +716,8 @@ protected:
  
  public:
    RGWListBucket() : list_versions(false), max(0),
-                    default_max(0), is_truncated(false), shard_id(-1) {}
+                    default_max(0), is_truncated(false),
+                   allow_unordered(false), shard_id(-1) {}
    int verify_permission() override;
    void pre_exec() override;
    void execute() override;
diff --git a/ceph/src/rgw/rgw_rados.cc b/ceph/src/rgw/rgw_rados.cc

index 100f77a699bc0310cfb1b0684cfd7dc174f9dedf..876147417a68d2044c6d41aa0e3d2858ba36d087 100644 (file)
--- a/ceph/src/rgw/rgw_rados.cc
+++ b/ceph/src/rgw/rgw_rados.cc
@@ -3109,6 +3109,9 @@ class RGWMetaNotifier : public RGWRadosThread {
    uint64_t interval_msec() override {
      return cct->_conf->rgw_md_notify_interval_msec;
    }
+  void stop_process() override {
+    notify_mgr.stop();
+  }
  public:
    RGWMetaNotifier(RGWRados *_store, RGWMetadataLog* log)
      : RGWRadosThread(_store, "meta-notifier"), notify_mgr(_store), log(log) {}
@@ -3141,6 +3144,9 @@ class RGWDataNotifier : public RGWRadosThread {
    uint64_t interval_msec() override {
      return cct->_conf->get_val<int64_t>("rgw_data_notify_interval_msec");
    }
+  void stop_process() override {
+    notify_mgr.stop();
+  }
  public:
    RGWDataNotifier(RGWRados *_store) : RGWRadosThread(_store, "data-notifier"), notify_mgr(_store) {}
  
@@ -5205,7 +5211,7 @@ int RGWRados::trim_usage(rgw_user& user, uint64_t start_epoch, uint64_t end_epoc
  
  int RGWRados::key_to_shard_id(const string& key, int max_shards)
  {
-  return rgw_shards_hash(key, max_shards);
+  return rgw_shard_id(key, max_shards);
  }
  
  void RGWRados::shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id)
@@ -5386,10 +5392,7 @@ int RGWRados::objexp_key_shard(const rgw_obj_index_key& key)
  {
    string obj_key = key.name + key.instance;
    int num_shards = cct->_conf->rgw_objexp_hints_num_shards;
-  uint32_t sid = ceph_str_hash_linux(obj_key.c_str(), obj_key.size());
-  uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
-  sid = rgw_shards_mod(sid2, num_shards);
-  return sid;
+  return rgw_bucket_shard_index(obj_key, num_shards);
  }
  
  static string objexp_hint_get_keyext(const string& tenant_name,
@@ -5573,8 +5576,9 @@ int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id)
    return 0;
  }
  
-/** 
- * get listing of the objects in a bucket.
+
+/**
+ * Get ordered listing of the objects in a bucket.
   *
   * max: maximum number of results to return
   * bucket: bucket to list contents of
@@ -5588,10 +5592,10 @@ int RGWRados::Bucket::update_bucket_id(const string& new_bucket_id)
   * common_prefixes: if delim is filled in, any matching prefixes are placed here.
   * is_truncated: if number of objects in the bucket is bigger than max, then truncated.
   */
-int RGWRados::Bucket::List::list_objects(int64_t max,
-                                         vector<rgw_bucket_dir_entry> *result,
-                                         map<string, bool> *common_prefixes,
-                                         bool *is_truncated)
+int RGWRados::Bucket::List::list_objects_ordered(int64_t max,
+                                                vector<rgw_bucket_dir_entry> *result,
+                                                map<string, bool> *common_prefixes,
+                                                bool *is_truncated)
  {
    RGWRados *store = target->get_store();
    CephContext *cct = store->ctx();
@@ -5620,7 +5624,8 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
    string bigger_than_delim;
  
    if (!params.delim.empty()) {
-    unsigned long val = decode_utf8((unsigned char *)params.delim.c_str(), params.delim.size());
+    unsigned long val = decode_utf8((unsigned char *)params.delim.c_str(),
+                                   params.delim.size());
      char buf[params.delim.size() + 16];
      int r = encode_utf8(val + 1, (unsigned char *)buf);
      if (r < 0) {
@@ -5639,7 +5644,7 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
        cur_marker = s;
      }
    }
-  
+
    string skip_after_delim;
    while (truncated && count <= max) {
      if (skip_after_delim > cur_marker.name) {
@@ -5647,22 +5652,29 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
        ldout(cct, 20) << "setting cur_marker=" << cur_marker.name << "[" << cur_marker.instance << "]" << dendl;
      }
      std::map<string, rgw_bucket_dir_entry> ent_map;
-    int r = store->cls_bucket_list(target->get_bucket_info(), shard_id, cur_marker, cur_prefix,
-                                   read_ahead + 1 - count, params.list_versions, ent_map,
-                                   &truncated, &cur_marker);
+    int r = store->cls_bucket_list_ordered(target->get_bucket_info(),
+                                          shard_id,
+                                          cur_marker,
+                                          cur_prefix,
+                                          read_ahead + 1 - count,
+                                          params.list_versions,
+                                          ent_map,
+                                          &truncated,
+                                          &cur_marker);
      if (r < 0)
        return r;
  
-    std::map<string, rgw_bucket_dir_entry>::iterator eiter;
-    for (eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
+    for (auto eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
        rgw_bucket_dir_entry& entry = eiter->second;
        rgw_obj_index_key index_key = entry.key;
  
        rgw_obj_key obj(index_key);
  
-      /* note that parse_raw_oid() here will not set the correct object's instance, as
-       * rgw_obj_index_key encodes that separately. We don't need to set the instance because it's
-       * not needed for the checks here and we end up using the raw entry for the return vector
+      /* note that parse_raw_oid() here will not set the correct
+       * object's instance, as rgw_obj_index_key encodes that
+       * separately. We don't need to set the instance because it's
+       * not needed for the checks here and we end up using the raw
+       * entry for the return vector
         */
        bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
        if (!valid) {
@@ -5698,7 +5710,8 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
        if (params.filter && !params.filter->filter(obj.name, index_key.name))
          continue;
  
-      if (params.prefix.size() &&  (obj.name.compare(0, params.prefix.size(), params.prefix) != 0))
+      if (params.prefix.size() &&
+         (obj.name.compare(0, params.prefix.size(), params.prefix) != 0))
          continue;
  
        if (!params.delim.empty()) {
@@ -5738,10 +5751,6 @@ int RGWRados::Bucket::List::list_objects(int64_t max,
        result->emplace_back(std::move(entry));
        count++;
      }
-
-    // Either the back-end telling us truncated, or we don't consume all
-    // items returned per the amount caller request
-    truncated = (truncated || eiter != ent_map.end());
    }
  
  done:
@@ -5749,7 +5758,140 @@ done:
      *is_truncated = truncated;
  
    return 0;
-}
+} // list_objects_ordered
+
+
+/**
+ * Get listing of the objects in a bucket and allow the results to be out
+ * of order.
+ *
+ * Even though there are key differences with the ordered counterpart,
+ * the parameters are the same to maintain some compatability.
+ *
+ * max: maximum number of results to return
+ * bucket: bucket to list contents of
+ * prefix: only return results that match this prefix
+ * delim: should not be set; if it is we should have indicated an error
+ * marker: if filled in, begin the listing with this object.
+ * end_marker: if filled in, end the listing with this object.
+ * result: the objects are put in here.
+ * common_prefixes: this is never filled with an unordered list; the param
+ *                  is maintained for compatibility
+ * is_truncated: if number of objects in the bucket is bigger than max, then
+ *               truncated.
+ */
+int RGWRados::Bucket::List::list_objects_unordered(int64_t max,
+                                                  vector<rgw_bucket_dir_entry> *result,
+                                                  map<string, bool> *common_prefixes,
+                                                  bool *is_truncated)
+{
+  RGWRados *store = target->get_store();
+  CephContext *cct = store->ctx();
+  int shard_id = target->get_shard_id();
+
+  int count = 0;
+  bool truncated = true;
+
+  // read a few extra in each call to cls_bucket_list_unordered in
+  // case some are filtered out due to namespace matching, versioning,
+  // filtering, etc.
+  const int64_t max_read_ahead = 100;
+  const uint32_t read_ahead = uint32_t(max + std::min(max, max_read_ahead));
+
+  result->clear();
+
+  rgw_obj_key marker_obj(params.marker.name, params.marker.instance, params.ns);
+  rgw_obj_index_key cur_marker;
+  marker_obj.get_index_key(&cur_marker);
+
+  rgw_obj_key end_marker_obj(params.end_marker.name, params.end_marker.instance,
+                             params.ns);
+  rgw_obj_index_key cur_end_marker;
+  end_marker_obj.get_index_key(&cur_end_marker);
+  const bool cur_end_marker_valid = !params.end_marker.empty();
+
+  rgw_obj_key prefix_obj(params.prefix);
+  prefix_obj.ns = params.ns;
+  string cur_prefix = prefix_obj.get_index_key_name();
+
+  while (truncated && count <= max) {
+    std::vector<rgw_bucket_dir_entry> ent_list;
+    int r = store->cls_bucket_list_unordered(target->get_bucket_info(),
+                                            shard_id,
+                                            cur_marker,
+                                            cur_prefix,
+                                            read_ahead,
+                                            params.list_versions,
+                                            ent_list,
+                                            &truncated,
+                                            &cur_marker);
+    if (r < 0)
+      return r;
+
+    // NB: while regions of ent_list will be sorted, we have no
+    // guarantee that all items will be sorted since they can cross
+    // shard boundaries
+
+    for (auto& entry : ent_list) {
+      rgw_obj_index_key index_key = entry.key;
+      rgw_obj_key obj(index_key);
+
+      /* note that parse_raw_oid() here will not set the correct
+       * object's instance, as rgw_obj_index_key encodes that
+       * separately. We don't need to set the instance because it's
+       * not needed for the checks here and we end up using the raw
+       * entry for the return vector
+       */
+      bool valid = rgw_obj_key::parse_raw_oid(index_key.name, &obj);
+      if (!valid) {
+        ldout(cct, 0) << "ERROR: could not parse object name: " <<
+         obj.name << dendl;
+        continue;
+      }
+
+      if (!params.list_versions && !entry.is_visible()) {
+        continue;
+      }
+
+      if (params.enforce_ns && obj.ns != params.ns) {
+        continue;
+      }
+
+      if (cur_end_marker_valid && cur_end_marker <= index_key) {
+       // we're not guaranteed items will come in order, so we have
+       // to loop through all
+       continue;
+      }
+
+      if (count < max) {
+        params.marker = index_key;
+        next_marker = index_key;
+      }
+
+      if (params.filter && !params.filter->filter(obj.name, index_key.name))
+        continue;
+
+      if (params.prefix.size() &&
+         (0 != obj.name.compare(0, params.prefix.size(), params.prefix)))
+        continue;
+
+      if (count >= max) {
+        truncated = true;
+        goto done;
+      }
+
+      result->emplace_back(std::move(entry));
+      count++;
+    } // for (auto& entry : ent_list)
+  } // while (truncated && count <= max)
+
+done:
+  if (is_truncated)
+    *is_truncated = truncated;
+
+  return 0;
+} // list_objects_unordered
+
  
  /**
   * create a rados pool, associated meta info
@@ -8445,27 +8587,34 @@ bool RGWRados::is_syncing_bucket_meta(const rgw_bucket& bucket)
  
  int RGWRados::check_bucket_empty(RGWBucketInfo& bucket_info)
  {
-  std::map<string, rgw_bucket_dir_entry> ent_map;
+  std::vector<rgw_bucket_dir_entry> ent_list;
    rgw_obj_index_key marker;
    string prefix;
    bool is_truncated;
  
    do {
-#define NUM_ENTRIES 1000
-    int r = cls_bucket_list(bucket_info, RGW_NO_SHARD, marker, prefix, NUM_ENTRIES, true, ent_map,
-                        &is_truncated, &marker);
+    constexpr uint NUM_ENTRIES = 1000u;
+    int r = cls_bucket_list_unordered(bucket_info,
+                                     RGW_NO_SHARD,
+                                     marker,
+                                     prefix,
+                                     NUM_ENTRIES,
+                                     true,
+                                     ent_list,
+                                     &is_truncated,
+                                     &marker);
      if (r < 0)
        return r;
  
      string ns;
-    std::map<string, rgw_bucket_dir_entry>::iterator eiter;
-    for (eiter = ent_map.begin(); eiter != ent_map.end(); ++eiter) {
+    for (auto const& dirent : ent_list) {
        rgw_obj_key obj;
  
-      if (rgw_obj_key::oid_to_key_in_ns(eiter->second.key.name, &obj, ns))
+      if (rgw_obj_key::oid_to_key_in_ns(dirent.key.name, &obj, ns))
          return -ENOTEMPTY;
      }
    } while (is_truncated);
+
    return 0;
  }
    
@@ -8625,7 +8774,9 @@ int RGWRados::send_chain_to_gc(cls_rgw_obj_chain& chain, const string& tag, bool
    return gc->send_chain(chain, tag, sync);
  }
  
-int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx, string& bucket_oid)
+int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info,
+                               librados::IoCtx& index_ctx,
+                               string& bucket_oid)
  {
    const rgw_bucket& bucket = bucket_info.bucket;
    int r = open_bucket_index_ctx(bucket_info, index_ctx);
@@ -8643,8 +8794,9 @@ int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCt
    return 0;
  }
  
-int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
-    string& bucket_oid_base) {
+int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info,
+                                    librados::IoCtx& index_ctx,
+                                    string& bucket_oid_base) {
    const rgw_bucket& bucket = bucket_info.bucket;
    int r = open_bucket_index_ctx(bucket_info, index_ctx);
    if (r < 0)
@@ -8662,8 +8814,11 @@ int RGWRados::open_bucket_index_base(const RGWBucketInfo& bucket_info, librados:
  
  }
  
-int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info, librados::IoCtx& index_ctx,
-    map<int, string>& bucket_objs, int shard_id, map<int, string> *bucket_instance_ids) {
+int RGWRados::open_bucket_index(const RGWBucketInfo& bucket_info,
+                               librados::IoCtx& index_ctx,
+                               map<int, string>& bucket_objs,
+                               int shard_id,
+                               map<int, string> *bucket_instance_ids) {
    string bucket_oid_base;
    int ret = open_bucket_index_base(bucket_info, index_ctx, bucket_oid_base);
    if (ret < 0) {
@@ -11072,9 +11227,8 @@ int RGWRados::bucket_index_link_olh(const RGWBucketInfo& bucket_info, RGWObjStat
    rgw_zone_set zones_trace;
    if (_zones_trace) {
      zones_trace = *_zones_trace;
-  } else {
-    zones_trace.insert(get_zone().id);
    }
+  zones_trace.insert(get_zone().id);
  
    BucketShard bs(this);
  
@@ -12836,10 +12990,9 @@ int RGWRados::process_lc()
    return lc->process();
  }
  
-int RGWRados::process_expire_objects()
+bool RGWRados::process_expire_objects()
  {
-  obj_expirer->inspect_all_shards(utime_t(), ceph_clock_now());
-  return 0;
+  return obj_expirer->inspect_all_shards(utime_t(), ceph_clock_now());
  }
  
  int RGWRados::cls_rgw_init_index(librados::IoCtx& index_ctx, librados::ObjectWriteOperation& op, string& oid)
@@ -12856,10 +13009,8 @@ int RGWRados::cls_obj_prepare_op(BucketShard& bs, RGWModifyOp op, string& tag,
    if (_zones_trace) {
      zones_trace = *_zones_trace;
    }
-  else {
-    zones_trace.insert(get_zone().id);
-  }
-  
+  zones_trace.insert(get_zone().id);
+
    ObjectWriteOperation o;
    cls_rgw_obj_key key(obj.key.get_index_key_name(), obj.key.instance);
    cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
@@ -12877,16 +13028,22 @@ int RGWRados::cls_obj_complete_op(BucketShard& bs, const rgw_obj& obj, RGWModify
    dir_meta = ent.meta;
    dir_meta.category = category;
  
+  rgw_zone_set zones_trace;
+  if (_zones_trace) {
+    zones_trace = *_zones_trace;
+  }
+  zones_trace.insert(get_zone().id);
+
    rgw_bucket_entry_ver ver;
    ver.pool = pool;
    ver.epoch = epoch;
    cls_rgw_obj_key key(ent.key.name, ent.key.instance);
    cls_rgw_guard_bucket_resharding(o, -ERR_BUSY_RESHARDING);
    cls_rgw_bucket_complete_op(o, op, tag, ver, key, dir_meta, remove_objs,
-                             get_zone().log_data, bilog_flags, _zones_trace);
+                             get_zone().log_data, bilog_flags, &zones_trace);
    complete_op_data *arg;
    index_completion_manager->create_completion(obj, op, tag, ver, key, dir_meta, remove_objs,
-                                              get_zone().log_data, bilog_flags, _zones_trace, &arg);
+                                              get_zone().log_data, bilog_flags, &zones_trace, &arg);
    librados::AioCompletion *completion = arg->rados_completion;
    int ret = bs.index_ctx.aio_operate(bs.bucket_obj, arg->rados_completion, &o);
    completion->release(); /* can't reference arg here, as it might have already been released */
@@ -12933,16 +13090,26 @@ int RGWRados::cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_
    return CLSRGWIssueSetTagTimeout(index_ctx, bucket_objs, cct->_conf->rgw_bucket_index_max_aio, timeout)();
  }
  
-int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
-                             uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
-                             bool *is_truncated, rgw_obj_index_key *last_entry,
-                             bool (*force_check_filter)(const string&  name))
+
+int RGWRados::cls_bucket_list_ordered(RGWBucketInfo& bucket_info,
+                                     int shard_id,
+                                     rgw_obj_index_key& start,
+                                     const string& prefix,
+                                     uint32_t num_entries,
+                                     bool list_versions,
+                                     map<string, rgw_bucket_dir_entry>& m,
+                                     bool *is_truncated,
+                                     rgw_obj_index_key *last_entry,
+                                     bool (*force_check_filter)(const string& name))
  {
-  ldout(cct, 10) << "cls_bucket_list " << bucket_info.bucket << " start " << start.name << "[" << start.instance << "] num_entries " << num_entries << dendl;
+  ldout(cct, 10) << "cls_bucket_list_ordered " << bucket_info.bucket <<
+    " start " << start.name << "[" << start.instance << "] num_entries " <<
+    num_entries << dendl;
  
    librados::IoCtx index_ctx;
    // key   - oid (for different shards if there is any)
-  // value - list result for the corresponding oid (shard), it is filled by the AIO callback
+  // value - list result for the corresponding oid (shard), it is filled by
+  //         the AIO callback
    map<int, string> oids;
    map<int, struct rgw_cls_list_ret> list_results;
    int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id);
@@ -12950,8 +13117,9 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
      return r;
  
    cls_rgw_obj_key start_key(start.name, start.instance);
-  r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries, list_versions,
-                            oids, list_results, cct->_conf->rgw_bucket_index_max_aio)();
+  r = CLSRGWIssueBucketList(index_ctx, start_key, prefix, num_entries,
+                           list_versions, oids, list_results,
+                           cct->_conf->rgw_bucket_index_max_aio)();
    if (r < 0)
      return r;
  
@@ -12996,13 +13164,15 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
         * and if the tags are old we need to do cleanup as well. */
        librados::IoCtx sub_ctx;
        sub_ctx.dup(index_ctx);
-      r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[vnames[pos]]);
+      r = check_disk_state(sub_ctx, bucket_info, dirent, dirent,
+                          updates[vnames[pos]]);
        if (r < 0 && r != -ENOENT) {
            return r;
        }
      }
      if (r >= 0) {
-      ldout(cct, 10) << "RGWRados::cls_bucket_list: got " << dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
+      ldout(cct, 10) << "RGWRados::cls_bucket_list_ordered: got " <<
+       dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
        m[name] = std::move(dirent);
        ++count;
      }
@@ -13024,14 +13194,16 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
        // we don't care if we lose suggested updates, send them off blindly
        AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
        index_ctx.aio_operate(miter->first, c, &o);
-        c->release();
+      c->release();
      }
    }
  
    // Check if all the returned entries are consumed or not
    for (size_t i = 0; i < vcurrents.size(); ++i) {
-    if (vcurrents[i] != vends[i])
+    if (vcurrents[i] != vends[i]) {
        *is_truncated = true;
+      break;
+    }
    }
    if (!m.empty())
      *last_entry = m.rbegin()->first;
@@ -13039,7 +13211,131 @@ int RGWRados::cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_
    return 0;
  }
  
-int RGWRados::cls_obj_usage_log_add(const string& oid, rgw_usage_log_info& info)
+
+int RGWRados::cls_bucket_list_unordered(RGWBucketInfo& bucket_info,
+                                       int shard_id,
+                                       rgw_obj_index_key& start,
+                                       const string& prefix,
+                                       uint32_t num_entries,
+                                       bool list_versions,
+                                       std::vector<rgw_bucket_dir_entry>& ent_list,
+                                       bool *is_truncated,
+                                       rgw_obj_index_key *last_entry,
+                                       bool (*force_check_filter)(const string& name)) {
+  ldout(cct, 10) << "cls_bucket_list_unordered " << bucket_info.bucket <<
+    " start " << start.name << "[" << start.instance <<
+    "] num_entries " << num_entries << dendl;
+
+  *is_truncated = false;
+  librados::IoCtx index_ctx;
+
+  rgw_obj_index_key my_start = start;
+
+  map<int, string> oids;
+  int r = open_bucket_index(bucket_info, index_ctx, oids, shard_id);
+  if (r < 0)
+    return r;
+  const uint32_t num_shards = oids.size();
+
+  uint32_t current_shard;
+  if (shard_id >= 0) {
+    current_shard = shard_id;
+  } else if (my_start.empty()) {
+    current_shard = 0u;
+  } else {
+    current_shard =
+      rgw_bucket_shard_index(my_start.name, num_shards);
+  }
+
+  uint32_t count = 0u;
+  map<string, bufferlist> updates;
+  std::string last_added_entry;
+  while (count <= num_entries &&
+        ((shard_id >= 0 && current_shard == uint32_t(shard_id)) ||
+         current_shard < num_shards)) {
+    // key   - oid (for different shards if there is any)
+    // value - list result for the corresponding oid (shard), it is filled by
+    //         the AIO callback
+    map<int, struct rgw_cls_list_ret> list_results;
+    r = CLSRGWIssueBucketList(index_ctx, my_start, prefix, num_entries,
+                             list_versions, oids, list_results,
+                             cct->_conf->rgw_bucket_index_max_aio)();
+    if (r < 0)
+      return r;
+
+    const std::string& oid = oids[current_shard];
+    assert(list_results.find(current_shard) != list_results.end());
+    auto& result = list_results[current_shard];
+    for (auto& entry : result.dir.m) {
+      rgw_bucket_dir_entry& dirent = entry.second;
+
+      bool force_check = force_check_filter &&
+       force_check_filter(dirent.key.name);
+      if ((!dirent.exists && !dirent.is_delete_marker()) ||
+         !dirent.pending_map.empty() ||
+         force_check) {
+       /* there are uncommitted ops. We need to check the current state,
+        * and if the tags are old we need to do cleanup as well. */
+       librados::IoCtx sub_ctx;
+       sub_ctx.dup(index_ctx);
+       r = check_disk_state(sub_ctx, bucket_info, dirent, dirent, updates[oid]);
+       if (r < 0 && r != -ENOENT) {
+         return r;
+       }
+      }
+
+      // at this point either r >=0 or r == -ENOENT
+      if (r >= 0) { // i.e., if r != -ENOENT
+       ldout(cct, 10) << "RGWRados::cls_bucket_list_unordered: got " <<
+         dirent.key.name << "[" << dirent.key.instance << "]" << dendl;
+
+       if (count < num_entries) {
+         last_added_entry = entry.first;
+         my_start = dirent.key;
+         ent_list.emplace_back(std::move(dirent));
+         ++count;
+       } else {
+         *is_truncated = true;
+         goto check_updates;
+       }
+      } else { // r == -ENOENT
+       // in the case of -ENOENT, make sure we're advancing marker
+       // for possible next call to CLSRGWIssueBucketList
+       my_start = dirent.key;
+      }
+    } // entry for loop
+
+    if (!result.is_truncated) {
+      // if we reached the end of the shard read next shard
+      ++current_shard;
+      my_start = rgw_obj_index_key();
+    }
+  } // shard loop
+
+check_updates:
+  // suggest updates if there is any
+  map<string, bufferlist>::iterator miter = updates.begin();
+  for (; miter != updates.end(); ++miter) {
+    if (miter->second.length()) {
+      ObjectWriteOperation o;
+      cls_rgw_suggest_changes(o, miter->second);
+      // we don't care if we lose suggested updates, send them off blindly
+      AioCompletion *c = librados::Rados::aio_create_completion(NULL, NULL, NULL);
+      index_ctx.aio_operate(miter->first, c, &o);
+      c->release();
+    }
+  }
+
+  if (last_entry && !ent_list.empty()) {
+    *last_entry = last_added_entry;
+  }
+
+  return 0;
+}
+
+
+int RGWRados::cls_obj_usage_log_add(const string& oid,
+                                   rgw_usage_log_info& info)
  {
    rgw_raw_obj obj(get_zone_params().usage_log_pool, oid);
  
@@ -13547,8 +13843,9 @@ int RGWRados::check_quota(const rgw_user& bucket_owner, rgw_bucket& bucket,
  }
  
  void RGWRados::get_bucket_index_objects(const string& bucket_oid_base,
-    uint32_t num_shards, map<int, string>& bucket_objects, int shard_id)
-{
+                                       uint32_t num_shards,
+                                       map<int, string>& bucket_objects,
+                                       int shard_id) {
    if (!num_shards) {
      bucket_objects[0] = bucket_oid_base;
    } else {
@@ -13602,9 +13899,7 @@ int RGWRados::get_target_shard_id(const RGWBucketInfo& bucket_info, const string
            *shard_id = -1;
          }
        } else {
-        uint32_t sid = ceph_str_hash_linux(obj_key.c_str(), obj_key.size());
-        uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
-        sid = rgw_shards_mod(sid2, bucket_info.num_shards);
+        uint32_t sid = rgw_bucket_shard_index(obj_key, bucket_info.num_shards);
          if (shard_id) {
            *shard_id = (int)sid;
          }
@@ -13642,9 +13937,7 @@ int RGWRados::get_bucket_index_object(const string& bucket_oid_base, const strin
            *shard_id = -1;
          }
        } else {
-        uint32_t sid = ceph_str_hash_linux(obj_key.c_str(), obj_key.size());
-        uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
-        sid = rgw_shards_mod(sid2, num_shards);
+        uint32_t sid = rgw_bucket_shard_index(obj_key, num_shards);
          char buf[bucket_oid_base.size() + 32];
          snprintf(buf, sizeof(buf), "%s.%d", bucket_oid_base.c_str(), sid);
          (*bucket_obj) = buf;
diff --git a/ceph/src/rgw/rgw_rados.h b/ceph/src/rgw/rgw_rados.h

index c0f23825da7dfb935f3403bfa2b71584bb85a975..c19aa3fe189758878b45289ec841947e7ef118bd 100644 (file)
--- a/ceph/src/rgw/rgw_rados.h
+++ b/ceph/src/rgw/rgw_rados.h
@@ -58,6 +58,7 @@ class RGWReshardWait;
  #define RGW_SHARDS_PRIME_0 7877
  #define RGW_SHARDS_PRIME_1 65521
  
+// only called by rgw_shard_id and rgw_bucket_shard_index
  static inline int rgw_shards_mod(unsigned hval, int max_shards)
  {
    if (max_shards <= RGW_SHARDS_PRIME_0) {
@@ -66,9 +67,19 @@ static inline int rgw_shards_mod(unsigned hval, int max_shards)
    return hval % RGW_SHARDS_PRIME_1 % max_shards;
  }
  
-static inline int rgw_shards_hash(const string& key, int max_shards)
+// used for logging and tagging
+static inline int rgw_shard_id(const string& key, int max_shards)
  {
-  return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()), max_shards);
+  return rgw_shards_mod(ceph_str_hash_linux(key.c_str(), key.size()),
+                       max_shards);
+}
+
+// used for bucket indices
+static inline uint32_t rgw_bucket_shard_index(const std::string& key,
+                                             int num_shards) {
+  uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
+  uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
+  return rgw_shards_mod(sid2, num_shards);
  }
  
  static inline int rgw_shards_max()
@@ -2985,12 +2996,25 @@ public:
        const string *get_optag() { return &optag; }
  
        bool is_prepared() { return prepared; }
-    };
+    }; // class UpdateIndex
+
+    class List {
+    protected:
  
-    struct List {
        RGWRados::Bucket *target;
        rgw_obj_key next_marker;
  
+      int list_objects_ordered(int64_t max,
+                              vector<rgw_bucket_dir_entry> *result,
+                              map<string, bool> *common_prefixes,
+                              bool *is_truncated);
+      int list_objects_unordered(int64_t max,
+                                vector<rgw_bucket_dir_entry> *result,
+                                map<string, bool> *common_prefixes,
+                                bool *is_truncated);
+
+    public:
+
        struct Params {
          string prefix;
          string delim;
@@ -3000,19 +3024,35 @@ public:
          bool enforce_ns;
          RGWAccessListFilter *filter;
          bool list_versions;
-
-        Params() : enforce_ns(true), filter(NULL), list_versions(false) {}
+       bool allow_unordered;
+
+        Params() :
+         enforce_ns(true),
+         filter(NULL),
+         list_versions(false),
+         allow_unordered(false)
+       {}
        } params;
  
-    public:
        explicit List(RGWRados::Bucket *_target) : target(_target) {}
  
-      int list_objects(int64_t max, vector<rgw_bucket_dir_entry> *result, map<string, bool> *common_prefixes, bool *is_truncated);
+      int list_objects(int64_t max,
+                      vector<rgw_bucket_dir_entry> *result,
+                      map<string, bool> *common_prefixes,
+                      bool *is_truncated) {
+       if (params.allow_unordered) {
+         return list_objects_unordered(max, result, common_prefixes,
+                                       is_truncated);
+       } else {
+         return list_objects_ordered(max, result, common_prefixes,
+                                     is_truncated);
+       }
+      }
        rgw_obj_key& get_next_marker() {
          return next_marker;
        }
-    };
-  };
+    }; // class List
+  }; // class Bucket
  
    /** Write/overwrite an object to the bucket storage. */
    virtual int put_system_obj_impl(rgw_raw_obj& obj, uint64_t size, ceph::real_time *mtime,
@@ -3478,10 +3518,19 @@ public:
                             ceph::real_time& removed_mtime, list<rgw_obj_index_key> *remove_objs, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
    int cls_obj_complete_cancel(BucketShard& bs, string& tag, rgw_obj& obj, uint16_t bilog_flags, rgw_zone_set *zones_trace = nullptr);
    int cls_obj_set_bucket_tag_timeout(RGWBucketInfo& bucket_info, uint64_t timeout);
-  int cls_bucket_list(RGWBucketInfo& bucket_info, int shard_id, rgw_obj_index_key& start, const string& prefix,
-                      uint32_t num_entries, bool list_versions, map<string, rgw_bucket_dir_entry>& m,
-                      bool *is_truncated, rgw_obj_index_key *last_entry,
-                      bool (*force_check_filter)(const string&  name) = NULL);
+  int cls_bucket_list_ordered(RGWBucketInfo& bucket_info, int shard_id,
+                             rgw_obj_index_key& start, const string& prefix,
+                             uint32_t num_entries, bool list_versions,
+                             map<string, rgw_bucket_dir_entry>& m,
+                             bool *is_truncated,
+                             rgw_obj_index_key *last_entry,
+                             bool (*force_check_filter)(const string& name) = nullptr);
+  int cls_bucket_list_unordered(RGWBucketInfo& bucket_info, int shard_id,
+                               rgw_obj_index_key& start, const string& prefix,
+                               uint32_t num_entries, bool list_versions,
+                               vector<rgw_bucket_dir_entry>& ent_list,
+                               bool *is_truncated, rgw_obj_index_key *last_entry,
+                               bool (*force_check_filter)(const string& name) = nullptr);
    int cls_bucket_head(const RGWBucketInfo& bucket_info, int shard_id, map<string, struct rgw_bucket_dir_header>& headers, map<int, string> *bucket_instance_ids = NULL);
    int cls_bucket_head_async(const RGWBucketInfo& bucket_info, int shard_id, RGWGetDirHeader_CB *ctx, int *num_aio);
    int list_bi_log_entries(RGWBucketInfo& bucket_info, int shard_id, string& marker, uint32_t max, std::list<rgw_bi_log_entry>& result, bool *truncated);
@@ -3561,7 +3610,7 @@ public:
  
    int list_gc_objs(int *index, string& marker, uint32_t max, bool expired_only, std::list<cls_rgw_gc_obj_info>& result, bool *truncated);
    int process_gc();
-  int process_expire_objects();
+  bool process_expire_objects();
    int defer_gc(void *ctx, const RGWBucketInfo& bucket_info, const rgw_obj& obj);
  
    int process_lc();
diff --git a/ceph/src/rgw/rgw_reshard.cc b/ceph/src/rgw/rgw_reshard.cc

index 03c1fb92e09b46606d4e8215c18ecc3bd9fa53e7..4fe60b7ba3b9f923abb35a6089a8d1985f8fc0ad 100644 (file)
--- a/ceph/src/rgw/rgw_reshard.cc
+++ b/ceph/src/rgw/rgw_reshard.cc
@@ -595,9 +595,8 @@ void RGWReshard::get_bucket_logshard_oid(const string& tenant, const string& buc
    uint32_t sid = ceph_str_hash_linux(key.c_str(), key.size());
    uint32_t sid2 = sid ^ ((sid & 0xFF) << 24);
    sid = sid2 % MAX_RESHARD_LOGSHARDS_PRIME % num_logshards;
-  int logshard = sid % num_logshards;
  
-  get_logshard_oid(logshard, oid);
+  get_logshard_oid(int(sid), oid);
  }
  
  int RGWReshard::add(cls_rgw_reshard_entry& entry)
diff --git a/ceph/src/rgw/rgw_rest_metadata.cc b/ceph/src/rgw/rgw_rest_metadata.cc

index e6239347d2bb6931472fa1b8f13d3c0d9496de3b..8d7eefec326587de1367c544dacd70fe70a141c8 100644 (file)
--- a/ceph/src/rgw/rgw_rest_metadata.cc
+++ b/ceph/src/rgw/rgw_rest_metadata.cc
@@ -20,6 +20,7 @@
  #include "rgw_client_io.h"
  #include "common/errno.h"
  #include "common/strtol.h"
+#include "rgw/rgw_b64.h"
  #include "include/assert.h"
  
  #define dout_context g_ceph_context
@@ -68,9 +69,25 @@ const string RGWOp_Metadata_List::name() {
  }
  
  void RGWOp_Metadata_List::execute() {
-  string marker = s->info.args.get("marker");
+  string marker;
+  ldout(s->cct, 16) << __func__
+                   << " raw marker " << s->info.args.get("marker")
+                   << dendl;
+
+  try {
+    marker = s->info.args.get("marker");
+    if (!marker.empty()) {
+      marker = rgw::from_base64(marker);
+    }
+    ldout(s->cct, 16) << __func__
+            << " marker " << marker << dendl;
+  } catch (...) {
+    marker = std::string("");
+  }
+
    bool max_entries_specified;
-  string max_entries_str = s->info.args.get("max-entries", &max_entries_specified);
+  string max_entries_str =
+    s->info.args.get("max-entries", &max_entries_specified);
  
    bool extended_response = (max_entries_specified); /* for backward compatibility, if max-entries is not specified
                                                      we will send the old response format */
@@ -93,6 +110,12 @@ void RGWOp_Metadata_List::execute() {
    void *handle;
    int max = 1000;
  
+  /* example markers:
+     marker = "3:b55a9110:root::bu_9:head";
+     marker = "3:b9a8b2a6:root::sorry_janefonda_890:head";
+     marker = "3:bf885d8f:root::sorry_janefonda_665:head";
+  */
+
    http_ret = store->meta_mgr->list_keys_init(metadata_key, marker, &handle);
    if (http_ret < 0) {
      dout(5) << "ERROR: can't get key: " << cpp_strerror(http_ret) << dendl;
@@ -133,7 +156,9 @@ void RGWOp_Metadata_List::execute() {
      encode_json("truncated", truncated, s->formatter);
      encode_json("count", count, s->formatter);
      if (truncated) {
-      encode_json("marker", store->meta_mgr->get_marker(handle), s->formatter);
+      string esc_marker =
+       rgw::to_base64(store->meta_mgr->get_marker(handle));
+      encode_json("marker", esc_marker, s->formatter);
      }
      s->formatter->close_section();
    }
diff --git a/ceph/src/rgw/rgw_rest_s3.cc b/ceph/src/rgw/rgw_rest_s3.cc

index b3e5d41060b2aded0b55882b85be69f4037f0549..3b07327f38d7ce2c6a76b4157a098b6f2ea8270c 100644 (file)
--- a/ceph/src/rgw/rgw_rest_s3.cc
+++ b/ceph/src/rgw/rgw_rest_s3.cc
@@ -649,12 +649,18 @@ int RGWListBucket_ObjStore_S3::get_params()
      marker.name = s->info.args.get("key-marker");
      marker.instance = s->info.args.get("version-id-marker");
    }
+
+  // non-standard
+  s->info.args.get_bool("allow-unordered", &allow_unordered, false);
+
+  delimiter = s->info.args.get("delimiter");
+
    max_keys = s->info.args.get("max-keys");
    op_ret = parse_max_keys();
    if (op_ret < 0) {
      return op_ret;
    }
-  delimiter = s->info.args.get("delimiter");
+
    encoding_type = s->info.args.get("encoding-type");
    if (s->system_request) {
      s->info.args.get_bool("objs-container", &objs_container, false);
@@ -670,6 +676,7 @@ int RGWListBucket_ObjStore_S3::get_params()
        shard_id = s->bucket_instance_shard_id;
      }
    }
+
    return 0;
  }
  
diff --git a/ceph/src/rgw/rgw_rest_swift.cc b/ceph/src/rgw/rgw_rest_swift.cc

index eab2420c2d377e0eb6e2690169cbc60ee4eed42d..c9d96d9631bf2a6841c23b85e409523a2725971f 100644 (file)
--- a/ceph/src/rgw/rgw_rest_swift.cc
+++ b/ceph/src/rgw/rgw_rest_swift.cc
@@ -293,6 +293,12 @@ int RGWListBucket_ObjStore_SWIFT::get_params()
    marker = s->info.args.get("marker");
    end_marker = s->info.args.get("end_marker");
    max_keys = s->info.args.get("limit");
+
+  // non-standard
+  s->info.args.get_bool("allow_unordered", &allow_unordered, false);
+
+  delimiter = s->info.args.get("delimiter");
+
    op_ret = parse_max_keys();
    if (op_ret < 0) {
      return op_ret;
@@ -300,8 +306,6 @@ int RGWListBucket_ObjStore_SWIFT::get_params()
    if (max > default_max)
      return -ERR_PRECONDITION_FAILED;
  
-  delimiter = s->info.args.get("delimiter");
-
    string path_args;
    if (s->info.args.exists("path")) { // should handle empty path
      path_args = s->info.args.get("path");
@@ -341,7 +345,10 @@ void RGWListBucket_ObjStore_SWIFT::send_response()
    dump_container_metadata(s, bucket, bucket_quota,
                            s->bucket_info.website_conf);
  
-  s->formatter->open_array_section_with_attrs("container", FormatterAttrs("name", s->bucket.name.c_str(), NULL));
+  s->formatter->open_array_section_with_attrs("container",
+                                             FormatterAttrs("name",
+                                                            s->bucket.name.c_str(),
+                                                            NULL));
  
    while (iter != objs.end() || pref_iter != common_prefixes.end()) {
      bool do_pref = false;
@@ -362,7 +369,7 @@ void RGWListBucket_ObjStore_SWIFT::send_response()
      else
        do_pref = true;
  
-    if (do_objs && (marker.empty() || marker < key)) {
+    if (do_objs && (allow_unordered || marker.empty() || marker < key)) {
        if (key.name.compare(path) == 0)
          goto next;
  
@@ -432,7 +439,7 @@ next:
    }
  
    rgw_flush_formatter_and_reset(s, s->formatter);
-}
+} // RGWListBucket_ObjStore_SWIFT::send_response
  
  static void dump_container_metadata(struct req_state *s,
                                      const RGWBucketEnt& bucket,
diff --git a/ceph/src/rgw/rgw_sync.cc b/ceph/src/rgw/rgw_sync.cc

index bd164b6231547f1483cfbc4a3efc5b5c1b745c42..474d474b0f676ecec4e14308c54ec01842dacd23 100644 (file)
--- a/ceph/src/rgw/rgw_sync.cc
+++ b/ceph/src/rgw/rgw_sync.cc
@@ -2248,7 +2248,7 @@ int RGWCloneMetaLogCoroutine::state_send_rest_request()
      log_error() << "failed to send http operation: " << http_op->to_str() << " ret=" << ret << std::endl;
      http_op->put();
      http_op = NULL;
-    return ret;
+    return set_cr_error(ret);
    }
  
    return io_block(0);
diff --git a/ceph/src/sample.ceph.conf b/ceph/src/sample.ceph.conf

index c8affbad704c3b3cbc819675e418740a8d7b791f..76b0e13384134b726721119859f4b63e9edfbaef 100644 (file)
--- a/ceph/src/sample.ceph.conf
+++ b/ceph/src/sample.ceph.conf
@@ -397,7 +397,7 @@
      # Type: Integer
      # Required: No
      # Default:  10
-    ;filestore merge threshold    = 10
+    ;filestore merge threshold    = -10
  
      # filestore_split_multiple * abs(filestore_merge_threshold) * 16 is the maximum number of files in a subdirectory before splitting into child directories.
      # Type: Integer
diff --git a/ceph/src/test/CMakeLists.txt b/ceph/src/test/CMakeLists.txt

index aad733c9a4f5689ab829a9008fe2e47c4b542f58..66e24b8bc96d0fb6df1e8b2b74cc756f4e5b028a 100644 (file)
--- a/ceph/src/test/CMakeLists.txt
+++ b/ceph/src/test/CMakeLists.txt
@@ -32,9 +32,6 @@ add_subdirectory(erasure-code)
  add_subdirectory(filestore)
  add_subdirectory(fs)
  add_subdirectory(journal)
-if(WITH_EMBEDDED)
-  add_subdirectory(libcephd)
-endif(WITH_EMBEDDED)
  add_subdirectory(libcephfs)
  add_subdirectory(librados)
  add_subdirectory(librados_test_stub)
diff --git a/ceph/src/test/cli-integration/rbd/formatted-output.t b/ceph/src/test/cli-integration/rbd/formatted-output.t

index 54c63733f25308ea28f0ab1c8e34491cbd8dc3d3..84fb109226c611229a1feb10746ce2f0b7bd7fae 100644 (file)
--- a/ceph/src/test/cli-integration/rbd/formatted-output.t
+++ b/ceph/src/test/cli-integration/rbd/formatted-output.t
@@ -56,8 +56,8 @@ TODO: figure out why .* does not match the block_name_prefix line in rbd info.
  For now, use a more inclusive regex.
    $ rbd info foo
    rbd image 'foo':
-  \tsize 1024 MB in 256 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 1GiB in 256 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 1 (esc)
    $ rbd info foo --format json | python -mjson.tool | sed 's/,$/, /'
@@ -84,8 +84,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info foo@snap
    rbd image 'foo':
-  \tsize 1024 MB in 256 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 1GiB in 256 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 1 (esc)
    \tprotected: False (esc)
@@ -113,8 +113,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info bar
    rbd image 'bar':
-  \tsize 1024 MB in 256 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 1GiB in 256 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering, exclusive-lock, object-map, fast-diff, deep-flatten (esc)
@@ -160,8 +160,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info bar@snap
    rbd image 'bar':
-  \tsize 512 MB in 128 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 512MiB in 128 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering, exclusive-lock, object-map, fast-diff, deep-flatten (esc)
@@ -210,8 +210,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info bar@snap2
    rbd image 'bar':
-  \tsize 1024 MB in 256 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 1GiB in 256 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering, exclusive-lock, object-map, fast-diff, deep-flatten (esc)
@@ -260,8 +260,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info baz
    rbd image 'baz':
-  \tsize 2048 MB in 512 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 2GiB in 512 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering (esc)
@@ -299,8 +299,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info quux
    rbd image 'quux':
-  \tsize 1024 kB in 1 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 1MiB in 1 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 1 (esc)
    $ rbd info quux --format json | python -mjson.tool | sed 's/,$/, /'
@@ -325,8 +325,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info rbd_other/child
    rbd image 'child':
-  \tsize 512 MB in 128 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 512MiB in 128 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering, exclusive-lock, object-map, fast-diff (esc)
@@ -370,8 +370,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info rbd_other/child@snap
    rbd image 'child':
-  \tsize 512 MB in 128 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 512MiB in 128 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering, exclusive-lock, object-map, fast-diff (esc)
@@ -379,7 +379,7 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    \tcreate_timestamp:* (glob)
    \tprotected: False (esc)
    \tparent: rbd/bar@snap (esc)
-  \toverlap: 512 MB (esc)
+  \toverlap: 512MiB (esc)
    $ rbd info rbd_other/child@snap --format json | python -mjson.tool | sed 's/,$/, /'
    {
        "block_name_prefix": "rbd_data.*",  (glob)
@@ -432,8 +432,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info rbd_other/deep-flatten-child
    rbd image 'deep-flatten-child':
-  \tsize 512 MB in 128 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 512MiB in 128 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering, exclusive-lock, object-map, fast-diff, deep-flatten (esc)
@@ -479,8 +479,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    </image>
    $ rbd info rbd_other/deep-flatten-child@snap
    rbd image 'deep-flatten-child':
-  \tsize 512 MB in 128 objects (esc)
-  \torder 22 (4096 kB objects) (esc)
+  \tsize 512MiB in 128 objects (esc)
+  \torder 22 (4MiB objects) (esc)
    [^^]+ (re)
    \tformat: 2 (esc)
    \tfeatures: layering, exclusive-lock, object-map, fast-diff, deep-flatten (esc)
@@ -550,15 +550,15 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
      <name>quuy</name>
    </images>
    $ rbd list -l
-  NAME       SIZE PARENT FMT PROT LOCK 
-  foo       1024M          1           
-  foo@snap  1024M          1           
-  quux      1024k          1      excl 
-  bar       1024M          2           
-  bar@snap   512M          2 yes       
-  bar@snap2 1024M          2           
-  baz       2048M          2      shr  
-  quuy      2048M          2           
+  NAME        SIZE PARENT FMT PROT LOCK 
+  foo         1GiB          1           
+  foo@snap    1GiB          1           
+  quux        1MiB          1      excl 
+  bar         1GiB          2           
+  bar@snap  512MiB          2 yes       
+  bar@snap2   1GiB          2           
+  baz         2GiB          2      shr  
+  quuy        2GiB          2           
    $ rbd list -l --format json | python -mjson.tool | sed 's/,$/, /'
    [
        {
@@ -675,11 +675,11 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
      <name>deep-flatten-child</name>
    </images>
    $ rbd list rbd_other -l
-  NAME                    SIZE PARENT       FMT PROT LOCK 
-  child                   512M                2           
-  child@snap              512M rbd/bar@snap   2           
-  deep-flatten-child      512M                2           
-  deep-flatten-child@snap 512M                2           
+  NAME                      SIZE PARENT       FMT PROT LOCK 
+  child                   512MiB                2           
+  child@snap              512MiB rbd/bar@snap   2           
+  deep-flatten-child      512MiB                2           
+  deep-flatten-child@snap 512MiB                2           
    $ rbd list rbd_other -l --format json | python -mjson.tool | sed 's/,$/, /'
    [
        {
@@ -805,8 +805,8 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
      </id*> (glob)
    </locks>
    $ rbd snap list foo
-  SNAPID NAME    SIZE TIMESTAMP 
-      *snap*1024*MB* (glob)
+  SNAPID NAME SIZE TIMESTAMP 
+      *snap*1GiB* (glob)
    $ rbd snap list foo --format json | python -mjson.tool | sed 's/,$/, /'
    [
        {
@@ -826,9 +826,9 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
      </snapshot>
    </snapshots>
    $ rbd snap list bar
-  SNAPID NAME     SIZE TIMESTAMP                
-      *snap*512*MB* (glob)
-      *snap2*1024*MB* (glob)
+  SNAPID NAME    SIZE TIMESTAMP                
+      *snap*512MiB* (glob)
+      *snap2*1GiB* (glob)
    $ rbd snap list bar --format json | python -mjson.tool | sed 's/,$/, /'
    [
        {
@@ -866,7 +866,7 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
    <snapshots></snapshots>
    $ rbd snap list rbd_other/child
    SNAPID NAME   SIZE TIMESTAMP                
-      *snap*512*MB* (glob)
+      *snap*512MiB* (glob)
    $ rbd snap list rbd_other/child --format json | python -mjson.tool | sed 's/,$/, /'
    [
        {
@@ -886,12 +886,12 @@ whenever it is run. grep -v to ignore it, but still work on other distros.
      </snapshot>
    </snapshots>
    $ rbd disk-usage --pool rbd_other 2>/dev/null
-  NAME                    PROVISIONED  USED 
-  child@snap                     512M     0 
-  child                          512M 4096k 
-  deep-flatten-child@snap        512M     0 
-  deep-flatten-child             512M     0 
-  <TOTAL>                       1024M 4096k 
+  NAME                    PROVISIONED USED 
+  child@snap                   512MiB   0B 
+  child                        512MiB 4MiB 
+  deep-flatten-child@snap      512MiB   0B 
+  deep-flatten-child           512MiB   0B 
+  <TOTAL>                        1GiB 4MiB 
    $ rbd disk-usage --pool rbd_other --format json | python -mjson.tool | sed 's/,$/, /'
    {
        "images": [
diff --git a/ceph/src/test/cls_rgw/test_cls_rgw.cc b/ceph/src/test/cls_rgw/test_cls_rgw.cc

index 64c950eab88560a83076a980136c38498811d532..2b51268fd3b5769639b20d5f6375a7e6cdaea4c8 100644 (file)
--- a/ceph/src/test/cls_rgw/test_cls_rgw.cc
+++ b/ceph/src/test/cls_rgw/test_cls_rgw.cc
@@ -7,6 +7,7 @@
  
  #include "gtest/gtest.h"
  #include "test/librados/test.h"
+#include "global/global_context.h"
  
  #include <errno.h>
  #include <string>
@@ -85,16 +86,18 @@ void test_stats(librados::IoCtx& ioctx, string& oid, int category, uint64_t num_
    ASSERT_EQ(num_entries, entries);
  }
  
-void index_prepare(OpMgr& mgr, librados::IoCtx& ioctx, string& oid, RGWModifyOp index_op, string& tag, string& obj, string& loc)
+void index_prepare(OpMgr& mgr, librados::IoCtx& ioctx, string& oid, RGWModifyOp index_op, string& tag,
+                  string& obj, string& loc, uint16_t bi_flags = 0)
  {
    ObjectWriteOperation *op = mgr.write_op();
    cls_rgw_obj_key key(obj, string());
    rgw_zone_set zones_trace;
-  cls_rgw_bucket_prepare_op(*op, index_op, tag, key, loc, true, 0, zones_trace);
+  cls_rgw_bucket_prepare_op(*op, index_op, tag, key, loc, true, bi_flags, zones_trace);
    ASSERT_EQ(0, ioctx.operate(oid, op));
  }
  
-void index_complete(OpMgr& mgr, librados::IoCtx& ioctx, string& oid, RGWModifyOp index_op, string& tag, int epoch, string& obj, rgw_bucket_dir_entry_meta& meta)
+void index_complete(OpMgr& mgr, librados::IoCtx& ioctx, string& oid, RGWModifyOp index_op, string& tag,
+                   int epoch, string& obj, rgw_bucket_dir_entry_meta& meta, uint16_t bi_flags = 0)
  {
    ObjectWriteOperation *op = mgr.write_op();
    cls_rgw_obj_key key(obj, string());
@@ -102,7 +105,7 @@ void index_complete(OpMgr& mgr, librados::IoCtx& ioctx, string& oid, RGWModifyOp
    ver.pool = ioctx.get_id();
    ver.epoch = epoch;
    meta.accounted_size = meta.size;
-  cls_rgw_bucket_complete_op(*op, index_op, tag, ver, key, meta, nullptr, true, 0, nullptr);
+  cls_rgw_bucket_complete_op(*op, index_op, tag, ver, key, meta, nullptr, true, bi_flags, nullptr);
    ASSERT_EQ(0, ioctx.operate(oid, op));
  }
  
@@ -375,6 +378,168 @@ TEST(cls_rgw, index_suggest)
    test_stats(ioctx, bucket_oid, 0, num_objs / 2, total_size);
  }
  
+/*
+ * This case is used to test whether get_obj_vals will
+ * return all validate utf8 objnames and filter out those
+ * in BI_PREFIX_CHAR private namespace.
+ */
+TEST(cls_rgw, index_list)
+{
+  string bucket_oid = str_int("bucket", 4);
+
+  OpMgr mgr;
+
+  ObjectWriteOperation *op = mgr.write_op();
+  cls_rgw_bucket_init(*op);
+  ASSERT_EQ(0, ioctx.operate(bucket_oid, op));
+
+  uint64_t epoch = 1;
+  uint64_t obj_size = 1024;
+  const int num_objs = 5;
+  const string keys[num_objs] = {
+    /* single byte utf8 character */
+    { static_cast<char>(0x41) },
+    /* double byte utf8 character */
+    { static_cast<char>(0xCF), static_cast<char>(0x8F) },
+    /* treble byte utf8 character */
+    { static_cast<char>(0xDF), static_cast<char>(0x8F), static_cast<char>(0x8F) },
+    /* quadruble byte utf8 character */
+    { static_cast<char>(0xF7), static_cast<char>(0x8F), static_cast<char>(0x8F), static_cast<char>(0x8F) },
+    /* BI_PREFIX_CHAR private namespace, for test only */
+    { static_cast<char>(0x80), static_cast<char>(0x41) }
+  };
+
+  for (int i = 0; i < num_objs; i++) {
+    string obj = keys[i];
+    string tag = str_int("tag", i);
+    string loc = str_int("loc", i);
+
+    index_prepare(mgr, ioctx, bucket_oid, CLS_RGW_OP_ADD, tag, obj, loc);
+
+    op = mgr.write_op();
+    rgw_bucket_dir_entry_meta meta;
+    meta.category = 0;
+    meta.size = obj_size;
+    index_complete(mgr, ioctx, bucket_oid, CLS_RGW_OP_ADD, tag, epoch, obj, meta);
+  }
+
+  test_stats(ioctx, bucket_oid, 0, num_objs, obj_size * num_objs);
+
+  map<int, string> oids = { {0, bucket_oid} };
+  map<int, struct rgw_cls_list_ret> list_results;
+  cls_rgw_obj_key start_key("", "");
+  int r = CLSRGWIssueBucketList(ioctx, start_key, "", 1000, true, oids, list_results, 1)();
+
+  ASSERT_EQ(r, 0);
+  ASSERT_EQ(1, list_results.size());
+
+  auto it = list_results.begin();
+  auto m = (it->second).dir.m;
+
+  ASSERT_EQ(4, m.size());
+  int i = 0;
+  for(auto it2 = m.begin(); it2 != m.end(); it2++, i++)
+    ASSERT_EQ(it2->first.compare(keys[i]), 0);
+}
+
+
+TEST(cls_rgw, bi_list)
+{
+  string bucket_oid = str_int("bucket", 5);
+
+ CephContext *cct = reinterpret_cast<CephContext *>(ioctx.cct());
+
+  OpMgr mgr;
+
+  ObjectWriteOperation *op = mgr.write_op();
+  cls_rgw_bucket_init(*op);
+  ASSERT_EQ(0, ioctx.operate(bucket_oid, op));
+
+  string name;
+  string marker;
+  uint64_t max = 10;
+  list<rgw_cls_bi_entry> entries;
+  bool is_truncated;
+
+  int ret = cls_rgw_bi_list(ioctx, bucket_oid, name, marker, max, &entries,
+                           &is_truncated);
+  ASSERT_EQ(ret, 0);
+  ASSERT_EQ(entries.size(), 0);
+  ASSERT_EQ(is_truncated, false);
+
+  uint64_t epoch = 1;
+  uint64_t obj_size = 1024;
+  uint64_t num_objs = 35;
+
+  for (uint64_t i = 0; i < num_objs; i++) {
+    string obj = str_int("obj", i);
+    string tag = str_int("tag", i);
+    string loc = str_int("loc", i);
+    index_prepare(mgr, ioctx, bucket_oid, CLS_RGW_OP_ADD, tag, obj, loc, RGW_BILOG_FLAG_VERSIONED_OP);
+    op = mgr.write_op();
+    rgw_bucket_dir_entry_meta meta;
+    meta.category = 0;
+    meta.size = obj_size;
+    index_complete(mgr, ioctx, bucket_oid, CLS_RGW_OP_ADD, tag, epoch, obj, meta, RGW_BILOG_FLAG_VERSIONED_OP);
+  }
+
+  ret = cls_rgw_bi_list(ioctx, bucket_oid, name, marker, num_objs + 10, &entries,
+                           &is_truncated);
+  ASSERT_EQ(ret, 0);
+  if (cct->_conf->osd_max_omap_entries_per_request < num_objs) {
+    ASSERT_EQ(entries.size(), cct->_conf->osd_max_omap_entries_per_request);
+  } else {
+    ASSERT_EQ(entries.size(), num_objs);
+  }
+
+  uint64_t num_entries = 0;
+
+  is_truncated = true;
+  while(is_truncated) {
+    ret = cls_rgw_bi_list(ioctx, bucket_oid, name, marker, max, &entries,
+                         &is_truncated);
+    ASSERT_EQ(ret, 0);
+    if (is_truncated) {
+      ASSERT_EQ(entries.size(), std::min(max, cct->_conf->osd_max_omap_entries_per_request));
+    } else {
+      ASSERT_EQ(entries.size(), num_objs - num_entries);
+    }
+    num_entries += entries.size();
+    marker = entries.back().idx;
+  }
+
+  ret = cls_rgw_bi_list(ioctx, bucket_oid, name, marker, max, &entries,
+                       &is_truncated);
+  ASSERT_EQ(ret, 0);
+  ASSERT_EQ(entries.size(), 0);
+  ASSERT_EQ(is_truncated, false);
+
+  if (cct->_conf->osd_max_omap_entries_per_request < 15) {
+    num_entries = 0;
+    max = 15;
+    is_truncated = true;
+    marker.clear();
+    while(is_truncated) {
+      ret = cls_rgw_bi_list(ioctx, bucket_oid, name, marker, max, &entries,
+                           &is_truncated);
+      ASSERT_EQ(ret, 0);
+      if (is_truncated) {
+       ASSERT_EQ(entries.size(), cct->_conf->osd_max_omap_entries_per_request);
+      } else {
+       ASSERT_EQ(entries.size(), num_objs - num_entries);
+      }
+      num_entries += entries.size();
+      marker = entries.back().idx;
+    }
+  }
+
+  ret = cls_rgw_bi_list(ioctx, bucket_oid, name, marker, max, &entries,
+                       &is_truncated);
+  ASSERT_EQ(ret, 0);
+  ASSERT_EQ(entries.size(), 0);
+  ASSERT_EQ(is_truncated, false);
+}
+
  /* test garbage collection */
  static void create_obj(cls_rgw_obj& obj, int i, int j)
  {
diff --git a/ceph/src/test/common/test_util.cc b/ceph/src/test/common/test_util.cc

index 64eace923c20b93ca980e3b0124996f39c7760d7..b47bec688e769c41b4a9abbd5182435f8be2f5c9 100644 (file)
--- a/ceph/src/test/common/test_util.cc
+++ b/ceph/src/test/common/test_util.cc
@@ -18,20 +18,6 @@
  
  #include <sstream>
  
-TEST(util, unit_to_bytesize)
-{
-  ASSERT_EQ(1234ll, unit_to_bytesize("1234", &cerr));
-  ASSERT_EQ(1024ll, unit_to_bytesize("1K", &cerr));
-  ASSERT_EQ(1024ll, unit_to_bytesize("1k", &cerr));
-  ASSERT_EQ(1048576ll, unit_to_bytesize("1M", &cerr));
-  ASSERT_EQ(1073741824ll, unit_to_bytesize("1G", &cerr));
-  ASSERT_EQ(1099511627776ll, unit_to_bytesize("1T", &cerr));
-  ASSERT_EQ(1125899906842624ll, unit_to_bytesize("1P", &cerr));
-  ASSERT_EQ(1152921504606846976ll, unit_to_bytesize("1E", &cerr));
-
-  ASSERT_EQ(65536ll, unit_to_bytesize(" 64K", &cerr));
-}
-
  #if defined(__linux__)
  TEST(util, collect_sys_info)
  {
diff --git a/ceph/src/test/libcephd/CMakeLists.txt b/ceph/src/test/libcephd/CMakeLists.txt

deleted file mode 100644 (file)

index a12e8ea..0000000
--- a/ceph/src/test/libcephd/CMakeLists.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# cephdtest
-set(cephdtest_srcs
-  test.cc)
-add_library(cephdtest STATIC ${cephdtest_srcs})
-set_target_properties(cephdtest PROPERTIES COMPILE_FLAGS ${UNITTEST_CXX_FLAGS})
-
-#Enable spdk
-if(HAVE_SPDK)
-link_directories("${CMAKE_SOURCE_DIR}/src/spdk/build/lib/")
-endif(HAVE_SPDK)
-
-# ceph_test_cephd_api_misc
-add_executable(ceph_test_cephd_api_misc
-  misc.cc
-  )
-set_target_properties(ceph_test_cephd_api_misc PROPERTIES COMPILE_FLAGS
-  ${UNITTEST_CXX_FLAGS})
-target_link_libraries(ceph_test_cephd_api_misc
-       cephd global ${UNITTEST_LIBS} cephdtest z snappy ceph_zstd)
-
-install(TARGETS
-  ceph_test_cephd_api_misc
-  DESTINATION ${CMAKE_INSTALL_BINDIR})
diff --git a/ceph/src/test/libcephd/misc.cc b/ceph/src/test/libcephd/misc.cc

deleted file mode 100644 (file)

index 274699d..0000000
--- a/ceph/src/test/libcephd/misc.cc
+++ /dev/null
@@ -1,7 +0,0 @@
-#include "gtest/gtest.h"
-#include "include/cephd/libcephd.h"
-
-TEST(LibCephdMiscVersion, Version) {
-  int major, minor, extra;
-  cephd_version(&major, &minor, &extra);
-}
diff --git a/ceph/src/test/libcephd/test.cc b/ceph/src/test/libcephd/test.cc

deleted file mode 100644 (file)

index aa90d10..0000000
--- a/ceph/src/test/libcephd/test.cc
+++ /dev/null
@@ -1,2 +0,0 @@
-void doNothing() {
-}
diff --git a/ceph/src/test/librbd/image/test_mock_RemoveRequest.cc b/ceph/src/test/librbd/image/test_mock_RemoveRequest.cc

index 77c536d03a8712e46cc04c6797f9ad8a08f20042..e253c43deaa55cbbc2057ba9a324aa3c496aeb0a 100644 (file)
--- a/ceph/src/test/librbd/image/test_mock_RemoveRequest.cc
+++ b/ceph/src/test/librbd/image/test_mock_RemoveRequest.cc
@@ -113,6 +113,17 @@ DisableRequest<MockImageCtx> *DisableRequest<MockImageCtx>::s_instance;
  #include "librbd/image/RemoveRequest.cc"
  template class librbd::image::RemoveRequest<librbd::MockImageCtx>;
  
+ACTION_P(TestFeatures, image_ctx) {
+  return ((image_ctx->features & arg0) != 0);
+}
+
+ACTION_P(ShutDownExclusiveLock, image_ctx) {
+  // shutting down exclusive lock will close object map and journal
+  image_ctx->exclusive_lock = nullptr;
+  image_ctx->object_map = nullptr;
+  image_ctx->journal = nullptr;
+}
+
  namespace librbd {
  namespace image {
  
@@ -230,6 +241,34 @@ public:
                       _, _, _))
        .WillOnce(Return(r));
    }
+
+  void expect_test_features(MockImageCtx &mock_image_ctx) {
+    if (m_mock_imctx->exclusive_lock != nullptr) {
+      EXPECT_CALL(mock_image_ctx, test_features(_))
+        .WillRepeatedly(TestFeatures(&mock_image_ctx));
+    }
+  }
+
+  void expect_set_journal_policy(MockImageCtx &mock_image_ctx) {
+    if (m_test_imctx->test_features(RBD_FEATURE_JOURNALING)) {
+      EXPECT_CALL(mock_image_ctx, set_journal_policy(_))
+        .WillOnce(Invoke([](journal::Policy* policy) {
+                    ASSERT_TRUE(policy->journal_disabled());
+                    delete policy;
+                  }));
+    }
+  }
+
+  void expect_shut_down_exclusive_lock(MockImageCtx &mock_image_ctx,
+                                       MockExclusiveLock &mock_exclusive_lock,
+                                       int r) {
+    if (m_mock_imctx->exclusive_lock != nullptr) {
+      EXPECT_CALL(mock_exclusive_lock, shut_down(_))
+        .WillOnce(DoAll(ShutDownExclusiveLock(&mock_image_ctx),
+                        CompleteContext(r, mock_image_ctx.image_ctx->op_work_queue)));
+    }
+  }
+
  };
  
  TEST_F(TestMockImageRemoveRequest, SuccessV1) {
@@ -285,6 +324,12 @@ TEST_F(TestMockImageRemoveRequest, SuccessV2) {
    REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
    TestImageRemoveSetUp();
  
+  MockExclusiveLock *mock_exclusive_lock = nullptr;
+  if (m_test_imctx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+    mock_exclusive_lock = new MockExclusiveLock();
+    m_mock_imctx->exclusive_lock = mock_exclusive_lock;
+  }
+
    C_SaferCond ctx;
    librbd::NoOpProgressContext no_op;
    ContextWQ op_work_queue;
@@ -294,6 +339,11 @@ TEST_F(TestMockImageRemoveRequest, SuccessV2) {
  
    InSequence seq;
    expect_state_open(*m_mock_imctx, 0);
+
+  expect_test_features(*m_mock_imctx);
+  expect_set_journal_policy(*m_mock_imctx);
+  expect_shut_down_exclusive_lock(*m_mock_imctx, *mock_exclusive_lock, 0);
+
    expect_mirror_image_get(*m_mock_imctx, 0);
    expect_get_group(*m_mock_imctx, 0);
    expect_trim(*m_mock_imctx, mock_trim_request, 0);
@@ -306,8 +356,8 @@ TEST_F(TestMockImageRemoveRequest, SuccessV2) {
    expect_remove_mirror_image(m_ioctx, 0);
    expect_dir_remove_image(m_ioctx, 0);
  
-  MockRemoveRequest *req = MockRemoveRequest::create(m_ioctx, m_image_name, "",
-                                             true, false, no_op, &op_work_queue, &ctx);
+  MockRemoveRequest *req = MockRemoveRequest::create(
+    m_ioctx, m_image_name, "", true, false, no_op, &op_work_queue, &ctx);
    req->send();
  
    ASSERT_EQ(0, ctx.wait());
@@ -319,6 +369,12 @@ TEST_F(TestMockImageRemoveRequest, NotExistsV2) {
    REQUIRE_FEATURE(RBD_FEATURE_JOURNALING);
    TestImageRemoveSetUp();
  
+  MockExclusiveLock *mock_exclusive_lock = nullptr;
+  if (m_test_imctx->test_features(RBD_FEATURE_EXCLUSIVE_LOCK)) {
+    mock_exclusive_lock = new MockExclusiveLock();
+    m_mock_imctx->exclusive_lock = mock_exclusive_lock;
+  }
+
    C_SaferCond ctx;
    librbd::NoOpProgressContext no_op;
    ContextWQ op_work_queue;
@@ -328,6 +384,11 @@ TEST_F(TestMockImageRemoveRequest, NotExistsV2) {
  
    InSequence seq;
    expect_state_open(*m_mock_imctx, 0);
+
+  expect_test_features(*m_mock_imctx);
+  expect_set_journal_policy(*m_mock_imctx);
+  expect_shut_down_exclusive_lock(*m_mock_imctx, *mock_exclusive_lock, 0);
+
    expect_mirror_image_get(*m_mock_imctx, 0);
    expect_get_group(*m_mock_imctx, 0);
    expect_trim(*m_mock_imctx, mock_trim_request, 0);
@@ -340,8 +401,8 @@ TEST_F(TestMockImageRemoveRequest, NotExistsV2) {
    expect_remove_mirror_image(m_ioctx, 0);
    expect_dir_remove_image(m_ioctx, -ENOENT);
  
-  MockRemoveRequest *req = MockRemoveRequest::create(m_ioctx, m_image_name, "",
-                                             true, false, no_op, &op_work_queue, &ctx);
+  MockRemoveRequest *req = MockRemoveRequest::create(
+    m_ioctx, m_image_name, "", true, false, no_op, &op_work_queue, &ctx);
    req->send();
    ASSERT_EQ(-ENOENT, ctx.wait());
  
diff --git a/ceph/src/test/librbd/test_ObjectMap.cc b/ceph/src/test/librbd/test_ObjectMap.cc

index 464c233faad0fad0c6eebe69cbd33e28b1d5a775..ef556df435a33fd0fc55ea0d9e017238772f9928 100644 (file)
--- a/ceph/src/test/librbd/test_ObjectMap.cc
+++ b/ceph/src/test/librbd/test_ObjectMap.cc
@@ -4,6 +4,7 @@
  #include "test/librbd/test_support.h"
  #include "librbd/ExclusiveLock.h"
  #include "librbd/ImageCtx.h"
+#include "librbd/ImageState.h"
  #include "librbd/ImageWatcher.h"
  #include "librbd/internal.h"
  #include "librbd/ObjectMap.h"
@@ -107,7 +108,7 @@ TEST_F(TestObjectMap, InvalidateFlagOnDisk) {
    ASSERT_TRUE(flags_set);
  }
  
-TEST_F(TestObjectMap, InvalidateFlagInMemoryOnly) {
+TEST_F(TestObjectMap, AcquireLockInvalidatesWhenTooSmall) {
    REQUIRE_FEATURE(RBD_FEATURE_OBJECT_MAP);
  
    librbd::ImageCtx *ictx;
@@ -116,21 +117,25 @@ TEST_F(TestObjectMap, InvalidateFlagInMemoryOnly) {
    ASSERT_EQ(0, ictx->test_flags(RBD_FLAG_OBJECT_MAP_INVALID, &flags_set));
    ASSERT_FALSE(flags_set);
  
+  librados::ObjectWriteOperation op;
+  librbd::cls_client::object_map_resize(&op, 0, OBJECT_NONEXISTENT);
+
    std::string oid = librbd::ObjectMap<>::object_map_name(ictx->id, CEPH_NOSNAP);
-  bufferlist valid_bl;
-  ASSERT_LT(0, ictx->md_ctx.read(oid, valid_bl, 0, 0));
+  ASSERT_EQ(0, ictx->md_ctx.operate(oid, &op));
  
-  bufferlist corrupt_bl;
-  corrupt_bl.append("corrupt");
-  ASSERT_EQ(0, ictx->md_ctx.write_full(oid, corrupt_bl));
+  C_SaferCond lock_ctx;
+  {
+    RWLock::WLocker owner_locker(ictx->owner_lock);
+    ictx->exclusive_lock->try_acquire_lock(&lock_ctx);
+  }
+  ASSERT_EQ(0, lock_ctx.wait());
  
-  ASSERT_EQ(0, when_open_object_map(ictx));
    ASSERT_EQ(0, ictx->test_flags(RBD_FLAG_OBJECT_MAP_INVALID, &flags_set));
    ASSERT_TRUE(flags_set);
  
-  ASSERT_EQ(0, ictx->md_ctx.write_full(oid, valid_bl));
-  ASSERT_EQ(0, open_image(m_image_name, &ictx));
-  ASSERT_EQ(0, ictx->test_flags(RBD_FLAG_OBJECT_MAP_INVALID, &flags_set));
-  ASSERT_FALSE(flags_set);
+  // Test the flag is stored on disk
+  ASSERT_EQ(0, ictx->state->refresh());
+  ASSERT_EQ(0, ictx->test_flags(RBD_FLAG_OBJECT_MAP_INVALID,
+                                &flags_set));
+  ASSERT_TRUE(flags_set);
  }
-
diff --git a/ceph/src/test/mon/PGMap.cc b/ceph/src/test/mon/PGMap.cc

index 37648b9e52bd7f615ed08d429465abfe0d703369..a4fe90610ab75f1cab883d7cdcc0440acb75c4c2 100644 (file)
--- a/ceph/src/test/mon/PGMap.cc
+++ b/ceph/src/test/mon/PGMap.cc
@@ -171,23 +171,23 @@ TEST(pgmap, dump_object_stat_sum_0)
    pool.type = pg_pool_t::TYPE_REPLICATED;
    PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail,
                                    pool.get_size(), verbose, &pool);  
-  ASSERT_EQ(stringify(si_t(sum.num_bytes)), tbl.get(0, 0));
+  ASSERT_EQ(stringify(byte_u_t(sum.num_bytes)), tbl.get(0, 0));
    float copies_rate =
      (static_cast<float>(sum.num_object_copies - sum.num_objects_degraded) /
       sum.num_object_copies);
    float used_bytes = sum.num_bytes * copies_rate * pool.get_size();
    float used_percent = used_bytes / (used_bytes + avail) * 100;
    unsigned col = 0;
-  ASSERT_EQ(stringify(si_t(sum.num_bytes)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(sum.num_bytes)), tbl.get(0, col++));
    ASSERT_EQ(percentify(used_percent), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(avail/pool.size)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++));
    ASSERT_EQ(stringify(sum.num_objects), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(sum.num_objects_dirty)), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(sum.num_rd)), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(sum.num_wr)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(si_u_t(sum.num_objects_dirty)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(sum.num_rd)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(sum.num_wr)), tbl.get(0, col++));
    // we can use pool.size for raw_used_rate if it is a replica pool
    uint64_t raw_bytes_used = sum.num_bytes * pool.get_size() * copies_rate;
-  ASSERT_EQ(stringify(si_t(raw_bytes_used)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(raw_bytes_used)), tbl.get(0, col++));
  }
  
  // with table, without formatter, verbose = true, empty, avail > 0
@@ -206,16 +206,16 @@ TEST(pgmap, dump_object_stat_sum_1)
    pool.type = pg_pool_t::TYPE_REPLICATED;
    PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail,
                                    pool.get_size(), verbose, &pool);  
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, 0));
+  ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, 0));
    unsigned col = 0;
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
    ASSERT_EQ(percentify(0), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(avail/pool.size)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++));
    ASSERT_EQ(stringify(0), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(si_u_t(0)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
  }
  
  // with table, without formatter, verbose = false, empty, avail = 0
@@ -235,10 +235,10 @@ TEST(pgmap, dump_object_stat_sum_2)
  
    PGMap::dump_object_stat_sum(tbl, nullptr, sum, avail,
                                    pool.get_size(), verbose, &pool);  
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, 0));
+  ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, 0));
    unsigned col = 0;
-  ASSERT_EQ(stringify(si_t(0)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(0)), tbl.get(0, col++));
    ASSERT_EQ(percentify(0), tbl.get(0, col++));
-  ASSERT_EQ(stringify(si_t(avail/pool.size)), tbl.get(0, col++));
+  ASSERT_EQ(stringify(byte_u_t(avail/pool.size)), tbl.get(0, col++));
    ASSERT_EQ(stringify(0), tbl.get(0, col++));
  }
diff --git a/ceph/src/test/objectstore/workload_generator.cc b/ceph/src/test/objectstore/workload_generator.cc

index 343cc2cd7362ca8f67b6035c9b8ea3f92374e040..a3fda382057fe6a284be438af7927b0ac8ebb837 100644 (file)
--- a/ceph/src/test/objectstore/workload_generator.cc
+++ b/ceph/src/test/objectstore/workload_generator.cc
@@ -401,7 +401,7 @@ void WorkloadGenerator::do_stats()
    dout(0) << __func__
           << " written: " << m_stats_total_written
           << " duration: " << duration << " sec"
-         << " bandwidth: " << prettybyte_t(throughput) << "/s"
+         << " bandwidth: " << byte_u_t(throughput) << "/s"
           << " iops: " << tx_throughput << "/s"
           << dendl;
  
diff --git a/ceph/src/test/objectstore_bench.cc b/ceph/src/test/objectstore_bench.cc

index 60c5d597e24528fa2c0bf63d8a24f29cde92ccab..550da87a67305b016991d7c97c37eec7eedefb4a 100644 (file)
--- a/ceph/src/test/objectstore_bench.cc
+++ b/ceph/src/test/objectstore_bench.cc
@@ -47,7 +47,7 @@ struct byte_units {
  
  bool byte_units::parse(const std::string &val, std::string *err)
  {
-  v = strict_sistrtoll(val.c_str(), err);
+  v = strict_iecstrtoll(val.c_str(), err);
    return err->empty();
  }
  
diff --git a/ceph/src/test/perf_counters.cc b/ceph/src/test/perf_counters.cc

index 182af5429addcd4dcd3c2725ce66b0b0d5ff03b6..362d11b89c954a4af73a4e119465ec43821c2575 100644 (file)
--- a/ceph/src/test/perf_counters.cc
+++ b/ceph/src/test/perf_counters.cc
@@ -182,7 +182,7 @@ TEST(PerfCounters, MultiplePerfCounters) {
    ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":13,\"element2\":0.000000000,"
             "\"element3\":{\"avgcount\":0,\"sum\":0.000000000,\"avgtime\":0.000000000}}}"), msg);
    ASSERT_EQ("", client.do_request("{ \"prefix\": \"perf schema\", \"format\": \"json\" }", &msg));
-  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":{\"type\":2,\"metric_type\":\"gauge\",\"value_type\":\"integer\",\"description\":\"\",\"nick\":\"\",\"priority\":0},\"element2\":{\"type\":1,\"metric_type\":\"gauge\",\"value_type\":\"real\",\"description\":\"\",\"nick\":\"\",\"priority\":0},\"element3\":{\"type\":5,\"metric_type\":\"gauge\",\"value_type\":\"real-integer-pair\",\"description\":\"\",\"nick\":\"\",\"priority\":0}}}"), msg);
+  ASSERT_EQ(sd("{\"test_perfcounter_1\":{\"element1\":{\"type\":2,\"metric_type\":\"gauge\",\"value_type\":\"integer\",\"description\":\"\",\"nick\":\"\",\"priority\":0,\"units\":\"none\"},\"element2\":{\"type\":1,\"metric_type\":\"gauge\",\"value_type\":\"real\",\"description\":\"\",\"nick\":\"\",\"priority\":0,\"units\":\"none\"},\"element3\":{\"type\":5,\"metric_type\":\"gauge\",\"value_type\":\"real-integer-pair\",\"description\":\"\",\"nick\":\"\",\"priority\":0,\"units\":\"none\"}}}"), msg);
    coll->clear();
    ASSERT_EQ("", client.do_request("{ \"prefix\": \"perf dump\", \"format\": \"json\" }", &msg));
    ASSERT_EQ("{}", msg);
diff --git a/ceph/src/test/pybind/test_ceph_argparse.py b/ceph/src/test/pybind/test_ceph_argparse.py

index b0f7f74b2c7a2fdebdbab2663a274218caecac13..cef0a2fc404ebf5c5a69130e8042c199f3f90ade 100755 (executable)
--- a/ceph/src/test/pybind/test_ceph_argparse.py
+++ b/ceph/src/test/pybind/test_ceph_argparse.py
@@ -24,7 +24,10 @@ import os
  import re
  import sys
  import json
-from StringIO import StringIO
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
  
  def get_command_descriptions(what):
      CEPH_BIN = os.environ['CEPH_BIN']
diff --git a/ceph/src/test/strtol.cc b/ceph/src/test/strtol.cc

index 3946736b915725f4dcb8ee4276e32cb1d7c9a0b4..673bf2977f416826a6b9cf46a29836646c2dd99c 100644 (file)
--- a/ceph/src/test/strtol.cc
+++ b/ceph/src/test/strtol.cc
@@ -13,15 +13,16 @@
   */
  
  #include "common/strtol.h"
+#include <math.h>
  #include <string>
  #include <map>
  
  #include "gtest/gtest.h"
  
-static void test_strict_strtoll(const char *str, long long expected)
+static void test_strict_strtoll(const char *str, long long expected, int base)
  {
    std::string err;
-  long long val = strict_strtoll(str, 10, &err);
+  long long val = strict_strtoll(str, base, &err);
    if (!err.empty()) {
      ASSERT_EQ(err, "");
    }
@@ -73,11 +74,14 @@ static void test_strict_strtof(const char *str, float expected)
  }
  
  TEST(StrToL, Simple1) {
-  test_strict_strtoll("123", 123);
-  test_strict_strtoll("0", 0);
-  test_strict_strtoll("-123", -123);
-  test_strict_strtoll("8796093022208", 8796093022208LL);
-  test_strict_strtoll("-8796093022208", -8796093022208LL);
+  test_strict_strtoll("123", 123, 10);
+  test_strict_strtoll("0", 0, 10);
+  test_strict_strtoll("-123", -123, 10);
+  test_strict_strtoll("8796093022208", 8796093022208LL, 10);
+  test_strict_strtoll("-8796093022208", -8796093022208LL, 10);
+  test_strict_strtoll("123", 123, 0);
+  test_strict_strtoll("0x7b", 123, 0);
+  test_strict_strtoll("4d2", 1234, 16);
  
    test_strict_strtol("208", 208);
    test_strict_strtol("-4", -4);
@@ -126,9 +130,11 @@ TEST(StrToL, Error1) {
    test_strict_strtoll_err("604462909807314587353088"); // overflow
    test_strict_strtoll_err("aw shucks"); // invalid
    test_strict_strtoll_err("343245 aw shucks"); // invalid chars at end
+  test_strict_strtoll_err("-"); // invalid
  
    test_strict_strtol_err("35 aw shucks"); // invalid chars at end
    test_strict_strtol_err("--0");
+  test_strict_strtol_err("-");
  
    test_strict_strtod_err("345345.0-");
    test_strict_strtod_err("34.0 garbo");
@@ -137,6 +143,137 @@ TEST(StrToL, Error1) {
  }
  
  
+static void test_strict_iecstrtoll(const char *str)
+{
+  std::string err;
+  strict_iecstrtoll(str, &err);
+  ASSERT_EQ(err, "");
+}
+
+static void test_strict_iecstrtoll_units(const std::string& foo,
+                                      std::string u, const int m)
+{
+  std::string s(foo);
+  s.append(u);
+  const char *str = s.c_str();
+  std::string err;
+  uint64_t r = strict_iecstrtoll(str, &err);
+  ASSERT_EQ(err, "");
+
+  str = foo.c_str();
+  std::string err2;
+  long long tmp = strict_strtoll(str, 10, &err2);
+  ASSERT_EQ(err2, "");
+  tmp = (tmp << m);
+  ASSERT_EQ(tmp, (long long)r);
+}
+
+TEST(IECStrToLL, WithUnits) {
+  std::map<std::string,int> units;
+  units["B"] = 0;
+  units["K"] = 10;
+  units["M"] = 20;
+  units["G"] = 30;
+  units["T"] = 40;
+  units["P"] = 50;
+  units["E"] = 60;
+  units["Ki"] = 10;
+  units["Mi"] = 20;
+  units["Gi"] = 30;
+  units["Ti"] = 40;
+  units["Pi"] = 50;
+  units["Ei"] = 60;
+
+  for (std::map<std::string,int>::iterator p = units.begin();
+       p != units.end(); ++p) {
+    // the upper bound of uint64_t is 2^64 = 4E
+    test_strict_iecstrtoll_units("4", p->first, p->second);
+    test_strict_iecstrtoll_units("1", p->first, p->second);
+    test_strict_iecstrtoll_units("0", p->first, p->second);
+  }
+}
+
+TEST(IECStrToLL, WithoutUnits) {
+  test_strict_iecstrtoll("1024");
+  test_strict_iecstrtoll("1152921504606846976");
+  test_strict_iecstrtoll("0");
+}
+
+static void test_strict_iecstrtoll_err(const char *str)
+{
+  std::string err;
+  strict_iecstrtoll(str, &err);
+  ASSERT_NE(err, "");
+}
+
+TEST(IECStrToLL, Error) {
+  test_strict_iecstrtoll_err("1024F");
+  test_strict_iecstrtoll_err("QDDSA");
+  test_strict_iecstrtoll_err("1b");
+  test_strict_iecstrtoll_err("100k");
+  test_strict_iecstrtoll_err("1000m");
+  test_strict_iecstrtoll_err("1g");
+  test_strict_iecstrtoll_err("20t");
+  test_strict_iecstrtoll_err("100p");
+  test_strict_iecstrtoll_err("1000e");
+  test_strict_iecstrtoll_err("B");
+  test_strict_iecstrtoll_err("M");
+  test_strict_iecstrtoll_err("BM");
+  test_strict_iecstrtoll_err("B0wef");
+  test_strict_iecstrtoll_err("0m");
+  test_strict_iecstrtoll_err("-1"); // it returns uint64_t
+  test_strict_iecstrtoll_err("-1K");
+  test_strict_iecstrtoll_err("1Bi");
+  test_strict_iecstrtoll_err("Bi");
+  test_strict_iecstrtoll_err("bi");
+  test_strict_iecstrtoll_err("gi");
+  test_strict_iecstrtoll_err("100ki");
+  test_strict_iecstrtoll_err("1000mi");
+  test_strict_iecstrtoll_err("1gi");
+  test_strict_iecstrtoll_err("20ti");
+  test_strict_iecstrtoll_err("100pi");
+  test_strict_iecstrtoll_err("1000ei");
+  // the upper bound of uint64_t is 2^64 = 4E, so 1024E overflows
+  test_strict_iecstrtoll_err("1024E"); // overflows after adding the suffix
+}
+
+// since strict_iecstrtoll is an alias of strict_iec_cast<uint64_t>(), quite a few
+// of cases are covered by existing test cases of strict_iecstrtoll already.
+TEST(StrictIECCast, Error) {
+  {
+    std::string err;
+    // the SI prefix is way too large for `int`.
+    (void)strict_iec_cast<int>("2E", &err);
+    ASSERT_NE(err, "");
+  }
+  {
+    std::string err;
+    (void)strict_iec_cast<int>("-2E", &err);
+    ASSERT_NE(err, "");
+  }
+  {
+    std::string err;
+    (void)strict_iec_cast<int>("1T", &err);
+    ASSERT_NE(err, "");
+  }
+  {
+    std::string err;
+    (void)strict_iec_cast<int64_t>("2E", &err);
+    ASSERT_EQ(err, "");
+  }
+  {
+    std::string err;
+    (void)strict_iec_cast<int64_t>("-2E", &err);
+    ASSERT_EQ(err, "");
+  }
+  {
+    std::string err;
+    (void)strict_iec_cast<int64_t>("1T", &err);
+    ASSERT_EQ(err, "");
+  }
+}
+
+
  static void test_strict_sistrtoll(const char *str)
  {
    std::string err;
@@ -145,10 +282,10 @@ static void test_strict_sistrtoll(const char *str)
  }
  
  static void test_strict_sistrtoll_units(const std::string& foo,
-                                      char u, const int m)
+                                      std::string u, const long long m)
  {
    std::string s(foo);
-  s.push_back(u);
+  s.append(u);
    const char *str = s.c_str();
    std::string err;
    uint64_t r = strict_sistrtoll(str, &err);
@@ -158,21 +295,20 @@ static void test_strict_sistrtoll_units(const std::string& foo,
    std::string err2;
    long long tmp = strict_strtoll(str, 10, &err2);
    ASSERT_EQ(err2, "");
-  tmp = (tmp << m);
+  tmp = (tmp *  m);
    ASSERT_EQ(tmp, (long long)r);
  }
  
  TEST(SIStrToLL, WithUnits) {
-  std::map<char,int> units;
-  units['B'] = 0;
-  units['K'] = 10;
-  units['M'] = 20;
-  units['G'] = 30;
-  units['T'] = 40;
-  units['P'] = 50;
-  units['E'] = 60;
-
-  for (std::map<char,int>::iterator p = units.begin();
+  std::map<std::string,long long> units;
+  units["K"] = pow(10, 3);
+  units["M"] = pow(10, 6);
+  units["G"] = pow(10, 9);
+  units["T"] = pow(10, 12);
+  units["P"] = pow(10, 15);
+  units["E"] = pow(10, 18);
+
+  for (std::map<std::string,long long>::iterator p = units.begin();
         p != units.end(); ++p) {
      // the upper bound of uint64_t is 2^64 = 4E
      test_strict_sistrtoll_units("4", p->first, p->second);
@@ -211,6 +347,17 @@ TEST(SIStrToLL, Error) {
    test_strict_sistrtoll_err("0m");
    test_strict_sistrtoll_err("-1"); // it returns uint64_t
    test_strict_sistrtoll_err("-1K");
+  test_strict_sistrtoll_err("1Bi");
+  test_strict_sistrtoll_err("Bi");
+  test_strict_sistrtoll_err("bi");
+  test_strict_sistrtoll_err("gi");
+  test_strict_sistrtoll_err("100ki");
+  test_strict_sistrtoll_err("1000mi");
+  test_strict_sistrtoll_err("1gi");
+  test_strict_sistrtoll_err("20ti");
+  test_strict_sistrtoll_err("100pi");
+  test_strict_sistrtoll_err("1000ei");
+  test_strict_sistrtoll_err("1B");
    // the upper bound of uint64_t is 2^64 = 4E, so 1024E overflows
    test_strict_sistrtoll_err("1024E"); // overflows after adding the suffix
  }
diff --git a/ceph/src/tools/ceph_kvstore_tool.cc b/ceph/src/tools/ceph_kvstore_tool.cc

index e9f31091c1c9d5837e8389cea64f99a694075a6e..3ba4a4a87669addfad6744ccd8482cf7d46f04c4 100644 (file)
--- a/ceph/src/tools/ceph_kvstore_tool.cc
+++ b/ceph/src/tools/ceph_kvstore_tool.cc
@@ -252,7 +252,7 @@ class StoreTool
  
        utime_t cur_duration = ceph_clock_now() - started_at;
        std::cout << "ts = " << cur_duration << "s, copied " << total_keys
-                << " keys so far (" << stringify(si_t(total_size)) << ")"
+                << " keys so far (" << stringify(si_u_t(total_size)) << ")"
                  << std::endl;
  
      } while (it->valid());
@@ -262,7 +262,7 @@ class StoreTool
      std::cout << "summary:" << std::endl;
      std::cout << "  copied " << total_keys << " keys" << std::endl;
      std::cout << "  used " << total_txs << " transactions" << std::endl;
-    std::cout << "  total size " << stringify(si_t(total_size)) << std::endl;
+    std::cout << "  total size " << stringify(si_u_t(total_size)) << std::endl;
      std::cout << "  from '" << store_path << "' to '" << other_path << "'"
                << std::endl;
      std::cout << "  duration " << time_taken << " seconds" << std::endl;
@@ -436,7 +436,7 @@ int main(int argc, const char *argv[])
        return 1;
      }
      std::cout << "(" << url_escape(prefix) << "," << url_escape(key)
-              << ") size " << si_t(bl.length()) << std::endl;
+              << ") size " << si_u_t(bl.length()) << std::endl;
  
    } else if (cmd == "set") {
      if (argc < 8) {
diff --git a/ceph/src/tools/ceph_monstore_tool.cc b/ceph/src/tools/ceph_monstore_tool.cc

index 4d6a8bdb7323011fb0d4a925abf313f7450075c7..0ad652d61cbed2f7879b3da7255a099937c3cc8f 100644 (file)
--- a/ceph/src/tools/ceph_monstore_tool.cc
+++ b/ceph/src/tools/ceph_monstore_tool.cc
@@ -1317,13 +1317,13 @@ int main(int argc, char **argv) {
          out_store.apply_transaction(tx);
  
        std::cout << "copied " << total_keys << " keys so far ("
-                << stringify(si_t(total_size)) << ")" << std::endl;
+                << stringify(byte_u_t(total_size)) << ")" << std::endl;
  
      } while (it->valid());
      out_store.close();
      std::cout << "summary: copied " << total_keys << " keys, using "
                << total_tx << " transactions, totalling "
-              << stringify(si_t(total_size)) << std::endl;
+              << stringify(byte_u_t(total_size)) << std::endl;
      std::cout << "from '" << store_path << "' to '" << out_path << "'"
                << std::endl;
    } else if (cmd == "rewrite-crush") {
diff --git a/ceph/src/tools/ceph_objectstore_tool.cc b/ceph/src/tools/ceph_objectstore_tool.cc

index 2e34268805b2612fb74709fdf8c40c8349bd109e..89af05b4fb0310df4bccbfb41f52142d72e01891 100644 (file)
--- a/ceph/src/tools/ceph_objectstore_tool.cc
+++ b/ceph/src/tools/ceph_objectstore_tool.cc
@@ -2405,6 +2405,43 @@ int print_obj_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter
    return r;
  }
  
+int corrupt_info(ObjectStore *store, coll_t coll, ghobject_t &ghobj, Formatter* formatter,
+            ObjectStore::Sequencer &osr)
+{
+  bufferlist attr;
+  int r = store->getattr(coll, ghobj, OI_ATTR, attr);
+  if (r < 0) {
+    cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+       << cpp_strerror(r) << std::endl;
+    return r;
+  }
+  object_info_t oi;
+  bufferlist::iterator bp = attr.begin();
+  try {
+    ::decode(oi, bp);
+  } catch (...) {
+    r = -EINVAL;
+    cerr << "Error getting attr on : " << make_pair(coll, ghobj) << ", "
+         << cpp_strerror(r) << std::endl;
+    return r;
+  }
+  cout << "Corrupting info" << std::endl;
+  if (!dry_run) {
+    attr.clear();
+    oi.alloc_hint_flags += 0xff;
+    ObjectStore::Transaction t;
+    ::encode(oi, attr, -1);  /* fixme: using full features */
+    t.setattr(coll, ghobj, OI_ATTR, attr);
+    r = store->apply_transaction(&osr, std::move(t));
+    if (r < 0) {
+      cerr << "Error writing object info: " << make_pair(coll, ghobj) << ", "
+         << cpp_strerror(r) << std::endl;
+      return r;
+    }
+  }
+  return 0;
+}
+
  int set_size(ObjectStore *store, coll_t coll, ghobject_t &ghobj, uint64_t setsize, Formatter* formatter,
              ObjectStore::Sequencer &osr, bool corrupt)
  {
@@ -2873,7 +2910,8 @@ int mydump_journal(Formatter *f, string journalpath, bool m_journal_dio)
  }
  
  int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
-                         const string &pool_name, const spg_t &pgid, bool dry_run)
+                         const string &pool_name, const spg_t &pgid, bool dry_run,
+                          int target_level)
  {
    int r = 0;
  
@@ -2923,7 +2961,7 @@ int apply_layout_settings(ObjectStore *os, const OSDSuperblock &superblock,
        cerr << "Would apply layout settings to " << coll << std::endl;
      } else {
        cerr << "Finished " << done << "/" << total << " collections" << "\r";
-      r = fs->apply_layout_settings(coll);
+      r = fs->apply_layout_settings(coll, target_level);
        if (r < 0) {
         cerr << "Error applying layout settings to " << coll << std::endl;
         return r;
@@ -2993,7 +3031,8 @@ int main(int argc, char **argv)
    positional.add_options()
      ("object", po::value<string>(&object), "'' for pgmeta_oid, object name or ghobject in json")
      ("objcmd", po::value<string>(&objcmd), "command [(get|set)-bytes, (get|set|rm)-(attr|omap), (get|set)-omaphdr, list-attrs, list-omap, remove]")
-    ("arg1", po::value<string>(&arg1), "arg1 based on cmd")
+    ("arg1", po::value<string>(&arg1), "arg1 based on cmd, "
+     "for apply-layout-settings: target hash level split to")
      ("arg2", po::value<string>(&arg2), "arg2 based on cmd")
      ;
  
@@ -3364,7 +3403,11 @@ int main(int argc, char **argv)
    }
  
    if (op == "apply-layout-settings") {
-    ret = apply_layout_settings(fs, superblock, pool, pgid, dry_run);
+    int target_level = 0;
+    if (vm.count("arg1") && isdigit(arg1[0])) {
+      target_level = atoi(arg1.c_str());
+    }
+    ret = apply_layout_settings(fs, superblock, pool, pgid, dry_run, target_level);
      goto out;
    }
  
@@ -3868,6 +3911,15 @@ int main(int argc, char **argv)
         }
         ret = print_obj_info(fs, coll, ghobj, formatter);
         goto out;
+      } else if (objcmd == "corrupt-info") {   // Undocumented testing feature
+       // There should not be any other arguments
+       if (vm.count("arg1") || vm.count("arg2")) {
+         usage(desc);
+          ret = 1;
+          goto out;
+        }
+        ret = corrupt_info(fs, coll, ghobj, formatter, *osr);
+        goto out;
        } else if (objcmd == "set-size" || objcmd == "corrupt-size") {
         // Undocumented testing feature
         bool corrupt = (objcmd == "corrupt-size");
diff --git a/ceph/src/tools/cephfs/Dumper.cc b/ceph/src/tools/cephfs/Dumper.cc

index 9c94a7d1442c169b96e2645ff53108e994075131..b7e2b8ba88bc9b533f67f7f334e0ac302260bba4 100644 (file)
--- a/ceph/src/tools/cephfs/Dumper.cc
+++ b/ceph/src/tools/cephfs/Dumper.cc
@@ -280,8 +280,15 @@ int Dumper::undump(const char *dump_file)
    {
      uint32_t const object_size = h.layout.object_size;
      assert(object_size > 0);
-    uint64_t const last_obj = h.write_pos / object_size;
-    uint64_t const purge_count = 2;
+    uint64_t last_obj = h.write_pos / object_size;
+    uint64_t purge_count = 2;
+    /* When the length is zero, the last_obj should be zeroed 
+     * from the offset determined by the new write_pos instead of being purged.
+     */
+    if (!len) {
+        purge_count = 1;
+        ++last_obj;
+    }
      C_SaferCond purge_cond;
      cout << "Purging " << purge_count << " objects from " << last_obj << std::endl;
      lock.Lock();
@@ -290,6 +297,20 @@ int Dumper::undump(const char *dump_file)
      lock.Unlock();
      purge_cond.wait();
    }
+  /* When the length is zero, zero the last object 
+   * from the offset determined by the new write_pos.
+   */
+  if (!len) {
+    uint64_t offset_in_obj = h.write_pos % h.layout.object_size;
+    uint64_t len           = h.layout.object_size - offset_in_obj;
+    C_SaferCond zero_cond;
+    cout << "Zeroing " << len << " bytes in the last object." << std::endl;
+    
+    lock.Lock();
+    filer.zero(ino, &h.layout, snapc, h.write_pos, len, ceph::real_clock::now(), 0, &zero_cond);
+    lock.Unlock();
+    zero_cond.wait();
+  }
  
    // Stream from `fd` to `filer`
    uint64_t pos = start;
diff --git a/ceph/src/tools/rados/rados.cc b/ceph/src/tools/rados/rados.cc

index 7017a4479436eb60a7e52068da52f2f9e3fdb5a0..04fb7458637772b20fde78173ef54248dc3aa954 100644 (file)
--- a/ceph/src/tools/rados/rados.cc
+++ b/ceph/src/tools/rados/rados.cc
@@ -240,7 +240,7 @@ static void usage_exit()
  template <typename I, typename T>
  static int rados_sistrtoll(I &i, T *val) {
    std::string err;
-  *val = strict_sistrtoll(i->second.c_str(), &err);
+  *val = strict_iecstrtoll(i->second.c_str(), &err);
    if (err != "") {
      cerr << "Invalid value for " << i->first << ": " << err << std::endl;
      return -EINVAL;
@@ -2126,7 +2126,7 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
        librados::pool_stat_t& s = i->second;
        if (!formatter) {
          tab << pool_name
-            << si_t(s.num_bytes)
+            << byte_u_t(s.num_bytes)
              << s.num_objects
              << s.num_object_clones
              << s.num_object_copies
@@ -2134,9 +2134,9 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
              << s.num_objects_unfound
              << s.num_objects_degraded
              << s.num_rd
-            << si_t(s.num_rd_kb << 10)
+            << byte_u_t(s.num_rd_kb << 10)
              << s.num_wr
-            << si_t(s.num_wr_kb << 10)
+            << byte_u_t(s.num_wr_kb << 10)
              << TextTable::endrow;
        } else {
          formatter->open_object_section("pool");
@@ -2178,11 +2178,11 @@ static int rados_tool_common(const std::map < std::string, std::string > &opts,
        cout << std::endl;
        cout << "total_objects    " << tstats.num_objects
             << std::endl;
-      cout << "total_used       " << si_t(tstats.kb_used << 10)
+      cout << "total_used       " << byte_u_t(tstats.kb_used << 10)
             << std::endl;
-      cout << "total_avail      " << si_t(tstats.kb_avail << 10)
+      cout << "total_avail      " << byte_u_t(tstats.kb_avail << 10)
             << std::endl;
-      cout << "total_space      " << si_t(tstats.kb << 10)
+      cout << "total_space      " << byte_u_t(tstats.kb << 10)
             << std::endl;
      } else {
        formatter->close_section();
diff --git a/ceph/src/tools/rbd/ArgumentTypes.cc b/ceph/src/tools/rbd/ArgumentTypes.cc

index 42b42a6fb37a99dc27b6b14c067abc048f7f40ee..1798e205f7958d3cd206151aea5a89ee7136d78f 100644 (file)
--- a/ceph/src/tools/rbd/ArgumentTypes.cc
+++ b/ceph/src/tools/rbd/ArgumentTypes.cc
@@ -398,7 +398,7 @@ void validate(boost::any& v, const std::vector<std::string>& values,
    const std::string &s = po::validators::get_single_string(values);
  
    std::string parse_error;
-  uint64_t size = strict_sistrtoll(s.c_str(), &parse_error);
+  uint64_t size = strict_iecstrtoll(s.c_str(), &parse_error);
    if (!parse_error.empty()) {
      throw po::validation_error(po::validation_error::invalid_option_value);
    }
@@ -432,7 +432,7 @@ void validate(boost::any& v, const std::vector<std::string>& values,
    const std::string &s = po::validators::get_single_string(values);
    
    std::string parse_error;
-  uint64_t objectsize = strict_sistrtoll(s.c_str(), &parse_error);
+  uint64_t objectsize = strict_iecstrtoll(s.c_str(), &parse_error);
    if (!parse_error.empty()) {
      throw po::validation_error(po::validation_error::invalid_option_value);
    }
@@ -505,7 +505,7 @@ void validate(boost::any& v, const std::vector<std::string>& values,
    const std::string &s = po::validators::get_single_string(values);
  
    std::string parse_error;
-  uint64_t size = strict_sistrtoll(s.c_str(), &parse_error);
+  uint64_t size = strict_iecstrtoll(s.c_str(), &parse_error);
    if (parse_error.empty() && (size >= (1 << 12))) {
      v = boost::any(size);
      return;
@@ -519,7 +519,7 @@ void validate(boost::any& v, const std::vector<std::string>& values,
    const std::string &s = po::validators::get_single_string(values);
  
    std::string parse_error;
-  uint64_t format = strict_sistrtoll(s.c_str(), &parse_error);
+  uint64_t format = strict_iecstrtoll(s.c_str(), &parse_error);
    if (!parse_error.empty() || (format != 1 && format != 2)) {
      throw po::validation_error(po::validation_error::invalid_option_value);
    }
diff --git a/ceph/src/tools/rbd/action/Bench.cc b/ceph/src/tools/rbd/action/Bench.cc

index 365593cde7ffa93d94ce20136640f8393c08a213..f4117ecb68fd22ccee04e82225435a8204995366 100644 (file)
--- a/ceph/src/tools/rbd/action/Bench.cc
+++ b/ceph/src/tools/rbd/action/Bench.cc
@@ -40,7 +40,7 @@ void validate(boost::any& v, const std::vector<std::string>& values,
    const std::string &s = po::validators::get_single_string(values);
  
    std::string parse_error;
-  uint64_t size = strict_sistrtoll(s.c_str(), &parse_error);
+  uint64_t size = strict_iecstrtoll(s.c_str(), &parse_error);
    if (!parse_error.empty()) {
      throw po::validation_error(po::validation_error::invalid_option_value);
    }
@@ -191,8 +191,8 @@ int do_bench(librbd::Image& image, io_type_t io_type,
    uint64_t size = 0;
    image.size(&size);
    if (io_size > size) {
-    std::cerr << "rbd: io-size " << prettybyte_t(io_size) << " "
-              << "larger than image size " << prettybyte_t(size) << std::endl;
+    std::cerr << "rbd: io-size " << byte_u_t(io_size) << " "
+              << "larger than image size " << byte_u_t(size) << std::endl;
      return -EINVAL;
    }
  
diff --git a/ceph/src/tools/rbd/action/DiskUsage.cc b/ceph/src/tools/rbd/action/DiskUsage.cc

index 5da9fec3f19073885551401b5f0da4b41375cff8..981481aa2e8a162d759e268b1fbdc5b44392d99f 100644 (file)
--- a/ceph/src/tools/rbd/action/DiskUsage.cc
+++ b/ceph/src/tools/rbd/action/DiskUsage.cc
@@ -78,8 +78,8 @@ static int compute_image_disk_usage(const std::string& name,
        full_name += "@" + snap_name;
      }
      tbl << full_name
-        << stringify(si_t(size))
-        << stringify(si_t(*used_size))
+        << stringify(byte_u_t(size))
+        << stringify(byte_u_t(*used_size))
          << TextTable::endrow;
    }
    return 0;
@@ -225,8 +225,8 @@ out:
    } else {
      if (count > 1) {
        tbl << "<TOTAL>"
-          << stringify(si_t(total_prov))
-          << stringify(si_t(total_used))
+          << stringify(byte_u_t(total_prov))
+          << stringify(byte_u_t(total_used))
            << TextTable::endrow;
      }
      std::cout << tbl;
diff --git a/ceph/src/tools/rbd/action/Info.cc b/ceph/src/tools/rbd/action/Info.cc

index 7944c0cf74ac34d9711f854edd92a9f75fa0230b..62dd9086a308c48de2b2a46902eff7355918828e 100644 (file)
--- a/ceph/src/tools/rbd/action/Info.cc
+++ b/ceph/src/tools/rbd/action/Info.cc
@@ -159,11 +159,11 @@ static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image,
      f->dump_int("format", (old_format ? 1 : 2));
    } else {
      std::cout << "rbd image '" << (imgname.empty() ? imgid : imgname) << "':\n"
-              << "\tsize " << prettybyte_t(info.size) << " in "
+              << "\tsize " << byte_u_t(info.size) << " in "
                << info.num_objs << " objects"
                << std::endl
                << "\torder " << info.order
-              << " (" << prettybyte_t(info.obj_size) << " objects)"
+              << " (" << byte_u_t(info.obj_size) << " objects)"
                << std::endl;
      if (!data_pool.empty()) {
        std::cout << "\tdata_pool: " << data_pool << std::endl;
@@ -234,7 +234,7 @@ static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image,
          std::cout << " (trash " << parent_id << ")";
        }
        std::cout << std::endl;
-      std::cout << "\toverlap: " << prettybyte_t(overlap) << std::endl;
+      std::cout << "\toverlap: " << byte_u_t(overlap) << std::endl;
      }
    }
  
@@ -244,7 +244,7 @@ static int do_show_info(librados::IoCtx &io_ctx, librbd::Image& image,
        f->dump_unsigned("stripe_unit", image.get_stripe_unit());
        f->dump_unsigned("stripe_count", image.get_stripe_count());
      } else {
-      std::cout << "\tstripe unit: " << prettybyte_t(image.get_stripe_unit())
+      std::cout << "\tstripe unit: " << byte_u_t(image.get_stripe_unit())
                  << std::endl
                  << "\tstripe count: " << image.get_stripe_count() << std::endl;
      }
diff --git a/ceph/src/tools/rbd/action/Journal.cc b/ceph/src/tools/rbd/action/Journal.cc

index 3485dc1f975a5da68517c615549b6a5f2250483f..20868721e08b16a79d5056f0835cb00c276cc65e 100644 (file)
--- a/ceph/src/tools/rbd/action/Journal.cc
+++ b/ceph/src/tools/rbd/action/Journal.cc
@@ -78,7 +78,7 @@ static int do_show_journal_info(librados::Rados& rados, librados::IoCtx& io_ctx,
      std::cout << "\theader_oid: " << header_oid << std::endl;
      std::cout << "\tobject_oid_prefix: " << object_oid_prefix << std::endl;
      std::cout << "\torder: " << static_cast<int>(order) << " ("
-             << prettybyte_t(1ull << order) << " objects)"<< std::endl;
+             << byte_u_t(1ull << order) << " objects)"<< std::endl;
      std::cout << "\tsplay_width: " << static_cast<int>(splay_width) << std::endl;
      if (!object_pool_name.empty()) {
        std::cout << "\tobject_pool: " << object_pool_name << std::endl;
diff --git a/ceph/src/tools/rbd/action/List.cc b/ceph/src/tools/rbd/action/List.cc

index fbac3e106559190dd7d1aa6641e0c4e02c198cc3..d7dc621ef01ea1de37641679303f6ac6247196a6 100644 (file)
--- a/ceph/src/tools/rbd/action/List.cc
+++ b/ceph/src/tools/rbd/action/List.cc
@@ -91,7 +91,7 @@ int list_process_image(librados::Rados* rados, WorkerEntry* w, bool lflag, Forma
      f->close_section();
    } else {
      tbl << w->name
-        << stringify(si_t(info.size))
+        << stringify(byte_u_t(info.size))
          << parent
          << ((old_format) ? '1' : '2')
          << ""                         // protect doesn't apply to images
@@ -131,7 +131,7 @@ int list_process_image(librados::Rados* rados, WorkerEntry* w, bool lflag, Forma
          f->close_section();
        } else {
          tbl << w->name + "@" + s->name
-            << stringify(si_t(s->size))
+            << stringify(byte_u_t(s->size))
              << parent
              << ((old_format) ? '1' : '2')
              << (is_protected ? "yes" : "")
diff --git a/ceph/src/tools/rbd/action/Snap.cc b/ceph/src/tools/rbd/action/Snap.cc

index 2513cebd8210ae6c8553c51e29c1b0c1c45f56ce..59312ff226dfa2a5649841956c5cec20c133cef6 100644 (file)
--- a/ceph/src/tools/rbd/action/Snap.cc
+++ b/ceph/src/tools/rbd/action/Snap.cc
@@ -57,7 +57,7 @@ int do_list_snaps(librbd::Image& image, Formatter *f)
        f->dump_string("timestamp", tt_str);
        f->close_section();
      } else {
-      t << s->id << s->name << stringify(prettybyte_t(s->size)) << tt_str
+      t << s->id << s->name << stringify(byte_u_t(s->size)) << tt_str
          << TextTable::endrow;
      }
    }
diff --git a/ceph/src/tools/rbd_nbd/rbd-nbd.cc b/ceph/src/tools/rbd_nbd/rbd-nbd.cc

index 3d76ece42a8a98d0a4afb7bff3e024f924b9b089..aefdbd36e0620a6367ea106c60f101911ed9503e 100644 (file)
--- a/ceph/src/tools/rbd_nbd/rbd-nbd.cc
+++ b/ceph/src/tools/rbd_nbd/rbd-nbd.cc
@@ -723,8 +723,8 @@ static int do_map(int argc, const char *argv[], Config *cfg)
  
    if (info.size > ULONG_MAX) {
      r = -EFBIG;
-    cerr << "rbd-nbd: image is too large (" << prettybyte_t(info.size)
-         << ", max is " << prettybyte_t(ULONG_MAX) << ")" << std::endl;
+    cerr << "rbd-nbd: image is too large (" << byte_u_t(info.size)
+         << ", max is " << byte_u_t(ULONG_MAX) << ")" << std::endl;
      goto close_nbd;
    }
  
diff --git a/ceph/systemd/rbdmap.service b/ceph/systemd/rbdmap.service

index 15e64abb4b0b44ae7436a162219ff879abee8c55..66e4f653b862c7456fc80c4e561febdd472831f1 100644 (file)
--- a/ceph/systemd/rbdmap.service
+++ b/ceph/systemd/rbdmap.service
@@ -1,8 +1,9 @@
  [Unit]
  Description=Map RBD devices
  
-After=network-online.target local-fs.target
-Wants=network-online.target local-fs.target
+After=network-online.target
+Before=remote-fs-pre.target
+Wants=network-online.target remote-fs-pre.target
  
  [Service]
  EnvironmentFile=-/etc/sysconfig/ceph
author	Alwin Antreich <a.antreich@proxmox.com>
	Wed, 5 Sep 2018 08:02:44 +0000 (10:02 +0200)
committer	Alwin Antreich <a.antreich@proxmox.com>
	Wed, 5 Sep 2018 08:02:44 +0000 (10:02 +0200)