]> git.proxmox.com Git - ceph.git/blob - ceph/src/boost/libs/uuid/include/boost/uuid/detail/uuid_x86.hpp
bump version to 12.2.2-pve1
[ceph.git] / ceph / src / boost / libs / uuid / include / boost / uuid / detail / uuid_x86.hpp
1 /*
2 * Copyright Andrey Semashev 2013.
3 * Distributed under the Boost Software License, Version 1.0.
4 * (See accompanying file LICENSE_1_0.txt or copy at
5 * http://www.boost.org/LICENSE_1_0.txt)
6 */
7 /*!
8 * \file uuid/detail/uuid_x86.hpp
9 *
10 * \brief This header contains optimized SSE implementation of \c boost::uuid operations.
11 */
12
13 #ifndef BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_
14 #define BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_
15
16 // MSVC does not always have immintrin.h (at least, not up to MSVC 10), so include the appropriate header for each instruction set
17 #if defined(BOOST_UUID_USE_SSE41)
18 #include <smmintrin.h>
19 #elif defined(BOOST_UUID_USE_SSE3)
20 #include <pmmintrin.h>
21 #else
22 #include <emmintrin.h>
23 #endif
24
25 #if defined(BOOST_MSVC) && defined(_M_X64) && !defined(BOOST_UUID_USE_SSE3) && (BOOST_MSVC < 1900 /* Fixed in Visual Studio 2015 */ )
26 // At least MSVC 9 (VS2008) and 12 (VS2013) have an optimizer bug that sometimes results in incorrect SIMD code
27 // generated in Release x64 mode. In particular, it affects operator==, where the compiler sometimes generates
28 // pcmpeqd with a memory opereand instead of movdqu followed by pcmpeqd. The problem is that uuid can be
29 // not aligned to 16 bytes and pcmpeqd causes alignment violation in this case. We cannot be sure that other
30 // MSVC versions are not affected so we apply the workaround for all versions, except VS2015 on up where
31 // the bug has been fixed.
32 //
33 // https://svn.boost.org/trac/boost/ticket/8509#comment:3
34 // https://connect.microsoft.com/VisualStudio/feedbackdetail/view/981648#tabs
35 #define BOOST_UUID_DETAIL_MSVC_BUG981648
36 #if BOOST_MSVC >= 1600
37 extern "C" void _ReadWriteBarrier(void);
38 #pragma intrinsic(_ReadWriteBarrier)
39 #endif
40 #endif
41
42 namespace boost {
43 namespace uuids {
44 namespace detail {
45
46 BOOST_FORCEINLINE __m128i load_unaligned_si128(const uint8_t* p) BOOST_NOEXCEPT
47 {
48 #if defined(BOOST_UUID_USE_SSE3)
49 return _mm_lddqu_si128(reinterpret_cast< const __m128i* >(p));
50 #elif !defined(BOOST_UUID_DETAIL_MSVC_BUG981648)
51 return _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
52 #elif defined(BOOST_MSVC) && BOOST_MSVC >= 1600
53 __m128i mm = _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
54 // Make sure this load doesn't get merged with the subsequent instructions
55 _ReadWriteBarrier();
56 return mm;
57 #else
58 // VS2008 x64 doesn't respect _ReadWriteBarrier above, so we have to generate this crippled code to load unaligned data
59 return _mm_unpacklo_epi64(_mm_loadl_epi64(reinterpret_cast< const __m128i* >(p)), _mm_loadl_epi64(reinterpret_cast< const __m128i* >(p + 8)));
60 #endif
61 }
62
63 } // namespace detail
64
65 inline bool uuid::is_nil() const BOOST_NOEXCEPT
66 {
67 __m128i mm = uuids::detail::load_unaligned_si128(data);
68 #if defined(BOOST_UUID_USE_SSE41)
69 return _mm_test_all_zeros(mm, mm) != 0;
70 #else
71 mm = _mm_cmpeq_epi32(mm, _mm_setzero_si128());
72 return _mm_movemask_epi8(mm) == 0xFFFF;
73 #endif
74 }
75
76 inline void uuid::swap(uuid& rhs) BOOST_NOEXCEPT
77 {
78 __m128i mm_this = uuids::detail::load_unaligned_si128(data);
79 __m128i mm_rhs = uuids::detail::load_unaligned_si128(rhs.data);
80 _mm_storeu_si128(reinterpret_cast< __m128i* >(rhs.data), mm_this);
81 _mm_storeu_si128(reinterpret_cast< __m128i* >(data), mm_rhs);
82 }
83
84 inline bool operator== (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
85 {
86 __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
87 __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
88
89 #if defined(BOOST_UUID_USE_SSE41)
90 __m128i mm = _mm_xor_si128(mm_left, mm_right);
91 return _mm_test_all_zeros(mm, mm) != 0;
92 #else
93 __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
94 return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
95 #endif
96 }
97
98 inline bool operator< (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
99 {
100 __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
101 __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
102
103 // To emulate lexicographical_compare behavior we have to perform two comparisons - the forward and reverse one.
104 // Then we know which bytes are equivalent and which ones are different, and for those different the comparison results
105 // will be opposite. Then we'll be able to find the first differing comparison result (for both forward and reverse ways),
106 // and depending on which way it is for, this will be the result of the operation. There are a few notes to consider:
107 //
108 // 1. Due to little endian byte order the first bytes go into the lower part of the xmm registers,
109 // so the comparison results in the least significant bits will actually be the most signigicant for the final operation result.
110 // This means we have to determine which of the comparison results have the least significant bit on, and this is achieved with
111 // the "(x - 1) ^ x" trick.
112 // 2. Because there is only signed comparison in SSE/AVX, we have to invert byte comparison results whenever signs of the corresponding
113 // bytes are different. I.e. in signed comparison it's -1 < 1, but in unsigned it is the opposite (255 > 1). To do that we XOR left and right,
114 // making the most significant bit of each byte 1 if the signs are different, and later apply this mask with another XOR to the comparison results.
115 // 3. pcmpgtw compares for "greater" relation, so we swap the arguments to get what we need.
116
117 const __m128i mm_signs_mask = _mm_xor_si128(mm_left, mm_right);
118
119 __m128i mm_cmp = _mm_cmpgt_epi8(mm_right, mm_left), mm_rcmp = _mm_cmpgt_epi8(mm_left, mm_right);
120
121 mm_cmp = _mm_xor_si128(mm_signs_mask, mm_cmp);
122 mm_rcmp = _mm_xor_si128(mm_signs_mask, mm_rcmp);
123
124 uint32_t cmp = static_cast< uint32_t >(_mm_movemask_epi8(mm_cmp)), rcmp = static_cast< uint32_t >(_mm_movemask_epi8(mm_rcmp));
125
126 cmp = (cmp - 1u) ^ cmp;
127 rcmp = (rcmp - 1u) ^ rcmp;
128
129 return cmp < rcmp;
130 }
131
132 } // namespace uuids
133 } // namespace boost
134
135 #endif // BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_