[ceph.git] / ceph / src / boost / boost / fiber / detail / spinlock_ttas_futex.hpp


//          Copyright Oliver Kowalke 2016.
// Distributed under the Boost Software License, Version 1.0.
//    (See accompanying file LICENSE_1_0.txt or copy at
//          http://www.boost.org/LICENSE_1_0.txt)

#ifndef BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
#define BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H

#include <atomic>
#include <cmath>
#include <random>
#include <thread>

#include <boost/fiber/detail/config.hpp>
#include <boost/fiber/detail/cpu_relax.hpp>
#include <boost/fiber/detail/futex.hpp>

// based on informations from:
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors

namespace boost {
namespace fibers {
namespace detail {

class spinlock_ttas_futex {
private:
    template< typename FBSplk >
    friend class spinlock_rtm;

    std::atomic< std::int32_t >                 value_{ 0 };

public:
    spinlock_ttas_futex() = default;

    spinlock_ttas_futex( spinlock_ttas_futex const&) = delete;
    spinlock_ttas_futex & operator=( spinlock_ttas_futex const&) = delete;

    void lock() noexcept {
        static thread_local std::minstd_rand generator{ std::random_device{}() };
        std::int32_t collisions = 0, retries = 0, expected = 0;
        // after max. spins or collisions suspend via futex
        while ( retries++ < BOOST_FIBERS_RETRY_THRESHOLD) {
            // avoid using multiple pause instructions for a delay of a specific cycle count
            // the delay of cpu_relax() (pause on Intel) depends on the processor family
            // the cycle count can not guaranteed from one system to the next
            // -> check the shared variable 'value_' in between each cpu_relax() to prevent
            //    unnecessarily long delays on some systems
            // test shared variable 'status_'
            // first access to 'value_' -> chache miss
            // sucessive acccess to 'value_' -> cache hit
            // if 'value_' was released by other fiber
            // cached 'value_' is invalidated -> cache miss
            if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
                if ( BOOST_FIBERS_SPIN_BEFORE_SLEEP0 > retries) {
                    // give CPU a hint that this thread is in a "spin-wait" loop
                    // delays the next instruction's execution for a finite period of time (depends on processor family)
                    // the CPU is not under demand, parts of the pipeline are no longer being used
                    // -> reduces the power consumed by the CPU
                    // -> prevent pipeline stalls
                    cpu_relax();
                } else if ( BOOST_FIBERS_SPIN_BEFORE_YIELD > retries) {
                    // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
                    // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
                    // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
                    // if and only if a thread of equal or greater priority is ready to run
                    static constexpr std::chrono::microseconds us0{ 0 };
                    std::this_thread::sleep_for( us0);
                } else {
                    // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
                    // but only to another thread on the same processor
                    // instead of constant checking, a thread only checks if no other useful work is pending
                    std::this_thread::yield();
                }
#else
                // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
                // but only to another thread on the same processor
                // instead of constant checking, a thread only checks if no other useful work is pending
                std::this_thread::yield();
#endif
            } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire) ) {
                // spinlock now contended
                // utilize 'Binary Exponential Backoff' algorithm
                // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
                std::uniform_int_distribution< std::int32_t > distribution{
                    0, static_cast< std::int32_t >( 1) << (std::min)(collisions, static_cast< std::int32_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
                const std::int32_t z = distribution( generator);
                ++collisions;
                for ( std::int32_t i = 0; i < z; ++i) {
                    // -> reduces the power consumed by the CPU
                    // -> prevent pipeline stalls
                    cpu_relax();
                }
            } else {
                // success, lock acquired
                return;
            }
        }
        // failure, lock not acquired
        // pause via futex
        if ( 2 != expected) {
            expected = value_.exchange( 2, std::memory_order_acquire);
        }
        while ( 0 != expected) {
            futex_wait( & value_, 2);
            expected = value_.exchange( 2, std::memory_order_acquire);
        }
    }

    bool try_lock() noexcept {
        std::int32_t expected = 0;
        return value_.compare_exchange_strong( expected, 1, std::memory_order_acquire);
    }

    void unlock() noexcept {
        if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
            value_.store( 0, std::memory_order_release);
            futex_wake( & value_);
        }
    }
};

}}}

#endif // BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
Commit	Line	Data
b32b8144 FG	1
	2	// Copyright Oliver Kowalke 2016.
	3	// Distributed under the Boost Software License, Version 1.0.
	4	// (See accompanying file LICENSE_1_0.txt or copy at
	5	// http://www.boost.org/LICENSE_1_0.txt)
	6
	7	#ifndef BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
	8	#define BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
	9
	10	#include <atomic>
	11	#include <cmath>
	12	#include <random>
	13	#include <thread>
	14
	15	#include <boost/fiber/detail/config.hpp>
	16	#include <boost/fiber/detail/cpu_relax.hpp>
	17	#include <boost/fiber/detail/futex.hpp>
	18
	19	// based on informations from:
	20	// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
	21	// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
	22
	23	namespace boost {
	24	namespace fibers {
	25	namespace detail {
	26
	27	class spinlock_ttas_futex {
	28	private:
	29	template< typename FBSplk >
	30	friend class spinlock_rtm;
	31
	32	std::atomic< std::int32_t > value_{ 0 };
	33
	34	public:
	35	spinlock_ttas_futex() = default;
	36
	37	spinlock_ttas_futex( spinlock_ttas_futex const&) = delete;
	38	spinlock_ttas_futex & operator=( spinlock_ttas_futex const&) = delete;
	39
	40	void lock() noexcept {
	41	static thread_local std::minstd_rand generator{ std::random_device{}() };
	42	std::int32_t collisions = 0, retries = 0, expected = 0;
	43	// after max. spins or collisions suspend via futex
	44	while ( retries++ < BOOST_FIBERS_RETRY_THRESHOLD) {
	45	// avoid using multiple pause instructions for a delay of a specific cycle count
	46	// the delay of cpu_relax() (pause on Intel) depends on the processor family
	47	// the cycle count can not guaranteed from one system to the next
	48	// -> check the shared variable 'value_' in between each cpu_relax() to prevent
	49	// unnecessarily long delays on some systems
	50	// test shared variable 'status_'
	51	// first access to 'value_' -> chache miss
	52	// sucessive acccess to 'value_' -> cache hit
	53	// if 'value_' was released by other fiber
	54	// cached 'value_' is invalidated -> cache miss
	55	if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
	56	#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
	57	if ( BOOST_FIBERS_SPIN_BEFORE_SLEEP0 > retries) {
	58	// give CPU a hint that this thread is in a "spin-wait" loop
	59	// delays the next instruction's execution for a finite period of time (depends on processor family)
	60	// the CPU is not under demand, parts of the pipeline are no longer being used
	61	// -> reduces the power consumed by the CPU
	62	// -> prevent pipeline stalls
	63	cpu_relax();
	64	} else if ( BOOST_FIBERS_SPIN_BEFORE_YIELD > retries) {
65	// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
66	// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
67	// std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
68	// if and only if a thread of equal or greater priority is ready to run
69	static constexpr std::chrono::microseconds us0{ 0 };
70	std::this_thread::sleep_for( us0);
71	} else {
72	// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
73	// but only to another thread on the same processor
74	// instead of constant checking, a thread only checks if no other useful work is pending
75	std::this_thread::yield();
76	}
77	#else
78	// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
79	// but only to another thread on the same processor
80	// instead of constant checking, a thread only checks if no other useful work is pending
81	std::this_thread::yield();
82	#endif
83	} else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire) ) {
84	// spinlock now contended
85	// utilize 'Binary Exponential Backoff' algorithm
86	// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
87	std::uniform_int_distribution< std::int32_t > distribution{
88	0, static_cast< std::int32_t >( 1) << (std::min)(collisions, static_cast< std::int32_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
89	const std::int32_t z = distribution( generator);
90	++collisions;
91	for ( std::int32_t i = 0; i < z; ++i) {
92	// -> reduces the power consumed by the CPU
93	// -> prevent pipeline stalls
94	cpu_relax();
95	}
96	} else {
97	// success, lock acquired
98	return;
99	}
100	}
101	// failure, lock not acquired
102	// pause via futex
103	if ( 2 != expected) {
104	expected = value_.exchange( 2, std::memory_order_acquire);
105	}
106	while ( 0 != expected) {
107	futex_wait( & value_, 2);
108	expected = value_.exchange( 2, std::memory_order_acquire);
109	}
110	}
111
112	bool try_lock() noexcept {
113	std::int32_t expected = 0;
114	return value_.compare_exchange_strong( expected, 1, std::memory_order_acquire);
115	}
116
117	void unlock() noexcept {
118	if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
119	value_.store( 0, std::memory_order_release);
120	futex_wake( & value_);
121	}
122	}
123	};
124
125	}}}
126
127	#endif // BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H