[ceph.git] / ceph / src / common / Throttle.h

// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab

#ifndef CEPH_THROTTLE_H
#define CEPH_THROTTLE_H

#include <atomic>
#include <chrono>
#include <iostream>
#include <list>
#include <map>

#include "common/ceph_mutex.h"
#include "include/Context.h"
#include "common/ThrottleInterface.h"
#include "common/Timer.h"
#include "common/convenience.h"
#include "common/perf_counters_collection.h"

/**
 * @class Throttle
 * Throttles the maximum number of active requests.
 *
 * This class defines the maximum number of slots currently taken away. The
 * excessive requests for more of them are delayed, until some slots are put
 * back, so @p get_current() drops below the limit after fulfills the requests.
 */
class Throttle final : public ThrottleInterface {
  CephContext *cct;
  const std::string name;
  PerfCountersRef logger;
  std::atomic<int64_t> count = { 0 }, max = { 0 };
  std::mutex lock;
  std::list<std::condition_variable> conds;
  const bool use_perf;

public:
  Throttle(CephContext *cct, const std::string& n, int64_t m = 0, bool _use_perf = true);
  ~Throttle() override;

private:
  void _reset_max(int64_t m);
  bool _should_wait(int64_t c) const {
    int64_t m = max;
    int64_t cur = count;
    return
      m &&
      ((c <= m && cur + c > m) || // normally stay under max
       (c >= m && cur > m));     // except for large c
  }

  bool _wait(int64_t c, std::unique_lock<std::mutex>& l);

public:
  /**
   * gets the number of currently taken slots
   * @returns the number of taken slots
   */
  int64_t get_current() const {
    return count;
  }

  /**
   * get the max number of slots
   * @returns the max number of slots
   */
  int64_t get_max() const { return max; }

  /**
   * return true if past midpoint
   */
  bool past_midpoint() const {
    return count >= max / 2;
  }

  /**
   * set the new max number, and wait until the number of taken slots drains
   * and drops below this limit.
   *
   * @param m the new max number
   * @returns true if this method is blocked, false it it returns immediately
   */
  bool wait(int64_t m = 0);

  /**
   * take the specified number of slots from the stock regardless the throttling
   * @param c number of slots to take
   * @returns the total number of taken slots
   */
  int64_t take(int64_t c = 1) override;

  /**
   * get the specified amount of slots from the stock, but will wait if the
   * total number taken by consumer would exceed the maximum number.
   * @param c number of slots to get
   * @param m new maximum number to set, ignored if it is 0
   * @returns true if this request is blocked due to the throttling, false 
   * otherwise
   */
  bool get(int64_t c = 1, int64_t m = 0);

  /**
   * the unblocked version of @p get()
   * @returns true if it successfully got the requested amount,
   * or false if it would block.
   */
  bool get_or_fail(int64_t c = 1);

  /**
   * put slots back to the stock
   * @param c number of slots to return
   * @returns number of requests being hold after this
   */
  int64_t put(int64_t c = 1) override;
   /**
   * reset the zero to the stock
   */
  void reset();

  bool should_wait(int64_t c) const {
    return _should_wait(c);
  }
  void reset_max(int64_t m) {
    std::lock_guard l(lock);
    _reset_max(m);
  }
};

/**
 * BackoffThrottle
 *
 * Creates a throttle which gradually induces delays when get() is called
 * based on params low_threshold, high_threshold, expected_throughput,
 * high_multiple, and max_multiple.
 *
 * In [0, low_threshold), we want no delay.
 *
 * In [low_threshold, high_threshold), delays should be injected based
 * on a line from 0 at low_threshold to
 * high_multiple * (1/expected_throughput) at high_threshold.
 *
 * In [high_threshold, 1), we want delays injected based on a line from
 * (high_multiple * (1/expected_throughput)) at high_threshold to
 * (high_multiple * (1/expected_throughput)) +
 * (max_multiple * (1/expected_throughput)) at 1.
 *
 * Let the current throttle ratio (current/max) be r, low_threshold be l,
 * high_threshold be h, high_delay (high_multiple / expected_throughput) be e,
 * and max_delay (max_multiple / expected_throughput) be m.
 *
 * delay = 0, r \in [0, l)
 * delay = (r - l) * (e / (h - l)), r \in [l, h)
 * delay = e + (r - h)((m - e)/(1 - h))
 */
class BackoffThrottle {
  CephContext *cct;
  const std::string name;
  PerfCountersRef logger;

  std::mutex lock;
  using locker = std::unique_lock<std::mutex>;

  unsigned next_cond = 0;

  /// allocated once to avoid constantly allocating new ones
  std::vector<std::condition_variable> conds;

  const bool use_perf;

  /// pointers into conds
  std::list<std::condition_variable*> waiters;

  std::list<std::condition_variable*>::iterator _push_waiter() {
    unsigned next = next_cond++;
    if (next_cond == conds.size())
      next_cond = 0;
    return waiters.insert(waiters.end(), &(conds[next]));
  }

  void _kick_waiters() {
    if (!waiters.empty())
      waiters.front()->notify_all();
  }

  /// see above, values are in [0, 1].
  double low_threshold = 0;
  double high_threshold = 1;

  /// see above, values are in seconds
  double high_delay_per_count = 0;
  double max_delay_per_count = 0;

  /// Filled in in set_params
  double s0 = 0; ///< e / (h - l), l != h, 0 otherwise
  double s1 = 0; ///< (m - e)/(1 - h), 1 != h, 0 otherwise

  /// max
  uint64_t max = 0;
  uint64_t current = 0;

  ceph::timespan _get_delay(uint64_t c) const;

public:
  /**
   * set_params
   *
   * Sets params.  If the params are invalid, returns false
   * and populates errstream (if non-null) with a user comprehensible
   * explanation.
   */
  bool set_params(
    double _low_threshold,
    double _high_threshold,
    double expected_throughput,
    double high_multiple,
    double max_multiple,
    uint64_t throttle_max,
    std::ostream *errstream);

  ceph::timespan get(uint64_t c = 1);
  ceph::timespan wait() {
    return get(0);
  }
  uint64_t put(uint64_t c = 1);
  uint64_t take(uint64_t c = 1);
  uint64_t get_current();
  uint64_t get_max();

  BackoffThrottle(CephContext *cct, const std::string& n,
    unsigned expected_concurrency, ///< [in] determines size of conds
    bool _use_perf = true);
  ~BackoffThrottle();
};


/**
 * @class SimpleThrottle
 * This is a simple way to bound the number of concurrent operations.
 *
 * It tracks the first error encountered, and makes it available
 * when all requests are complete. wait_for_ret() should be called
 * before the instance is destroyed.
 *
 * Re-using the same instance isn't safe if you want to check each set
 * of operations for errors, since the return value is not reset.
 */
class SimpleThrottle {
public:
  SimpleThrottle(uint64_t max, bool ignore_enoent);
  ~SimpleThrottle();
  void start_op();
  void end_op(int r);
  bool pending_error() const;
  int wait_for_ret();
private:
  mutable std::mutex m_lock;
  std::condition_variable m_cond;
  uint64_t m_max;
  uint64_t m_current = 0;
  int m_ret = 0;
  bool m_ignore_enoent;
  uint32_t waiters = 0;
};


class OrderedThrottle;

class C_OrderedThrottle : public Context {
public:
  C_OrderedThrottle(OrderedThrottle *ordered_throttle, uint64_t tid)
    : m_ordered_throttle(ordered_throttle), m_tid(tid) {
  }

protected:
  void finish(int r) override;

private:
  OrderedThrottle *m_ordered_throttle;
  uint64_t m_tid;
};

/**
 * @class OrderedThrottle
 * Throttles the maximum number of active requests and completes them in order
 *
 * Operations can complete out-of-order but their associated Context callback
 * will completed in-order during invocation of start_op() and wait_for_ret()
 */
class OrderedThrottle {
public:
  OrderedThrottle(uint64_t max, bool ignore_enoent);
  ~OrderedThrottle();

  C_OrderedThrottle *start_op(Context *on_finish);
  void end_op(int r);

  bool pending_error() const;
  int wait_for_ret();

protected:
  friend class C_OrderedThrottle;

  void finish_op(uint64_t tid, int r);

private:
  struct Result {
    bool finished;
    int ret_val;
    Context *on_finish;

    Result(Context *_on_finish = NULL)
      : finished(false), ret_val(0), on_finish(_on_finish) {
    }
  };

  typedef std::map<uint64_t, Result> TidResult;

  mutable std::mutex m_lock;
  std::condition_variable m_cond;
  uint64_t m_max;
  uint64_t m_current = 0;
  int m_ret_val = 0;
  bool m_ignore_enoent;

  uint64_t m_next_tid = 0;
  uint64_t m_complete_tid = 0;

  TidResult m_tid_result;

  void complete_pending_ops(std::unique_lock<std::mutex>& l);
  uint32_t waiters = 0;
};


class TokenBucketThrottle {
  struct Bucket {
    CephContext *cct;
    const std::string name;

    uint64_t remain;
    uint64_t max;

    Bucket(CephContext *cct, const std::string &name, uint64_t m)
      : cct(cct), name(name), remain(m), max(m) {}

    uint64_t get(uint64_t c);
    uint64_t put(uint64_t c);
    void set_max(uint64_t m);
  };

  struct Blocker {
    uint64_t tokens_requested;
    Context *ctx;

    Blocker(uint64_t _tokens_requested, Context* _ctx)
      : tokens_requested(_tokens_requested), ctx(_ctx) {}
  };

  CephContext *m_cct;
  const std::string m_name;
  Bucket m_throttle;
  uint64_t m_avg = 0;
  uint64_t m_burst = 0;
  SafeTimer *m_timer;
  ceph::mutex *m_timer_lock;
  Context *m_token_ctx = nullptr;
  std::list<Blocker> m_blockers;
  ceph::mutex m_lock;

  // minimum of the filling period.
  uint64_t m_tick_min = 50;
  // tokens filling period, its unit is millisecond.
  uint64_t m_tick = 0;
  /**
   * These variables are used to calculate how many tokens need to be put into
   * the bucket within each tick.
   *
   * In actual use, the tokens to be put per tick(m_avg / m_ticks_per_second)
   * may be a floating point number, but we need an 'uint64_t' to put into the
   * bucket.
   *
   * For example, we set the value of rate to be 950, means 950 iops(or bps).
   *
   * In this case, the filling period(m_tick) should be 1000 / 950 = 1.052,
   * which is too small for the SafeTimer. So we should set the period(m_tick)
   * to be 50(m_tick_min), and 20 ticks in one second(m_ticks_per_second).
   * The tokens filled in bucket per tick is 950 / 20 = 47.5, not an integer.
   *
   * To resolve this, we use a method called tokens_filled(m_current_tick) to
   * calculate how many tokens will be put so far(until m_current_tick):
   *
   *   tokens_filled = m_current_tick / m_ticks_per_second * m_avg
   *
   * And the difference between two ticks will be the result we expect.
   *   tokens in tick 0: (1 / 20 * 950) - (0 / 20 * 950) =  47 -   0 = 47
   *   tokens in tick 1: (2 / 20 * 950) - (1 / 20 * 950) =  95 -  47 = 48
   *   tokens in tick 2: (3 / 20 * 950) - (2 / 20 * 950) = 142 -  95 = 47
   *
   * As a result, the tokens filled in one second will shown as this:
   *   tick    | 1| 2| 3| 4| 5| 6| 7| 8| 9|10|11|12|13|14|15|16|17|18|19|20|
   *   tokens  |47|48|47|48|47|48|47|48|47|48|47|48|47|48|47|48|47|48|47|48|
   */
  uint64_t m_ticks_per_second = 0;
  uint64_t m_current_tick = 0;

  // period for the bucket filling tokens, its unit is seconds.
  double m_schedule_tick = 1.0;

public:
  TokenBucketThrottle(CephContext *cct, const std::string &name,
                      uint64_t capacity, uint64_t avg,
                      SafeTimer *timer, ceph::mutex *timer_lock);

  ~TokenBucketThrottle();

  const std::string &get_name() {
    return m_name;
  }

  template <typename T, typename I, void(T::*MF)(int, I*, uint64_t)>
  void add_blocker(uint64_t c, T *handler, I *item, uint64_t flag) {
    Context *ctx = new LambdaContext([handler, item, flag](int r) {
      (handler->*MF)(r, item, flag);
      });
    m_blockers.emplace_back(c, ctx);
  }

  template <typename T, typename I, void(T::*MF)(int, I*, uint64_t)>
  bool get(uint64_t c, T *handler, I *item, uint64_t flag) {
    bool wait = false;
    uint64_t got = 0;
    std::lock_guard lock(m_lock);
    if (!m_blockers.empty()) {
      // Keep the order of requests, add item after previous blocked requests.
      wait = true;
    } else {
      if (0 == m_throttle.max || 0 == m_avg)
        return false;

      got = m_throttle.get(c);
      if (got < c) {
        // Not enough tokens, add a blocker for it.
        wait = true;
      }
    }

    if (wait)
      add_blocker<T, I, MF>(c - got, handler, item, flag);

    return wait;
  }

  int set_limit(uint64_t average, uint64_t burst);
  void set_schedule_tick_min(uint64_t tick);

private:
  uint64_t tokens_filled(double tick);
  uint64_t tokens_this_tick();
  void add_tokens();
  void schedule_timer();
  void cancel_timer();
};

#endif
Commit	Line	Data
7c673cae FG	1	// -- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t --
	2	// vim: ts=8 sw=2 smarttab
	3
	4	#ifndef CEPH_THROTTLE_H
	5	#define CEPH_THROTTLE_H
	6
31f18b77	7	#include <atomic>
11fdf7f2	8	#include <chrono>
7c673cae	9	#include <iostream>
11fdf7f2 TL	10	#include <list>
11fdf7f2 TL	11	#include <map>
31f18b77	12
11fdf7f2	13	#include "common/ceph_mutex.h"
7c673cae	14	#include "include/Context.h"
11fdf7f2 TL	15	#include "common/ThrottleInterface.h"
	16	#include "common/Timer.h"
	17	#include "common/convenience.h"
	18	#include "common/perf_counters_collection.h"
7c673cae FG	19
	20	/**
	21	* @class Throttle
	22	* Throttles the maximum number of active requests.
	23	*
	24	* This class defines the maximum number of slots currently taken away. The
	25	* excessive requests for more of them are delayed, until some slots are put
	26	* back, so @p get_current() drops below the limit after fulfills the requests.
	27	*/
11fdf7f2	28	class Throttle final : public ThrottleInterface {
7c673cae FG	29	CephContext *cct;
7c673cae FG	30	const std::string name;
11fdf7f2	31	PerfCountersRef logger;
b32b8144	32	std::atomic<int64_t> count = { 0 }, max = { 0 };
11fdf7f2 TL	33	std::mutex lock;
11fdf7f2 TL	34	std::list<std::condition_variable> conds;
7c673cae FG	35	const bool use_perf;
	36
	37	public:
	38	Throttle(CephContext *cct, const std::string& n, int64_t m = 0, bool _use_perf = true);
11fdf7f2	39	~Throttle() override;
7c673cae FG	40
	41	private:
	42	void _reset_max(int64_t m);
	43	bool _should_wait(int64_t c) const {
31f18b77 FG	44	int64_t m = max;
31f18b77 FG	45	int64_t cur = count;
7c673cae FG	46	return
	47	m &&
	48	((c <= m && cur + c > m) \|\| // normally stay under max
	49	(c >= m && cur > m)); // except for large c
	50	}
	51
11fdf7f2	52	bool _wait(int64_t c, std::unique_lock<std::mutex>& l);
7c673cae FG	53
	54	public:
	55	/**
	56	* gets the number of currently taken slots
	57	* @returns the number of taken slots
	58	*/
	59	int64_t get_current() const {
31f18b77	60	return count;
7c673cae FG	61	}
	62
	63	/**
	64	* get the max number of slots
	65	* @returns the max number of slots
	66	*/
31f18b77	67	int64_t get_max() const { return max; }
7c673cae FG	68
	69	/**
	70	* return true if past midpoint
	71	*/
	72	bool past_midpoint() const {
31f18b77	73	return count >= max / 2;
7c673cae FG	74	}
	75
	76	/**
	77	* set the new max number, and wait until the number of taken slots drains
	78	* and drops below this limit.
	79	*
	80	* @param m the new max number
	81	* @returns true if this method is blocked, false it it returns immediately
	82	*/
	83	bool wait(int64_t m = 0);
	84
	85	/**
	86	* take the specified number of slots from the stock regardless the throttling
	87	* @param c number of slots to take
	88	* @returns the total number of taken slots
	89	*/
11fdf7f2	90	int64_t take(int64_t c = 1) override;
7c673cae FG	91
	92	/**
	93	* get the specified amount of slots from the stock, but will wait if the
	94	* total number taken by consumer would exceed the maximum number.
	95	* @param c number of slots to get
	96	* @param m new maximum number to set, ignored if it is 0
	97	* @returns true if this request is blocked due to the throttling, false
	98	* otherwise
	99	*/
	100	bool get(int64_t c = 1, int64_t m = 0);
	101
	102	/**
	103	* the unblocked version of @p get()
	104	* @returns true if it successfully got the requested amount,
	105	* or false if it would block.
	106	*/
	107	bool get_or_fail(int64_t c = 1);
	108
	109	/**
	110	* put slots back to the stock
	111	* @param c number of slots to return
	112	* @returns number of requests being hold after this
	113	*/
11fdf7f2	114	int64_t put(int64_t c = 1) override;
7c673cae FG	115	/**
	116	* reset the zero to the stock
	117	*/
	118	void reset();
	119
	120	bool should_wait(int64_t c) const {
	121	return _should_wait(c);
	122	}
	123	void reset_max(int64_t m) {
11fdf7f2	124	std::lock_guard l(lock);
7c673cae FG	125	_reset_max(m);
	126	}
	127	};
	128
	129	/**
	130	* BackoffThrottle
	131	*
	132	* Creates a throttle which gradually induces delays when get() is called
11fdf7f2	133	* based on params low_threshold, high_threshold, expected_throughput,
7c673cae FG	134	* high_multiple, and max_multiple.
7c673cae FG	135	*
11fdf7f2	136	* In [0, low_threshold), we want no delay.
7c673cae	137	*
11fdf7f2 TL	138	* In [low_threshold, high_threshold), delays should be injected based
	139	* on a line from 0 at low_threshold to
	140	* high_multiple * (1/expected_throughput) at high_threshold.
7c673cae	141	*
11fdf7f2 TL	142	* In [high_threshold, 1), we want delays injected based on a line from
11fdf7f2 TL	143	* (high_multiple * (1/expected_throughput)) at high_threshold to
7c673cae FG	144	* (high_multiple * (1/expected_throughput)) +
	145	* (max_multiple * (1/expected_throughput)) at 1.
	146	*
11fdf7f2 TL	147	* Let the current throttle ratio (current/max) be r, low_threshold be l,
	148	* high_threshold be h, high_delay (high_multiple / expected_throughput) be e,
	149	* and max_delay (max_multiple / expected_throughput) be m.
7c673cae FG	150	*
	151	* delay = 0, r \in [0, l)
	152	* delay = (r - l) * (e / (h - l)), r \in [l, h)
	153	* delay = e + (r - h)((m - e)/(1 - h))
	154	*/
	155	class BackoffThrottle {
	156	CephContext *cct;
	157	const std::string name;
11fdf7f2	158	PerfCountersRef logger;
7c673cae FG	159
	160	std::mutex lock;
	161	using locker = std::unique_lock<std::mutex>;
	162
	163	unsigned next_cond = 0;
	164
	165	/// allocated once to avoid constantly allocating new ones
9f95a23c	166	std::vector<std::condition_variable> conds;
7c673cae FG	167
	168	const bool use_perf;
	169
	170	/// pointers into conds
9f95a23c	171	std::list<std::condition_variable*> waiters;
7c673cae FG	172
	173	std::list<std::condition_variable*>::iterator _push_waiter() {
	174	unsigned next = next_cond++;
	175	if (next_cond == conds.size())
	176	next_cond = 0;
	177	return waiters.insert(waiters.end(), &(conds[next]));
	178	}
	179
	180	void _kick_waiters() {
	181	if (!waiters.empty())
	182	waiters.front()->notify_all();
	183	}
	184
	185	/// see above, values are in [0, 1].
11fdf7f2 TL	186	double low_threshold = 0;
11fdf7f2 TL	187	double high_threshold = 1;
7c673cae FG	188
	189	/// see above, values are in seconds
	190	double high_delay_per_count = 0;
	191	double max_delay_per_count = 0;
	192
	193	/// Filled in in set_params
	194	double s0 = 0; ///< e / (h - l), l != h, 0 otherwise
	195	double s1 = 0; ///< (m - e)/(1 - h), 1 != h, 0 otherwise
	196
	197	/// max
	198	uint64_t max = 0;
	199	uint64_t current = 0;
	200
11fdf7f2	201	ceph::timespan _get_delay(uint64_t c) const;
7c673cae FG	202
	203	public:
	204	/**
	205	* set_params
	206	*
	207	* Sets params. If the params are invalid, returns false
11fdf7f2	208	* and populates errstream (if non-null) with a user comprehensible
7c673cae FG	209	* explanation.
	210	*/
	211	bool set_params(
11fdf7f2 TL	212	double _low_threshold,
11fdf7f2 TL	213	double _high_threshold,
7c673cae FG	214	double expected_throughput,
	215	double high_multiple,
	216	double max_multiple,
	217	uint64_t throttle_max,
9f95a23c	218	std::ostream *errstream);
7c673cae	219
11fdf7f2 TL	220	ceph::timespan get(uint64_t c = 1);
11fdf7f2 TL	221	ceph::timespan wait() {
7c673cae FG	222	return get(0);
	223	}
	224	uint64_t put(uint64_t c = 1);
	225	uint64_t take(uint64_t c = 1);
	226	uint64_t get_current();
	227	uint64_t get_max();
	228
	229	BackoffThrottle(CephContext *cct, const std::string& n,
	230	unsigned expected_concurrency, ///< [in] determines size of conds
	231	bool _use_perf = true);
	232	~BackoffThrottle();
	233	};
	234
	235
	236	/**
	237	* @class SimpleThrottle
	238	* This is a simple way to bound the number of concurrent operations.
	239	*
	240	* It tracks the first error encountered, and makes it available
	241	* when all requests are complete. wait_for_ret() should be called
	242	* before the instance is destroyed.
	243	*
	244	* Re-using the same instance isn't safe if you want to check each set
	245	* of operations for errors, since the return value is not reset.
	246	*/
	247	class SimpleThrottle {
	248	public:
	249	SimpleThrottle(uint64_t max, bool ignore_enoent);
	250	~SimpleThrottle();
	251	void start_op();
	252	void end_op(int r);
	253	bool pending_error() const;
	254	int wait_for_ret();
	255	private:
11fdf7f2 TL	256	mutable std::mutex m_lock;
11fdf7f2 TL	257	std::condition_variable m_cond;
7c673cae	258	uint64_t m_max;
11fdf7f2 TL	259	uint64_t m_current = 0;
11fdf7f2 TL	260	int m_ret = 0;
7c673cae	261	bool m_ignore_enoent;
c07f9fc5	262	uint32_t waiters = 0;
7c673cae FG	263	};
	264
	265
	266	class OrderedThrottle;
	267
	268	class C_OrderedThrottle : public Context {
	269	public:
	270	C_OrderedThrottle(OrderedThrottle *ordered_throttle, uint64_t tid)
	271	: m_ordered_throttle(ordered_throttle), m_tid(tid) {
	272	}
	273
	274	protected:
	275	void finish(int r) override;
	276
	277	private:
	278	OrderedThrottle *m_ordered_throttle;
	279	uint64_t m_tid;
	280	};
	281
	282	/**
	283	* @class OrderedThrottle
	284	* Throttles the maximum number of active requests and completes them in order
	285	*
	286	* Operations can complete out-of-order but their associated Context callback
11fdf7f2	287	* will completed in-order during invocation of start_op() and wait_for_ret()
7c673cae FG	288	*/
	289	class OrderedThrottle {
	290	public:
	291	OrderedThrottle(uint64_t max, bool ignore_enoent);
c07f9fc5	292	~OrderedThrottle();
7c673cae FG	293
	294	C_OrderedThrottle start_op(Context on_finish);
	295	void end_op(int r);
	296
	297	bool pending_error() const;
	298	int wait_for_ret();
	299
	300	protected:
	301	friend class C_OrderedThrottle;
	302
	303	void finish_op(uint64_t tid, int r);
	304
	305	private:
	306	struct Result {
	307	bool finished;
	308	int ret_val;
	309	Context *on_finish;
	310
	311	Result(Context *_on_finish = NULL)
	312	: finished(false), ret_val(0), on_finish(_on_finish) {
	313	}
	314	};
	315
	316	typedef std::map<uint64_t, Result> TidResult;
	317
11fdf7f2 TL	318	mutable std::mutex m_lock;
11fdf7f2 TL	319	std::condition_variable m_cond;
7c673cae	320	uint64_t m_max;
11fdf7f2 TL	321	uint64_t m_current = 0;
11fdf7f2 TL	322	int m_ret_val = 0;
7c673cae FG	323	bool m_ignore_enoent;
7c673cae FG	324
11fdf7f2 TL	325	uint64_t m_next_tid = 0;
11fdf7f2 TL	326	uint64_t m_complete_tid = 0;
7c673cae FG	327
	328	TidResult m_tid_result;
	329
11fdf7f2	330	void complete_pending_ops(std::unique_lock<std::mutex>& l);
c07f9fc5	331	uint32_t waiters = 0;
7c673cae FG	332	};
7c673cae FG	333
11fdf7f2 TL	334
	335	class TokenBucketThrottle {
	336	struct Bucket {
	337	CephContext *cct;
	338	const std::string name;
	339
	340	uint64_t remain;
	341	uint64_t max;
	342
	343	Bucket(CephContext *cct, const std::string &name, uint64_t m)
	344	: cct(cct), name(name), remain(m), max(m) {}
	345
	346	uint64_t get(uint64_t c);
	347	uint64_t put(uint64_t c);
	348	void set_max(uint64_t m);
	349	};
	350
	351	struct Blocker {
	352	uint64_t tokens_requested;
	353	Context *ctx;
	354
	355	Blocker(uint64_t _tokens_requested, Context* _ctx)
	356	: tokens_requested(_tokens_requested), ctx(_ctx) {}
	357	};
	358
	359	CephContext *m_cct;
	360	const std::string m_name;
	361	Bucket m_throttle;
	362	uint64_t m_avg = 0;
	363	uint64_t m_burst = 0;
	364	SafeTimer *m_timer;
9f95a23c TL	365	ceph::mutex *m_timer_lock;
	366	Context *m_token_ctx = nullptr;
	367	std::list<Blocker> m_blockers;
	368	ceph::mutex m_lock;
11fdf7f2 TL	369
	370	// minimum of the filling period.
	371	uint64_t m_tick_min = 50;
	372	// tokens filling period, its unit is millisecond.
	373	uint64_t m_tick = 0;
	374	/**
	375	* These variables are used to calculate how many tokens need to be put into
	376	* the bucket within each tick.
	377	*
	378	* In actual use, the tokens to be put per tick(m_avg / m_ticks_per_second)
	379	* may be a floating point number, but we need an 'uint64_t' to put into the
	380	* bucket.
	381	*
	382	* For example, we set the value of rate to be 950, means 950 iops(or bps).
	383	*
	384	* In this case, the filling period(m_tick) should be 1000 / 950 = 1.052,
	385	* which is too small for the SafeTimer. So we should set the period(m_tick)
	386	* to be 50(m_tick_min), and 20 ticks in one second(m_ticks_per_second).
	387	* The tokens filled in bucket per tick is 950 / 20 = 47.5, not an integer.
	388	*
	389	* To resolve this, we use a method called tokens_filled(m_current_tick) to
	390	* calculate how many tokens will be put so far(until m_current_tick):
	391	*
	392	* tokens_filled = m_current_tick / m_ticks_per_second * m_avg
	393	*
	394	* And the difference between two ticks will be the result we expect.
	395	* tokens in tick 0: (1 / 20 * 950) - (0 / 20 * 950) = 47 - 0 = 47
	396	* tokens in tick 1: (2 / 20 * 950) - (1 / 20 * 950) = 95 - 47 = 48
	397	* tokens in tick 2: (3 / 20 * 950) - (2 / 20 * 950) = 142 - 95 = 47
	398	*
	399	* As a result, the tokens filled in one second will shown as this:
	400	* tick \| 1\| 2\| 3\| 4\| 5\| 6\| 7\| 8\| 9\|10\|11\|12\|13\|14\|15\|16\|17\|18\|19\|20\|
	401	* tokens \|47\|48\|47\|48\|47\|48\|47\|48\|47\|48\|47\|48\|47\|48\|47\|48\|47\|48\|47\|48\|
	402	*/
	403	uint64_t m_ticks_per_second = 0;
	404	uint64_t m_current_tick = 0;
	405
	406	// period for the bucket filling tokens, its unit is seconds.
	407	double m_schedule_tick = 1.0;
	408
	409	public:
	410	TokenBucketThrottle(CephContext *cct, const std::string &name,
	411	uint64_t capacity, uint64_t avg,
9f95a23c	412	SafeTimer timer, ceph::mutex timer_lock);
81eedcae	413
11fdf7f2 TL	414	~TokenBucketThrottle();
	415
	416	const std::string &get_name() {
	417	return m_name;
	418	}
	419
	420	template <typename T, typename I, void(T::MF)(int, I, uint64_t)>
	421	void add_blocker(uint64_t c, T handler, I item, uint64_t flag) {
9f95a23c	422	Context *ctx = new LambdaContext([handler, item, flag](int r) {
11fdf7f2 TL	423	(handler->*MF)(r, item, flag);
	424	});
	425	m_blockers.emplace_back(c, ctx);
	426	}
81eedcae	427
11fdf7f2 TL	428	template <typename T, typename I, void(T::MF)(int, I, uint64_t)>
11fdf7f2 TL	429	bool get(uint64_t c, T handler, I item, uint64_t flag) {
11fdf7f2 TL	430	bool wait = false;
	431	uint64_t got = 0;
	432	std::lock_guard lock(m_lock);
	433	if (!m_blockers.empty()) {
	434	// Keep the order of requests, add item after previous blocked requests.
	435	wait = true;
	436	} else {
	437	if (0 == m_throttle.max \|\| 0 == m_avg)
	438	return false;
81eedcae	439
11fdf7f2 TL	440	got = m_throttle.get(c);
	441	if (got < c) {
	442	// Not enough tokens, add a blocker for it.
	443	wait = true;
	444	}
	445	}
	446
	447	if (wait)
	448	add_blocker<T, I, MF>(c - got, handler, item, flag);
	449
	450	return wait;
	451	}
81eedcae	452
11fdf7f2 TL	453	int set_limit(uint64_t average, uint64_t burst);
	454	void set_schedule_tick_min(uint64_t tick);
	455
	456	private:
	457	uint64_t tokens_filled(double tick);
	458	uint64_t tokens_this_tick();
	459	void add_tokens();
	460	void schedule_timer();
	461	void cancel_timer();
	462	};
	463
7c673cae	464	#endif