1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2016 Red Hat Inc.
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
22 #include "boost/variant.hpp"
24 #include "dmclock/src/dmclock_server.h"
26 #include "osd/scheduler/OpScheduler.h"
27 #include "common/config.h"
28 #include "include/cmp.h"
29 #include "common/ceph_context.h"
30 #include "common/mClockPriorityQueue.h"
31 #include "osd/scheduler/OpSchedulerItem.h"
34 namespace ceph::osd::scheduler
{
36 constexpr uint64_t default_min
= 1;
37 constexpr uint64_t default_max
= 999999;
39 using client_id_t
= uint64_t;
40 using profile_id_t
= uint64_t;
42 struct client_profile_id_t
{
43 client_id_t client_id
;
44 profile_id_t profile_id
;
46 friend std::ostream
& operator<<(std::ostream
& out
,
47 const client_profile_id_t
& client_profile
) {
48 out
<< " client_id: " << client_profile
.client_id
49 << " profile_id: " << client_profile
.profile_id
;
54 WRITE_EQ_OPERATORS_2(client_profile_id_t
, client_id
, profile_id
)
55 WRITE_CMP_OPERATORS_2(client_profile_id_t
, client_id
, profile_id
)
58 struct scheduler_id_t
{
59 op_scheduler_class class_id
;
60 client_profile_id_t client_profile_id
;
62 friend std::ostream
& operator<<(std::ostream
& out
,
63 const scheduler_id_t
& sched_id
) {
64 out
<< "{ class_id: " << sched_id
.class_id
65 << sched_id
.client_profile_id
;
70 WRITE_EQ_OPERATORS_2(scheduler_id_t
, class_id
, client_profile_id
)
71 WRITE_CMP_OPERATORS_2(scheduler_id_t
, class_id
, client_profile_id
)
74 * Scheduler implementation based on mclock.
76 * TODO: explain configs
78 class mClockScheduler
: public OpScheduler
, md_config_obs_t
{
81 const uint32_t num_shards
;
83 double max_osd_capacity
;
84 double osd_mclock_cost_per_io
;
85 double osd_mclock_cost_per_byte
;
86 std::string mclock_profile
= "high_client_ops";
92 ClientAllocs(uint64_t _res
, uint64_t _wgt
, uint64_t _lim
) {
93 update(_res
, _wgt
, _lim
);
96 inline void update(uint64_t _res
, uint64_t _wgt
, uint64_t _lim
) {
104 static_cast<size_t>(op_scheduler_class::client
) + 1
106 // Placeholder, get replaced with configured values
107 ClientAllocs(1, 1, 1), // background_recovery
108 ClientAllocs(1, 1, 1), // background_best_effort
109 ClientAllocs(1, 1, 1), // immediate (not used)
110 ClientAllocs(1, 1, 1) // client
112 class ClientRegistry
{
114 crimson::dmclock::ClientInfo
,
115 static_cast<size_t>(op_scheduler_class::immediate
)
116 > internal_client_infos
= {
117 // Placeholder, gets replaced with configured values
118 crimson::dmclock::ClientInfo(1, 1, 1),
119 crimson::dmclock::ClientInfo(1, 1, 1)
122 crimson::dmclock::ClientInfo default_external_client_info
= {1, 1, 1};
123 std::map
<client_profile_id_t
,
124 crimson::dmclock::ClientInfo
> external_client_infos
;
125 const crimson::dmclock::ClientInfo
*get_external_client(
126 const client_profile_id_t
&client
) const;
128 void update_from_config(const ConfigProxy
&conf
);
129 const crimson::dmclock::ClientInfo
*get_info(
130 const scheduler_id_t
&id
) const;
133 using mclock_queue_t
= crimson::dmclock::PullPriorityQueue
<
139 mclock_queue_t scheduler
;
140 std::list
<OpSchedulerItem
> immediate
;
142 static scheduler_id_t
get_scheduler_id(const OpSchedulerItem
&item
) {
143 return scheduler_id_t
{
144 item
.get_scheduler_class(),
153 mClockScheduler(CephContext
*cct
, uint32_t num_shards
, bool is_rotational
);
154 ~mClockScheduler() override
;
156 // Set the max osd capacity in iops
157 void set_max_osd_capacity();
159 // Set the cost per io for the osd
160 void set_osd_mclock_cost_per_io();
162 // Set the cost per byte for the osd
163 void set_osd_mclock_cost_per_byte();
165 // Set the mclock profile type to enable
166 void set_mclock_profile();
168 // Get the active mclock profile
169 std::string
get_mclock_profile();
171 // Set "balanced" profile allocations
172 void set_balanced_profile_allocations();
174 // Set "high_recovery_ops" profile allocations
175 void set_high_recovery_ops_profile_allocations();
177 // Set "high_client_ops" profile allocations
178 void set_high_client_ops_profile_allocations();
180 // Set the mclock related config params based on the profile
181 void enable_mclock_profile_settings();
183 // Set mclock config parameter based on allocations
184 void set_profile_config();
186 // Calculate scale cost per item
187 int calc_scaled_cost(int cost
);
189 // Helper method to display mclock queues
190 std::string
display_queues() const;
192 // Enqueue op in the back of the regular queue
193 void enqueue(OpSchedulerItem
&&item
) final
;
195 // Enqueue the op in the front of the regular queue
196 void enqueue_front(OpSchedulerItem
&&item
) final
;
198 // Return an op to be dispatch
199 WorkItem
dequeue() final
;
201 // Returns if the queue is empty
202 bool empty() const final
{
203 return immediate
.empty() && scheduler
.empty();
206 // Formatted output of the queue
207 void dump(ceph::Formatter
&f
) const final
;
209 void print(std::ostream
&ostream
) const final
{
210 ostream
<< "mClockScheduler";
213 // Update data associated with the modified mclock config key(s)
214 void update_configuration() final
;
216 const char** get_tracked_conf_keys() const final
;
217 void handle_conf_change(const ConfigProxy
& conf
,
218 const std::set
<std::string
> &changed
) final
;