]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2016 Red Hat Inc. | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | ||
16 | #pragma once | |
17 | ||
18 | #include <ostream> | |
19 | #include <map> | |
20 | #include <vector> | |
21 | ||
22 | #include "boost/variant.hpp" | |
23 | ||
24 | #include "dmclock/src/dmclock_server.h" | |
25 | ||
26 | #include "osd/scheduler/OpScheduler.h" | |
27 | #include "common/config.h" | |
28 | #include "include/cmp.h" | |
29 | #include "common/ceph_context.h" | |
30 | #include "common/mClockPriorityQueue.h" | |
31 | #include "osd/scheduler/OpSchedulerItem.h" | |
32 | ||
33 | ||
34 | namespace ceph::osd::scheduler { | |
35 | ||
f67539c2 TL |
36 | constexpr uint64_t default_min = 1; |
37 | constexpr uint64_t default_max = 999999; | |
38 | ||
9f95a23c TL |
39 | using client_id_t = uint64_t; |
40 | using profile_id_t = uint64_t; | |
41 | ||
42 | struct client_profile_id_t { | |
43 | client_id_t client_id; | |
44 | profile_id_t profile_id; | |
45 | }; | |
46 | ||
47 | WRITE_EQ_OPERATORS_2(client_profile_id_t, client_id, profile_id) | |
48 | WRITE_CMP_OPERATORS_2(client_profile_id_t, client_id, profile_id) | |
49 | ||
50 | ||
51 | struct scheduler_id_t { | |
52 | op_scheduler_class class_id; | |
53 | client_profile_id_t client_profile_id; | |
54 | }; | |
55 | ||
56 | WRITE_EQ_OPERATORS_2(scheduler_id_t, class_id, client_profile_id) | |
57 | WRITE_CMP_OPERATORS_2(scheduler_id_t, class_id, client_profile_id) | |
58 | ||
59 | /** | |
60 | * Scheduler implementation based on mclock. | |
61 | * | |
62 | * TODO: explain configs | |
63 | */ | |
64 | class mClockScheduler : public OpScheduler, md_config_obs_t { | |
65 | ||
f67539c2 TL |
66 | CephContext *cct; |
67 | const uint32_t num_shards; | |
68 | bool is_rotational; | |
69 | double max_osd_capacity; | |
70 | double osd_mclock_cost_per_io; | |
71 | double osd_mclock_cost_per_byte; | |
72 | std::string mclock_profile = "high_client_ops"; | |
73 | struct ClientAllocs { | |
74 | uint64_t res; | |
75 | uint64_t wgt; | |
76 | uint64_t lim; | |
77 | ||
78 | ClientAllocs(uint64_t _res, uint64_t _wgt, uint64_t _lim) { | |
79 | update(_res, _wgt, _lim); | |
80 | } | |
81 | ||
82 | inline void update(uint64_t _res, uint64_t _wgt, uint64_t _lim) { | |
83 | res = _res; | |
84 | wgt = _wgt; | |
85 | lim = _lim; | |
86 | } | |
87 | }; | |
88 | std::array< | |
89 | ClientAllocs, | |
90 | static_cast<size_t>(op_scheduler_class::client) + 1 | |
91 | > client_allocs = { | |
92 | // Placeholder, get replaced with configured values | |
93 | ClientAllocs(1, 1, 1), // background_recovery | |
94 | ClientAllocs(1, 1, 1), // background_best_effort | |
95 | ClientAllocs(1, 1, 1), // immediate (not used) | |
96 | ClientAllocs(1, 1, 1) // client | |
97 | }; | |
9f95a23c TL |
98 | class ClientRegistry { |
99 | std::array< | |
100 | crimson::dmclock::ClientInfo, | |
101 | static_cast<size_t>(op_scheduler_class::immediate) | |
102 | > internal_client_infos = { | |
103 | // Placeholder, gets replaced with configured values | |
104 | crimson::dmclock::ClientInfo(1, 1, 1), | |
105 | crimson::dmclock::ClientInfo(1, 1, 1) | |
106 | }; | |
107 | ||
108 | crimson::dmclock::ClientInfo default_external_client_info = {1, 1, 1}; | |
109 | std::map<client_profile_id_t, | |
110 | crimson::dmclock::ClientInfo> external_client_infos; | |
111 | const crimson::dmclock::ClientInfo *get_external_client( | |
112 | const client_profile_id_t &client) const; | |
113 | public: | |
114 | void update_from_config(const ConfigProxy &conf); | |
115 | const crimson::dmclock::ClientInfo *get_info( | |
116 | const scheduler_id_t &id) const; | |
117 | } client_registry; | |
118 | ||
119 | using mclock_queue_t = crimson::dmclock::PullPriorityQueue< | |
120 | scheduler_id_t, | |
121 | OpSchedulerItem, | |
122 | true, | |
123 | true, | |
124 | 2>; | |
125 | mclock_queue_t scheduler; | |
126 | std::list<OpSchedulerItem> immediate; | |
127 | ||
128 | static scheduler_id_t get_scheduler_id(const OpSchedulerItem &item) { | |
129 | return scheduler_id_t{ | |
130 | item.get_scheduler_class(), | |
131 | client_profile_id_t{ | |
132 | item.get_owner(), | |
133 | 0 | |
134 | } | |
135 | }; | |
136 | } | |
137 | ||
138 | public: | |
f67539c2 TL |
139 | mClockScheduler(CephContext *cct, uint32_t num_shards, bool is_rotational); |
140 | ~mClockScheduler() override; | |
141 | ||
142 | // Set the max osd capacity in iops | |
143 | void set_max_osd_capacity(); | |
144 | ||
145 | // Set the cost per io for the osd | |
146 | void set_osd_mclock_cost_per_io(); | |
147 | ||
148 | // Set the cost per byte for the osd | |
149 | void set_osd_mclock_cost_per_byte(); | |
150 | ||
151 | // Set the mclock profile type to enable | |
152 | void set_mclock_profile(); | |
153 | ||
154 | // Get the active mclock profile | |
155 | std::string get_mclock_profile(); | |
156 | ||
157 | // Set "balanced" profile allocations | |
158 | void set_balanced_profile_allocations(); | |
159 | ||
160 | // Set "high_recovery_ops" profile allocations | |
161 | void set_high_recovery_ops_profile_allocations(); | |
162 | ||
163 | // Set "high_client_ops" profile allocations | |
164 | void set_high_client_ops_profile_allocations(); | |
165 | ||
166 | // Set the mclock related config params based on the profile | |
167 | void enable_mclock_profile_settings(); | |
168 | ||
169 | // Set mclock config parameter based on allocations | |
170 | void set_profile_config(); | |
171 | ||
f67539c2 TL |
172 | // Calculate scale cost per item |
173 | int calc_scaled_cost(int cost); | |
9f95a23c TL |
174 | |
175 | // Enqueue op in the back of the regular queue | |
176 | void enqueue(OpSchedulerItem &&item) final; | |
177 | ||
178 | // Enqueue the op in the front of the regular queue | |
179 | void enqueue_front(OpSchedulerItem &&item) final; | |
180 | ||
181 | // Return an op to be dispatch | |
f67539c2 | 182 | WorkItem dequeue() final; |
9f95a23c TL |
183 | |
184 | // Returns if the queue is empty | |
185 | bool empty() const final { | |
186 | return immediate.empty() && scheduler.empty(); | |
187 | } | |
188 | ||
189 | // Formatted output of the queue | |
190 | void dump(ceph::Formatter &f) const final; | |
191 | ||
192 | void print(std::ostream &ostream) const final { | |
193 | ostream << "mClockScheduler"; | |
194 | } | |
195 | ||
a4b75251 TL |
196 | // Update data associated with the modified mclock config key(s) |
197 | void update_configuration() final; | |
198 | ||
9f95a23c TL |
199 | const char** get_tracked_conf_keys() const final; |
200 | void handle_conf_change(const ConfigProxy& conf, | |
201 | const std::set<std::string> &changed) final; | |
202 | }; | |
203 | ||
204 | } |