]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/include/seastar/core/io_queue.hh
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / include / seastar / core / io_queue.hh
1 /*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18 /*
19 * Copyright 2019 ScyllaDB
20 */
21
22 #pragma once
23
24 #include <seastar/core/sstring.hh>
25 #include <seastar/core/fair_queue.hh>
26 #include <seastar/core/metrics_registration.hh>
27 #include <seastar/core/shared_ptr.hh>
28 #include <seastar/core/future.hh>
29 #include <seastar/core/internal/io_request.hh>
30 #include <mutex>
31 #include <array>
32
33 namespace seastar {
34
35 /// Renames an io priority class
36 ///
37 /// Renames an \ref io_priority_class previously created with register_one_priority_class().
38 ///
39 /// The operation is global and affects all shards.
40 /// The operation affects the exported statistics labels.
41 ///
42 /// \param pc The io priority class to be renamed
43 /// \param new_name The new name for the io priority class
44 /// \return a future that is ready when the io priority class have been renamed
45 future<>
46 rename_priority_class(io_priority_class pc, sstring new_name);
47
48 namespace internal {
49 namespace linux_abi {
50
51 struct io_event;
52 struct iocb;
53
54 }
55 }
56
57 using shard_id = unsigned;
58
59 class io_priority_class;
60
61 class io_queue {
62 private:
63 struct priority_class_data {
64 priority_class_ptr ptr;
65 size_t bytes;
66 uint64_t ops;
67 uint32_t nr_queued;
68 std::chrono::duration<double> queue_time;
69 metrics::metric_groups _metric_groups;
70 priority_class_data(sstring name, sstring mountpoint, priority_class_ptr ptr, shard_id owner);
71 void rename(sstring new_name, sstring mountpoint, shard_id owner);
72 private:
73 void register_stats(sstring name, sstring mountpoint, shard_id owner);
74 };
75
76 std::vector<std::vector<lw_shared_ptr<priority_class_data>>> _priority_classes;
77 fair_queue _fq;
78
79 static constexpr unsigned _max_classes = 2048;
80 static std::mutex _register_lock;
81 static std::array<uint32_t, _max_classes> _registered_shares;
82 static std::array<sstring, _max_classes> _registered_names;
83
84 static io_priority_class register_one_priority_class(sstring name, uint32_t shares);
85
86 priority_class_data& find_or_create_class(const io_priority_class& pc, shard_id owner);
87 friend class smp;
88 public:
89 // We want to represent the fact that write requests are (maybe) more expensive
90 // than read requests. To avoid dealing with floating point math we will scale one
91 // read request to be counted by this amount.
92 //
93 // A write request that is 30% more expensive than a read will be accounted as
94 // (read_request_base_count * 130) / 100.
95 // It is also technically possible for reads to be the expensive ones, in which case
96 // writes will have an integer value lower than read_request_base_count.
97 static constexpr unsigned read_request_base_count = 128;
98
99 struct config {
100 shard_id coordinator;
101 std::vector<shard_id> io_topology;
102 unsigned capacity = std::numeric_limits<unsigned>::max();
103 unsigned max_req_count = std::numeric_limits<unsigned>::max();
104 unsigned max_bytes_count = std::numeric_limits<unsigned>::max();
105 unsigned disk_req_write_to_read_multiplier = read_request_base_count;
106 unsigned disk_bytes_write_to_read_multiplier = read_request_base_count;
107 sstring mountpoint = "undefined";
108 };
109
110 io_queue(config cfg);
111 ~io_queue();
112
113 future<size_t>
114 queue_request(const io_priority_class& pc, size_t len, internal::io_request req);
115
116 size_t capacity() const {
117 return _config.capacity;
118 }
119
120 size_t queued_requests() const {
121 return _fq.waiters();
122 }
123
124 // How many requests are sent to disk but not yet returned.
125 size_t requests_currently_executing() const {
126 return _fq.requests_currently_executing();
127 }
128
129 // Inform the underlying queue about the fact that some of our requests finished
130 void notify_requests_finished(fair_queue_request_descriptor& desc) {
131 _fq.notify_requests_finished(desc);
132 }
133
134 // Dispatch requests that are pending in the I/O queue
135 void poll_io_queue() {
136 _fq.dispatch_requests();
137 }
138
139 sstring mountpoint() const {
140 return _config.mountpoint;
141 }
142
143 shard_id coordinator() const {
144 return _config.coordinator;
145 }
146 shard_id coordinator_of_shard(shard_id shard) const {
147 return _config.io_topology[shard];
148 }
149
150 future<> update_shares_for_class(io_priority_class pc, size_t new_shares);
151 void rename_priority_class(io_priority_class pc, sstring new_name);
152
153 friend class reactor;
154 private:
155 config _config;
156 static fair_queue::config make_fair_queue_config(config cfg);
157 };
158
159 }