ceph/src/seastar/include/seastar/core/io_queue.hh

   1 /*
   2  * This file is open source software, licensed to you under the terms
   3  * of the Apache License, Version 2.0 (the "License").  See the NOTICE file
   4  * distributed with this work for additional information regarding copyright
   5  * ownership.  You may not use this file except in compliance with the License.
   6  *
   7  * You may obtain a copy of the License at
   8  *
   9  *   http://www.apache.org/licenses/LICENSE-2.0
  10  *
  11  * Unless required by applicable law or agreed to in writing,
  12  * software distributed under the License is distributed on an
  13  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  14  * KIND, either express or implied.  See the License for the
  15  * specific language governing permissions and limitations
  16  * under the License.
  17  */
  18 /*
  19  * Copyright 2019 ScyllaDB
  20  */
  21
  22 #pragma once
  23
  24 #include <seastar/core/sstring.hh>
  25 #include <seastar/core/fair_queue.hh>
  26 #include <seastar/core/metrics_registration.hh>
  27 #include <seastar/core/shared_ptr.hh>
  28 #include <seastar/core/future.hh>
  29 #include <seastar/core/internal/io_request.hh>
  30 #include <mutex>
  31 #include <array>
  32
  33 namespace seastar {
  34
  35 /// Renames an io priority class
  36 ///
  37 /// Renames an \ref io_priority_class previously created with register_one_priority_class().
  38 ///
  39 /// The operation is global and affects all shards.
  40 /// The operation affects the exported statistics labels.
  41 ///
  42 /// \param pc The io priority class to be renamed
  43 /// \param new_name The new name for the io priority class
  44 /// \return a future that is ready when the io priority class have been renamed
  45 future<>
  46 rename_priority_class(io_priority_class pc, sstring new_name);
  47
  48 namespace internal {
  49 namespace linux_abi {
  50
  51 struct io_event;
  52 struct iocb;
  53
  54 }
  55 }
  56
  57 using shard_id = unsigned;
  58
  59 class io_priority_class;
  60
  61 class io_queue {
  62 private:
  63     struct priority_class_data {
  64         priority_class_ptr ptr;
  65         size_t bytes;
  66         uint64_t ops;
  67         uint32_t nr_queued;
  68         std::chrono::duration<double> queue_time;
  69         metrics::metric_groups _metric_groups;
  70         priority_class_data(sstring name, sstring mountpoint, priority_class_ptr ptr, shard_id owner);
  71         void rename(sstring new_name, sstring mountpoint, shard_id owner);
  72     private:
  73         void register_stats(sstring name, sstring mountpoint, shard_id owner);
  74     };
  75
  76     std::vector<std::vector<lw_shared_ptr<priority_class_data>>> _priority_classes;
  77     fair_queue _fq;
  78
  79     static constexpr unsigned _max_classes = 2048;
  80     static std::mutex _register_lock;
  81     static std::array<uint32_t, _max_classes> _registered_shares;
  82     static std::array<sstring, _max_classes> _registered_names;
  83
  84     static io_priority_class register_one_priority_class(sstring name, uint32_t shares);
  85
  86     priority_class_data& find_or_create_class(const io_priority_class& pc, shard_id owner);
  87     friend class smp;
  88 public:
  89     // We want to represent the fact that write requests are (maybe) more expensive
  90     // than read requests. To avoid dealing with floating point math we will scale one
  91     // read request to be counted by this amount.
  92     //
  93     // A write request that is 30% more expensive than a read will be accounted as
  94     // (read_request_base_count * 130) / 100.
  95     // It is also technically possible for reads to be the expensive ones, in which case
  96     // writes will have an integer value lower than read_request_base_count.
  97     static constexpr unsigned read_request_base_count = 128;
  98
  99     struct config {
 100         shard_id coordinator;
 101         std::vector<shard_id> io_topology;
 102         unsigned capacity = std::numeric_limits<unsigned>::max();
 103         unsigned max_req_count = std::numeric_limits<unsigned>::max();
 104         unsigned max_bytes_count = std::numeric_limits<unsigned>::max();
 105         unsigned disk_req_write_to_read_multiplier = read_request_base_count;
 106         unsigned disk_bytes_write_to_read_multiplier = read_request_base_count;
 107         sstring mountpoint = "undefined";
 108     };
 109
 110     io_queue(config cfg);
 111     ~io_queue();
 112
 113     future<size_t>
 114     queue_request(const io_priority_class& pc, size_t len, internal::io_request req);
 115
 116     size_t capacity() const {
 117         return _config.capacity;
 118     }
 119
 120     size_t queued_requests() const {
 121         return _fq.waiters();
 122     }
 123
 124     // How many requests are sent to disk but not yet returned.
 125     size_t requests_currently_executing() const {
 126         return _fq.requests_currently_executing();
 127     }
 128
 129     // Inform the underlying queue about the fact that some of our requests finished
 130     void notify_requests_finished(fair_queue_request_descriptor& desc) {
 131         _fq.notify_requests_finished(desc);
 132     }
 133
 134     // Dispatch requests that are pending in the I/O queue
 135     void poll_io_queue() {
 136         _fq.dispatch_requests();
 137     }
 138
 139     sstring mountpoint() const {
 140         return _config.mountpoint;
 141     }
 142
 143     shard_id coordinator() const {
 144         return _config.coordinator;
 145     }
 146     shard_id coordinator_of_shard(shard_id shard) const {
 147         return _config.io_topology[shard];
 148     }
 149
 150     future<> update_shares_for_class(io_priority_class pc, size_t new_shares);
 151     void rename_priority_class(io_priority_class pc, sstring new_name);
 152
 153     friend class reactor;
 154 private:
 155     config _config;
 156     static fair_queue::config make_fair_queue_config(config cfg);
 157 };
 158
 159 }