]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | #ifndef CEPH_LIBRBD_IMAGECTX_H | |
4 | #define CEPH_LIBRBD_IMAGECTX_H | |
5 | ||
6 | #include "include/int_types.h" | |
7 | ||
8 | #include <list> | |
9 | #include <map> | |
10 | #include <string> | |
11 | #include <vector> | |
12 | ||
13 | #include "common/event_socket.h" | |
14 | #include "common/Mutex.h" | |
15 | #include "common/Readahead.h" | |
16 | #include "common/RWLock.h" | |
17 | #include "common/snap_types.h" | |
31f18b77 | 18 | #include "common/zipkin_trace.h" |
7c673cae FG |
19 | |
20 | #include "include/buffer_fwd.h" | |
21 | #include "include/rbd/librbd.hpp" | |
22 | #include "include/rbd_types.h" | |
23 | #include "include/types.h" | |
24 | #include "include/xlist.h" | |
25 | #include "osdc/ObjectCacher.h" | |
26 | ||
27 | #include "cls/rbd/cls_rbd_types.h" | |
28 | #include "cls/rbd/cls_rbd_client.h" | |
29 | #include "librbd/AsyncRequest.h" | |
30 | #include "librbd/Types.h" | |
31 | ||
32 | class CephContext; | |
33 | class ContextWQ; | |
34 | class Finisher; | |
35 | class PerfCounters; | |
36 | class ThreadPool; | |
37 | class SafeTimer; | |
38 | ||
39 | namespace librbd { | |
40 | ||
7c673cae FG |
41 | template <typename> class ExclusiveLock; |
42 | template <typename> class ImageState; | |
43 | template <typename> class ImageWatcher; | |
44 | template <typename> class Journal; | |
45 | class LibrbdAdminSocketHook; | |
46 | template <typename> class ObjectMap; | |
47 | template <typename> class Operations; | |
48 | class LibrbdWriteback; | |
49 | ||
50 | namespace cache { struct ImageCache; } | |
51 | namespace exclusive_lock { struct Policy; } | |
52 | namespace io { | |
53 | class AioCompletion; | |
31f18b77 | 54 | class AsyncOperation; |
b32b8144 | 55 | template <typename> class CopyupRequest; |
224ce89b | 56 | template <typename> class ImageRequestWQ; |
7c673cae FG |
57 | } |
58 | namespace journal { struct Policy; } | |
59 | ||
60 | namespace operation { | |
61 | template <typename> class ResizeRequest; | |
62 | } | |
63 | ||
64 | struct ImageCtx { | |
65 | CephContext *cct; | |
66 | PerfCounters *perfcounter; | |
67 | struct rbd_obj_header_ondisk header; | |
68 | ::SnapContext snapc; | |
69 | std::vector<librados::snap_t> snaps; // this mirrors snapc.snaps, but is in | |
70 | // a format librados can understand | |
71 | std::map<librados::snap_t, SnapInfo> snap_info; | |
72 | std::map<std::pair<cls::rbd::SnapshotNamespace, std::string>, librados::snap_t> snap_ids; | |
73 | uint64_t snap_id; | |
74 | bool snap_exists; // false if our snap_id was deleted | |
75 | // whether the image was opened read-only. cannot be changed after opening | |
76 | bool read_only; | |
77 | bool flush_encountered; | |
78 | ||
79 | std::map<rados::cls::lock::locker_id_t, | |
80 | rados::cls::lock::locker_info_t> lockers; | |
81 | bool exclusive_locked; | |
82 | std::string lock_tag; | |
83 | ||
84 | std::string name; | |
85 | cls::rbd::SnapshotNamespace snap_namespace; | |
86 | std::string snap_name; | |
87 | IoCtx data_ctx, md_ctx; | |
88 | ImageWatcher<ImageCtx> *image_watcher; | |
89 | Journal<ImageCtx> *journal; | |
90 | ||
91 | /** | |
92 | * Lock ordering: | |
93 | * | |
94 | * owner_lock, md_lock, cache_lock, snap_lock, parent_lock, | |
95 | * object_map_lock, async_op_lock | |
96 | */ | |
97 | RWLock owner_lock; // protects exclusive lock leadership updates | |
98 | RWLock md_lock; // protects access to the mutable image metadata that | |
99 | // isn't guarded by other locks below, and blocks writes | |
100 | // when held exclusively, so snapshots can be consistent. | |
101 | // Fields guarded include: | |
102 | // flush_encountered | |
103 | // total_bytes_read | |
104 | // exclusive_locked | |
105 | // lock_tag | |
106 | // lockers | |
107 | Mutex cache_lock; // used as client_lock for the ObjectCacher | |
108 | RWLock snap_lock; // protects snapshot-related member variables, | |
109 | // features (and associated helper classes), and flags | |
110 | RWLock parent_lock; // protects parent_md and parent | |
111 | RWLock object_map_lock; // protects object map updates and object_map itself | |
112 | Mutex async_ops_lock; // protects async_ops and async_requests | |
113 | Mutex copyup_list_lock; // protects copyup_waiting_list | |
114 | Mutex completed_reqs_lock; // protects completed_reqs | |
115 | ||
116 | unsigned extra_read_flags; | |
117 | ||
118 | bool old_format; | |
119 | uint8_t order; | |
120 | uint64_t size; | |
121 | uint64_t features; | |
122 | std::string object_prefix; | |
123 | char *format_string; | |
124 | std::string header_oid; | |
125 | std::string id; // only used for new-format images | |
126 | ParentInfo parent_md; | |
127 | ImageCtx *parent; | |
b32b8144 | 128 | ImageCtx *child = nullptr; |
7c673cae FG |
129 | cls::rbd::GroupSpec group_spec; |
130 | uint64_t stripe_unit, stripe_count; | |
131 | uint64_t flags; | |
31f18b77 | 132 | utime_t create_timestamp; |
7c673cae FG |
133 | |
134 | file_layout_t layout; | |
135 | ||
136 | cache::ImageCache *image_cache = nullptr; | |
137 | ObjectCacher *object_cacher; | |
138 | LibrbdWriteback *writeback_handler; | |
139 | ObjectCacher::ObjectSet *object_set; | |
140 | ||
141 | Readahead readahead; | |
142 | uint64_t total_bytes_read; | |
143 | ||
b32b8144 | 144 | std::map<uint64_t, io::CopyupRequest<ImageCtx>*> copyup_list; |
7c673cae | 145 | |
31f18b77 | 146 | xlist<io::AsyncOperation*> async_ops; |
7c673cae FG |
147 | xlist<AsyncRequest<>*> async_requests; |
148 | std::list<Context*> async_requests_waiters; | |
149 | ||
150 | ImageState<ImageCtx> *state; | |
151 | Operations<ImageCtx> *operations; | |
152 | ||
153 | ExclusiveLock<ImageCtx> *exclusive_lock; | |
154 | ObjectMap<ImageCtx> *object_map; | |
155 | ||
156 | xlist<operation::ResizeRequest<ImageCtx>*> resize_reqs; | |
157 | ||
224ce89b | 158 | io::ImageRequestWQ<ImageCtx> *io_work_queue; |
7c673cae FG |
159 | xlist<io::AioCompletion*> completed_reqs; |
160 | EventSocket event_socket; | |
161 | ||
162 | ContextWQ *op_work_queue; | |
163 | ||
164 | // Configuration | |
165 | static const string METADATA_CONF_PREFIX; | |
166 | bool non_blocking_aio; | |
167 | bool cache; | |
168 | bool cache_writethrough_until_flush; | |
169 | uint64_t cache_size; | |
170 | uint64_t cache_max_dirty; | |
171 | uint64_t cache_target_dirty; | |
172 | double cache_max_dirty_age; | |
173 | uint32_t cache_max_dirty_object; | |
174 | bool cache_block_writes_upfront; | |
175 | uint32_t concurrent_management_ops; | |
176 | bool balance_snap_reads; | |
177 | bool localize_snap_reads; | |
178 | bool balance_parent_reads; | |
179 | bool localize_parent_reads; | |
b32b8144 | 180 | uint64_t sparse_read_threshold_bytes; |
7c673cae FG |
181 | uint32_t readahead_trigger_requests; |
182 | uint64_t readahead_max_bytes; | |
183 | uint64_t readahead_disable_after_bytes; | |
184 | bool clone_copy_on_read; | |
185 | bool blacklist_on_break_lock; | |
186 | uint32_t blacklist_expire_seconds; | |
187 | uint32_t request_timed_out_seconds; | |
188 | bool enable_alloc_hint; | |
189 | uint8_t journal_order; | |
190 | uint8_t journal_splay_width; | |
191 | double journal_commit_age; | |
192 | int journal_object_flush_interval; | |
193 | uint64_t journal_object_flush_bytes; | |
194 | double journal_object_flush_age; | |
195 | std::string journal_pool; | |
196 | uint32_t journal_max_payload_bytes; | |
197 | int journal_max_concurrent_object_sets; | |
198 | bool mirroring_resync_after_disconnect; | |
199 | int mirroring_replay_delay; | |
200 | bool skip_partial_discard; | |
181888fb | 201 | bool blkin_trace_all; |
7c673cae FG |
202 | |
203 | LibrbdAdminSocketHook *asok_hook; | |
204 | ||
205 | exclusive_lock::Policy *exclusive_lock_policy = nullptr; | |
206 | journal::Policy *journal_policy = nullptr; | |
207 | ||
31f18b77 FG |
208 | ZTracer::Endpoint trace_endpoint; |
209 | ||
7c673cae FG |
210 | static bool _filter_metadata_confs(const string &prefix, std::map<string, bool> &configs, |
211 | const map<string, bufferlist> &pairs, map<string, bufferlist> *res); | |
212 | ||
213 | // unit test mock helpers | |
214 | static ImageCtx* create(const std::string &image_name, | |
215 | const std::string &image_id, | |
216 | const char *snap, IoCtx& p, bool read_only) { | |
217 | return new ImageCtx(image_name, image_id, snap, p, read_only); | |
218 | } | |
219 | void destroy() { | |
220 | delete this; | |
221 | } | |
222 | ||
223 | /** | |
224 | * Either image_name or image_id must be set. | |
225 | * If id is not known, pass the empty std::string, | |
226 | * and init() will look it up. | |
227 | */ | |
228 | ImageCtx(const std::string &image_name, const std::string &image_id, | |
229 | const char *snap, IoCtx& p, bool read_only); | |
230 | ~ImageCtx(); | |
231 | void init(); | |
232 | void shutdown(); | |
233 | void init_layout(); | |
234 | void perf_start(std::string name); | |
235 | void perf_stop(); | |
236 | void set_read_flag(unsigned flag); | |
237 | int get_read_flags(librados::snap_t snap_id); | |
238 | int snap_set(cls::rbd::SnapshotNamespace in_snap_namespace, | |
239 | std::string in_snap_name); | |
240 | void snap_unset(); | |
241 | librados::snap_t get_snap_id(cls::rbd::SnapshotNamespace in_snap_namespace, | |
242 | std::string in_snap_name) const; | |
243 | const SnapInfo* get_snap_info(librados::snap_t in_snap_id) const; | |
244 | int get_snap_name(librados::snap_t in_snap_id, | |
245 | std::string *out_snap_name) const; | |
246 | int get_snap_namespace(librados::snap_t in_snap_id, | |
247 | cls::rbd::SnapshotNamespace *out_snap_namespace) const; | |
248 | int get_parent_spec(librados::snap_t in_snap_id, | |
249 | ParentSpec *pspec) const; | |
250 | int is_snap_protected(librados::snap_t in_snap_id, | |
251 | bool *is_protected) const; | |
252 | int is_snap_unprotected(librados::snap_t in_snap_id, | |
253 | bool *is_unprotected) const; | |
254 | ||
255 | uint64_t get_current_size() const; | |
256 | uint64_t get_object_size() const; | |
257 | string get_object_name(uint64_t num) const; | |
258 | uint64_t get_stripe_unit() const; | |
259 | uint64_t get_stripe_count() const; | |
260 | uint64_t get_stripe_period() const; | |
31f18b77 | 261 | utime_t get_create_timestamp() const; |
7c673cae FG |
262 | |
263 | void add_snap(cls::rbd::SnapshotNamespace in_snap_namespace, | |
264 | std::string in_snap_name, | |
265 | librados::snap_t id, | |
266 | uint64_t in_size, const ParentInfo &parent, | |
267 | uint8_t protection_status, uint64_t flags, utime_t timestamp); | |
268 | void rm_snap(cls::rbd::SnapshotNamespace in_snap_namespace, | |
269 | std::string in_snap_name, | |
270 | librados::snap_t id); | |
271 | uint64_t get_image_size(librados::snap_t in_snap_id) const; | |
272 | uint64_t get_object_count(librados::snap_t in_snap_id) const; | |
273 | bool test_features(uint64_t test_features) const; | |
274 | bool test_features(uint64_t test_features, | |
275 | const RWLock &in_snap_lock) const; | |
276 | int get_flags(librados::snap_t in_snap_id, uint64_t *flags) const; | |
31f18b77 FG |
277 | int test_flags(uint64_t test_flags, bool *flags_set) const; |
278 | int test_flags(uint64_t test_flags, const RWLock &in_snap_lock, | |
279 | bool *flags_set) const; | |
7c673cae FG |
280 | int update_flags(librados::snap_t in_snap_id, uint64_t flag, bool enabled); |
281 | ||
282 | const ParentInfo* get_parent_info(librados::snap_t in_snap_id) const; | |
283 | int64_t get_parent_pool_id(librados::snap_t in_snap_id) const; | |
284 | std::string get_parent_image_id(librados::snap_t in_snap_id) const; | |
285 | uint64_t get_parent_snap_id(librados::snap_t in_snap_id) const; | |
286 | int get_parent_overlap(librados::snap_t in_snap_id, | |
287 | uint64_t *overlap) const; | |
288 | void aio_read_from_cache(object_t o, uint64_t object_no, bufferlist *bl, | |
289 | size_t len, uint64_t off, Context *onfinish, | |
31f18b77 | 290 | int fadvise_flags, ZTracer::Trace *trace); |
7c673cae FG |
291 | void write_to_cache(object_t o, const bufferlist& bl, size_t len, |
292 | uint64_t off, Context *onfinish, int fadvise_flags, | |
31f18b77 | 293 | uint64_t journal_tid, ZTracer::Trace *trace); |
7c673cae FG |
294 | void user_flushed(); |
295 | void flush_cache(Context *onfinish); | |
296 | void shut_down_cache(Context *on_finish); | |
297 | int invalidate_cache(bool purge_on_error); | |
298 | void invalidate_cache(bool purge_on_error, Context *on_finish); | |
299 | void clear_nonexistence_cache(); | |
300 | bool is_cache_empty(); | |
301 | void register_watch(Context *on_finish); | |
302 | uint64_t prune_parent_extents(vector<pair<uint64_t,uint64_t> >& objectx, | |
303 | uint64_t overlap); | |
304 | ||
305 | void flush_async_operations(); | |
306 | void flush_async_operations(Context *on_finish); | |
307 | ||
308 | int flush(); | |
309 | void flush(Context *on_safe); | |
310 | ||
311 | void cancel_async_requests(); | |
312 | void cancel_async_requests(Context *on_finish); | |
313 | ||
b32b8144 FG |
314 | void apply_metadata(const std::map<std::string, bufferlist> &meta, |
315 | bool thread_safe); | |
7c673cae FG |
316 | |
317 | ExclusiveLock<ImageCtx> *create_exclusive_lock(); | |
318 | ObjectMap<ImageCtx> *create_object_map(uint64_t snap_id); | |
319 | Journal<ImageCtx> *create_journal(); | |
320 | ||
321 | void clear_pending_completions(); | |
322 | ||
323 | void set_image_name(const std::string &name); | |
324 | ||
325 | void notify_update(); | |
326 | void notify_update(Context *on_finish); | |
327 | ||
328 | exclusive_lock::Policy *get_exclusive_lock_policy() const; | |
329 | void set_exclusive_lock_policy(exclusive_lock::Policy *policy); | |
330 | ||
331 | journal::Policy *get_journal_policy() const; | |
332 | void set_journal_policy(journal::Policy *policy); | |
333 | ||
334 | static void get_thread_pool_instance(CephContext *cct, | |
335 | ThreadPool **thread_pool, | |
336 | ContextWQ **op_work_queue); | |
337 | static void get_timer_instance(CephContext *cct, SafeTimer **timer, | |
338 | Mutex **timer_lock); | |
339 | }; | |
340 | } | |
341 | ||
342 | #endif |