]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | #ifndef CEPH_OBJECTSTORE_H | |
15 | #define CEPH_OBJECTSTORE_H | |
16 | ||
f67539c2 | 17 | #include "include/buffer.h" |
9f95a23c | 18 | #include "include/common_fwd.h" |
7c673cae | 19 | #include "include/Context.h" |
f67539c2 | 20 | #include "include/interval_set.h" |
11fdf7f2 | 21 | #include "include/stringify.h" |
f67539c2 TL |
22 | #include "include/types.h" |
23 | ||
7c673cae FG |
24 | #include "osd/osd_types.h" |
25 | #include "common/TrackedOp.h" | |
26 | #include "common/WorkQueue.h" | |
27 | #include "ObjectMap.h" | |
9f95a23c | 28 | #include "os/Transaction.h" |
7c673cae FG |
29 | |
30 | #include <errno.h> | |
31 | #include <sys/stat.h> | |
7c673cae | 32 | #include <map> |
20effc67 TL |
33 | #include <memory> |
34 | #include <vector> | |
7c673cae | 35 | |
f67539c2 | 36 | #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) || defined(_WIN32) |
7c673cae FG |
37 | #include <sys/statvfs.h> |
38 | #else | |
39 | #include <sys/vfs.h> /* or <sys/statfs.h> */ | |
11fdf7f2 | 40 | #endif |
7c673cae | 41 | |
7c673cae FG |
42 | namespace ceph { |
43 | class Formatter; | |
44 | } | |
45 | ||
46 | /* | |
47 | * low-level interface to the local OSD file system | |
48 | */ | |
49 | ||
50 | class Logger; | |
11fdf7f2 | 51 | class ContextQueue; |
7c673cae | 52 | |
9f95a23c TL |
53 | static inline void encode(const std::map<std::string,ceph::buffer::ptr> *attrset, ceph::buffer::list &bl) { |
54 | using ceph::encode; | |
11fdf7f2 | 55 | encode(*attrset, bl); |
7c673cae FG |
56 | } |
57 | ||
7c673cae FG |
58 | // Flag bits |
59 | typedef uint32_t osflagbits_t; | |
60 | const int SKIP_JOURNAL_REPLAY = 1 << 0; | |
61 | const int SKIP_MOUNT_OMAP = 1 << 1; | |
62 | ||
63 | class ObjectStore { | |
64 | protected: | |
9f95a23c | 65 | std::string path; |
7c673cae FG |
66 | |
67 | public: | |
9f95a23c TL |
68 | using Transaction = ceph::os::Transaction; |
69 | ||
7c673cae FG |
70 | CephContext* cct; |
71 | /** | |
72 | * create - create an ObjectStore instance. | |
73 | * | |
74 | * This is invoked once at initialization time. | |
75 | * | |
9f95a23c | 76 | * @param type type of store. This is a std::string from the configuration file. |
7c673cae FG |
77 | * @param data path (or other descriptor) for data |
78 | * @param journal path (or other descriptor) for journal (optional) | |
79 | * @param flags which filestores should check if applicable | |
80 | */ | |
20effc67 TL |
81 | #ifndef WITH_SEASTAR |
82 | static std::unique_ptr<ObjectStore> create( | |
83 | CephContext *cct, | |
84 | const std::string& type, | |
85 | const std::string& data, | |
86 | const std::string& journal, | |
87 | osflagbits_t flags = 0); | |
88 | #endif | |
89 | static std::unique_ptr<ObjectStore> create( | |
90 | CephContext *cct, | |
91 | const std::string& type, | |
92 | const std::string& data); | |
7c673cae FG |
93 | |
94 | /** | |
95 | * probe a block device to learn the uuid of the owning OSD | |
96 | * | |
97 | * @param cct cct | |
98 | * @param path path to device | |
99 | * @param fsid [out] osd uuid | |
100 | */ | |
101 | static int probe_block_device_fsid( | |
102 | CephContext *cct, | |
9f95a23c | 103 | const std::string& path, |
7c673cae FG |
104 | uuid_d *fsid); |
105 | ||
106 | /** | |
107 | * Fetch Object Store statistics. | |
108 | * | |
109 | * Currently only latency of write and apply times are measured. | |
110 | * | |
111 | * This appears to be called with nothing locked. | |
112 | */ | |
113 | virtual objectstore_perf_stat_t get_cur_stats() = 0; | |
114 | ||
115 | /** | |
116 | * Fetch Object Store performance counters. | |
117 | * | |
118 | * | |
119 | * This appears to be called with nothing locked. | |
120 | */ | |
121 | virtual const PerfCounters* get_perf_counters() const = 0; | |
122 | ||
123 | /** | |
11fdf7f2 | 124 | * a collection also orders transactions |
7c673cae | 125 | * |
11fdf7f2 TL |
126 | * Any transactions queued under a given collection will be applied in |
127 | * sequence. Transactions queued under different collections may run | |
7c673cae FG |
128 | * in parallel. |
129 | * | |
9f95a23c | 130 | * ObjectStore users may get collection handles with open_collection() (or, |
11fdf7f2 | 131 | * for bootstrapping a new collection, create_new_collection()). |
7c673cae | 132 | */ |
11fdf7f2 TL |
133 | struct CollectionImpl : public RefCountedObject { |
134 | const coll_t cid; | |
7c673cae | 135 | |
11fdf7f2 | 136 | /// wait for any queued transactions to apply |
7c673cae FG |
137 | // block until any previous transactions are visible. specifically, |
138 | // collection_list and collection_empty need to reflect prior operations. | |
139 | virtual void flush() = 0; | |
140 | ||
7c673cae FG |
141 | /** |
142 | * Async flush_commit | |
143 | * | |
144 | * There are two cases: | |
11fdf7f2 | 145 | * 1) collection is currently idle: the method returns true. c is |
7c673cae | 146 | * not touched. |
11fdf7f2 TL |
147 | * 2) collection is not idle: the method returns false and c is |
148 | * called asynchronously with a value of 0 once all transactions | |
149 | * queued on this collection prior to the call have been applied | |
7c673cae FG |
150 | * and committed. |
151 | */ | |
11fdf7f2 | 152 | virtual bool flush_commit(Context *c) = 0; |
7c673cae | 153 | |
11fdf7f2 TL |
154 | const coll_t &get_cid() { |
155 | return cid; | |
7c673cae | 156 | } |
9f95a23c TL |
157 | protected: |
158 | CollectionImpl() = delete; | |
159 | CollectionImpl(CephContext* cct, const coll_t& c) : RefCountedObject(cct), cid(c) {} | |
160 | ~CollectionImpl() = default; | |
7c673cae | 161 | }; |
9f95a23c | 162 | using CollectionHandle = ceph::ref_t<CollectionImpl>; |
7c673cae | 163 | |
7c673cae FG |
164 | |
165 | /********************************* | |
166 | * | |
167 | * Object Contents and semantics | |
168 | * | |
169 | * All ObjectStore objects are identified as a named object | |
170 | * (ghobject_t and hobject_t) in a named collection (coll_t). | |
171 | * ObjectStore operations support the creation, mutation, deletion | |
172 | * and enumeration of objects within a collection. Enumeration is | |
173 | * in sorted key order (where keys are sorted by hash). Object names | |
174 | * are globally unique. | |
175 | * | |
176 | * Each object has four distinct parts: byte data, xattrs, omap_header | |
177 | * and omap entries. | |
178 | * | |
179 | * The data portion of an object is conceptually equivalent to a | |
180 | * file in a file system. Random and Partial access for both read | |
181 | * and write operations is required. The ability to have a sparse | |
182 | * implementation of the data portion of an object is beneficial for | |
183 | * some workloads, but not required. There is a system-wide limit on | |
184 | * the maximum size of an object, which is typically around 100 MB. | |
185 | * | |
186 | * Xattrs are equivalent to the extended attributes of file | |
9f95a23c TL |
187 | * systems. Xattrs are a std::set of key/value pairs. Sub-value access |
188 | * is not required. It is possible to enumerate the std::set of xattrs in | |
7c673cae FG |
189 | * key order. At the implementation level, xattrs are used |
190 | * exclusively internal to Ceph and the implementer can expect the | |
191 | * total size of all of the xattrs on an object to be relatively | |
192 | * small, i.e., less than 64KB. Much of Ceph assumes that accessing | |
193 | * xattrs on temporally adjacent object accesses (recent past or | |
194 | * near future) is inexpensive. | |
195 | * | |
196 | * omap_header is a single blob of data. It can be read or written | |
197 | * in total. | |
198 | * | |
199 | * Omap entries are conceptually the same as xattrs | |
200 | * but in a different address space. In other words, you can have | |
201 | * the same key as an xattr and an omap entry and they have distinct | |
202 | * values. Enumeration of xattrs doesn't include omap entries and | |
203 | * vice versa. The size and access characteristics of omap entries | |
204 | * are very different from xattrs. In particular, the value portion | |
205 | * of an omap entry can be quite large (MBs). More importantly, the | |
206 | * interface must support efficient range queries on omap entries even | |
207 | * when there are a large numbers of entries. | |
208 | * | |
209 | *********************************/ | |
210 | ||
211 | /******************************* | |
212 | * | |
213 | * Collections | |
214 | * | |
215 | * A collection is simply a grouping of objects. Collections have | |
216 | * names (coll_t) and can be enumerated in order. Like an | |
9f95a23c | 217 | * individual object, a collection also has a std::set of xattrs. |
7c673cae | 218 | * |
7c673cae FG |
219 | * |
220 | */ | |
7c673cae | 221 | |
7c673cae | 222 | |
11fdf7f2 TL |
223 | int queue_transaction(CollectionHandle& ch, |
224 | Transaction&& t, | |
225 | TrackedOpRef op = TrackedOpRef(), | |
226 | ThreadPool::TPHandle *handle = NULL) { | |
9f95a23c | 227 | std::vector<Transaction> tls; |
7c673cae | 228 | tls.push_back(std::move(t)); |
11fdf7f2 | 229 | return queue_transactions(ch, tls, op, handle); |
7c673cae FG |
230 | } |
231 | ||
232 | virtual int queue_transactions( | |
9f95a23c | 233 | CollectionHandle& ch, std::vector<Transaction>& tls, |
7c673cae FG |
234 | TrackedOpRef op = TrackedOpRef(), |
235 | ThreadPool::TPHandle *handle = NULL) = 0; | |
236 | ||
237 | ||
7c673cae FG |
238 | public: |
239 | ObjectStore(CephContext* cct, | |
240 | const std::string& path_) : path(path_), cct(cct) {} | |
241 | virtual ~ObjectStore() {} | |
242 | ||
243 | // no copying | |
244 | explicit ObjectStore(const ObjectStore& o) = delete; | |
245 | const ObjectStore& operator=(const ObjectStore& o) = delete; | |
246 | ||
247 | // versioning | |
248 | virtual int upgrade() { | |
249 | return 0; | |
250 | } | |
251 | ||
9f95a23c TL |
252 | virtual void get_db_statistics(ceph::Formatter *f) { } |
253 | virtual void generate_db_histogram(ceph::Formatter *f) { } | |
254 | virtual int flush_cache(std::ostream *os = NULL) { return -1; } | |
255 | virtual void dump_perf_counters(ceph::Formatter *f) {} | |
256 | virtual void dump_cache_stats(ceph::Formatter *f) {} | |
257 | virtual void dump_cache_stats(std::ostream& os) {} | |
7c673cae | 258 | |
9f95a23c | 259 | virtual std::string get_type() = 0; |
7c673cae FG |
260 | |
261 | // mgmt | |
262 | virtual bool test_mount_in_use() = 0; | |
263 | virtual int mount() = 0; | |
264 | virtual int umount() = 0; | |
265 | virtual int fsck(bool deep) { | |
266 | return -EOPNOTSUPP; | |
267 | } | |
3efd9988 FG |
268 | virtual int repair(bool deep) { |
269 | return -EOPNOTSUPP; | |
270 | } | |
eafe8130 TL |
271 | virtual int quick_fix() { |
272 | return -EOPNOTSUPP; | |
273 | } | |
7c673cae FG |
274 | |
275 | virtual void set_cache_shards(unsigned num) { } | |
276 | ||
277 | /** | |
278 | * Returns 0 if the hobject is valid, -error otherwise | |
279 | * | |
280 | * Errors: | |
281 | * -ENAMETOOLONG: locator/namespace/name too large | |
282 | */ | |
283 | virtual int validate_hobject_key(const hobject_t &obj) const = 0; | |
284 | ||
285 | virtual unsigned get_max_attr_name_length() = 0; | |
286 | virtual int mkfs() = 0; // wipe | |
287 | virtual int mkjournal() = 0; // journal only | |
288 | virtual bool needs_journal() = 0; //< requires a journal | |
289 | virtual bool wants_journal() = 0; //< prefers a journal | |
290 | virtual bool allows_journal() = 0; //< allows a journal | |
291 | ||
9f95a23c TL |
292 | // return store min allocation size, if applicable |
293 | virtual uint64_t get_min_alloc_size() const { | |
294 | return 0; | |
295 | } | |
296 | ||
11fdf7f2 | 297 | /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda) |
9f95a23c | 298 | virtual int get_devices(std::set<std::string> *devls) { |
11fdf7f2 TL |
299 | return -EOPNOTSUPP; |
300 | } | |
301 | ||
302 | /// true if a txn is readable immediately after it is queued. | |
303 | virtual bool is_sync_onreadable() const { | |
304 | return true; | |
305 | } | |
306 | ||
31f18b77 FG |
307 | /** |
308 | * is_rotational | |
309 | * | |
310 | * Check whether store is backed by a rotational (HDD) or non-rotational | |
311 | * (SSD) device. | |
312 | * | |
313 | * This must be usable *before* the store is mounted. | |
314 | * | |
315 | * @return true for HDD, false for SSD | |
316 | */ | |
317 | virtual bool is_rotational() { | |
318 | return true; | |
319 | } | |
320 | ||
d2e6a577 FG |
321 | /** |
322 | * is_journal_rotational | |
323 | * | |
324 | * Check whether journal is backed by a rotational (HDD) or non-rotational | |
325 | * (SSD) device. | |
326 | * | |
327 | * | |
328 | * @return true for HDD, false for SSD | |
329 | */ | |
330 | virtual bool is_journal_rotational() { | |
331 | return true; | |
332 | } | |
333 | ||
9f95a23c | 334 | virtual std::string get_default_device_class() { |
224ce89b WB |
335 | return is_rotational() ? "hdd" : "ssd"; |
336 | } | |
337 | ||
11fdf7f2 TL |
338 | virtual int get_numa_node( |
339 | int *numa_node, | |
9f95a23c TL |
340 | std::set<int> *nodes, |
341 | std::set<std::string> *failed) { | |
11fdf7f2 TL |
342 | return -EOPNOTSUPP; |
343 | } | |
344 | ||
345 | ||
7c673cae FG |
346 | virtual bool can_sort_nibblewise() { |
347 | return false; // assume a backend cannot, unless it says otherwise | |
348 | } | |
349 | ||
11fdf7f2 TL |
350 | virtual int statfs(struct store_statfs_t *buf, |
351 | osd_alert_list_t* alerts = nullptr) = 0; | |
9f95a23c TL |
352 | virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf, |
353 | bool *per_pool_omap) = 0; | |
7c673cae | 354 | |
f67539c2 | 355 | virtual void collect_metadata(std::map<std::string,std::string> *pm) { } |
7c673cae FG |
356 | |
357 | /** | |
358 | * write_meta - write a simple configuration key out-of-band | |
359 | * | |
360 | * Write a simple key/value pair for basic store configuration | |
361 | * (e.g., a uuid or magic number) to an unopened/unmounted store. | |
362 | * The default implementation writes this to a plaintext file in the | |
363 | * path. | |
364 | * | |
365 | * A newline is appended. | |
366 | * | |
367 | * @param key key name (e.g., "fsid") | |
9f95a23c | 368 | * @param value value (e.g., a uuid rendered as a std::string) |
7c673cae FG |
369 | * @returns 0 for success, or an error code |
370 | */ | |
371 | virtual int write_meta(const std::string& key, | |
372 | const std::string& value); | |
373 | ||
374 | /** | |
375 | * read_meta - read a simple configuration key out-of-band | |
376 | * | |
377 | * Read a simple key value to an unopened/mounted store. | |
378 | * | |
379 | * Trailing whitespace is stripped off. | |
380 | * | |
381 | * @param key key name | |
9f95a23c | 382 | * @param value pointer to value std::string |
7c673cae FG |
383 | * @returns 0 for success, or an error code |
384 | */ | |
385 | virtual int read_meta(const std::string& key, | |
386 | std::string *value); | |
387 | ||
388 | /** | |
389 | * get ideal max value for collection_list() | |
390 | * | |
391 | * default to some arbitrary values; the implementation will override. | |
392 | */ | |
393 | virtual int get_ideal_list_max() { return 64; } | |
394 | ||
395 | ||
396 | /** | |
397 | * get a collection handle | |
398 | * | |
399 | * Provide a trivial handle as a default to avoid converting legacy | |
400 | * implementations. | |
401 | */ | |
11fdf7f2 TL |
402 | virtual CollectionHandle open_collection(const coll_t &cid) = 0; |
403 | ||
404 | /** | |
405 | * get a collection handle for a soon-to-be-created collection | |
406 | * | |
407 | * This handle must be used by queue_transaction that includes a | |
408 | * create_collection call in order to become valid. It will become the | |
409 | * reference to the created collection. | |
410 | */ | |
411 | virtual CollectionHandle create_new_collection(const coll_t &cid) = 0; | |
7c673cae | 412 | |
11fdf7f2 | 413 | /** |
9f95a23c | 414 | * std::set ContextQueue for a collection |
11fdf7f2 TL |
415 | * |
416 | * After that, oncommits of Transaction will queue into commit_queue. | |
417 | * And osd ShardThread will call oncommits. | |
418 | */ | |
419 | virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0; | |
7c673cae FG |
420 | |
421 | /** | |
422 | * Synchronous read operations | |
423 | */ | |
424 | ||
425 | /** | |
20effc67 | 426 | * exists -- Test for existence of object |
7c673cae FG |
427 | * |
428 | * @param cid collection for object | |
429 | * @param oid oid of object | |
430 | * @returns true if object exists, false otherwise | |
431 | */ | |
11fdf7f2 | 432 | virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0; |
7c673cae | 433 | /** |
9f95a23c | 434 | * set_collection_opts -- std::set pool options for a collectioninformation for an object |
7c673cae FG |
435 | * |
436 | * @param cid collection | |
437 | * @param opts new collection options | |
438 | * @returns 0 on success, negative error code on failure. | |
439 | */ | |
440 | virtual int set_collection_opts( | |
11fdf7f2 | 441 | CollectionHandle& c, |
7c673cae FG |
442 | const pool_opts_t& opts) = 0; |
443 | ||
444 | /** | |
445 | * stat -- get information for an object | |
446 | * | |
447 | * @param cid collection for object | |
448 | * @param oid oid of object | |
449 | * @param st output information for the object | |
450 | * @param allow_eio if false, assert on -EIO operation failure | |
451 | * @returns 0 on success, negative error code on failure. | |
452 | */ | |
7c673cae FG |
453 | virtual int stat( |
454 | CollectionHandle &c, | |
455 | const ghobject_t& oid, | |
456 | struct stat *st, | |
11fdf7f2 | 457 | bool allow_eio = false) = 0; |
7c673cae FG |
458 | /** |
459 | * read -- read a byte range of data from an object | |
460 | * | |
461 | * Note: if reading from an offset past the end of the object, we | |
462 | * return 0 (not, say, -EINVAL). | |
463 | * | |
464 | * @param cid collection for object | |
465 | * @param oid oid of object | |
466 | * @param offset location offset of first byte to be read | |
467 | * @param len number of bytes to be read | |
9f95a23c | 468 | * @param bl output ceph::buffer::list |
7c673cae | 469 | * @param op_flags is CEPH_OSD_OP_FLAG_* |
7c673cae FG |
470 | * @returns number of bytes read on success, or negative error code on failure. |
471 | */ | |
7c673cae FG |
472 | virtual int read( |
473 | CollectionHandle &c, | |
474 | const ghobject_t& oid, | |
475 | uint64_t offset, | |
476 | size_t len, | |
9f95a23c | 477 | ceph::buffer::list& bl, |
11fdf7f2 | 478 | uint32_t op_flags = 0) = 0; |
7c673cae FG |
479 | |
480 | /** | |
9f95a23c | 481 | * fiemap -- get extent std::map of data of an object |
7c673cae | 482 | * |
9f95a23c TL |
483 | * Returns an encoded std::map of the extents of an object's data portion |
484 | * (std::map<offset,size>). | |
7c673cae FG |
485 | * |
486 | * A non-enlightened implementation is free to return the extent (offset, len) | |
487 | * as the sole extent. | |
488 | * | |
489 | * @param cid collection for object | |
490 | * @param oid oid of object | |
491 | * @param offset location offset of first byte to be read | |
492 | * @param len number of bytes to be read | |
9f95a23c | 493 | * @param bl output ceph::buffer::list for extent std::map information. |
7c673cae FG |
494 | * @returns 0 on success, negative error code on failure. |
495 | */ | |
7c673cae | 496 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c | 497 | uint64_t offset, size_t len, ceph::buffer::list& bl) = 0; |
7c673cae | 498 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c TL |
499 | uint64_t offset, size_t len, std::map<uint64_t, uint64_t>& destmap) = 0; |
500 | ||
501 | /** | |
502 | * readv -- read specfic intervals from an object; | |
503 | * caller must call fiemap to fill in the extent-map first. | |
504 | * | |
505 | * Note: if reading from an offset past the end of the object, we | |
506 | * return 0 (not, say, -EINVAL). Also the default version of readv | |
507 | * reads each extent separately synchronously, which can become horribly | |
508 | * inefficient if the physical layout of the pushing object get massively | |
509 | * fragmented and hence should be overridden by any real os that | |
510 | * cares about the performance.. | |
511 | * | |
512 | * @param cid collection for object | |
513 | * @param oid oid of object | |
514 | * @param m intervals to be read | |
515 | * @param bl output ceph::buffer::list | |
516 | * @param op_flags is CEPH_OSD_OP_FLAG_* | |
517 | * @returns number of bytes read on success, or negative error code on failure. | |
518 | */ | |
519 | virtual int readv( | |
520 | CollectionHandle &c, | |
521 | const ghobject_t& oid, | |
522 | interval_set<uint64_t>& m, | |
523 | ceph::buffer::list& bl, | |
524 | uint32_t op_flags = 0) { | |
525 | int total = 0; | |
526 | for (auto p = m.begin(); p != m.end(); p++) { | |
f67539c2 | 527 | ceph::buffer::list t; |
9f95a23c TL |
528 | int r = read(c, oid, p.get_start(), p.get_len(), t, op_flags); |
529 | if (r < 0) | |
530 | return r; | |
531 | total += r; | |
532 | // prune fiemap, if necessary | |
533 | if (p.get_len() != t.length()) { | |
534 | auto save = p++; | |
535 | if (t.length() == 0) { | |
536 | m.erase(save); // Remove this empty interval | |
537 | } else { | |
538 | save.set_len(t.length()); // fix interval length | |
539 | bl.claim_append(t); | |
540 | } | |
541 | // Remove any other follow-up intervals present too | |
542 | while (p != m.end()) { | |
543 | save = p++; | |
544 | m.erase(save); | |
545 | } | |
546 | break; | |
547 | } | |
548 | bl.claim_append(t); | |
549 | } | |
550 | return total; | |
551 | } | |
552 | ||
553 | /** | |
554 | * dump_onode -- dumps onode metadata in human readable form, | |
555 | intended primiarily for debugging | |
556 | * | |
557 | * @param cid collection for object | |
558 | * @param oid oid of object | |
559 | * @param section_name section name to create and print under | |
560 | * @param f Formatter class instance to print to | |
561 | * @returns 0 on success, negative error code on failure. | |
562 | */ | |
563 | virtual int dump_onode( | |
564 | CollectionHandle &c, | |
565 | const ghobject_t& oid, | |
f67539c2 TL |
566 | const std::string& section_name, |
567 | ceph::Formatter *f) { | |
9f95a23c TL |
568 | return -ENOTSUP; |
569 | } | |
7c673cae FG |
570 | |
571 | /** | |
572 | * getattr -- get an xattr of an object | |
573 | * | |
574 | * @param cid collection for object | |
575 | * @param oid oid of object | |
576 | * @param name name of attr to read | |
577 | * @param value place to put output result. | |
578 | * @returns 0 on success, negative error code on failure. | |
579 | */ | |
7c673cae | 580 | virtual int getattr(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c | 581 | const char *name, ceph::buffer::ptr& value) = 0; |
7c673cae FG |
582 | |
583 | /** | |
584 | * getattr -- get an xattr of an object | |
585 | * | |
586 | * @param cid collection for object | |
587 | * @param oid oid of object | |
588 | * @param name name of attr to read | |
589 | * @param value place to put output result. | |
590 | * @returns 0 on success, negative error code on failure. | |
591 | */ | |
7c673cae FG |
592 | int getattr( |
593 | CollectionHandle &c, const ghobject_t& oid, | |
9f95a23c TL |
594 | const std::string& name, ceph::buffer::list& value) { |
595 | ceph::buffer::ptr bp; | |
7c673cae FG |
596 | int r = getattr(c, oid, name.c_str(), bp); |
597 | value.push_back(bp); | |
598 | return r; | |
599 | } | |
600 | ||
601 | /** | |
602 | * getattrs -- get all of the xattrs of an object | |
603 | * | |
604 | * @param cid collection for object | |
605 | * @param oid oid of object | |
606 | * @param aset place to put output result. | |
607 | * @returns 0 on success, negative error code on failure. | |
608 | */ | |
7c673cae | 609 | virtual int getattrs(CollectionHandle &c, const ghobject_t& oid, |
20effc67 | 610 | std::map<std::string,ceph::buffer::ptr, std::less<>>& aset) = 0; |
7c673cae FG |
611 | |
612 | /** | |
613 | * getattrs -- get all of the xattrs of an object | |
614 | * | |
615 | * @param cid collection for object | |
616 | * @param oid oid of object | |
617 | * @param aset place to put output result. | |
618 | * @returns 0 on success, negative error code on failure. | |
619 | */ | |
7c673cae | 620 | int getattrs(CollectionHandle &c, const ghobject_t& oid, |
20effc67 TL |
621 | std::map<std::string,ceph::buffer::list,std::less<>>& aset) { |
622 | std::map<std::string,ceph::buffer::ptr,std::less<>> bmap; | |
7c673cae | 623 | int r = getattrs(c, oid, bmap); |
9f95a23c | 624 | for (auto i = bmap.begin(); i != bmap.end(); ++i) { |
7c673cae FG |
625 | aset[i->first].append(i->second); |
626 | } | |
627 | return r; | |
628 | } | |
629 | ||
630 | ||
631 | // collections | |
632 | ||
633 | /** | |
634 | * list_collections -- get all of the collections known to this ObjectStore | |
635 | * | |
9f95a23c | 636 | * @param ls std::list of the collections in sorted order. |
7c673cae FG |
637 | * @returns 0 on success, negative error code on failure. |
638 | */ | |
9f95a23c | 639 | virtual int list_collections(std::vector<coll_t>& ls) = 0; |
7c673cae FG |
640 | |
641 | /** | |
642 | * does a collection exist? | |
643 | * | |
644 | * @param c collection | |
645 | * @returns true if it exists, false otherwise | |
646 | */ | |
647 | virtual bool collection_exists(const coll_t& c) = 0; | |
648 | ||
649 | /** | |
650 | * is a collection empty? | |
651 | * | |
652 | * @param c collection | |
653 | * @param empty true if the specified collection is empty, false otherwise | |
654 | * @returns 0 on success, negative error code on failure. | |
655 | */ | |
11fdf7f2 | 656 | virtual int collection_empty(CollectionHandle& c, bool *empty) = 0; |
7c673cae FG |
657 | |
658 | /** | |
659 | * return the number of significant bits of the coll_t::pgid. | |
660 | * | |
661 | * This should return what the last create_collection or split_collection | |
9f95a23c | 662 | * std::set. A legacy backend may return -EAGAIN if the value is unavailable |
7c673cae FG |
663 | * (because we upgraded from an older version, e.g., FileStore). |
664 | */ | |
11fdf7f2 | 665 | virtual int collection_bits(CollectionHandle& c) = 0; |
7c673cae FG |
666 | |
667 | ||
668 | /** | |
9f95a23c | 669 | * std::list contents of a collection that fall in the range [start, end) and no more than a specified many result |
7c673cae FG |
670 | * |
671 | * @param c collection | |
672 | * @param start list object that sort >= this value | |
673 | * @param end list objects that sort < this value | |
674 | * @param max return no more than this many results | |
675 | * @param seq return no objects with snap < seq | |
676 | * @param ls [out] result | |
677 | * @param next [out] next item sorts >= this value | |
678 | * @return zero on success, or negative error | |
679 | */ | |
7c673cae FG |
680 | virtual int collection_list(CollectionHandle &c, |
681 | const ghobject_t& start, const ghobject_t& end, | |
682 | int max, | |
9f95a23c | 683 | std::vector<ghobject_t> *ls, ghobject_t *next) = 0; |
7c673cae | 684 | |
f91f0fd5 TL |
685 | virtual int collection_list_legacy(CollectionHandle &c, |
686 | const ghobject_t& start, | |
687 | const ghobject_t& end, int max, | |
688 | std::vector<ghobject_t> *ls, | |
689 | ghobject_t *next) { | |
690 | return collection_list(c, start, end, max, ls, next); | |
691 | } | |
7c673cae FG |
692 | |
693 | /// OMAP | |
694 | /// Get omap contents | |
7c673cae FG |
695 | virtual int omap_get( |
696 | CollectionHandle &c, ///< [in] Collection containing oid | |
697 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
698 | ceph::buffer::list *header, ///< [out] omap header |
699 | std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value std::map | |
11fdf7f2 | 700 | ) = 0; |
7c673cae FG |
701 | |
702 | /// Get omap header | |
7c673cae FG |
703 | virtual int omap_get_header( |
704 | CollectionHandle &c, ///< [in] Collection containing oid | |
705 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 706 | ceph::buffer::list *header, ///< [out] omap header |
7c673cae | 707 | bool allow_eio = false ///< [in] don't assert on eio |
11fdf7f2 | 708 | ) = 0; |
7c673cae FG |
709 | |
710 | /// Get keys defined on oid | |
7c673cae FG |
711 | virtual int omap_get_keys( |
712 | CollectionHandle &c, ///< [in] Collection containing oid | |
713 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 714 | std::set<std::string> *keys ///< [out] Keys defined on oid |
11fdf7f2 | 715 | ) = 0; |
7c673cae FG |
716 | |
717 | /// Get key values | |
7c673cae FG |
718 | virtual int omap_get_values( |
719 | CollectionHandle &c, ///< [in] Collection containing oid | |
720 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
721 | const std::set<std::string> &keys, ///< [in] Keys to get |
722 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
11fdf7f2 | 723 | ) = 0; |
7c673cae | 724 | |
9f95a23c TL |
725 | #ifdef WITH_SEASTAR |
726 | virtual int omap_get_values( | |
727 | CollectionHandle &c, ///< [in] Collection containing oid | |
728 | const ghobject_t &oid, ///< [in] Object containing omap | |
729 | const std::optional<std::string> &start_after, ///< [in] Keys to get | |
730 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
731 | ) = 0; | |
732 | #endif | |
733 | ||
7c673cae | 734 | /// Filters keys into out which are defined on oid |
7c673cae FG |
735 | virtual int omap_check_keys( |
736 | CollectionHandle &c, ///< [in] Collection containing oid | |
737 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
738 | const std::set<std::string> &keys, ///< [in] Keys to check |
739 | std::set<std::string> *out ///< [out] Subset of keys defined on oid | |
11fdf7f2 | 740 | ) = 0; |
7c673cae FG |
741 | |
742 | /** | |
743 | * Returns an object map iterator | |
744 | * | |
745 | * Warning! The returned iterator is an implicit lock on filestore | |
746 | * operations in c. Do not use filestore methods on c while the returned | |
747 | * iterator is live. (Filling in a transaction is no problem). | |
748 | * | |
749 | * @return iterator, null on error | |
750 | */ | |
7c673cae FG |
751 | virtual ObjectMap::ObjectMapIterator get_omap_iterator( |
752 | CollectionHandle &c, ///< [in] collection | |
753 | const ghobject_t &oid ///< [in] object | |
11fdf7f2 | 754 | ) = 0; |
7c673cae FG |
755 | |
756 | virtual int flush_journal() { return -EOPNOTSUPP; } | |
757 | ||
9f95a23c | 758 | virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; } |
7c673cae | 759 | |
9f95a23c | 760 | virtual int snapshot(const std::string& name) { return -EOPNOTSUPP; } |
7c673cae FG |
761 | |
762 | /** | |
763 | * Set and get internal fsid for this instance. No external data is modified | |
764 | */ | |
765 | virtual void set_fsid(uuid_d u) = 0; | |
766 | virtual uuid_d get_fsid() = 0; | |
767 | ||
768 | /** | |
769 | * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store | |
770 | * - num objects - total (including witeouts) object count to measure used space for. | |
771 | */ | |
772 | virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0; | |
773 | ||
774 | ||
775 | // DEBUG | |
776 | virtual void inject_data_error(const ghobject_t &oid) {} | |
777 | virtual void inject_mdata_error(const ghobject_t &oid) {} | |
224ce89b WB |
778 | |
779 | virtual void compact() {} | |
28e407b8 AA |
780 | virtual bool has_builtin_csum() const { |
781 | return false; | |
782 | } | |
7c673cae | 783 | }; |
7c673cae FG |
784 | |
785 | #endif |