]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | #ifndef CEPH_OBJECTSTORE_H | |
15 | #define CEPH_OBJECTSTORE_H | |
16 | ||
f67539c2 | 17 | #include "include/buffer.h" |
9f95a23c | 18 | #include "include/common_fwd.h" |
7c673cae | 19 | #include "include/Context.h" |
f67539c2 | 20 | #include "include/interval_set.h" |
11fdf7f2 | 21 | #include "include/stringify.h" |
f67539c2 TL |
22 | #include "include/types.h" |
23 | ||
7c673cae FG |
24 | #include "osd/osd_types.h" |
25 | #include "common/TrackedOp.h" | |
26 | #include "common/WorkQueue.h" | |
27 | #include "ObjectMap.h" | |
9f95a23c | 28 | #include "os/Transaction.h" |
7c673cae FG |
29 | |
30 | #include <errno.h> | |
31 | #include <sys/stat.h> | |
7c673cae | 32 | #include <map> |
20effc67 TL |
33 | #include <memory> |
34 | #include <vector> | |
7c673cae | 35 | |
f67539c2 | 36 | #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) || defined(_WIN32) |
7c673cae FG |
37 | #include <sys/statvfs.h> |
38 | #else | |
39 | #include <sys/vfs.h> /* or <sys/statfs.h> */ | |
11fdf7f2 | 40 | #endif |
7c673cae | 41 | |
7c673cae FG |
42 | namespace ceph { |
43 | class Formatter; | |
44 | } | |
45 | ||
46 | /* | |
47 | * low-level interface to the local OSD file system | |
48 | */ | |
49 | ||
50 | class Logger; | |
11fdf7f2 | 51 | class ContextQueue; |
7c673cae | 52 | |
9f95a23c TL |
53 | static inline void encode(const std::map<std::string,ceph::buffer::ptr> *attrset, ceph::buffer::list &bl) { |
54 | using ceph::encode; | |
11fdf7f2 | 55 | encode(*attrset, bl); |
7c673cae FG |
56 | } |
57 | ||
7c673cae FG |
58 | // Flag bits |
59 | typedef uint32_t osflagbits_t; | |
60 | const int SKIP_JOURNAL_REPLAY = 1 << 0; | |
61 | const int SKIP_MOUNT_OMAP = 1 << 1; | |
62 | ||
63 | class ObjectStore { | |
64 | protected: | |
9f95a23c | 65 | std::string path; |
7c673cae FG |
66 | |
67 | public: | |
9f95a23c TL |
68 | using Transaction = ceph::os::Transaction; |
69 | ||
7c673cae FG |
70 | CephContext* cct; |
71 | /** | |
72 | * create - create an ObjectStore instance. | |
73 | * | |
74 | * This is invoked once at initialization time. | |
75 | * | |
9f95a23c | 76 | * @param type type of store. This is a std::string from the configuration file. |
7c673cae FG |
77 | * @param data path (or other descriptor) for data |
78 | * @param journal path (or other descriptor) for journal (optional) | |
79 | * @param flags which filestores should check if applicable | |
80 | */ | |
20effc67 TL |
81 | #ifndef WITH_SEASTAR |
82 | static std::unique_ptr<ObjectStore> create( | |
83 | CephContext *cct, | |
84 | const std::string& type, | |
85 | const std::string& data, | |
86 | const std::string& journal, | |
87 | osflagbits_t flags = 0); | |
88 | #endif | |
89 | static std::unique_ptr<ObjectStore> create( | |
90 | CephContext *cct, | |
91 | const std::string& type, | |
92 | const std::string& data); | |
7c673cae FG |
93 | |
94 | /** | |
95 | * probe a block device to learn the uuid of the owning OSD | |
96 | * | |
97 | * @param cct cct | |
98 | * @param path path to device | |
99 | * @param fsid [out] osd uuid | |
100 | */ | |
101 | static int probe_block_device_fsid( | |
102 | CephContext *cct, | |
9f95a23c | 103 | const std::string& path, |
7c673cae FG |
104 | uuid_d *fsid); |
105 | ||
106 | /** | |
107 | * Fetch Object Store statistics. | |
108 | * | |
109 | * Currently only latency of write and apply times are measured. | |
110 | * | |
111 | * This appears to be called with nothing locked. | |
112 | */ | |
113 | virtual objectstore_perf_stat_t get_cur_stats() = 0; | |
114 | ||
115 | /** | |
116 | * Fetch Object Store performance counters. | |
117 | * | |
118 | * | |
119 | * This appears to be called with nothing locked. | |
120 | */ | |
121 | virtual const PerfCounters* get_perf_counters() const = 0; | |
122 | ||
123 | /** | |
11fdf7f2 | 124 | * a collection also orders transactions |
7c673cae | 125 | * |
11fdf7f2 TL |
126 | * Any transactions queued under a given collection will be applied in |
127 | * sequence. Transactions queued under different collections may run | |
7c673cae FG |
128 | * in parallel. |
129 | * | |
9f95a23c | 130 | * ObjectStore users may get collection handles with open_collection() (or, |
11fdf7f2 | 131 | * for bootstrapping a new collection, create_new_collection()). |
7c673cae | 132 | */ |
11fdf7f2 TL |
133 | struct CollectionImpl : public RefCountedObject { |
134 | const coll_t cid; | |
7c673cae | 135 | |
11fdf7f2 | 136 | /// wait for any queued transactions to apply |
7c673cae FG |
137 | // block until any previous transactions are visible. specifically, |
138 | // collection_list and collection_empty need to reflect prior operations. | |
139 | virtual void flush() = 0; | |
140 | ||
7c673cae FG |
141 | /** |
142 | * Async flush_commit | |
143 | * | |
144 | * There are two cases: | |
11fdf7f2 | 145 | * 1) collection is currently idle: the method returns true. c is |
7c673cae | 146 | * not touched. |
11fdf7f2 TL |
147 | * 2) collection is not idle: the method returns false and c is |
148 | * called asynchronously with a value of 0 once all transactions | |
149 | * queued on this collection prior to the call have been applied | |
7c673cae FG |
150 | * and committed. |
151 | */ | |
11fdf7f2 | 152 | virtual bool flush_commit(Context *c) = 0; |
7c673cae | 153 | |
11fdf7f2 TL |
154 | const coll_t &get_cid() { |
155 | return cid; | |
7c673cae | 156 | } |
9f95a23c TL |
157 | protected: |
158 | CollectionImpl() = delete; | |
159 | CollectionImpl(CephContext* cct, const coll_t& c) : RefCountedObject(cct), cid(c) {} | |
160 | ~CollectionImpl() = default; | |
7c673cae | 161 | }; |
9f95a23c | 162 | using CollectionHandle = ceph::ref_t<CollectionImpl>; |
7c673cae | 163 | |
7c673cae FG |
164 | |
165 | /********************************* | |
166 | * | |
167 | * Object Contents and semantics | |
168 | * | |
169 | * All ObjectStore objects are identified as a named object | |
170 | * (ghobject_t and hobject_t) in a named collection (coll_t). | |
171 | * ObjectStore operations support the creation, mutation, deletion | |
172 | * and enumeration of objects within a collection. Enumeration is | |
173 | * in sorted key order (where keys are sorted by hash). Object names | |
174 | * are globally unique. | |
175 | * | |
176 | * Each object has four distinct parts: byte data, xattrs, omap_header | |
177 | * and omap entries. | |
178 | * | |
179 | * The data portion of an object is conceptually equivalent to a | |
180 | * file in a file system. Random and Partial access for both read | |
181 | * and write operations is required. The ability to have a sparse | |
182 | * implementation of the data portion of an object is beneficial for | |
183 | * some workloads, but not required. There is a system-wide limit on | |
184 | * the maximum size of an object, which is typically around 100 MB. | |
185 | * | |
186 | * Xattrs are equivalent to the extended attributes of file | |
9f95a23c TL |
187 | * systems. Xattrs are a std::set of key/value pairs. Sub-value access |
188 | * is not required. It is possible to enumerate the std::set of xattrs in | |
7c673cae FG |
189 | * key order. At the implementation level, xattrs are used |
190 | * exclusively internal to Ceph and the implementer can expect the | |
191 | * total size of all of the xattrs on an object to be relatively | |
192 | * small, i.e., less than 64KB. Much of Ceph assumes that accessing | |
193 | * xattrs on temporally adjacent object accesses (recent past or | |
194 | * near future) is inexpensive. | |
195 | * | |
196 | * omap_header is a single blob of data. It can be read or written | |
197 | * in total. | |
198 | * | |
199 | * Omap entries are conceptually the same as xattrs | |
200 | * but in a different address space. In other words, you can have | |
201 | * the same key as an xattr and an omap entry and they have distinct | |
202 | * values. Enumeration of xattrs doesn't include omap entries and | |
203 | * vice versa. The size and access characteristics of omap entries | |
204 | * are very different from xattrs. In particular, the value portion | |
205 | * of an omap entry can be quite large (MBs). More importantly, the | |
206 | * interface must support efficient range queries on omap entries even | |
207 | * when there are a large numbers of entries. | |
208 | * | |
209 | *********************************/ | |
210 | ||
211 | /******************************* | |
212 | * | |
213 | * Collections | |
214 | * | |
215 | * A collection is simply a grouping of objects. Collections have | |
216 | * names (coll_t) and can be enumerated in order. Like an | |
9f95a23c | 217 | * individual object, a collection also has a std::set of xattrs. |
7c673cae | 218 | * |
7c673cae FG |
219 | * |
220 | */ | |
7c673cae | 221 | |
7c673cae | 222 | |
11fdf7f2 TL |
223 | int queue_transaction(CollectionHandle& ch, |
224 | Transaction&& t, | |
225 | TrackedOpRef op = TrackedOpRef(), | |
226 | ThreadPool::TPHandle *handle = NULL) { | |
9f95a23c | 227 | std::vector<Transaction> tls; |
7c673cae | 228 | tls.push_back(std::move(t)); |
11fdf7f2 | 229 | return queue_transactions(ch, tls, op, handle); |
7c673cae FG |
230 | } |
231 | ||
232 | virtual int queue_transactions( | |
9f95a23c | 233 | CollectionHandle& ch, std::vector<Transaction>& tls, |
7c673cae FG |
234 | TrackedOpRef op = TrackedOpRef(), |
235 | ThreadPool::TPHandle *handle = NULL) = 0; | |
236 | ||
237 | ||
7c673cae FG |
238 | public: |
239 | ObjectStore(CephContext* cct, | |
240 | const std::string& path_) : path(path_), cct(cct) {} | |
241 | virtual ~ObjectStore() {} | |
242 | ||
243 | // no copying | |
244 | explicit ObjectStore(const ObjectStore& o) = delete; | |
245 | const ObjectStore& operator=(const ObjectStore& o) = delete; | |
246 | ||
247 | // versioning | |
248 | virtual int upgrade() { | |
249 | return 0; | |
250 | } | |
251 | ||
9f95a23c TL |
252 | virtual void get_db_statistics(ceph::Formatter *f) { } |
253 | virtual void generate_db_histogram(ceph::Formatter *f) { } | |
254 | virtual int flush_cache(std::ostream *os = NULL) { return -1; } | |
255 | virtual void dump_perf_counters(ceph::Formatter *f) {} | |
256 | virtual void dump_cache_stats(ceph::Formatter *f) {} | |
257 | virtual void dump_cache_stats(std::ostream& os) {} | |
7c673cae | 258 | |
9f95a23c | 259 | virtual std::string get_type() = 0; |
7c673cae FG |
260 | |
261 | // mgmt | |
262 | virtual bool test_mount_in_use() = 0; | |
263 | virtual int mount() = 0; | |
264 | virtual int umount() = 0; | |
265 | virtual int fsck(bool deep) { | |
266 | return -EOPNOTSUPP; | |
267 | } | |
3efd9988 FG |
268 | virtual int repair(bool deep) { |
269 | return -EOPNOTSUPP; | |
270 | } | |
eafe8130 TL |
271 | virtual int quick_fix() { |
272 | return -EOPNOTSUPP; | |
273 | } | |
7c673cae FG |
274 | |
275 | virtual void set_cache_shards(unsigned num) { } | |
276 | ||
277 | /** | |
278 | * Returns 0 if the hobject is valid, -error otherwise | |
279 | * | |
280 | * Errors: | |
281 | * -ENAMETOOLONG: locator/namespace/name too large | |
282 | */ | |
283 | virtual int validate_hobject_key(const hobject_t &obj) const = 0; | |
284 | ||
285 | virtual unsigned get_max_attr_name_length() = 0; | |
286 | virtual int mkfs() = 0; // wipe | |
287 | virtual int mkjournal() = 0; // journal only | |
288 | virtual bool needs_journal() = 0; //< requires a journal | |
289 | virtual bool wants_journal() = 0; //< prefers a journal | |
290 | virtual bool allows_journal() = 0; //< allows a journal | |
1d09f67e | 291 | virtual void prepare_for_fast_shutdown() {} |
39ae355f | 292 | virtual bool has_null_manager() const { return false; } |
9f95a23c TL |
293 | // return store min allocation size, if applicable |
294 | virtual uint64_t get_min_alloc_size() const { | |
295 | return 0; | |
296 | } | |
297 | ||
11fdf7f2 | 298 | /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda) |
9f95a23c | 299 | virtual int get_devices(std::set<std::string> *devls) { |
11fdf7f2 TL |
300 | return -EOPNOTSUPP; |
301 | } | |
302 | ||
303 | /// true if a txn is readable immediately after it is queued. | |
304 | virtual bool is_sync_onreadable() const { | |
305 | return true; | |
306 | } | |
307 | ||
31f18b77 FG |
308 | /** |
309 | * is_rotational | |
310 | * | |
311 | * Check whether store is backed by a rotational (HDD) or non-rotational | |
312 | * (SSD) device. | |
313 | * | |
314 | * This must be usable *before* the store is mounted. | |
315 | * | |
316 | * @return true for HDD, false for SSD | |
317 | */ | |
318 | virtual bool is_rotational() { | |
319 | return true; | |
320 | } | |
321 | ||
d2e6a577 FG |
322 | /** |
323 | * is_journal_rotational | |
324 | * | |
325 | * Check whether journal is backed by a rotational (HDD) or non-rotational | |
326 | * (SSD) device. | |
327 | * | |
328 | * | |
329 | * @return true for HDD, false for SSD | |
330 | */ | |
331 | virtual bool is_journal_rotational() { | |
332 | return true; | |
333 | } | |
334 | ||
9f95a23c | 335 | virtual std::string get_default_device_class() { |
224ce89b WB |
336 | return is_rotational() ? "hdd" : "ssd"; |
337 | } | |
338 | ||
11fdf7f2 TL |
339 | virtual int get_numa_node( |
340 | int *numa_node, | |
9f95a23c TL |
341 | std::set<int> *nodes, |
342 | std::set<std::string> *failed) { | |
11fdf7f2 TL |
343 | return -EOPNOTSUPP; |
344 | } | |
345 | ||
346 | ||
7c673cae FG |
347 | virtual bool can_sort_nibblewise() { |
348 | return false; // assume a backend cannot, unless it says otherwise | |
349 | } | |
350 | ||
11fdf7f2 TL |
351 | virtual int statfs(struct store_statfs_t *buf, |
352 | osd_alert_list_t* alerts = nullptr) = 0; | |
9f95a23c TL |
353 | virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf, |
354 | bool *per_pool_omap) = 0; | |
7c673cae | 355 | |
f67539c2 | 356 | virtual void collect_metadata(std::map<std::string,std::string> *pm) { } |
7c673cae FG |
357 | |
358 | /** | |
359 | * write_meta - write a simple configuration key out-of-band | |
360 | * | |
361 | * Write a simple key/value pair for basic store configuration | |
362 | * (e.g., a uuid or magic number) to an unopened/unmounted store. | |
363 | * The default implementation writes this to a plaintext file in the | |
364 | * path. | |
365 | * | |
366 | * A newline is appended. | |
367 | * | |
368 | * @param key key name (e.g., "fsid") | |
9f95a23c | 369 | * @param value value (e.g., a uuid rendered as a std::string) |
7c673cae FG |
370 | * @returns 0 for success, or an error code |
371 | */ | |
372 | virtual int write_meta(const std::string& key, | |
373 | const std::string& value); | |
374 | ||
375 | /** | |
376 | * read_meta - read a simple configuration key out-of-band | |
377 | * | |
378 | * Read a simple key value to an unopened/mounted store. | |
379 | * | |
380 | * Trailing whitespace is stripped off. | |
381 | * | |
382 | * @param key key name | |
9f95a23c | 383 | * @param value pointer to value std::string |
7c673cae FG |
384 | * @returns 0 for success, or an error code |
385 | */ | |
386 | virtual int read_meta(const std::string& key, | |
387 | std::string *value); | |
388 | ||
389 | /** | |
390 | * get ideal max value for collection_list() | |
391 | * | |
392 | * default to some arbitrary values; the implementation will override. | |
393 | */ | |
394 | virtual int get_ideal_list_max() { return 64; } | |
395 | ||
396 | ||
397 | /** | |
398 | * get a collection handle | |
399 | * | |
400 | * Provide a trivial handle as a default to avoid converting legacy | |
401 | * implementations. | |
402 | */ | |
11fdf7f2 TL |
403 | virtual CollectionHandle open_collection(const coll_t &cid) = 0; |
404 | ||
405 | /** | |
406 | * get a collection handle for a soon-to-be-created collection | |
407 | * | |
408 | * This handle must be used by queue_transaction that includes a | |
409 | * create_collection call in order to become valid. It will become the | |
410 | * reference to the created collection. | |
411 | */ | |
412 | virtual CollectionHandle create_new_collection(const coll_t &cid) = 0; | |
7c673cae | 413 | |
11fdf7f2 | 414 | /** |
9f95a23c | 415 | * std::set ContextQueue for a collection |
11fdf7f2 TL |
416 | * |
417 | * After that, oncommits of Transaction will queue into commit_queue. | |
418 | * And osd ShardThread will call oncommits. | |
419 | */ | |
420 | virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0; | |
7c673cae FG |
421 | |
422 | /** | |
423 | * Synchronous read operations | |
424 | */ | |
425 | ||
426 | /** | |
20effc67 | 427 | * exists -- Test for existence of object |
7c673cae FG |
428 | * |
429 | * @param cid collection for object | |
430 | * @param oid oid of object | |
431 | * @returns true if object exists, false otherwise | |
432 | */ | |
11fdf7f2 | 433 | virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0; |
7c673cae | 434 | /** |
9f95a23c | 435 | * set_collection_opts -- std::set pool options for a collectioninformation for an object |
7c673cae FG |
436 | * |
437 | * @param cid collection | |
438 | * @param opts new collection options | |
439 | * @returns 0 on success, negative error code on failure. | |
440 | */ | |
441 | virtual int set_collection_opts( | |
11fdf7f2 | 442 | CollectionHandle& c, |
7c673cae FG |
443 | const pool_opts_t& opts) = 0; |
444 | ||
445 | /** | |
446 | * stat -- get information for an object | |
447 | * | |
448 | * @param cid collection for object | |
449 | * @param oid oid of object | |
450 | * @param st output information for the object | |
451 | * @param allow_eio if false, assert on -EIO operation failure | |
452 | * @returns 0 on success, negative error code on failure. | |
453 | */ | |
7c673cae FG |
454 | virtual int stat( |
455 | CollectionHandle &c, | |
456 | const ghobject_t& oid, | |
457 | struct stat *st, | |
11fdf7f2 | 458 | bool allow_eio = false) = 0; |
7c673cae FG |
459 | /** |
460 | * read -- read a byte range of data from an object | |
461 | * | |
462 | * Note: if reading from an offset past the end of the object, we | |
463 | * return 0 (not, say, -EINVAL). | |
464 | * | |
465 | * @param cid collection for object | |
466 | * @param oid oid of object | |
467 | * @param offset location offset of first byte to be read | |
468 | * @param len number of bytes to be read | |
9f95a23c | 469 | * @param bl output ceph::buffer::list |
7c673cae | 470 | * @param op_flags is CEPH_OSD_OP_FLAG_* |
7c673cae FG |
471 | * @returns number of bytes read on success, or negative error code on failure. |
472 | */ | |
7c673cae FG |
473 | virtual int read( |
474 | CollectionHandle &c, | |
475 | const ghobject_t& oid, | |
476 | uint64_t offset, | |
477 | size_t len, | |
9f95a23c | 478 | ceph::buffer::list& bl, |
11fdf7f2 | 479 | uint32_t op_flags = 0) = 0; |
7c673cae FG |
480 | |
481 | /** | |
9f95a23c | 482 | * fiemap -- get extent std::map of data of an object |
7c673cae | 483 | * |
9f95a23c TL |
484 | * Returns an encoded std::map of the extents of an object's data portion |
485 | * (std::map<offset,size>). | |
7c673cae FG |
486 | * |
487 | * A non-enlightened implementation is free to return the extent (offset, len) | |
488 | * as the sole extent. | |
489 | * | |
490 | * @param cid collection for object | |
491 | * @param oid oid of object | |
492 | * @param offset location offset of first byte to be read | |
493 | * @param len number of bytes to be read | |
9f95a23c | 494 | * @param bl output ceph::buffer::list for extent std::map information. |
7c673cae FG |
495 | * @returns 0 on success, negative error code on failure. |
496 | */ | |
7c673cae | 497 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c | 498 | uint64_t offset, size_t len, ceph::buffer::list& bl) = 0; |
7c673cae | 499 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c TL |
500 | uint64_t offset, size_t len, std::map<uint64_t, uint64_t>& destmap) = 0; |
501 | ||
502 | /** | |
503 | * readv -- read specfic intervals from an object; | |
504 | * caller must call fiemap to fill in the extent-map first. | |
505 | * | |
506 | * Note: if reading from an offset past the end of the object, we | |
507 | * return 0 (not, say, -EINVAL). Also the default version of readv | |
508 | * reads each extent separately synchronously, which can become horribly | |
509 | * inefficient if the physical layout of the pushing object get massively | |
510 | * fragmented and hence should be overridden by any real os that | |
511 | * cares about the performance.. | |
512 | * | |
513 | * @param cid collection for object | |
514 | * @param oid oid of object | |
515 | * @param m intervals to be read | |
516 | * @param bl output ceph::buffer::list | |
517 | * @param op_flags is CEPH_OSD_OP_FLAG_* | |
518 | * @returns number of bytes read on success, or negative error code on failure. | |
519 | */ | |
520 | virtual int readv( | |
521 | CollectionHandle &c, | |
522 | const ghobject_t& oid, | |
523 | interval_set<uint64_t>& m, | |
524 | ceph::buffer::list& bl, | |
525 | uint32_t op_flags = 0) { | |
526 | int total = 0; | |
527 | for (auto p = m.begin(); p != m.end(); p++) { | |
f67539c2 | 528 | ceph::buffer::list t; |
9f95a23c TL |
529 | int r = read(c, oid, p.get_start(), p.get_len(), t, op_flags); |
530 | if (r < 0) | |
531 | return r; | |
532 | total += r; | |
533 | // prune fiemap, if necessary | |
534 | if (p.get_len() != t.length()) { | |
535 | auto save = p++; | |
536 | if (t.length() == 0) { | |
537 | m.erase(save); // Remove this empty interval | |
538 | } else { | |
539 | save.set_len(t.length()); // fix interval length | |
540 | bl.claim_append(t); | |
541 | } | |
542 | // Remove any other follow-up intervals present too | |
543 | while (p != m.end()) { | |
544 | save = p++; | |
545 | m.erase(save); | |
546 | } | |
547 | break; | |
548 | } | |
549 | bl.claim_append(t); | |
550 | } | |
551 | return total; | |
552 | } | |
553 | ||
554 | /** | |
555 | * dump_onode -- dumps onode metadata in human readable form, | |
556 | intended primiarily for debugging | |
557 | * | |
558 | * @param cid collection for object | |
559 | * @param oid oid of object | |
560 | * @param section_name section name to create and print under | |
561 | * @param f Formatter class instance to print to | |
562 | * @returns 0 on success, negative error code on failure. | |
563 | */ | |
564 | virtual int dump_onode( | |
565 | CollectionHandle &c, | |
566 | const ghobject_t& oid, | |
f67539c2 TL |
567 | const std::string& section_name, |
568 | ceph::Formatter *f) { | |
9f95a23c TL |
569 | return -ENOTSUP; |
570 | } | |
7c673cae FG |
571 | |
572 | /** | |
573 | * getattr -- get an xattr of an object | |
574 | * | |
575 | * @param cid collection for object | |
576 | * @param oid oid of object | |
577 | * @param name name of attr to read | |
578 | * @param value place to put output result. | |
579 | * @returns 0 on success, negative error code on failure. | |
580 | */ | |
7c673cae | 581 | virtual int getattr(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c | 582 | const char *name, ceph::buffer::ptr& value) = 0; |
7c673cae FG |
583 | |
584 | /** | |
585 | * getattr -- get an xattr of an object | |
586 | * | |
587 | * @param cid collection for object | |
588 | * @param oid oid of object | |
589 | * @param name name of attr to read | |
590 | * @param value place to put output result. | |
591 | * @returns 0 on success, negative error code on failure. | |
592 | */ | |
7c673cae FG |
593 | int getattr( |
594 | CollectionHandle &c, const ghobject_t& oid, | |
9f95a23c TL |
595 | const std::string& name, ceph::buffer::list& value) { |
596 | ceph::buffer::ptr bp; | |
7c673cae FG |
597 | int r = getattr(c, oid, name.c_str(), bp); |
598 | value.push_back(bp); | |
599 | return r; | |
600 | } | |
601 | ||
602 | /** | |
603 | * getattrs -- get all of the xattrs of an object | |
604 | * | |
605 | * @param cid collection for object | |
606 | * @param oid oid of object | |
607 | * @param aset place to put output result. | |
608 | * @returns 0 on success, negative error code on failure. | |
609 | */ | |
7c673cae | 610 | virtual int getattrs(CollectionHandle &c, const ghobject_t& oid, |
20effc67 | 611 | std::map<std::string,ceph::buffer::ptr, std::less<>>& aset) = 0; |
7c673cae FG |
612 | |
613 | /** | |
614 | * getattrs -- get all of the xattrs of an object | |
615 | * | |
616 | * @param cid collection for object | |
617 | * @param oid oid of object | |
618 | * @param aset place to put output result. | |
619 | * @returns 0 on success, negative error code on failure. | |
620 | */ | |
7c673cae | 621 | int getattrs(CollectionHandle &c, const ghobject_t& oid, |
20effc67 TL |
622 | std::map<std::string,ceph::buffer::list,std::less<>>& aset) { |
623 | std::map<std::string,ceph::buffer::ptr,std::less<>> bmap; | |
7c673cae | 624 | int r = getattrs(c, oid, bmap); |
9f95a23c | 625 | for (auto i = bmap.begin(); i != bmap.end(); ++i) { |
7c673cae FG |
626 | aset[i->first].append(i->second); |
627 | } | |
628 | return r; | |
629 | } | |
630 | ||
631 | ||
632 | // collections | |
633 | ||
634 | /** | |
635 | * list_collections -- get all of the collections known to this ObjectStore | |
636 | * | |
9f95a23c | 637 | * @param ls std::list of the collections in sorted order. |
7c673cae FG |
638 | * @returns 0 on success, negative error code on failure. |
639 | */ | |
9f95a23c | 640 | virtual int list_collections(std::vector<coll_t>& ls) = 0; |
7c673cae FG |
641 | |
642 | /** | |
643 | * does a collection exist? | |
644 | * | |
645 | * @param c collection | |
646 | * @returns true if it exists, false otherwise | |
647 | */ | |
648 | virtual bool collection_exists(const coll_t& c) = 0; | |
649 | ||
650 | /** | |
651 | * is a collection empty? | |
652 | * | |
653 | * @param c collection | |
654 | * @param empty true if the specified collection is empty, false otherwise | |
655 | * @returns 0 on success, negative error code on failure. | |
656 | */ | |
11fdf7f2 | 657 | virtual int collection_empty(CollectionHandle& c, bool *empty) = 0; |
7c673cae FG |
658 | |
659 | /** | |
660 | * return the number of significant bits of the coll_t::pgid. | |
661 | * | |
662 | * This should return what the last create_collection or split_collection | |
9f95a23c | 663 | * std::set. A legacy backend may return -EAGAIN if the value is unavailable |
7c673cae FG |
664 | * (because we upgraded from an older version, e.g., FileStore). |
665 | */ | |
11fdf7f2 | 666 | virtual int collection_bits(CollectionHandle& c) = 0; |
7c673cae FG |
667 | |
668 | ||
669 | /** | |
9f95a23c | 670 | * std::list contents of a collection that fall in the range [start, end) and no more than a specified many result |
7c673cae FG |
671 | * |
672 | * @param c collection | |
673 | * @param start list object that sort >= this value | |
674 | * @param end list objects that sort < this value | |
675 | * @param max return no more than this many results | |
676 | * @param seq return no objects with snap < seq | |
677 | * @param ls [out] result | |
678 | * @param next [out] next item sorts >= this value | |
679 | * @return zero on success, or negative error | |
680 | */ | |
7c673cae FG |
681 | virtual int collection_list(CollectionHandle &c, |
682 | const ghobject_t& start, const ghobject_t& end, | |
683 | int max, | |
9f95a23c | 684 | std::vector<ghobject_t> *ls, ghobject_t *next) = 0; |
7c673cae | 685 | |
f91f0fd5 TL |
686 | virtual int collection_list_legacy(CollectionHandle &c, |
687 | const ghobject_t& start, | |
688 | const ghobject_t& end, int max, | |
689 | std::vector<ghobject_t> *ls, | |
690 | ghobject_t *next) { | |
691 | return collection_list(c, start, end, max, ls, next); | |
692 | } | |
7c673cae FG |
693 | |
694 | /// OMAP | |
695 | /// Get omap contents | |
7c673cae FG |
696 | virtual int omap_get( |
697 | CollectionHandle &c, ///< [in] Collection containing oid | |
698 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
699 | ceph::buffer::list *header, ///< [out] omap header |
700 | std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value std::map | |
11fdf7f2 | 701 | ) = 0; |
7c673cae FG |
702 | |
703 | /// Get omap header | |
7c673cae FG |
704 | virtual int omap_get_header( |
705 | CollectionHandle &c, ///< [in] Collection containing oid | |
706 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 707 | ceph::buffer::list *header, ///< [out] omap header |
7c673cae | 708 | bool allow_eio = false ///< [in] don't assert on eio |
11fdf7f2 | 709 | ) = 0; |
7c673cae FG |
710 | |
711 | /// Get keys defined on oid | |
7c673cae FG |
712 | virtual int omap_get_keys( |
713 | CollectionHandle &c, ///< [in] Collection containing oid | |
714 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 715 | std::set<std::string> *keys ///< [out] Keys defined on oid |
11fdf7f2 | 716 | ) = 0; |
7c673cae FG |
717 | |
718 | /// Get key values | |
7c673cae FG |
719 | virtual int omap_get_values( |
720 | CollectionHandle &c, ///< [in] Collection containing oid | |
721 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
722 | const std::set<std::string> &keys, ///< [in] Keys to get |
723 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
11fdf7f2 | 724 | ) = 0; |
7c673cae | 725 | |
9f95a23c TL |
726 | #ifdef WITH_SEASTAR |
727 | virtual int omap_get_values( | |
728 | CollectionHandle &c, ///< [in] Collection containing oid | |
729 | const ghobject_t &oid, ///< [in] Object containing omap | |
730 | const std::optional<std::string> &start_after, ///< [in] Keys to get | |
731 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
732 | ) = 0; | |
733 | #endif | |
734 | ||
7c673cae | 735 | /// Filters keys into out which are defined on oid |
7c673cae FG |
736 | virtual int omap_check_keys( |
737 | CollectionHandle &c, ///< [in] Collection containing oid | |
738 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
739 | const std::set<std::string> &keys, ///< [in] Keys to check |
740 | std::set<std::string> *out ///< [out] Subset of keys defined on oid | |
11fdf7f2 | 741 | ) = 0; |
7c673cae FG |
742 | |
743 | /** | |
744 | * Returns an object map iterator | |
745 | * | |
746 | * Warning! The returned iterator is an implicit lock on filestore | |
747 | * operations in c. Do not use filestore methods on c while the returned | |
748 | * iterator is live. (Filling in a transaction is no problem). | |
749 | * | |
750 | * @return iterator, null on error | |
751 | */ | |
7c673cae FG |
752 | virtual ObjectMap::ObjectMapIterator get_omap_iterator( |
753 | CollectionHandle &c, ///< [in] collection | |
754 | const ghobject_t &oid ///< [in] object | |
11fdf7f2 | 755 | ) = 0; |
7c673cae FG |
756 | |
757 | virtual int flush_journal() { return -EOPNOTSUPP; } | |
758 | ||
9f95a23c | 759 | virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; } |
7c673cae | 760 | |
9f95a23c | 761 | virtual int snapshot(const std::string& name) { return -EOPNOTSUPP; } |
7c673cae FG |
762 | |
763 | /** | |
764 | * Set and get internal fsid for this instance. No external data is modified | |
765 | */ | |
766 | virtual void set_fsid(uuid_d u) = 0; | |
767 | virtual uuid_d get_fsid() = 0; | |
768 | ||
769 | /** | |
770 | * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store | |
771 | * - num objects - total (including witeouts) object count to measure used space for. | |
772 | */ | |
773 | virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0; | |
774 | ||
775 | ||
776 | // DEBUG | |
777 | virtual void inject_data_error(const ghobject_t &oid) {} | |
778 | virtual void inject_mdata_error(const ghobject_t &oid) {} | |
224ce89b WB |
779 | |
780 | virtual void compact() {} | |
28e407b8 AA |
781 | virtual bool has_builtin_csum() const { |
782 | return false; | |
783 | } | |
7c673cae | 784 | }; |
7c673cae FG |
785 | |
786 | #endif |