]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | #ifndef CEPH_OBJECTSTORE_H | |
15 | #define CEPH_OBJECTSTORE_H | |
16 | ||
f67539c2 | 17 | #include "include/buffer.h" |
9f95a23c | 18 | #include "include/common_fwd.h" |
7c673cae | 19 | #include "include/Context.h" |
f67539c2 | 20 | #include "include/interval_set.h" |
11fdf7f2 | 21 | #include "include/stringify.h" |
f67539c2 TL |
22 | #include "include/types.h" |
23 | ||
7c673cae FG |
24 | #include "osd/osd_types.h" |
25 | #include "common/TrackedOp.h" | |
26 | #include "common/WorkQueue.h" | |
27 | #include "ObjectMap.h" | |
9f95a23c | 28 | #include "os/Transaction.h" |
7c673cae FG |
29 | |
30 | #include <errno.h> | |
31 | #include <sys/stat.h> | |
32 | #include <vector> | |
33 | #include <map> | |
34 | ||
f67539c2 | 35 | #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) || defined(_WIN32) |
7c673cae FG |
36 | #include <sys/statvfs.h> |
37 | #else | |
38 | #include <sys/vfs.h> /* or <sys/statfs.h> */ | |
11fdf7f2 | 39 | #endif |
7c673cae | 40 | |
7c673cae FG |
41 | namespace ceph { |
42 | class Formatter; | |
43 | } | |
44 | ||
45 | /* | |
46 | * low-level interface to the local OSD file system | |
47 | */ | |
48 | ||
49 | class Logger; | |
11fdf7f2 | 50 | class ContextQueue; |
7c673cae | 51 | |
9f95a23c TL |
52 | static inline void encode(const std::map<std::string,ceph::buffer::ptr> *attrset, ceph::buffer::list &bl) { |
53 | using ceph::encode; | |
11fdf7f2 | 54 | encode(*attrset, bl); |
7c673cae FG |
55 | } |
56 | ||
7c673cae FG |
57 | // Flag bits |
58 | typedef uint32_t osflagbits_t; | |
59 | const int SKIP_JOURNAL_REPLAY = 1 << 0; | |
60 | const int SKIP_MOUNT_OMAP = 1 << 1; | |
61 | ||
62 | class ObjectStore { | |
63 | protected: | |
9f95a23c | 64 | std::string path; |
7c673cae FG |
65 | |
66 | public: | |
9f95a23c TL |
67 | using Transaction = ceph::os::Transaction; |
68 | ||
7c673cae FG |
69 | CephContext* cct; |
70 | /** | |
71 | * create - create an ObjectStore instance. | |
72 | * | |
73 | * This is invoked once at initialization time. | |
74 | * | |
9f95a23c | 75 | * @param type type of store. This is a std::string from the configuration file. |
7c673cae FG |
76 | * @param data path (or other descriptor) for data |
77 | * @param journal path (or other descriptor) for journal (optional) | |
78 | * @param flags which filestores should check if applicable | |
79 | */ | |
80 | static ObjectStore *create(CephContext *cct, | |
9f95a23c TL |
81 | const std::string& type, |
82 | const std::string& data, | |
83 | const std::string& journal, | |
7c673cae FG |
84 | osflagbits_t flags = 0); |
85 | ||
86 | /** | |
87 | * probe a block device to learn the uuid of the owning OSD | |
88 | * | |
89 | * @param cct cct | |
90 | * @param path path to device | |
91 | * @param fsid [out] osd uuid | |
92 | */ | |
93 | static int probe_block_device_fsid( | |
94 | CephContext *cct, | |
9f95a23c | 95 | const std::string& path, |
7c673cae FG |
96 | uuid_d *fsid); |
97 | ||
98 | /** | |
99 | * Fetch Object Store statistics. | |
100 | * | |
101 | * Currently only latency of write and apply times are measured. | |
102 | * | |
103 | * This appears to be called with nothing locked. | |
104 | */ | |
105 | virtual objectstore_perf_stat_t get_cur_stats() = 0; | |
106 | ||
107 | /** | |
108 | * Fetch Object Store performance counters. | |
109 | * | |
110 | * | |
111 | * This appears to be called with nothing locked. | |
112 | */ | |
113 | virtual const PerfCounters* get_perf_counters() const = 0; | |
114 | ||
115 | /** | |
11fdf7f2 | 116 | * a collection also orders transactions |
7c673cae | 117 | * |
11fdf7f2 TL |
118 | * Any transactions queued under a given collection will be applied in |
119 | * sequence. Transactions queued under different collections may run | |
7c673cae FG |
120 | * in parallel. |
121 | * | |
9f95a23c | 122 | * ObjectStore users may get collection handles with open_collection() (or, |
11fdf7f2 | 123 | * for bootstrapping a new collection, create_new_collection()). |
7c673cae | 124 | */ |
11fdf7f2 TL |
125 | struct CollectionImpl : public RefCountedObject { |
126 | const coll_t cid; | |
7c673cae | 127 | |
11fdf7f2 | 128 | /// wait for any queued transactions to apply |
7c673cae FG |
129 | // block until any previous transactions are visible. specifically, |
130 | // collection_list and collection_empty need to reflect prior operations. | |
131 | virtual void flush() = 0; | |
132 | ||
7c673cae FG |
133 | /** |
134 | * Async flush_commit | |
135 | * | |
136 | * There are two cases: | |
11fdf7f2 | 137 | * 1) collection is currently idle: the method returns true. c is |
7c673cae | 138 | * not touched. |
11fdf7f2 TL |
139 | * 2) collection is not idle: the method returns false and c is |
140 | * called asynchronously with a value of 0 once all transactions | |
141 | * queued on this collection prior to the call have been applied | |
7c673cae FG |
142 | * and committed. |
143 | */ | |
11fdf7f2 | 144 | virtual bool flush_commit(Context *c) = 0; |
7c673cae | 145 | |
11fdf7f2 TL |
146 | const coll_t &get_cid() { |
147 | return cid; | |
7c673cae | 148 | } |
9f95a23c TL |
149 | protected: |
150 | CollectionImpl() = delete; | |
151 | CollectionImpl(CephContext* cct, const coll_t& c) : RefCountedObject(cct), cid(c) {} | |
152 | ~CollectionImpl() = default; | |
7c673cae | 153 | }; |
9f95a23c | 154 | using CollectionHandle = ceph::ref_t<CollectionImpl>; |
7c673cae | 155 | |
7c673cae FG |
156 | |
157 | /********************************* | |
158 | * | |
159 | * Object Contents and semantics | |
160 | * | |
161 | * All ObjectStore objects are identified as a named object | |
162 | * (ghobject_t and hobject_t) in a named collection (coll_t). | |
163 | * ObjectStore operations support the creation, mutation, deletion | |
164 | * and enumeration of objects within a collection. Enumeration is | |
165 | * in sorted key order (where keys are sorted by hash). Object names | |
166 | * are globally unique. | |
167 | * | |
168 | * Each object has four distinct parts: byte data, xattrs, omap_header | |
169 | * and omap entries. | |
170 | * | |
171 | * The data portion of an object is conceptually equivalent to a | |
172 | * file in a file system. Random and Partial access for both read | |
173 | * and write operations is required. The ability to have a sparse | |
174 | * implementation of the data portion of an object is beneficial for | |
175 | * some workloads, but not required. There is a system-wide limit on | |
176 | * the maximum size of an object, which is typically around 100 MB. | |
177 | * | |
178 | * Xattrs are equivalent to the extended attributes of file | |
9f95a23c TL |
179 | * systems. Xattrs are a std::set of key/value pairs. Sub-value access |
180 | * is not required. It is possible to enumerate the std::set of xattrs in | |
7c673cae FG |
181 | * key order. At the implementation level, xattrs are used |
182 | * exclusively internal to Ceph and the implementer can expect the | |
183 | * total size of all of the xattrs on an object to be relatively | |
184 | * small, i.e., less than 64KB. Much of Ceph assumes that accessing | |
185 | * xattrs on temporally adjacent object accesses (recent past or | |
186 | * near future) is inexpensive. | |
187 | * | |
188 | * omap_header is a single blob of data. It can be read or written | |
189 | * in total. | |
190 | * | |
191 | * Omap entries are conceptually the same as xattrs | |
192 | * but in a different address space. In other words, you can have | |
193 | * the same key as an xattr and an omap entry and they have distinct | |
194 | * values. Enumeration of xattrs doesn't include omap entries and | |
195 | * vice versa. The size and access characteristics of omap entries | |
196 | * are very different from xattrs. In particular, the value portion | |
197 | * of an omap entry can be quite large (MBs). More importantly, the | |
198 | * interface must support efficient range queries on omap entries even | |
199 | * when there are a large numbers of entries. | |
200 | * | |
201 | *********************************/ | |
202 | ||
203 | /******************************* | |
204 | * | |
205 | * Collections | |
206 | * | |
207 | * A collection is simply a grouping of objects. Collections have | |
208 | * names (coll_t) and can be enumerated in order. Like an | |
9f95a23c | 209 | * individual object, a collection also has a std::set of xattrs. |
7c673cae | 210 | * |
7c673cae FG |
211 | * |
212 | */ | |
7c673cae | 213 | |
7c673cae | 214 | |
11fdf7f2 TL |
215 | int queue_transaction(CollectionHandle& ch, |
216 | Transaction&& t, | |
217 | TrackedOpRef op = TrackedOpRef(), | |
218 | ThreadPool::TPHandle *handle = NULL) { | |
9f95a23c | 219 | std::vector<Transaction> tls; |
7c673cae | 220 | tls.push_back(std::move(t)); |
11fdf7f2 | 221 | return queue_transactions(ch, tls, op, handle); |
7c673cae FG |
222 | } |
223 | ||
224 | virtual int queue_transactions( | |
9f95a23c | 225 | CollectionHandle& ch, std::vector<Transaction>& tls, |
7c673cae FG |
226 | TrackedOpRef op = TrackedOpRef(), |
227 | ThreadPool::TPHandle *handle = NULL) = 0; | |
228 | ||
229 | ||
7c673cae FG |
230 | public: |
231 | ObjectStore(CephContext* cct, | |
232 | const std::string& path_) : path(path_), cct(cct) {} | |
233 | virtual ~ObjectStore() {} | |
234 | ||
235 | // no copying | |
236 | explicit ObjectStore(const ObjectStore& o) = delete; | |
237 | const ObjectStore& operator=(const ObjectStore& o) = delete; | |
238 | ||
239 | // versioning | |
240 | virtual int upgrade() { | |
241 | return 0; | |
242 | } | |
243 | ||
9f95a23c TL |
244 | virtual void get_db_statistics(ceph::Formatter *f) { } |
245 | virtual void generate_db_histogram(ceph::Formatter *f) { } | |
246 | virtual int flush_cache(std::ostream *os = NULL) { return -1; } | |
247 | virtual void dump_perf_counters(ceph::Formatter *f) {} | |
248 | virtual void dump_cache_stats(ceph::Formatter *f) {} | |
249 | virtual void dump_cache_stats(std::ostream& os) {} | |
7c673cae | 250 | |
9f95a23c | 251 | virtual std::string get_type() = 0; |
7c673cae FG |
252 | |
253 | // mgmt | |
254 | virtual bool test_mount_in_use() = 0; | |
255 | virtual int mount() = 0; | |
256 | virtual int umount() = 0; | |
257 | virtual int fsck(bool deep) { | |
258 | return -EOPNOTSUPP; | |
259 | } | |
3efd9988 FG |
260 | virtual int repair(bool deep) { |
261 | return -EOPNOTSUPP; | |
262 | } | |
eafe8130 TL |
263 | virtual int quick_fix() { |
264 | return -EOPNOTSUPP; | |
265 | } | |
7c673cae FG |
266 | |
267 | virtual void set_cache_shards(unsigned num) { } | |
268 | ||
269 | /** | |
270 | * Returns 0 if the hobject is valid, -error otherwise | |
271 | * | |
272 | * Errors: | |
273 | * -ENAMETOOLONG: locator/namespace/name too large | |
274 | */ | |
275 | virtual int validate_hobject_key(const hobject_t &obj) const = 0; | |
276 | ||
277 | virtual unsigned get_max_attr_name_length() = 0; | |
278 | virtual int mkfs() = 0; // wipe | |
279 | virtual int mkjournal() = 0; // journal only | |
280 | virtual bool needs_journal() = 0; //< requires a journal | |
281 | virtual bool wants_journal() = 0; //< prefers a journal | |
282 | virtual bool allows_journal() = 0; //< allows a journal | |
283 | ||
9f95a23c TL |
284 | // return store min allocation size, if applicable |
285 | virtual uint64_t get_min_alloc_size() const { | |
286 | return 0; | |
287 | } | |
288 | ||
11fdf7f2 | 289 | /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda) |
9f95a23c | 290 | virtual int get_devices(std::set<std::string> *devls) { |
11fdf7f2 TL |
291 | return -EOPNOTSUPP; |
292 | } | |
293 | ||
294 | /// true if a txn is readable immediately after it is queued. | |
295 | virtual bool is_sync_onreadable() const { | |
296 | return true; | |
297 | } | |
298 | ||
31f18b77 FG |
299 | /** |
300 | * is_rotational | |
301 | * | |
302 | * Check whether store is backed by a rotational (HDD) or non-rotational | |
303 | * (SSD) device. | |
304 | * | |
305 | * This must be usable *before* the store is mounted. | |
306 | * | |
307 | * @return true for HDD, false for SSD | |
308 | */ | |
309 | virtual bool is_rotational() { | |
310 | return true; | |
311 | } | |
312 | ||
d2e6a577 FG |
313 | /** |
314 | * is_journal_rotational | |
315 | * | |
316 | * Check whether journal is backed by a rotational (HDD) or non-rotational | |
317 | * (SSD) device. | |
318 | * | |
319 | * | |
320 | * @return true for HDD, false for SSD | |
321 | */ | |
322 | virtual bool is_journal_rotational() { | |
323 | return true; | |
324 | } | |
325 | ||
9f95a23c | 326 | virtual std::string get_default_device_class() { |
224ce89b WB |
327 | return is_rotational() ? "hdd" : "ssd"; |
328 | } | |
329 | ||
11fdf7f2 TL |
330 | virtual int get_numa_node( |
331 | int *numa_node, | |
9f95a23c TL |
332 | std::set<int> *nodes, |
333 | std::set<std::string> *failed) { | |
11fdf7f2 TL |
334 | return -EOPNOTSUPP; |
335 | } | |
336 | ||
337 | ||
7c673cae FG |
338 | virtual bool can_sort_nibblewise() { |
339 | return false; // assume a backend cannot, unless it says otherwise | |
340 | } | |
341 | ||
11fdf7f2 TL |
342 | virtual int statfs(struct store_statfs_t *buf, |
343 | osd_alert_list_t* alerts = nullptr) = 0; | |
9f95a23c TL |
344 | virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf, |
345 | bool *per_pool_omap) = 0; | |
7c673cae | 346 | |
f67539c2 | 347 | virtual void collect_metadata(std::map<std::string,std::string> *pm) { } |
7c673cae FG |
348 | |
349 | /** | |
350 | * write_meta - write a simple configuration key out-of-band | |
351 | * | |
352 | * Write a simple key/value pair for basic store configuration | |
353 | * (e.g., a uuid or magic number) to an unopened/unmounted store. | |
354 | * The default implementation writes this to a plaintext file in the | |
355 | * path. | |
356 | * | |
357 | * A newline is appended. | |
358 | * | |
359 | * @param key key name (e.g., "fsid") | |
9f95a23c | 360 | * @param value value (e.g., a uuid rendered as a std::string) |
7c673cae FG |
361 | * @returns 0 for success, or an error code |
362 | */ | |
363 | virtual int write_meta(const std::string& key, | |
364 | const std::string& value); | |
365 | ||
366 | /** | |
367 | * read_meta - read a simple configuration key out-of-band | |
368 | * | |
369 | * Read a simple key value to an unopened/mounted store. | |
370 | * | |
371 | * Trailing whitespace is stripped off. | |
372 | * | |
373 | * @param key key name | |
9f95a23c | 374 | * @param value pointer to value std::string |
7c673cae FG |
375 | * @returns 0 for success, or an error code |
376 | */ | |
377 | virtual int read_meta(const std::string& key, | |
378 | std::string *value); | |
379 | ||
380 | /** | |
381 | * get ideal max value for collection_list() | |
382 | * | |
383 | * default to some arbitrary values; the implementation will override. | |
384 | */ | |
385 | virtual int get_ideal_list_max() { return 64; } | |
386 | ||
387 | ||
388 | /** | |
389 | * get a collection handle | |
390 | * | |
391 | * Provide a trivial handle as a default to avoid converting legacy | |
392 | * implementations. | |
393 | */ | |
11fdf7f2 TL |
394 | virtual CollectionHandle open_collection(const coll_t &cid) = 0; |
395 | ||
396 | /** | |
397 | * get a collection handle for a soon-to-be-created collection | |
398 | * | |
399 | * This handle must be used by queue_transaction that includes a | |
400 | * create_collection call in order to become valid. It will become the | |
401 | * reference to the created collection. | |
402 | */ | |
403 | virtual CollectionHandle create_new_collection(const coll_t &cid) = 0; | |
7c673cae | 404 | |
11fdf7f2 | 405 | /** |
9f95a23c | 406 | * std::set ContextQueue for a collection |
11fdf7f2 TL |
407 | * |
408 | * After that, oncommits of Transaction will queue into commit_queue. | |
409 | * And osd ShardThread will call oncommits. | |
410 | */ | |
411 | virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0; | |
7c673cae FG |
412 | |
413 | /** | |
414 | * Synchronous read operations | |
415 | */ | |
416 | ||
417 | /** | |
418 | * exists -- Test for existance of object | |
419 | * | |
420 | * @param cid collection for object | |
421 | * @param oid oid of object | |
422 | * @returns true if object exists, false otherwise | |
423 | */ | |
11fdf7f2 | 424 | virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0; |
7c673cae | 425 | /** |
9f95a23c | 426 | * set_collection_opts -- std::set pool options for a collectioninformation for an object |
7c673cae FG |
427 | * |
428 | * @param cid collection | |
429 | * @param opts new collection options | |
430 | * @returns 0 on success, negative error code on failure. | |
431 | */ | |
432 | virtual int set_collection_opts( | |
11fdf7f2 | 433 | CollectionHandle& c, |
7c673cae FG |
434 | const pool_opts_t& opts) = 0; |
435 | ||
436 | /** | |
437 | * stat -- get information for an object | |
438 | * | |
439 | * @param cid collection for object | |
440 | * @param oid oid of object | |
441 | * @param st output information for the object | |
442 | * @param allow_eio if false, assert on -EIO operation failure | |
443 | * @returns 0 on success, negative error code on failure. | |
444 | */ | |
7c673cae FG |
445 | virtual int stat( |
446 | CollectionHandle &c, | |
447 | const ghobject_t& oid, | |
448 | struct stat *st, | |
11fdf7f2 | 449 | bool allow_eio = false) = 0; |
7c673cae FG |
450 | /** |
451 | * read -- read a byte range of data from an object | |
452 | * | |
453 | * Note: if reading from an offset past the end of the object, we | |
454 | * return 0 (not, say, -EINVAL). | |
455 | * | |
456 | * @param cid collection for object | |
457 | * @param oid oid of object | |
458 | * @param offset location offset of first byte to be read | |
459 | * @param len number of bytes to be read | |
9f95a23c | 460 | * @param bl output ceph::buffer::list |
7c673cae | 461 | * @param op_flags is CEPH_OSD_OP_FLAG_* |
7c673cae FG |
462 | * @returns number of bytes read on success, or negative error code on failure. |
463 | */ | |
7c673cae FG |
464 | virtual int read( |
465 | CollectionHandle &c, | |
466 | const ghobject_t& oid, | |
467 | uint64_t offset, | |
468 | size_t len, | |
9f95a23c | 469 | ceph::buffer::list& bl, |
11fdf7f2 | 470 | uint32_t op_flags = 0) = 0; |
7c673cae FG |
471 | |
472 | /** | |
9f95a23c | 473 | * fiemap -- get extent std::map of data of an object |
7c673cae | 474 | * |
9f95a23c TL |
475 | * Returns an encoded std::map of the extents of an object's data portion |
476 | * (std::map<offset,size>). | |
7c673cae FG |
477 | * |
478 | * A non-enlightened implementation is free to return the extent (offset, len) | |
479 | * as the sole extent. | |
480 | * | |
481 | * @param cid collection for object | |
482 | * @param oid oid of object | |
483 | * @param offset location offset of first byte to be read | |
484 | * @param len number of bytes to be read | |
9f95a23c | 485 | * @param bl output ceph::buffer::list for extent std::map information. |
7c673cae FG |
486 | * @returns 0 on success, negative error code on failure. |
487 | */ | |
7c673cae | 488 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c | 489 | uint64_t offset, size_t len, ceph::buffer::list& bl) = 0; |
7c673cae | 490 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c TL |
491 | uint64_t offset, size_t len, std::map<uint64_t, uint64_t>& destmap) = 0; |
492 | ||
493 | /** | |
494 | * readv -- read specfic intervals from an object; | |
495 | * caller must call fiemap to fill in the extent-map first. | |
496 | * | |
497 | * Note: if reading from an offset past the end of the object, we | |
498 | * return 0 (not, say, -EINVAL). Also the default version of readv | |
499 | * reads each extent separately synchronously, which can become horribly | |
500 | * inefficient if the physical layout of the pushing object get massively | |
501 | * fragmented and hence should be overridden by any real os that | |
502 | * cares about the performance.. | |
503 | * | |
504 | * @param cid collection for object | |
505 | * @param oid oid of object | |
506 | * @param m intervals to be read | |
507 | * @param bl output ceph::buffer::list | |
508 | * @param op_flags is CEPH_OSD_OP_FLAG_* | |
509 | * @returns number of bytes read on success, or negative error code on failure. | |
510 | */ | |
511 | virtual int readv( | |
512 | CollectionHandle &c, | |
513 | const ghobject_t& oid, | |
514 | interval_set<uint64_t>& m, | |
515 | ceph::buffer::list& bl, | |
516 | uint32_t op_flags = 0) { | |
517 | int total = 0; | |
518 | for (auto p = m.begin(); p != m.end(); p++) { | |
f67539c2 | 519 | ceph::buffer::list t; |
9f95a23c TL |
520 | int r = read(c, oid, p.get_start(), p.get_len(), t, op_flags); |
521 | if (r < 0) | |
522 | return r; | |
523 | total += r; | |
524 | // prune fiemap, if necessary | |
525 | if (p.get_len() != t.length()) { | |
526 | auto save = p++; | |
527 | if (t.length() == 0) { | |
528 | m.erase(save); // Remove this empty interval | |
529 | } else { | |
530 | save.set_len(t.length()); // fix interval length | |
531 | bl.claim_append(t); | |
532 | } | |
533 | // Remove any other follow-up intervals present too | |
534 | while (p != m.end()) { | |
535 | save = p++; | |
536 | m.erase(save); | |
537 | } | |
538 | break; | |
539 | } | |
540 | bl.claim_append(t); | |
541 | } | |
542 | return total; | |
543 | } | |
544 | ||
545 | /** | |
546 | * dump_onode -- dumps onode metadata in human readable form, | |
547 | intended primiarily for debugging | |
548 | * | |
549 | * @param cid collection for object | |
550 | * @param oid oid of object | |
551 | * @param section_name section name to create and print under | |
552 | * @param f Formatter class instance to print to | |
553 | * @returns 0 on success, negative error code on failure. | |
554 | */ | |
555 | virtual int dump_onode( | |
556 | CollectionHandle &c, | |
557 | const ghobject_t& oid, | |
f67539c2 TL |
558 | const std::string& section_name, |
559 | ceph::Formatter *f) { | |
9f95a23c TL |
560 | return -ENOTSUP; |
561 | } | |
7c673cae FG |
562 | |
563 | /** | |
564 | * getattr -- get an xattr of an object | |
565 | * | |
566 | * @param cid collection for object | |
567 | * @param oid oid of object | |
568 | * @param name name of attr to read | |
569 | * @param value place to put output result. | |
570 | * @returns 0 on success, negative error code on failure. | |
571 | */ | |
7c673cae | 572 | virtual int getattr(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c | 573 | const char *name, ceph::buffer::ptr& value) = 0; |
7c673cae FG |
574 | |
575 | /** | |
576 | * getattr -- get an xattr of an object | |
577 | * | |
578 | * @param cid collection for object | |
579 | * @param oid oid of object | |
580 | * @param name name of attr to read | |
581 | * @param value place to put output result. | |
582 | * @returns 0 on success, negative error code on failure. | |
583 | */ | |
7c673cae FG |
584 | int getattr( |
585 | CollectionHandle &c, const ghobject_t& oid, | |
9f95a23c TL |
586 | const std::string& name, ceph::buffer::list& value) { |
587 | ceph::buffer::ptr bp; | |
7c673cae FG |
588 | int r = getattr(c, oid, name.c_str(), bp); |
589 | value.push_back(bp); | |
590 | return r; | |
591 | } | |
592 | ||
593 | /** | |
594 | * getattrs -- get all of the xattrs of an object | |
595 | * | |
596 | * @param cid collection for object | |
597 | * @param oid oid of object | |
598 | * @param aset place to put output result. | |
599 | * @returns 0 on success, negative error code on failure. | |
600 | */ | |
7c673cae | 601 | virtual int getattrs(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c | 602 | std::map<std::string,ceph::buffer::ptr>& aset) = 0; |
7c673cae FG |
603 | |
604 | /** | |
605 | * getattrs -- get all of the xattrs of an object | |
606 | * | |
607 | * @param cid collection for object | |
608 | * @param oid oid of object | |
609 | * @param aset place to put output result. | |
610 | * @returns 0 on success, negative error code on failure. | |
611 | */ | |
7c673cae | 612 | int getattrs(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c TL |
613 | std::map<std::string,ceph::buffer::list>& aset) { |
614 | std::map<std::string,ceph::buffer::ptr> bmap; | |
7c673cae | 615 | int r = getattrs(c, oid, bmap); |
9f95a23c | 616 | for (auto i = bmap.begin(); i != bmap.end(); ++i) { |
7c673cae FG |
617 | aset[i->first].append(i->second); |
618 | } | |
619 | return r; | |
620 | } | |
621 | ||
622 | ||
623 | // collections | |
624 | ||
625 | /** | |
626 | * list_collections -- get all of the collections known to this ObjectStore | |
627 | * | |
9f95a23c | 628 | * @param ls std::list of the collections in sorted order. |
7c673cae FG |
629 | * @returns 0 on success, negative error code on failure. |
630 | */ | |
9f95a23c | 631 | virtual int list_collections(std::vector<coll_t>& ls) = 0; |
7c673cae FG |
632 | |
633 | /** | |
634 | * does a collection exist? | |
635 | * | |
636 | * @param c collection | |
637 | * @returns true if it exists, false otherwise | |
638 | */ | |
639 | virtual bool collection_exists(const coll_t& c) = 0; | |
640 | ||
641 | /** | |
642 | * is a collection empty? | |
643 | * | |
644 | * @param c collection | |
645 | * @param empty true if the specified collection is empty, false otherwise | |
646 | * @returns 0 on success, negative error code on failure. | |
647 | */ | |
11fdf7f2 | 648 | virtual int collection_empty(CollectionHandle& c, bool *empty) = 0; |
7c673cae FG |
649 | |
650 | /** | |
651 | * return the number of significant bits of the coll_t::pgid. | |
652 | * | |
653 | * This should return what the last create_collection or split_collection | |
9f95a23c | 654 | * std::set. A legacy backend may return -EAGAIN if the value is unavailable |
7c673cae FG |
655 | * (because we upgraded from an older version, e.g., FileStore). |
656 | */ | |
11fdf7f2 | 657 | virtual int collection_bits(CollectionHandle& c) = 0; |
7c673cae FG |
658 | |
659 | ||
660 | /** | |
9f95a23c | 661 | * std::list contents of a collection that fall in the range [start, end) and no more than a specified many result |
7c673cae FG |
662 | * |
663 | * @param c collection | |
664 | * @param start list object that sort >= this value | |
665 | * @param end list objects that sort < this value | |
666 | * @param max return no more than this many results | |
667 | * @param seq return no objects with snap < seq | |
668 | * @param ls [out] result | |
669 | * @param next [out] next item sorts >= this value | |
670 | * @return zero on success, or negative error | |
671 | */ | |
7c673cae FG |
672 | virtual int collection_list(CollectionHandle &c, |
673 | const ghobject_t& start, const ghobject_t& end, | |
674 | int max, | |
9f95a23c | 675 | std::vector<ghobject_t> *ls, ghobject_t *next) = 0; |
7c673cae | 676 | |
f91f0fd5 TL |
677 | virtual int collection_list_legacy(CollectionHandle &c, |
678 | const ghobject_t& start, | |
679 | const ghobject_t& end, int max, | |
680 | std::vector<ghobject_t> *ls, | |
681 | ghobject_t *next) { | |
682 | return collection_list(c, start, end, max, ls, next); | |
683 | } | |
7c673cae FG |
684 | |
685 | /// OMAP | |
686 | /// Get omap contents | |
7c673cae FG |
687 | virtual int omap_get( |
688 | CollectionHandle &c, ///< [in] Collection containing oid | |
689 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
690 | ceph::buffer::list *header, ///< [out] omap header |
691 | std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value std::map | |
11fdf7f2 | 692 | ) = 0; |
7c673cae FG |
693 | |
694 | /// Get omap header | |
7c673cae FG |
695 | virtual int omap_get_header( |
696 | CollectionHandle &c, ///< [in] Collection containing oid | |
697 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 698 | ceph::buffer::list *header, ///< [out] omap header |
7c673cae | 699 | bool allow_eio = false ///< [in] don't assert on eio |
11fdf7f2 | 700 | ) = 0; |
7c673cae FG |
701 | |
702 | /// Get keys defined on oid | |
7c673cae FG |
703 | virtual int omap_get_keys( |
704 | CollectionHandle &c, ///< [in] Collection containing oid | |
705 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 706 | std::set<std::string> *keys ///< [out] Keys defined on oid |
11fdf7f2 | 707 | ) = 0; |
7c673cae FG |
708 | |
709 | /// Get key values | |
7c673cae FG |
710 | virtual int omap_get_values( |
711 | CollectionHandle &c, ///< [in] Collection containing oid | |
712 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
713 | const std::set<std::string> &keys, ///< [in] Keys to get |
714 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
11fdf7f2 | 715 | ) = 0; |
7c673cae | 716 | |
9f95a23c TL |
717 | #ifdef WITH_SEASTAR |
718 | virtual int omap_get_values( | |
719 | CollectionHandle &c, ///< [in] Collection containing oid | |
720 | const ghobject_t &oid, ///< [in] Object containing omap | |
721 | const std::optional<std::string> &start_after, ///< [in] Keys to get | |
722 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
723 | ) = 0; | |
724 | #endif | |
725 | ||
7c673cae | 726 | /// Filters keys into out which are defined on oid |
7c673cae FG |
727 | virtual int omap_check_keys( |
728 | CollectionHandle &c, ///< [in] Collection containing oid | |
729 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
730 | const std::set<std::string> &keys, ///< [in] Keys to check |
731 | std::set<std::string> *out ///< [out] Subset of keys defined on oid | |
11fdf7f2 | 732 | ) = 0; |
7c673cae FG |
733 | |
734 | /** | |
735 | * Returns an object map iterator | |
736 | * | |
737 | * Warning! The returned iterator is an implicit lock on filestore | |
738 | * operations in c. Do not use filestore methods on c while the returned | |
739 | * iterator is live. (Filling in a transaction is no problem). | |
740 | * | |
741 | * @return iterator, null on error | |
742 | */ | |
7c673cae FG |
743 | virtual ObjectMap::ObjectMapIterator get_omap_iterator( |
744 | CollectionHandle &c, ///< [in] collection | |
745 | const ghobject_t &oid ///< [in] object | |
11fdf7f2 | 746 | ) = 0; |
7c673cae FG |
747 | |
748 | virtual int flush_journal() { return -EOPNOTSUPP; } | |
749 | ||
9f95a23c | 750 | virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; } |
7c673cae | 751 | |
9f95a23c | 752 | virtual int snapshot(const std::string& name) { return -EOPNOTSUPP; } |
7c673cae FG |
753 | |
754 | /** | |
755 | * Set and get internal fsid for this instance. No external data is modified | |
756 | */ | |
757 | virtual void set_fsid(uuid_d u) = 0; | |
758 | virtual uuid_d get_fsid() = 0; | |
759 | ||
760 | /** | |
761 | * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store | |
762 | * - num objects - total (including witeouts) object count to measure used space for. | |
763 | */ | |
764 | virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0; | |
765 | ||
766 | ||
767 | // DEBUG | |
768 | virtual void inject_data_error(const ghobject_t &oid) {} | |
769 | virtual void inject_mdata_error(const ghobject_t &oid) {} | |
224ce89b WB |
770 | |
771 | virtual void compact() {} | |
28e407b8 AA |
772 | virtual bool has_builtin_csum() const { |
773 | return false; | |
774 | } | |
7c673cae | 775 | }; |
7c673cae FG |
776 | |
777 | #endif |