]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | #ifndef CEPH_OBJECTSTORE_H | |
15 | #define CEPH_OBJECTSTORE_H | |
16 | ||
9f95a23c | 17 | #include "include/common_fwd.h" |
7c673cae FG |
18 | #include "include/Context.h" |
19 | #include "include/buffer.h" | |
20 | #include "include/types.h" | |
11fdf7f2 | 21 | #include "include/stringify.h" |
7c673cae FG |
22 | #include "osd/osd_types.h" |
23 | #include "common/TrackedOp.h" | |
24 | #include "common/WorkQueue.h" | |
25 | #include "ObjectMap.h" | |
9f95a23c | 26 | #include "os/Transaction.h" |
7c673cae FG |
27 | |
28 | #include <errno.h> | |
29 | #include <sys/stat.h> | |
30 | #include <vector> | |
31 | #include <map> | |
32 | ||
11fdf7f2 | 33 | #if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) |
7c673cae FG |
34 | #include <sys/statvfs.h> |
35 | #else | |
36 | #include <sys/vfs.h> /* or <sys/statfs.h> */ | |
11fdf7f2 | 37 | #endif |
7c673cae | 38 | |
7c673cae FG |
39 | namespace ceph { |
40 | class Formatter; | |
41 | } | |
42 | ||
43 | /* | |
44 | * low-level interface to the local OSD file system | |
45 | */ | |
46 | ||
47 | class Logger; | |
11fdf7f2 | 48 | class ContextQueue; |
7c673cae | 49 | |
9f95a23c TL |
50 | static inline void encode(const std::map<std::string,ceph::buffer::ptr> *attrset, ceph::buffer::list &bl) { |
51 | using ceph::encode; | |
11fdf7f2 | 52 | encode(*attrset, bl); |
7c673cae FG |
53 | } |
54 | ||
7c673cae FG |
55 | // Flag bits |
56 | typedef uint32_t osflagbits_t; | |
57 | const int SKIP_JOURNAL_REPLAY = 1 << 0; | |
58 | const int SKIP_MOUNT_OMAP = 1 << 1; | |
59 | ||
60 | class ObjectStore { | |
61 | protected: | |
9f95a23c | 62 | std::string path; |
7c673cae FG |
63 | |
64 | public: | |
9f95a23c TL |
65 | using Transaction = ceph::os::Transaction; |
66 | ||
7c673cae FG |
67 | CephContext* cct; |
68 | /** | |
69 | * create - create an ObjectStore instance. | |
70 | * | |
71 | * This is invoked once at initialization time. | |
72 | * | |
9f95a23c | 73 | * @param type type of store. This is a std::string from the configuration file. |
7c673cae FG |
74 | * @param data path (or other descriptor) for data |
75 | * @param journal path (or other descriptor) for journal (optional) | |
76 | * @param flags which filestores should check if applicable | |
77 | */ | |
78 | static ObjectStore *create(CephContext *cct, | |
9f95a23c TL |
79 | const std::string& type, |
80 | const std::string& data, | |
81 | const std::string& journal, | |
7c673cae FG |
82 | osflagbits_t flags = 0); |
83 | ||
84 | /** | |
85 | * probe a block device to learn the uuid of the owning OSD | |
86 | * | |
87 | * @param cct cct | |
88 | * @param path path to device | |
89 | * @param fsid [out] osd uuid | |
90 | */ | |
91 | static int probe_block_device_fsid( | |
92 | CephContext *cct, | |
9f95a23c | 93 | const std::string& path, |
7c673cae FG |
94 | uuid_d *fsid); |
95 | ||
96 | /** | |
97 | * Fetch Object Store statistics. | |
98 | * | |
99 | * Currently only latency of write and apply times are measured. | |
100 | * | |
101 | * This appears to be called with nothing locked. | |
102 | */ | |
103 | virtual objectstore_perf_stat_t get_cur_stats() = 0; | |
104 | ||
105 | /** | |
106 | * Fetch Object Store performance counters. | |
107 | * | |
108 | * | |
109 | * This appears to be called with nothing locked. | |
110 | */ | |
111 | virtual const PerfCounters* get_perf_counters() const = 0; | |
112 | ||
113 | /** | |
11fdf7f2 | 114 | * a collection also orders transactions |
7c673cae | 115 | * |
11fdf7f2 TL |
116 | * Any transactions queued under a given collection will be applied in |
117 | * sequence. Transactions queued under different collections may run | |
7c673cae FG |
118 | * in parallel. |
119 | * | |
9f95a23c | 120 | * ObjectStore users may get collection handles with open_collection() (or, |
11fdf7f2 | 121 | * for bootstrapping a new collection, create_new_collection()). |
7c673cae | 122 | */ |
11fdf7f2 TL |
123 | struct CollectionImpl : public RefCountedObject { |
124 | const coll_t cid; | |
7c673cae | 125 | |
11fdf7f2 | 126 | /// wait for any queued transactions to apply |
7c673cae FG |
127 | // block until any previous transactions are visible. specifically, |
128 | // collection_list and collection_empty need to reflect prior operations. | |
129 | virtual void flush() = 0; | |
130 | ||
7c673cae FG |
131 | /** |
132 | * Async flush_commit | |
133 | * | |
134 | * There are two cases: | |
11fdf7f2 | 135 | * 1) collection is currently idle: the method returns true. c is |
7c673cae | 136 | * not touched. |
11fdf7f2 TL |
137 | * 2) collection is not idle: the method returns false and c is |
138 | * called asynchronously with a value of 0 once all transactions | |
139 | * queued on this collection prior to the call have been applied | |
7c673cae FG |
140 | * and committed. |
141 | */ | |
11fdf7f2 | 142 | virtual bool flush_commit(Context *c) = 0; |
7c673cae | 143 | |
11fdf7f2 TL |
144 | const coll_t &get_cid() { |
145 | return cid; | |
7c673cae | 146 | } |
9f95a23c TL |
147 | protected: |
148 | CollectionImpl() = delete; | |
149 | CollectionImpl(CephContext* cct, const coll_t& c) : RefCountedObject(cct), cid(c) {} | |
150 | ~CollectionImpl() = default; | |
7c673cae | 151 | }; |
9f95a23c | 152 | using CollectionHandle = ceph::ref_t<CollectionImpl>; |
7c673cae | 153 | |
7c673cae FG |
154 | |
155 | /********************************* | |
156 | * | |
157 | * Object Contents and semantics | |
158 | * | |
159 | * All ObjectStore objects are identified as a named object | |
160 | * (ghobject_t and hobject_t) in a named collection (coll_t). | |
161 | * ObjectStore operations support the creation, mutation, deletion | |
162 | * and enumeration of objects within a collection. Enumeration is | |
163 | * in sorted key order (where keys are sorted by hash). Object names | |
164 | * are globally unique. | |
165 | * | |
166 | * Each object has four distinct parts: byte data, xattrs, omap_header | |
167 | * and omap entries. | |
168 | * | |
169 | * The data portion of an object is conceptually equivalent to a | |
170 | * file in a file system. Random and Partial access for both read | |
171 | * and write operations is required. The ability to have a sparse | |
172 | * implementation of the data portion of an object is beneficial for | |
173 | * some workloads, but not required. There is a system-wide limit on | |
174 | * the maximum size of an object, which is typically around 100 MB. | |
175 | * | |
176 | * Xattrs are equivalent to the extended attributes of file | |
9f95a23c TL |
177 | * systems. Xattrs are a std::set of key/value pairs. Sub-value access |
178 | * is not required. It is possible to enumerate the std::set of xattrs in | |
7c673cae FG |
179 | * key order. At the implementation level, xattrs are used |
180 | * exclusively internal to Ceph and the implementer can expect the | |
181 | * total size of all of the xattrs on an object to be relatively | |
182 | * small, i.e., less than 64KB. Much of Ceph assumes that accessing | |
183 | * xattrs on temporally adjacent object accesses (recent past or | |
184 | * near future) is inexpensive. | |
185 | * | |
186 | * omap_header is a single blob of data. It can be read or written | |
187 | * in total. | |
188 | * | |
189 | * Omap entries are conceptually the same as xattrs | |
190 | * but in a different address space. In other words, you can have | |
191 | * the same key as an xattr and an omap entry and they have distinct | |
192 | * values. Enumeration of xattrs doesn't include omap entries and | |
193 | * vice versa. The size and access characteristics of omap entries | |
194 | * are very different from xattrs. In particular, the value portion | |
195 | * of an omap entry can be quite large (MBs). More importantly, the | |
196 | * interface must support efficient range queries on omap entries even | |
197 | * when there are a large numbers of entries. | |
198 | * | |
199 | *********************************/ | |
200 | ||
201 | /******************************* | |
202 | * | |
203 | * Collections | |
204 | * | |
205 | * A collection is simply a grouping of objects. Collections have | |
206 | * names (coll_t) and can be enumerated in order. Like an | |
9f95a23c | 207 | * individual object, a collection also has a std::set of xattrs. |
7c673cae | 208 | * |
7c673cae FG |
209 | * |
210 | */ | |
7c673cae | 211 | |
7c673cae | 212 | |
11fdf7f2 TL |
213 | int queue_transaction(CollectionHandle& ch, |
214 | Transaction&& t, | |
215 | TrackedOpRef op = TrackedOpRef(), | |
216 | ThreadPool::TPHandle *handle = NULL) { | |
9f95a23c | 217 | std::vector<Transaction> tls; |
7c673cae | 218 | tls.push_back(std::move(t)); |
11fdf7f2 | 219 | return queue_transactions(ch, tls, op, handle); |
7c673cae FG |
220 | } |
221 | ||
222 | virtual int queue_transactions( | |
9f95a23c | 223 | CollectionHandle& ch, std::vector<Transaction>& tls, |
7c673cae FG |
224 | TrackedOpRef op = TrackedOpRef(), |
225 | ThreadPool::TPHandle *handle = NULL) = 0; | |
226 | ||
227 | ||
7c673cae FG |
228 | public: |
229 | ObjectStore(CephContext* cct, | |
230 | const std::string& path_) : path(path_), cct(cct) {} | |
231 | virtual ~ObjectStore() {} | |
232 | ||
233 | // no copying | |
234 | explicit ObjectStore(const ObjectStore& o) = delete; | |
235 | const ObjectStore& operator=(const ObjectStore& o) = delete; | |
236 | ||
237 | // versioning | |
238 | virtual int upgrade() { | |
239 | return 0; | |
240 | } | |
241 | ||
9f95a23c TL |
242 | virtual void get_db_statistics(ceph::Formatter *f) { } |
243 | virtual void generate_db_histogram(ceph::Formatter *f) { } | |
244 | virtual int flush_cache(std::ostream *os = NULL) { return -1; } | |
245 | virtual void dump_perf_counters(ceph::Formatter *f) {} | |
246 | virtual void dump_cache_stats(ceph::Formatter *f) {} | |
247 | virtual void dump_cache_stats(std::ostream& os) {} | |
7c673cae | 248 | |
9f95a23c | 249 | virtual std::string get_type() = 0; |
7c673cae FG |
250 | |
251 | // mgmt | |
252 | virtual bool test_mount_in_use() = 0; | |
253 | virtual int mount() = 0; | |
254 | virtual int umount() = 0; | |
255 | virtual int fsck(bool deep) { | |
256 | return -EOPNOTSUPP; | |
257 | } | |
3efd9988 FG |
258 | virtual int repair(bool deep) { |
259 | return -EOPNOTSUPP; | |
260 | } | |
eafe8130 TL |
261 | virtual int quick_fix() { |
262 | return -EOPNOTSUPP; | |
263 | } | |
7c673cae FG |
264 | |
265 | virtual void set_cache_shards(unsigned num) { } | |
266 | ||
267 | /** | |
268 | * Returns 0 if the hobject is valid, -error otherwise | |
269 | * | |
270 | * Errors: | |
271 | * -ENAMETOOLONG: locator/namespace/name too large | |
272 | */ | |
273 | virtual int validate_hobject_key(const hobject_t &obj) const = 0; | |
274 | ||
275 | virtual unsigned get_max_attr_name_length() = 0; | |
276 | virtual int mkfs() = 0; // wipe | |
277 | virtual int mkjournal() = 0; // journal only | |
278 | virtual bool needs_journal() = 0; //< requires a journal | |
279 | virtual bool wants_journal() = 0; //< prefers a journal | |
280 | virtual bool allows_journal() = 0; //< allows a journal | |
281 | ||
9f95a23c TL |
282 | // return store min allocation size, if applicable |
283 | virtual uint64_t get_min_alloc_size() const { | |
284 | return 0; | |
285 | } | |
286 | ||
11fdf7f2 | 287 | /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda) |
9f95a23c | 288 | virtual int get_devices(std::set<std::string> *devls) { |
11fdf7f2 TL |
289 | return -EOPNOTSUPP; |
290 | } | |
291 | ||
292 | /// true if a txn is readable immediately after it is queued. | |
293 | virtual bool is_sync_onreadable() const { | |
294 | return true; | |
295 | } | |
296 | ||
31f18b77 FG |
297 | /** |
298 | * is_rotational | |
299 | * | |
300 | * Check whether store is backed by a rotational (HDD) or non-rotational | |
301 | * (SSD) device. | |
302 | * | |
303 | * This must be usable *before* the store is mounted. | |
304 | * | |
305 | * @return true for HDD, false for SSD | |
306 | */ | |
307 | virtual bool is_rotational() { | |
308 | return true; | |
309 | } | |
310 | ||
d2e6a577 FG |
311 | /** |
312 | * is_journal_rotational | |
313 | * | |
314 | * Check whether journal is backed by a rotational (HDD) or non-rotational | |
315 | * (SSD) device. | |
316 | * | |
317 | * | |
318 | * @return true for HDD, false for SSD | |
319 | */ | |
320 | virtual bool is_journal_rotational() { | |
321 | return true; | |
322 | } | |
323 | ||
9f95a23c | 324 | virtual std::string get_default_device_class() { |
224ce89b WB |
325 | return is_rotational() ? "hdd" : "ssd"; |
326 | } | |
327 | ||
11fdf7f2 TL |
328 | virtual int get_numa_node( |
329 | int *numa_node, | |
9f95a23c TL |
330 | std::set<int> *nodes, |
331 | std::set<std::string> *failed) { | |
11fdf7f2 TL |
332 | return -EOPNOTSUPP; |
333 | } | |
334 | ||
335 | ||
7c673cae FG |
336 | virtual bool can_sort_nibblewise() { |
337 | return false; // assume a backend cannot, unless it says otherwise | |
338 | } | |
339 | ||
11fdf7f2 TL |
340 | virtual int statfs(struct store_statfs_t *buf, |
341 | osd_alert_list_t* alerts = nullptr) = 0; | |
9f95a23c TL |
342 | virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf, |
343 | bool *per_pool_omap) = 0; | |
7c673cae | 344 | |
9f95a23c | 345 | virtual void collect_metadata(std::map<std::string,string> *pm) { } |
7c673cae FG |
346 | |
347 | /** | |
348 | * write_meta - write a simple configuration key out-of-band | |
349 | * | |
350 | * Write a simple key/value pair for basic store configuration | |
351 | * (e.g., a uuid or magic number) to an unopened/unmounted store. | |
352 | * The default implementation writes this to a plaintext file in the | |
353 | * path. | |
354 | * | |
355 | * A newline is appended. | |
356 | * | |
357 | * @param key key name (e.g., "fsid") | |
9f95a23c | 358 | * @param value value (e.g., a uuid rendered as a std::string) |
7c673cae FG |
359 | * @returns 0 for success, or an error code |
360 | */ | |
361 | virtual int write_meta(const std::string& key, | |
362 | const std::string& value); | |
363 | ||
364 | /** | |
365 | * read_meta - read a simple configuration key out-of-band | |
366 | * | |
367 | * Read a simple key value to an unopened/mounted store. | |
368 | * | |
369 | * Trailing whitespace is stripped off. | |
370 | * | |
371 | * @param key key name | |
9f95a23c | 372 | * @param value pointer to value std::string |
7c673cae FG |
373 | * @returns 0 for success, or an error code |
374 | */ | |
375 | virtual int read_meta(const std::string& key, | |
376 | std::string *value); | |
377 | ||
378 | /** | |
379 | * get ideal max value for collection_list() | |
380 | * | |
381 | * default to some arbitrary values; the implementation will override. | |
382 | */ | |
383 | virtual int get_ideal_list_max() { return 64; } | |
384 | ||
385 | ||
386 | /** | |
387 | * get a collection handle | |
388 | * | |
389 | * Provide a trivial handle as a default to avoid converting legacy | |
390 | * implementations. | |
391 | */ | |
11fdf7f2 TL |
392 | virtual CollectionHandle open_collection(const coll_t &cid) = 0; |
393 | ||
394 | /** | |
395 | * get a collection handle for a soon-to-be-created collection | |
396 | * | |
397 | * This handle must be used by queue_transaction that includes a | |
398 | * create_collection call in order to become valid. It will become the | |
399 | * reference to the created collection. | |
400 | */ | |
401 | virtual CollectionHandle create_new_collection(const coll_t &cid) = 0; | |
7c673cae | 402 | |
11fdf7f2 | 403 | /** |
9f95a23c | 404 | * std::set ContextQueue for a collection |
11fdf7f2 TL |
405 | * |
406 | * After that, oncommits of Transaction will queue into commit_queue. | |
407 | * And osd ShardThread will call oncommits. | |
408 | */ | |
409 | virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0; | |
7c673cae FG |
410 | |
411 | /** | |
412 | * Synchronous read operations | |
413 | */ | |
414 | ||
415 | /** | |
416 | * exists -- Test for existance of object | |
417 | * | |
418 | * @param cid collection for object | |
419 | * @param oid oid of object | |
420 | * @returns true if object exists, false otherwise | |
421 | */ | |
11fdf7f2 | 422 | virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0; |
7c673cae | 423 | /** |
9f95a23c | 424 | * set_collection_opts -- std::set pool options for a collectioninformation for an object |
7c673cae FG |
425 | * |
426 | * @param cid collection | |
427 | * @param opts new collection options | |
428 | * @returns 0 on success, negative error code on failure. | |
429 | */ | |
430 | virtual int set_collection_opts( | |
11fdf7f2 | 431 | CollectionHandle& c, |
7c673cae FG |
432 | const pool_opts_t& opts) = 0; |
433 | ||
434 | /** | |
435 | * stat -- get information for an object | |
436 | * | |
437 | * @param cid collection for object | |
438 | * @param oid oid of object | |
439 | * @param st output information for the object | |
440 | * @param allow_eio if false, assert on -EIO operation failure | |
441 | * @returns 0 on success, negative error code on failure. | |
442 | */ | |
7c673cae FG |
443 | virtual int stat( |
444 | CollectionHandle &c, | |
445 | const ghobject_t& oid, | |
446 | struct stat *st, | |
11fdf7f2 | 447 | bool allow_eio = false) = 0; |
7c673cae FG |
448 | /** |
449 | * read -- read a byte range of data from an object | |
450 | * | |
451 | * Note: if reading from an offset past the end of the object, we | |
452 | * return 0 (not, say, -EINVAL). | |
453 | * | |
454 | * @param cid collection for object | |
455 | * @param oid oid of object | |
456 | * @param offset location offset of first byte to be read | |
457 | * @param len number of bytes to be read | |
9f95a23c | 458 | * @param bl output ceph::buffer::list |
7c673cae | 459 | * @param op_flags is CEPH_OSD_OP_FLAG_* |
7c673cae FG |
460 | * @returns number of bytes read on success, or negative error code on failure. |
461 | */ | |
7c673cae FG |
462 | virtual int read( |
463 | CollectionHandle &c, | |
464 | const ghobject_t& oid, | |
465 | uint64_t offset, | |
466 | size_t len, | |
9f95a23c | 467 | ceph::buffer::list& bl, |
11fdf7f2 | 468 | uint32_t op_flags = 0) = 0; |
7c673cae FG |
469 | |
470 | /** | |
9f95a23c | 471 | * fiemap -- get extent std::map of data of an object |
7c673cae | 472 | * |
9f95a23c TL |
473 | * Returns an encoded std::map of the extents of an object's data portion |
474 | * (std::map<offset,size>). | |
7c673cae FG |
475 | * |
476 | * A non-enlightened implementation is free to return the extent (offset, len) | |
477 | * as the sole extent. | |
478 | * | |
479 | * @param cid collection for object | |
480 | * @param oid oid of object | |
481 | * @param offset location offset of first byte to be read | |
482 | * @param len number of bytes to be read | |
9f95a23c | 483 | * @param bl output ceph::buffer::list for extent std::map information. |
7c673cae FG |
484 | * @returns 0 on success, negative error code on failure. |
485 | */ | |
7c673cae | 486 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c | 487 | uint64_t offset, size_t len, ceph::buffer::list& bl) = 0; |
7c673cae | 488 | virtual int fiemap(CollectionHandle& c, const ghobject_t& oid, |
9f95a23c TL |
489 | uint64_t offset, size_t len, std::map<uint64_t, uint64_t>& destmap) = 0; |
490 | ||
491 | /** | |
492 | * readv -- read specfic intervals from an object; | |
493 | * caller must call fiemap to fill in the extent-map first. | |
494 | * | |
495 | * Note: if reading from an offset past the end of the object, we | |
496 | * return 0 (not, say, -EINVAL). Also the default version of readv | |
497 | * reads each extent separately synchronously, which can become horribly | |
498 | * inefficient if the physical layout of the pushing object get massively | |
499 | * fragmented and hence should be overridden by any real os that | |
500 | * cares about the performance.. | |
501 | * | |
502 | * @param cid collection for object | |
503 | * @param oid oid of object | |
504 | * @param m intervals to be read | |
505 | * @param bl output ceph::buffer::list | |
506 | * @param op_flags is CEPH_OSD_OP_FLAG_* | |
507 | * @returns number of bytes read on success, or negative error code on failure. | |
508 | */ | |
509 | virtual int readv( | |
510 | CollectionHandle &c, | |
511 | const ghobject_t& oid, | |
512 | interval_set<uint64_t>& m, | |
513 | ceph::buffer::list& bl, | |
514 | uint32_t op_flags = 0) { | |
515 | int total = 0; | |
516 | for (auto p = m.begin(); p != m.end(); p++) { | |
517 | bufferlist t; | |
518 | int r = read(c, oid, p.get_start(), p.get_len(), t, op_flags); | |
519 | if (r < 0) | |
520 | return r; | |
521 | total += r; | |
522 | // prune fiemap, if necessary | |
523 | if (p.get_len() != t.length()) { | |
524 | auto save = p++; | |
525 | if (t.length() == 0) { | |
526 | m.erase(save); // Remove this empty interval | |
527 | } else { | |
528 | save.set_len(t.length()); // fix interval length | |
529 | bl.claim_append(t); | |
530 | } | |
531 | // Remove any other follow-up intervals present too | |
532 | while (p != m.end()) { | |
533 | save = p++; | |
534 | m.erase(save); | |
535 | } | |
536 | break; | |
537 | } | |
538 | bl.claim_append(t); | |
539 | } | |
540 | return total; | |
541 | } | |
542 | ||
543 | /** | |
544 | * dump_onode -- dumps onode metadata in human readable form, | |
545 | intended primiarily for debugging | |
546 | * | |
547 | * @param cid collection for object | |
548 | * @param oid oid of object | |
549 | * @param section_name section name to create and print under | |
550 | * @param f Formatter class instance to print to | |
551 | * @returns 0 on success, negative error code on failure. | |
552 | */ | |
553 | virtual int dump_onode( | |
554 | CollectionHandle &c, | |
555 | const ghobject_t& oid, | |
556 | const string& section_name, | |
557 | Formatter *f) { | |
558 | return -ENOTSUP; | |
559 | } | |
7c673cae FG |
560 | |
561 | /** | |
562 | * getattr -- get an xattr of an object | |
563 | * | |
564 | * @param cid collection for object | |
565 | * @param oid oid of object | |
566 | * @param name name of attr to read | |
567 | * @param value place to put output result. | |
568 | * @returns 0 on success, negative error code on failure. | |
569 | */ | |
7c673cae | 570 | virtual int getattr(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c | 571 | const char *name, ceph::buffer::ptr& value) = 0; |
7c673cae FG |
572 | |
573 | /** | |
574 | * getattr -- get an xattr of an object | |
575 | * | |
576 | * @param cid collection for object | |
577 | * @param oid oid of object | |
578 | * @param name name of attr to read | |
579 | * @param value place to put output result. | |
580 | * @returns 0 on success, negative error code on failure. | |
581 | */ | |
7c673cae FG |
582 | int getattr( |
583 | CollectionHandle &c, const ghobject_t& oid, | |
9f95a23c TL |
584 | const std::string& name, ceph::buffer::list& value) { |
585 | ceph::buffer::ptr bp; | |
7c673cae FG |
586 | int r = getattr(c, oid, name.c_str(), bp); |
587 | value.push_back(bp); | |
588 | return r; | |
589 | } | |
590 | ||
591 | /** | |
592 | * getattrs -- get all of the xattrs of an object | |
593 | * | |
594 | * @param cid collection for object | |
595 | * @param oid oid of object | |
596 | * @param aset place to put output result. | |
597 | * @returns 0 on success, negative error code on failure. | |
598 | */ | |
7c673cae | 599 | virtual int getattrs(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c | 600 | std::map<std::string,ceph::buffer::ptr>& aset) = 0; |
7c673cae FG |
601 | |
602 | /** | |
603 | * getattrs -- get all of the xattrs of an object | |
604 | * | |
605 | * @param cid collection for object | |
606 | * @param oid oid of object | |
607 | * @param aset place to put output result. | |
608 | * @returns 0 on success, negative error code on failure. | |
609 | */ | |
7c673cae | 610 | int getattrs(CollectionHandle &c, const ghobject_t& oid, |
9f95a23c TL |
611 | std::map<std::string,ceph::buffer::list>& aset) { |
612 | std::map<std::string,ceph::buffer::ptr> bmap; | |
7c673cae | 613 | int r = getattrs(c, oid, bmap); |
9f95a23c | 614 | for (auto i = bmap.begin(); i != bmap.end(); ++i) { |
7c673cae FG |
615 | aset[i->first].append(i->second); |
616 | } | |
617 | return r; | |
618 | } | |
619 | ||
620 | ||
621 | // collections | |
622 | ||
623 | /** | |
624 | * list_collections -- get all of the collections known to this ObjectStore | |
625 | * | |
9f95a23c | 626 | * @param ls std::list of the collections in sorted order. |
7c673cae FG |
627 | * @returns 0 on success, negative error code on failure. |
628 | */ | |
9f95a23c | 629 | virtual int list_collections(std::vector<coll_t>& ls) = 0; |
7c673cae FG |
630 | |
631 | /** | |
632 | * does a collection exist? | |
633 | * | |
634 | * @param c collection | |
635 | * @returns true if it exists, false otherwise | |
636 | */ | |
637 | virtual bool collection_exists(const coll_t& c) = 0; | |
638 | ||
639 | /** | |
640 | * is a collection empty? | |
641 | * | |
642 | * @param c collection | |
643 | * @param empty true if the specified collection is empty, false otherwise | |
644 | * @returns 0 on success, negative error code on failure. | |
645 | */ | |
11fdf7f2 | 646 | virtual int collection_empty(CollectionHandle& c, bool *empty) = 0; |
7c673cae FG |
647 | |
648 | /** | |
649 | * return the number of significant bits of the coll_t::pgid. | |
650 | * | |
651 | * This should return what the last create_collection or split_collection | |
9f95a23c | 652 | * std::set. A legacy backend may return -EAGAIN if the value is unavailable |
7c673cae FG |
653 | * (because we upgraded from an older version, e.g., FileStore). |
654 | */ | |
11fdf7f2 | 655 | virtual int collection_bits(CollectionHandle& c) = 0; |
7c673cae FG |
656 | |
657 | ||
658 | /** | |
9f95a23c | 659 | * std::list contents of a collection that fall in the range [start, end) and no more than a specified many result |
7c673cae FG |
660 | * |
661 | * @param c collection | |
662 | * @param start list object that sort >= this value | |
663 | * @param end list objects that sort < this value | |
664 | * @param max return no more than this many results | |
665 | * @param seq return no objects with snap < seq | |
666 | * @param ls [out] result | |
667 | * @param next [out] next item sorts >= this value | |
668 | * @return zero on success, or negative error | |
669 | */ | |
7c673cae FG |
670 | virtual int collection_list(CollectionHandle &c, |
671 | const ghobject_t& start, const ghobject_t& end, | |
672 | int max, | |
9f95a23c | 673 | std::vector<ghobject_t> *ls, ghobject_t *next) = 0; |
7c673cae | 674 | |
f91f0fd5 TL |
675 | virtual int collection_list_legacy(CollectionHandle &c, |
676 | const ghobject_t& start, | |
677 | const ghobject_t& end, int max, | |
678 | std::vector<ghobject_t> *ls, | |
679 | ghobject_t *next) { | |
680 | return collection_list(c, start, end, max, ls, next); | |
681 | } | |
7c673cae FG |
682 | |
683 | /// OMAP | |
684 | /// Get omap contents | |
7c673cae FG |
685 | virtual int omap_get( |
686 | CollectionHandle &c, ///< [in] Collection containing oid | |
687 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
688 | ceph::buffer::list *header, ///< [out] omap header |
689 | std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value std::map | |
11fdf7f2 | 690 | ) = 0; |
7c673cae FG |
691 | |
692 | /// Get omap header | |
7c673cae FG |
693 | virtual int omap_get_header( |
694 | CollectionHandle &c, ///< [in] Collection containing oid | |
695 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 696 | ceph::buffer::list *header, ///< [out] omap header |
7c673cae | 697 | bool allow_eio = false ///< [in] don't assert on eio |
11fdf7f2 | 698 | ) = 0; |
7c673cae FG |
699 | |
700 | /// Get keys defined on oid | |
7c673cae FG |
701 | virtual int omap_get_keys( |
702 | CollectionHandle &c, ///< [in] Collection containing oid | |
703 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c | 704 | std::set<std::string> *keys ///< [out] Keys defined on oid |
11fdf7f2 | 705 | ) = 0; |
7c673cae FG |
706 | |
707 | /// Get key values | |
7c673cae FG |
708 | virtual int omap_get_values( |
709 | CollectionHandle &c, ///< [in] Collection containing oid | |
710 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
711 | const std::set<std::string> &keys, ///< [in] Keys to get |
712 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
11fdf7f2 | 713 | ) = 0; |
7c673cae | 714 | |
9f95a23c TL |
715 | #ifdef WITH_SEASTAR |
716 | virtual int omap_get_values( | |
717 | CollectionHandle &c, ///< [in] Collection containing oid | |
718 | const ghobject_t &oid, ///< [in] Object containing omap | |
719 | const std::optional<std::string> &start_after, ///< [in] Keys to get | |
720 | std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values | |
721 | ) = 0; | |
722 | #endif | |
723 | ||
7c673cae | 724 | /// Filters keys into out which are defined on oid |
7c673cae FG |
725 | virtual int omap_check_keys( |
726 | CollectionHandle &c, ///< [in] Collection containing oid | |
727 | const ghobject_t &oid, ///< [in] Object containing omap | |
9f95a23c TL |
728 | const std::set<std::string> &keys, ///< [in] Keys to check |
729 | std::set<std::string> *out ///< [out] Subset of keys defined on oid | |
11fdf7f2 | 730 | ) = 0; |
7c673cae FG |
731 | |
732 | /** | |
733 | * Returns an object map iterator | |
734 | * | |
735 | * Warning! The returned iterator is an implicit lock on filestore | |
736 | * operations in c. Do not use filestore methods on c while the returned | |
737 | * iterator is live. (Filling in a transaction is no problem). | |
738 | * | |
739 | * @return iterator, null on error | |
740 | */ | |
7c673cae FG |
741 | virtual ObjectMap::ObjectMapIterator get_omap_iterator( |
742 | CollectionHandle &c, ///< [in] collection | |
743 | const ghobject_t &oid ///< [in] object | |
11fdf7f2 | 744 | ) = 0; |
7c673cae FG |
745 | |
746 | virtual int flush_journal() { return -EOPNOTSUPP; } | |
747 | ||
9f95a23c | 748 | virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; } |
7c673cae | 749 | |
9f95a23c | 750 | virtual int snapshot(const std::string& name) { return -EOPNOTSUPP; } |
7c673cae FG |
751 | |
752 | /** | |
753 | * Set and get internal fsid for this instance. No external data is modified | |
754 | */ | |
755 | virtual void set_fsid(uuid_d u) = 0; | |
756 | virtual uuid_d get_fsid() = 0; | |
757 | ||
758 | /** | |
759 | * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store | |
760 | * - num objects - total (including witeouts) object count to measure used space for. | |
761 | */ | |
762 | virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0; | |
763 | ||
764 | ||
765 | // DEBUG | |
766 | virtual void inject_data_error(const ghobject_t &oid) {} | |
767 | virtual void inject_mdata_error(const ghobject_t &oid) {} | |
224ce89b WB |
768 | |
769 | virtual void compact() {} | |
28e407b8 AA |
770 | virtual bool has_builtin_csum() const { |
771 | return false; | |
772 | } | |
7c673cae | 773 | }; |
7c673cae FG |
774 | |
775 | #endif |