]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/ObjectStore.h
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / os / ObjectStore.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14#ifndef CEPH_OBJECTSTORE_H
15#define CEPH_OBJECTSTORE_H
16
9f95a23c 17#include "include/common_fwd.h"
7c673cae
FG
18#include "include/Context.h"
19#include "include/buffer.h"
20#include "include/types.h"
11fdf7f2 21#include "include/stringify.h"
7c673cae
FG
22#include "osd/osd_types.h"
23#include "common/TrackedOp.h"
24#include "common/WorkQueue.h"
25#include "ObjectMap.h"
9f95a23c 26#include "os/Transaction.h"
7c673cae
FG
27
28#include <errno.h>
29#include <sys/stat.h>
30#include <vector>
31#include <map>
32
11fdf7f2 33#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun)
7c673cae
FG
34#include <sys/statvfs.h>
35#else
36#include <sys/vfs.h> /* or <sys/statfs.h> */
11fdf7f2 37#endif
7c673cae 38
7c673cae
FG
39namespace ceph {
40 class Formatter;
41}
42
43/*
44 * low-level interface to the local OSD file system
45 */
46
47class Logger;
11fdf7f2 48class ContextQueue;
7c673cae 49
9f95a23c
TL
50static inline void encode(const std::map<std::string,ceph::buffer::ptr> *attrset, ceph::buffer::list &bl) {
51 using ceph::encode;
11fdf7f2 52 encode(*attrset, bl);
7c673cae
FG
53}
54
7c673cae
FG
55// Flag bits
56typedef uint32_t osflagbits_t;
57const int SKIP_JOURNAL_REPLAY = 1 << 0;
58const int SKIP_MOUNT_OMAP = 1 << 1;
59
60class ObjectStore {
61protected:
9f95a23c 62 std::string path;
7c673cae
FG
63
64public:
9f95a23c
TL
65 using Transaction = ceph::os::Transaction;
66
7c673cae
FG
67 CephContext* cct;
68 /**
69 * create - create an ObjectStore instance.
70 *
71 * This is invoked once at initialization time.
72 *
9f95a23c 73 * @param type type of store. This is a std::string from the configuration file.
7c673cae
FG
74 * @param data path (or other descriptor) for data
75 * @param journal path (or other descriptor) for journal (optional)
76 * @param flags which filestores should check if applicable
77 */
78 static ObjectStore *create(CephContext *cct,
9f95a23c
TL
79 const std::string& type,
80 const std::string& data,
81 const std::string& journal,
7c673cae
FG
82 osflagbits_t flags = 0);
83
84 /**
85 * probe a block device to learn the uuid of the owning OSD
86 *
87 * @param cct cct
88 * @param path path to device
89 * @param fsid [out] osd uuid
90 */
91 static int probe_block_device_fsid(
92 CephContext *cct,
9f95a23c 93 const std::string& path,
7c673cae
FG
94 uuid_d *fsid);
95
96 /**
97 * Fetch Object Store statistics.
98 *
99 * Currently only latency of write and apply times are measured.
100 *
101 * This appears to be called with nothing locked.
102 */
103 virtual objectstore_perf_stat_t get_cur_stats() = 0;
104
105 /**
106 * Fetch Object Store performance counters.
107 *
108 *
109 * This appears to be called with nothing locked.
110 */
111 virtual const PerfCounters* get_perf_counters() const = 0;
112
113 /**
11fdf7f2 114 * a collection also orders transactions
7c673cae 115 *
11fdf7f2
TL
116 * Any transactions queued under a given collection will be applied in
117 * sequence. Transactions queued under different collections may run
7c673cae
FG
118 * in parallel.
119 *
9f95a23c 120 * ObjectStore users may get collection handles with open_collection() (or,
11fdf7f2 121 * for bootstrapping a new collection, create_new_collection()).
7c673cae 122 */
11fdf7f2
TL
123 struct CollectionImpl : public RefCountedObject {
124 const coll_t cid;
7c673cae 125
11fdf7f2 126 /// wait for any queued transactions to apply
7c673cae
FG
127 // block until any previous transactions are visible. specifically,
128 // collection_list and collection_empty need to reflect prior operations.
129 virtual void flush() = 0;
130
7c673cae
FG
131 /**
132 * Async flush_commit
133 *
134 * There are two cases:
11fdf7f2 135 * 1) collection is currently idle: the method returns true. c is
7c673cae 136 * not touched.
11fdf7f2
TL
137 * 2) collection is not idle: the method returns false and c is
138 * called asynchronously with a value of 0 once all transactions
139 * queued on this collection prior to the call have been applied
7c673cae
FG
140 * and committed.
141 */
11fdf7f2 142 virtual bool flush_commit(Context *c) = 0;
7c673cae 143
11fdf7f2
TL
144 const coll_t &get_cid() {
145 return cid;
7c673cae 146 }
9f95a23c
TL
147 protected:
148 CollectionImpl() = delete;
149 CollectionImpl(CephContext* cct, const coll_t& c) : RefCountedObject(cct), cid(c) {}
150 ~CollectionImpl() = default;
7c673cae 151 };
9f95a23c 152 using CollectionHandle = ceph::ref_t<CollectionImpl>;
7c673cae 153
7c673cae
FG
154
155 /*********************************
156 *
157 * Object Contents and semantics
158 *
159 * All ObjectStore objects are identified as a named object
160 * (ghobject_t and hobject_t) in a named collection (coll_t).
161 * ObjectStore operations support the creation, mutation, deletion
162 * and enumeration of objects within a collection. Enumeration is
163 * in sorted key order (where keys are sorted by hash). Object names
164 * are globally unique.
165 *
166 * Each object has four distinct parts: byte data, xattrs, omap_header
167 * and omap entries.
168 *
169 * The data portion of an object is conceptually equivalent to a
170 * file in a file system. Random and Partial access for both read
171 * and write operations is required. The ability to have a sparse
172 * implementation of the data portion of an object is beneficial for
173 * some workloads, but not required. There is a system-wide limit on
174 * the maximum size of an object, which is typically around 100 MB.
175 *
176 * Xattrs are equivalent to the extended attributes of file
9f95a23c
TL
177 * systems. Xattrs are a std::set of key/value pairs. Sub-value access
178 * is not required. It is possible to enumerate the std::set of xattrs in
7c673cae
FG
179 * key order. At the implementation level, xattrs are used
180 * exclusively internal to Ceph and the implementer can expect the
181 * total size of all of the xattrs on an object to be relatively
182 * small, i.e., less than 64KB. Much of Ceph assumes that accessing
183 * xattrs on temporally adjacent object accesses (recent past or
184 * near future) is inexpensive.
185 *
186 * omap_header is a single blob of data. It can be read or written
187 * in total.
188 *
189 * Omap entries are conceptually the same as xattrs
190 * but in a different address space. In other words, you can have
191 * the same key as an xattr and an omap entry and they have distinct
192 * values. Enumeration of xattrs doesn't include omap entries and
193 * vice versa. The size and access characteristics of omap entries
194 * are very different from xattrs. In particular, the value portion
195 * of an omap entry can be quite large (MBs). More importantly, the
196 * interface must support efficient range queries on omap entries even
197 * when there are a large numbers of entries.
198 *
199 *********************************/
200
201 /*******************************
202 *
203 * Collections
204 *
205 * A collection is simply a grouping of objects. Collections have
206 * names (coll_t) and can be enumerated in order. Like an
9f95a23c 207 * individual object, a collection also has a std::set of xattrs.
7c673cae 208 *
7c673cae
FG
209 *
210 */
7c673cae 211
7c673cae 212
11fdf7f2
TL
213 int queue_transaction(CollectionHandle& ch,
214 Transaction&& t,
215 TrackedOpRef op = TrackedOpRef(),
216 ThreadPool::TPHandle *handle = NULL) {
9f95a23c 217 std::vector<Transaction> tls;
7c673cae 218 tls.push_back(std::move(t));
11fdf7f2 219 return queue_transactions(ch, tls, op, handle);
7c673cae
FG
220 }
221
222 virtual int queue_transactions(
9f95a23c 223 CollectionHandle& ch, std::vector<Transaction>& tls,
7c673cae
FG
224 TrackedOpRef op = TrackedOpRef(),
225 ThreadPool::TPHandle *handle = NULL) = 0;
226
227
7c673cae
FG
228 public:
229 ObjectStore(CephContext* cct,
230 const std::string& path_) : path(path_), cct(cct) {}
231 virtual ~ObjectStore() {}
232
233 // no copying
234 explicit ObjectStore(const ObjectStore& o) = delete;
235 const ObjectStore& operator=(const ObjectStore& o) = delete;
236
237 // versioning
238 virtual int upgrade() {
239 return 0;
240 }
241
9f95a23c
TL
242 virtual void get_db_statistics(ceph::Formatter *f) { }
243 virtual void generate_db_histogram(ceph::Formatter *f) { }
244 virtual int flush_cache(std::ostream *os = NULL) { return -1; }
245 virtual void dump_perf_counters(ceph::Formatter *f) {}
246 virtual void dump_cache_stats(ceph::Formatter *f) {}
247 virtual void dump_cache_stats(std::ostream& os) {}
7c673cae 248
9f95a23c 249 virtual std::string get_type() = 0;
7c673cae
FG
250
251 // mgmt
252 virtual bool test_mount_in_use() = 0;
253 virtual int mount() = 0;
254 virtual int umount() = 0;
255 virtual int fsck(bool deep) {
256 return -EOPNOTSUPP;
257 }
3efd9988
FG
258 virtual int repair(bool deep) {
259 return -EOPNOTSUPP;
260 }
eafe8130
TL
261 virtual int quick_fix() {
262 return -EOPNOTSUPP;
263 }
7c673cae
FG
264
265 virtual void set_cache_shards(unsigned num) { }
266
267 /**
268 * Returns 0 if the hobject is valid, -error otherwise
269 *
270 * Errors:
271 * -ENAMETOOLONG: locator/namespace/name too large
272 */
273 virtual int validate_hobject_key(const hobject_t &obj) const = 0;
274
275 virtual unsigned get_max_attr_name_length() = 0;
276 virtual int mkfs() = 0; // wipe
277 virtual int mkjournal() = 0; // journal only
278 virtual bool needs_journal() = 0; //< requires a journal
279 virtual bool wants_journal() = 0; //< prefers a journal
280 virtual bool allows_journal() = 0; //< allows a journal
281
9f95a23c
TL
282 // return store min allocation size, if applicable
283 virtual uint64_t get_min_alloc_size() const {
284 return 0;
285 }
286
11fdf7f2 287 /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda)
9f95a23c 288 virtual int get_devices(std::set<std::string> *devls) {
11fdf7f2
TL
289 return -EOPNOTSUPP;
290 }
291
292 /// true if a txn is readable immediately after it is queued.
293 virtual bool is_sync_onreadable() const {
294 return true;
295 }
296
31f18b77
FG
297 /**
298 * is_rotational
299 *
300 * Check whether store is backed by a rotational (HDD) or non-rotational
301 * (SSD) device.
302 *
303 * This must be usable *before* the store is mounted.
304 *
305 * @return true for HDD, false for SSD
306 */
307 virtual bool is_rotational() {
308 return true;
309 }
310
d2e6a577
FG
311 /**
312 * is_journal_rotational
313 *
314 * Check whether journal is backed by a rotational (HDD) or non-rotational
315 * (SSD) device.
316 *
317 *
318 * @return true for HDD, false for SSD
319 */
320 virtual bool is_journal_rotational() {
321 return true;
322 }
323
9f95a23c 324 virtual std::string get_default_device_class() {
224ce89b
WB
325 return is_rotational() ? "hdd" : "ssd";
326 }
327
11fdf7f2
TL
328 virtual int get_numa_node(
329 int *numa_node,
9f95a23c
TL
330 std::set<int> *nodes,
331 std::set<std::string> *failed) {
11fdf7f2
TL
332 return -EOPNOTSUPP;
333 }
334
335
7c673cae
FG
336 virtual bool can_sort_nibblewise() {
337 return false; // assume a backend cannot, unless it says otherwise
338 }
339
11fdf7f2
TL
340 virtual int statfs(struct store_statfs_t *buf,
341 osd_alert_list_t* alerts = nullptr) = 0;
9f95a23c
TL
342 virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf,
343 bool *per_pool_omap) = 0;
7c673cae 344
9f95a23c 345 virtual void collect_metadata(std::map<std::string,string> *pm) { }
7c673cae
FG
346
347 /**
348 * write_meta - write a simple configuration key out-of-band
349 *
350 * Write a simple key/value pair for basic store configuration
351 * (e.g., a uuid or magic number) to an unopened/unmounted store.
352 * The default implementation writes this to a plaintext file in the
353 * path.
354 *
355 * A newline is appended.
356 *
357 * @param key key name (e.g., "fsid")
9f95a23c 358 * @param value value (e.g., a uuid rendered as a std::string)
7c673cae
FG
359 * @returns 0 for success, or an error code
360 */
361 virtual int write_meta(const std::string& key,
362 const std::string& value);
363
364 /**
365 * read_meta - read a simple configuration key out-of-band
366 *
367 * Read a simple key value to an unopened/mounted store.
368 *
369 * Trailing whitespace is stripped off.
370 *
371 * @param key key name
9f95a23c 372 * @param value pointer to value std::string
7c673cae
FG
373 * @returns 0 for success, or an error code
374 */
375 virtual int read_meta(const std::string& key,
376 std::string *value);
377
378 /**
379 * get ideal max value for collection_list()
380 *
381 * default to some arbitrary values; the implementation will override.
382 */
383 virtual int get_ideal_list_max() { return 64; }
384
385
386 /**
387 * get a collection handle
388 *
389 * Provide a trivial handle as a default to avoid converting legacy
390 * implementations.
391 */
11fdf7f2
TL
392 virtual CollectionHandle open_collection(const coll_t &cid) = 0;
393
394 /**
395 * get a collection handle for a soon-to-be-created collection
396 *
397 * This handle must be used by queue_transaction that includes a
398 * create_collection call in order to become valid. It will become the
399 * reference to the created collection.
400 */
401 virtual CollectionHandle create_new_collection(const coll_t &cid) = 0;
7c673cae 402
11fdf7f2 403 /**
9f95a23c 404 * std::set ContextQueue for a collection
11fdf7f2
TL
405 *
406 * After that, oncommits of Transaction will queue into commit_queue.
407 * And osd ShardThread will call oncommits.
408 */
409 virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0;
7c673cae
FG
410
411 /**
412 * Synchronous read operations
413 */
414
415 /**
416 * exists -- Test for existance of object
417 *
418 * @param cid collection for object
419 * @param oid oid of object
420 * @returns true if object exists, false otherwise
421 */
11fdf7f2 422 virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0;
7c673cae 423 /**
9f95a23c 424 * set_collection_opts -- std::set pool options for a collectioninformation for an object
7c673cae
FG
425 *
426 * @param cid collection
427 * @param opts new collection options
428 * @returns 0 on success, negative error code on failure.
429 */
430 virtual int set_collection_opts(
11fdf7f2 431 CollectionHandle& c,
7c673cae
FG
432 const pool_opts_t& opts) = 0;
433
434 /**
435 * stat -- get information for an object
436 *
437 * @param cid collection for object
438 * @param oid oid of object
439 * @param st output information for the object
440 * @param allow_eio if false, assert on -EIO operation failure
441 * @returns 0 on success, negative error code on failure.
442 */
7c673cae
FG
443 virtual int stat(
444 CollectionHandle &c,
445 const ghobject_t& oid,
446 struct stat *st,
11fdf7f2 447 bool allow_eio = false) = 0;
7c673cae
FG
448 /**
449 * read -- read a byte range of data from an object
450 *
451 * Note: if reading from an offset past the end of the object, we
452 * return 0 (not, say, -EINVAL).
453 *
454 * @param cid collection for object
455 * @param oid oid of object
456 * @param offset location offset of first byte to be read
457 * @param len number of bytes to be read
9f95a23c 458 * @param bl output ceph::buffer::list
7c673cae 459 * @param op_flags is CEPH_OSD_OP_FLAG_*
7c673cae
FG
460 * @returns number of bytes read on success, or negative error code on failure.
461 */
7c673cae
FG
462 virtual int read(
463 CollectionHandle &c,
464 const ghobject_t& oid,
465 uint64_t offset,
466 size_t len,
9f95a23c 467 ceph::buffer::list& bl,
11fdf7f2 468 uint32_t op_flags = 0) = 0;
7c673cae
FG
469
470 /**
9f95a23c 471 * fiemap -- get extent std::map of data of an object
7c673cae 472 *
9f95a23c
TL
473 * Returns an encoded std::map of the extents of an object's data portion
474 * (std::map<offset,size>).
7c673cae
FG
475 *
476 * A non-enlightened implementation is free to return the extent (offset, len)
477 * as the sole extent.
478 *
479 * @param cid collection for object
480 * @param oid oid of object
481 * @param offset location offset of first byte to be read
482 * @param len number of bytes to be read
9f95a23c 483 * @param bl output ceph::buffer::list for extent std::map information.
7c673cae
FG
484 * @returns 0 on success, negative error code on failure.
485 */
7c673cae 486 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
9f95a23c 487 uint64_t offset, size_t len, ceph::buffer::list& bl) = 0;
7c673cae 488 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
9f95a23c
TL
489 uint64_t offset, size_t len, std::map<uint64_t, uint64_t>& destmap) = 0;
490
491 /**
492 * readv -- read specfic intervals from an object;
493 * caller must call fiemap to fill in the extent-map first.
494 *
495 * Note: if reading from an offset past the end of the object, we
496 * return 0 (not, say, -EINVAL). Also the default version of readv
497 * reads each extent separately synchronously, which can become horribly
498 * inefficient if the physical layout of the pushing object get massively
499 * fragmented and hence should be overridden by any real os that
500 * cares about the performance..
501 *
502 * @param cid collection for object
503 * @param oid oid of object
504 * @param m intervals to be read
505 * @param bl output ceph::buffer::list
506 * @param op_flags is CEPH_OSD_OP_FLAG_*
507 * @returns number of bytes read on success, or negative error code on failure.
508 */
509 virtual int readv(
510 CollectionHandle &c,
511 const ghobject_t& oid,
512 interval_set<uint64_t>& m,
513 ceph::buffer::list& bl,
514 uint32_t op_flags = 0) {
515 int total = 0;
516 for (auto p = m.begin(); p != m.end(); p++) {
517 bufferlist t;
518 int r = read(c, oid, p.get_start(), p.get_len(), t, op_flags);
519 if (r < 0)
520 return r;
521 total += r;
522 // prune fiemap, if necessary
523 if (p.get_len() != t.length()) {
524 auto save = p++;
525 if (t.length() == 0) {
526 m.erase(save); // Remove this empty interval
527 } else {
528 save.set_len(t.length()); // fix interval length
529 bl.claim_append(t);
530 }
531 // Remove any other follow-up intervals present too
532 while (p != m.end()) {
533 save = p++;
534 m.erase(save);
535 }
536 break;
537 }
538 bl.claim_append(t);
539 }
540 return total;
541 }
542
543 /**
544 * dump_onode -- dumps onode metadata in human readable form,
545 intended primiarily for debugging
546 *
547 * @param cid collection for object
548 * @param oid oid of object
549 * @param section_name section name to create and print under
550 * @param f Formatter class instance to print to
551 * @returns 0 on success, negative error code on failure.
552 */
553 virtual int dump_onode(
554 CollectionHandle &c,
555 const ghobject_t& oid,
556 const string& section_name,
557 Formatter *f) {
558 return -ENOTSUP;
559 }
7c673cae
FG
560
561 /**
562 * getattr -- get an xattr of an object
563 *
564 * @param cid collection for object
565 * @param oid oid of object
566 * @param name name of attr to read
567 * @param value place to put output result.
568 * @returns 0 on success, negative error code on failure.
569 */
7c673cae 570 virtual int getattr(CollectionHandle &c, const ghobject_t& oid,
9f95a23c 571 const char *name, ceph::buffer::ptr& value) = 0;
7c673cae
FG
572
573 /**
574 * getattr -- get an xattr of an object
575 *
576 * @param cid collection for object
577 * @param oid oid of object
578 * @param name name of attr to read
579 * @param value place to put output result.
580 * @returns 0 on success, negative error code on failure.
581 */
7c673cae
FG
582 int getattr(
583 CollectionHandle &c, const ghobject_t& oid,
9f95a23c
TL
584 const std::string& name, ceph::buffer::list& value) {
585 ceph::buffer::ptr bp;
7c673cae
FG
586 int r = getattr(c, oid, name.c_str(), bp);
587 value.push_back(bp);
588 return r;
589 }
590
591 /**
592 * getattrs -- get all of the xattrs of an object
593 *
594 * @param cid collection for object
595 * @param oid oid of object
596 * @param aset place to put output result.
597 * @returns 0 on success, negative error code on failure.
598 */
7c673cae 599 virtual int getattrs(CollectionHandle &c, const ghobject_t& oid,
9f95a23c 600 std::map<std::string,ceph::buffer::ptr>& aset) = 0;
7c673cae
FG
601
602 /**
603 * getattrs -- get all of the xattrs of an object
604 *
605 * @param cid collection for object
606 * @param oid oid of object
607 * @param aset place to put output result.
608 * @returns 0 on success, negative error code on failure.
609 */
7c673cae 610 int getattrs(CollectionHandle &c, const ghobject_t& oid,
9f95a23c
TL
611 std::map<std::string,ceph::buffer::list>& aset) {
612 std::map<std::string,ceph::buffer::ptr> bmap;
7c673cae 613 int r = getattrs(c, oid, bmap);
9f95a23c 614 for (auto i = bmap.begin(); i != bmap.end(); ++i) {
7c673cae
FG
615 aset[i->first].append(i->second);
616 }
617 return r;
618 }
619
620
621 // collections
622
623 /**
624 * list_collections -- get all of the collections known to this ObjectStore
625 *
9f95a23c 626 * @param ls std::list of the collections in sorted order.
7c673cae
FG
627 * @returns 0 on success, negative error code on failure.
628 */
9f95a23c 629 virtual int list_collections(std::vector<coll_t>& ls) = 0;
7c673cae
FG
630
631 /**
632 * does a collection exist?
633 *
634 * @param c collection
635 * @returns true if it exists, false otherwise
636 */
637 virtual bool collection_exists(const coll_t& c) = 0;
638
639 /**
640 * is a collection empty?
641 *
642 * @param c collection
643 * @param empty true if the specified collection is empty, false otherwise
644 * @returns 0 on success, negative error code on failure.
645 */
11fdf7f2 646 virtual int collection_empty(CollectionHandle& c, bool *empty) = 0;
7c673cae
FG
647
648 /**
649 * return the number of significant bits of the coll_t::pgid.
650 *
651 * This should return what the last create_collection or split_collection
9f95a23c 652 * std::set. A legacy backend may return -EAGAIN if the value is unavailable
7c673cae
FG
653 * (because we upgraded from an older version, e.g., FileStore).
654 */
11fdf7f2 655 virtual int collection_bits(CollectionHandle& c) = 0;
7c673cae
FG
656
657
658 /**
9f95a23c 659 * std::list contents of a collection that fall in the range [start, end) and no more than a specified many result
7c673cae
FG
660 *
661 * @param c collection
662 * @param start list object that sort >= this value
663 * @param end list objects that sort < this value
664 * @param max return no more than this many results
665 * @param seq return no objects with snap < seq
666 * @param ls [out] result
667 * @param next [out] next item sorts >= this value
668 * @return zero on success, or negative error
669 */
7c673cae
FG
670 virtual int collection_list(CollectionHandle &c,
671 const ghobject_t& start, const ghobject_t& end,
672 int max,
9f95a23c 673 std::vector<ghobject_t> *ls, ghobject_t *next) = 0;
7c673cae 674
f91f0fd5
TL
675 virtual int collection_list_legacy(CollectionHandle &c,
676 const ghobject_t& start,
677 const ghobject_t& end, int max,
678 std::vector<ghobject_t> *ls,
679 ghobject_t *next) {
680 return collection_list(c, start, end, max, ls, next);
681 }
7c673cae
FG
682
683 /// OMAP
684 /// Get omap contents
7c673cae
FG
685 virtual int omap_get(
686 CollectionHandle &c, ///< [in] Collection containing oid
687 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
688 ceph::buffer::list *header, ///< [out] omap header
689 std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value std::map
11fdf7f2 690 ) = 0;
7c673cae
FG
691
692 /// Get omap header
7c673cae
FG
693 virtual int omap_get_header(
694 CollectionHandle &c, ///< [in] Collection containing oid
695 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c 696 ceph::buffer::list *header, ///< [out] omap header
7c673cae 697 bool allow_eio = false ///< [in] don't assert on eio
11fdf7f2 698 ) = 0;
7c673cae
FG
699
700 /// Get keys defined on oid
7c673cae
FG
701 virtual int omap_get_keys(
702 CollectionHandle &c, ///< [in] Collection containing oid
703 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c 704 std::set<std::string> *keys ///< [out] Keys defined on oid
11fdf7f2 705 ) = 0;
7c673cae
FG
706
707 /// Get key values
7c673cae
FG
708 virtual int omap_get_values(
709 CollectionHandle &c, ///< [in] Collection containing oid
710 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
711 const std::set<std::string> &keys, ///< [in] Keys to get
712 std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
11fdf7f2 713 ) = 0;
7c673cae 714
9f95a23c
TL
715#ifdef WITH_SEASTAR
716 virtual int omap_get_values(
717 CollectionHandle &c, ///< [in] Collection containing oid
718 const ghobject_t &oid, ///< [in] Object containing omap
719 const std::optional<std::string> &start_after, ///< [in] Keys to get
720 std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
721 ) = 0;
722#endif
723
7c673cae 724 /// Filters keys into out which are defined on oid
7c673cae
FG
725 virtual int omap_check_keys(
726 CollectionHandle &c, ///< [in] Collection containing oid
727 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
728 const std::set<std::string> &keys, ///< [in] Keys to check
729 std::set<std::string> *out ///< [out] Subset of keys defined on oid
11fdf7f2 730 ) = 0;
7c673cae
FG
731
732 /**
733 * Returns an object map iterator
734 *
735 * Warning! The returned iterator is an implicit lock on filestore
736 * operations in c. Do not use filestore methods on c while the returned
737 * iterator is live. (Filling in a transaction is no problem).
738 *
739 * @return iterator, null on error
740 */
7c673cae
FG
741 virtual ObjectMap::ObjectMapIterator get_omap_iterator(
742 CollectionHandle &c, ///< [in] collection
743 const ghobject_t &oid ///< [in] object
11fdf7f2 744 ) = 0;
7c673cae
FG
745
746 virtual int flush_journal() { return -EOPNOTSUPP; }
747
9f95a23c 748 virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; }
7c673cae 749
9f95a23c 750 virtual int snapshot(const std::string& name) { return -EOPNOTSUPP; }
7c673cae
FG
751
752 /**
753 * Set and get internal fsid for this instance. No external data is modified
754 */
755 virtual void set_fsid(uuid_d u) = 0;
756 virtual uuid_d get_fsid() = 0;
757
758 /**
759 * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store
760 * - num objects - total (including witeouts) object count to measure used space for.
761 */
762 virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0;
763
764
765 // DEBUG
766 virtual void inject_data_error(const ghobject_t &oid) {}
767 virtual void inject_mdata_error(const ghobject_t &oid) {}
224ce89b
WB
768
769 virtual void compact() {}
28e407b8
AA
770 virtual bool has_builtin_csum() const {
771 return false;
772 }
7c673cae 773};
7c673cae
FG
774
775#endif