]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/ObjectStore.h
buildsys: switch source download to quincy
[ceph.git] / ceph / src / os / ObjectStore.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14#ifndef CEPH_OBJECTSTORE_H
15#define CEPH_OBJECTSTORE_H
16
f67539c2 17#include "include/buffer.h"
9f95a23c 18#include "include/common_fwd.h"
7c673cae 19#include "include/Context.h"
f67539c2 20#include "include/interval_set.h"
11fdf7f2 21#include "include/stringify.h"
f67539c2
TL
22#include "include/types.h"
23
7c673cae
FG
24#include "osd/osd_types.h"
25#include "common/TrackedOp.h"
26#include "common/WorkQueue.h"
27#include "ObjectMap.h"
9f95a23c 28#include "os/Transaction.h"
7c673cae
FG
29
30#include <errno.h>
31#include <sys/stat.h>
32#include <vector>
33#include <map>
34
f67539c2 35#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) || defined(_WIN32)
7c673cae
FG
36#include <sys/statvfs.h>
37#else
38#include <sys/vfs.h> /* or <sys/statfs.h> */
11fdf7f2 39#endif
7c673cae 40
7c673cae
FG
41namespace ceph {
42 class Formatter;
43}
44
45/*
46 * low-level interface to the local OSD file system
47 */
48
49class Logger;
11fdf7f2 50class ContextQueue;
7c673cae 51
9f95a23c
TL
52static inline void encode(const std::map<std::string,ceph::buffer::ptr> *attrset, ceph::buffer::list &bl) {
53 using ceph::encode;
11fdf7f2 54 encode(*attrset, bl);
7c673cae
FG
55}
56
7c673cae
FG
57// Flag bits
58typedef uint32_t osflagbits_t;
59const int SKIP_JOURNAL_REPLAY = 1 << 0;
60const int SKIP_MOUNT_OMAP = 1 << 1;
61
62class ObjectStore {
63protected:
9f95a23c 64 std::string path;
7c673cae
FG
65
66public:
9f95a23c
TL
67 using Transaction = ceph::os::Transaction;
68
7c673cae
FG
69 CephContext* cct;
70 /**
71 * create - create an ObjectStore instance.
72 *
73 * This is invoked once at initialization time.
74 *
9f95a23c 75 * @param type type of store. This is a std::string from the configuration file.
7c673cae
FG
76 * @param data path (or other descriptor) for data
77 * @param journal path (or other descriptor) for journal (optional)
78 * @param flags which filestores should check if applicable
79 */
80 static ObjectStore *create(CephContext *cct,
9f95a23c
TL
81 const std::string& type,
82 const std::string& data,
83 const std::string& journal,
7c673cae
FG
84 osflagbits_t flags = 0);
85
86 /**
87 * probe a block device to learn the uuid of the owning OSD
88 *
89 * @param cct cct
90 * @param path path to device
91 * @param fsid [out] osd uuid
92 */
93 static int probe_block_device_fsid(
94 CephContext *cct,
9f95a23c 95 const std::string& path,
7c673cae
FG
96 uuid_d *fsid);
97
98 /**
99 * Fetch Object Store statistics.
100 *
101 * Currently only latency of write and apply times are measured.
102 *
103 * This appears to be called with nothing locked.
104 */
105 virtual objectstore_perf_stat_t get_cur_stats() = 0;
106
107 /**
108 * Fetch Object Store performance counters.
109 *
110 *
111 * This appears to be called with nothing locked.
112 */
113 virtual const PerfCounters* get_perf_counters() const = 0;
114
115 /**
11fdf7f2 116 * a collection also orders transactions
7c673cae 117 *
11fdf7f2
TL
118 * Any transactions queued under a given collection will be applied in
119 * sequence. Transactions queued under different collections may run
7c673cae
FG
120 * in parallel.
121 *
9f95a23c 122 * ObjectStore users may get collection handles with open_collection() (or,
11fdf7f2 123 * for bootstrapping a new collection, create_new_collection()).
7c673cae 124 */
11fdf7f2
TL
125 struct CollectionImpl : public RefCountedObject {
126 const coll_t cid;
7c673cae 127
11fdf7f2 128 /// wait for any queued transactions to apply
7c673cae
FG
129 // block until any previous transactions are visible. specifically,
130 // collection_list and collection_empty need to reflect prior operations.
131 virtual void flush() = 0;
132
7c673cae
FG
133 /**
134 * Async flush_commit
135 *
136 * There are two cases:
11fdf7f2 137 * 1) collection is currently idle: the method returns true. c is
7c673cae 138 * not touched.
11fdf7f2
TL
139 * 2) collection is not idle: the method returns false and c is
140 * called asynchronously with a value of 0 once all transactions
141 * queued on this collection prior to the call have been applied
7c673cae
FG
142 * and committed.
143 */
11fdf7f2 144 virtual bool flush_commit(Context *c) = 0;
7c673cae 145
11fdf7f2
TL
146 const coll_t &get_cid() {
147 return cid;
7c673cae 148 }
9f95a23c
TL
149 protected:
150 CollectionImpl() = delete;
151 CollectionImpl(CephContext* cct, const coll_t& c) : RefCountedObject(cct), cid(c) {}
152 ~CollectionImpl() = default;
7c673cae 153 };
9f95a23c 154 using CollectionHandle = ceph::ref_t<CollectionImpl>;
7c673cae 155
7c673cae
FG
156
157 /*********************************
158 *
159 * Object Contents and semantics
160 *
161 * All ObjectStore objects are identified as a named object
162 * (ghobject_t and hobject_t) in a named collection (coll_t).
163 * ObjectStore operations support the creation, mutation, deletion
164 * and enumeration of objects within a collection. Enumeration is
165 * in sorted key order (where keys are sorted by hash). Object names
166 * are globally unique.
167 *
168 * Each object has four distinct parts: byte data, xattrs, omap_header
169 * and omap entries.
170 *
171 * The data portion of an object is conceptually equivalent to a
172 * file in a file system. Random and Partial access for both read
173 * and write operations is required. The ability to have a sparse
174 * implementation of the data portion of an object is beneficial for
175 * some workloads, but not required. There is a system-wide limit on
176 * the maximum size of an object, which is typically around 100 MB.
177 *
178 * Xattrs are equivalent to the extended attributes of file
9f95a23c
TL
179 * systems. Xattrs are a std::set of key/value pairs. Sub-value access
180 * is not required. It is possible to enumerate the std::set of xattrs in
7c673cae
FG
181 * key order. At the implementation level, xattrs are used
182 * exclusively internal to Ceph and the implementer can expect the
183 * total size of all of the xattrs on an object to be relatively
184 * small, i.e., less than 64KB. Much of Ceph assumes that accessing
185 * xattrs on temporally adjacent object accesses (recent past or
186 * near future) is inexpensive.
187 *
188 * omap_header is a single blob of data. It can be read or written
189 * in total.
190 *
191 * Omap entries are conceptually the same as xattrs
192 * but in a different address space. In other words, you can have
193 * the same key as an xattr and an omap entry and they have distinct
194 * values. Enumeration of xattrs doesn't include omap entries and
195 * vice versa. The size and access characteristics of omap entries
196 * are very different from xattrs. In particular, the value portion
197 * of an omap entry can be quite large (MBs). More importantly, the
198 * interface must support efficient range queries on omap entries even
199 * when there are a large numbers of entries.
200 *
201 *********************************/
202
203 /*******************************
204 *
205 * Collections
206 *
207 * A collection is simply a grouping of objects. Collections have
208 * names (coll_t) and can be enumerated in order. Like an
9f95a23c 209 * individual object, a collection also has a std::set of xattrs.
7c673cae 210 *
7c673cae
FG
211 *
212 */
7c673cae 213
7c673cae 214
11fdf7f2
TL
215 int queue_transaction(CollectionHandle& ch,
216 Transaction&& t,
217 TrackedOpRef op = TrackedOpRef(),
218 ThreadPool::TPHandle *handle = NULL) {
9f95a23c 219 std::vector<Transaction> tls;
7c673cae 220 tls.push_back(std::move(t));
11fdf7f2 221 return queue_transactions(ch, tls, op, handle);
7c673cae
FG
222 }
223
224 virtual int queue_transactions(
9f95a23c 225 CollectionHandle& ch, std::vector<Transaction>& tls,
7c673cae
FG
226 TrackedOpRef op = TrackedOpRef(),
227 ThreadPool::TPHandle *handle = NULL) = 0;
228
229
7c673cae
FG
230 public:
231 ObjectStore(CephContext* cct,
232 const std::string& path_) : path(path_), cct(cct) {}
233 virtual ~ObjectStore() {}
234
235 // no copying
236 explicit ObjectStore(const ObjectStore& o) = delete;
237 const ObjectStore& operator=(const ObjectStore& o) = delete;
238
239 // versioning
240 virtual int upgrade() {
241 return 0;
242 }
243
9f95a23c
TL
244 virtual void get_db_statistics(ceph::Formatter *f) { }
245 virtual void generate_db_histogram(ceph::Formatter *f) { }
246 virtual int flush_cache(std::ostream *os = NULL) { return -1; }
247 virtual void dump_perf_counters(ceph::Formatter *f) {}
248 virtual void dump_cache_stats(ceph::Formatter *f) {}
249 virtual void dump_cache_stats(std::ostream& os) {}
7c673cae 250
9f95a23c 251 virtual std::string get_type() = 0;
7c673cae
FG
252
253 // mgmt
254 virtual bool test_mount_in_use() = 0;
255 virtual int mount() = 0;
256 virtual int umount() = 0;
257 virtual int fsck(bool deep) {
258 return -EOPNOTSUPP;
259 }
3efd9988
FG
260 virtual int repair(bool deep) {
261 return -EOPNOTSUPP;
262 }
eafe8130
TL
263 virtual int quick_fix() {
264 return -EOPNOTSUPP;
265 }
7c673cae
FG
266
267 virtual void set_cache_shards(unsigned num) { }
268
269 /**
270 * Returns 0 if the hobject is valid, -error otherwise
271 *
272 * Errors:
273 * -ENAMETOOLONG: locator/namespace/name too large
274 */
275 virtual int validate_hobject_key(const hobject_t &obj) const = 0;
276
277 virtual unsigned get_max_attr_name_length() = 0;
278 virtual int mkfs() = 0; // wipe
279 virtual int mkjournal() = 0; // journal only
280 virtual bool needs_journal() = 0; //< requires a journal
281 virtual bool wants_journal() = 0; //< prefers a journal
282 virtual bool allows_journal() = 0; //< allows a journal
283
9f95a23c
TL
284 // return store min allocation size, if applicable
285 virtual uint64_t get_min_alloc_size() const {
286 return 0;
287 }
288
11fdf7f2 289 /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda)
9f95a23c 290 virtual int get_devices(std::set<std::string> *devls) {
11fdf7f2
TL
291 return -EOPNOTSUPP;
292 }
293
294 /// true if a txn is readable immediately after it is queued.
295 virtual bool is_sync_onreadable() const {
296 return true;
297 }
298
31f18b77
FG
299 /**
300 * is_rotational
301 *
302 * Check whether store is backed by a rotational (HDD) or non-rotational
303 * (SSD) device.
304 *
305 * This must be usable *before* the store is mounted.
306 *
307 * @return true for HDD, false for SSD
308 */
309 virtual bool is_rotational() {
310 return true;
311 }
312
d2e6a577
FG
313 /**
314 * is_journal_rotational
315 *
316 * Check whether journal is backed by a rotational (HDD) or non-rotational
317 * (SSD) device.
318 *
319 *
320 * @return true for HDD, false for SSD
321 */
322 virtual bool is_journal_rotational() {
323 return true;
324 }
325
9f95a23c 326 virtual std::string get_default_device_class() {
224ce89b
WB
327 return is_rotational() ? "hdd" : "ssd";
328 }
329
11fdf7f2
TL
330 virtual int get_numa_node(
331 int *numa_node,
9f95a23c
TL
332 std::set<int> *nodes,
333 std::set<std::string> *failed) {
11fdf7f2
TL
334 return -EOPNOTSUPP;
335 }
336
337
7c673cae
FG
338 virtual bool can_sort_nibblewise() {
339 return false; // assume a backend cannot, unless it says otherwise
340 }
341
11fdf7f2
TL
342 virtual int statfs(struct store_statfs_t *buf,
343 osd_alert_list_t* alerts = nullptr) = 0;
9f95a23c
TL
344 virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf,
345 bool *per_pool_omap) = 0;
7c673cae 346
f67539c2 347 virtual void collect_metadata(std::map<std::string,std::string> *pm) { }
7c673cae
FG
348
349 /**
350 * write_meta - write a simple configuration key out-of-band
351 *
352 * Write a simple key/value pair for basic store configuration
353 * (e.g., a uuid or magic number) to an unopened/unmounted store.
354 * The default implementation writes this to a plaintext file in the
355 * path.
356 *
357 * A newline is appended.
358 *
359 * @param key key name (e.g., "fsid")
9f95a23c 360 * @param value value (e.g., a uuid rendered as a std::string)
7c673cae
FG
361 * @returns 0 for success, or an error code
362 */
363 virtual int write_meta(const std::string& key,
364 const std::string& value);
365
366 /**
367 * read_meta - read a simple configuration key out-of-band
368 *
369 * Read a simple key value to an unopened/mounted store.
370 *
371 * Trailing whitespace is stripped off.
372 *
373 * @param key key name
9f95a23c 374 * @param value pointer to value std::string
7c673cae
FG
375 * @returns 0 for success, or an error code
376 */
377 virtual int read_meta(const std::string& key,
378 std::string *value);
379
380 /**
381 * get ideal max value for collection_list()
382 *
383 * default to some arbitrary values; the implementation will override.
384 */
385 virtual int get_ideal_list_max() { return 64; }
386
387
388 /**
389 * get a collection handle
390 *
391 * Provide a trivial handle as a default to avoid converting legacy
392 * implementations.
393 */
11fdf7f2
TL
394 virtual CollectionHandle open_collection(const coll_t &cid) = 0;
395
396 /**
397 * get a collection handle for a soon-to-be-created collection
398 *
399 * This handle must be used by queue_transaction that includes a
400 * create_collection call in order to become valid. It will become the
401 * reference to the created collection.
402 */
403 virtual CollectionHandle create_new_collection(const coll_t &cid) = 0;
7c673cae 404
11fdf7f2 405 /**
9f95a23c 406 * std::set ContextQueue for a collection
11fdf7f2
TL
407 *
408 * After that, oncommits of Transaction will queue into commit_queue.
409 * And osd ShardThread will call oncommits.
410 */
411 virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0;
7c673cae
FG
412
413 /**
414 * Synchronous read operations
415 */
416
417 /**
418 * exists -- Test for existance of object
419 *
420 * @param cid collection for object
421 * @param oid oid of object
422 * @returns true if object exists, false otherwise
423 */
11fdf7f2 424 virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0;
7c673cae 425 /**
9f95a23c 426 * set_collection_opts -- std::set pool options for a collectioninformation for an object
7c673cae
FG
427 *
428 * @param cid collection
429 * @param opts new collection options
430 * @returns 0 on success, negative error code on failure.
431 */
432 virtual int set_collection_opts(
11fdf7f2 433 CollectionHandle& c,
7c673cae
FG
434 const pool_opts_t& opts) = 0;
435
436 /**
437 * stat -- get information for an object
438 *
439 * @param cid collection for object
440 * @param oid oid of object
441 * @param st output information for the object
442 * @param allow_eio if false, assert on -EIO operation failure
443 * @returns 0 on success, negative error code on failure.
444 */
7c673cae
FG
445 virtual int stat(
446 CollectionHandle &c,
447 const ghobject_t& oid,
448 struct stat *st,
11fdf7f2 449 bool allow_eio = false) = 0;
7c673cae
FG
450 /**
451 * read -- read a byte range of data from an object
452 *
453 * Note: if reading from an offset past the end of the object, we
454 * return 0 (not, say, -EINVAL).
455 *
456 * @param cid collection for object
457 * @param oid oid of object
458 * @param offset location offset of first byte to be read
459 * @param len number of bytes to be read
9f95a23c 460 * @param bl output ceph::buffer::list
7c673cae 461 * @param op_flags is CEPH_OSD_OP_FLAG_*
7c673cae
FG
462 * @returns number of bytes read on success, or negative error code on failure.
463 */
7c673cae
FG
464 virtual int read(
465 CollectionHandle &c,
466 const ghobject_t& oid,
467 uint64_t offset,
468 size_t len,
9f95a23c 469 ceph::buffer::list& bl,
11fdf7f2 470 uint32_t op_flags = 0) = 0;
7c673cae
FG
471
472 /**
9f95a23c 473 * fiemap -- get extent std::map of data of an object
7c673cae 474 *
9f95a23c
TL
475 * Returns an encoded std::map of the extents of an object's data portion
476 * (std::map<offset,size>).
7c673cae
FG
477 *
478 * A non-enlightened implementation is free to return the extent (offset, len)
479 * as the sole extent.
480 *
481 * @param cid collection for object
482 * @param oid oid of object
483 * @param offset location offset of first byte to be read
484 * @param len number of bytes to be read
9f95a23c 485 * @param bl output ceph::buffer::list for extent std::map information.
7c673cae
FG
486 * @returns 0 on success, negative error code on failure.
487 */
7c673cae 488 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
9f95a23c 489 uint64_t offset, size_t len, ceph::buffer::list& bl) = 0;
7c673cae 490 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
9f95a23c
TL
491 uint64_t offset, size_t len, std::map<uint64_t, uint64_t>& destmap) = 0;
492
493 /**
494 * readv -- read specfic intervals from an object;
495 * caller must call fiemap to fill in the extent-map first.
496 *
497 * Note: if reading from an offset past the end of the object, we
498 * return 0 (not, say, -EINVAL). Also the default version of readv
499 * reads each extent separately synchronously, which can become horribly
500 * inefficient if the physical layout of the pushing object get massively
501 * fragmented and hence should be overridden by any real os that
502 * cares about the performance..
503 *
504 * @param cid collection for object
505 * @param oid oid of object
506 * @param m intervals to be read
507 * @param bl output ceph::buffer::list
508 * @param op_flags is CEPH_OSD_OP_FLAG_*
509 * @returns number of bytes read on success, or negative error code on failure.
510 */
511 virtual int readv(
512 CollectionHandle &c,
513 const ghobject_t& oid,
514 interval_set<uint64_t>& m,
515 ceph::buffer::list& bl,
516 uint32_t op_flags = 0) {
517 int total = 0;
518 for (auto p = m.begin(); p != m.end(); p++) {
f67539c2 519 ceph::buffer::list t;
9f95a23c
TL
520 int r = read(c, oid, p.get_start(), p.get_len(), t, op_flags);
521 if (r < 0)
522 return r;
523 total += r;
524 // prune fiemap, if necessary
525 if (p.get_len() != t.length()) {
526 auto save = p++;
527 if (t.length() == 0) {
528 m.erase(save); // Remove this empty interval
529 } else {
530 save.set_len(t.length()); // fix interval length
531 bl.claim_append(t);
532 }
533 // Remove any other follow-up intervals present too
534 while (p != m.end()) {
535 save = p++;
536 m.erase(save);
537 }
538 break;
539 }
540 bl.claim_append(t);
541 }
542 return total;
543 }
544
545 /**
546 * dump_onode -- dumps onode metadata in human readable form,
547 intended primiarily for debugging
548 *
549 * @param cid collection for object
550 * @param oid oid of object
551 * @param section_name section name to create and print under
552 * @param f Formatter class instance to print to
553 * @returns 0 on success, negative error code on failure.
554 */
555 virtual int dump_onode(
556 CollectionHandle &c,
557 const ghobject_t& oid,
f67539c2
TL
558 const std::string& section_name,
559 ceph::Formatter *f) {
9f95a23c
TL
560 return -ENOTSUP;
561 }
7c673cae
FG
562
563 /**
564 * getattr -- get an xattr of an object
565 *
566 * @param cid collection for object
567 * @param oid oid of object
568 * @param name name of attr to read
569 * @param value place to put output result.
570 * @returns 0 on success, negative error code on failure.
571 */
7c673cae 572 virtual int getattr(CollectionHandle &c, const ghobject_t& oid,
9f95a23c 573 const char *name, ceph::buffer::ptr& value) = 0;
7c673cae
FG
574
575 /**
576 * getattr -- get an xattr of an object
577 *
578 * @param cid collection for object
579 * @param oid oid of object
580 * @param name name of attr to read
581 * @param value place to put output result.
582 * @returns 0 on success, negative error code on failure.
583 */
7c673cae
FG
584 int getattr(
585 CollectionHandle &c, const ghobject_t& oid,
9f95a23c
TL
586 const std::string& name, ceph::buffer::list& value) {
587 ceph::buffer::ptr bp;
7c673cae
FG
588 int r = getattr(c, oid, name.c_str(), bp);
589 value.push_back(bp);
590 return r;
591 }
592
593 /**
594 * getattrs -- get all of the xattrs of an object
595 *
596 * @param cid collection for object
597 * @param oid oid of object
598 * @param aset place to put output result.
599 * @returns 0 on success, negative error code on failure.
600 */
7c673cae 601 virtual int getattrs(CollectionHandle &c, const ghobject_t& oid,
9f95a23c 602 std::map<std::string,ceph::buffer::ptr>& aset) = 0;
7c673cae
FG
603
604 /**
605 * getattrs -- get all of the xattrs of an object
606 *
607 * @param cid collection for object
608 * @param oid oid of object
609 * @param aset place to put output result.
610 * @returns 0 on success, negative error code on failure.
611 */
7c673cae 612 int getattrs(CollectionHandle &c, const ghobject_t& oid,
9f95a23c
TL
613 std::map<std::string,ceph::buffer::list>& aset) {
614 std::map<std::string,ceph::buffer::ptr> bmap;
7c673cae 615 int r = getattrs(c, oid, bmap);
9f95a23c 616 for (auto i = bmap.begin(); i != bmap.end(); ++i) {
7c673cae
FG
617 aset[i->first].append(i->second);
618 }
619 return r;
620 }
621
622
623 // collections
624
625 /**
626 * list_collections -- get all of the collections known to this ObjectStore
627 *
9f95a23c 628 * @param ls std::list of the collections in sorted order.
7c673cae
FG
629 * @returns 0 on success, negative error code on failure.
630 */
9f95a23c 631 virtual int list_collections(std::vector<coll_t>& ls) = 0;
7c673cae
FG
632
633 /**
634 * does a collection exist?
635 *
636 * @param c collection
637 * @returns true if it exists, false otherwise
638 */
639 virtual bool collection_exists(const coll_t& c) = 0;
640
641 /**
642 * is a collection empty?
643 *
644 * @param c collection
645 * @param empty true if the specified collection is empty, false otherwise
646 * @returns 0 on success, negative error code on failure.
647 */
11fdf7f2 648 virtual int collection_empty(CollectionHandle& c, bool *empty) = 0;
7c673cae
FG
649
650 /**
651 * return the number of significant bits of the coll_t::pgid.
652 *
653 * This should return what the last create_collection or split_collection
9f95a23c 654 * std::set. A legacy backend may return -EAGAIN if the value is unavailable
7c673cae
FG
655 * (because we upgraded from an older version, e.g., FileStore).
656 */
11fdf7f2 657 virtual int collection_bits(CollectionHandle& c) = 0;
7c673cae
FG
658
659
660 /**
9f95a23c 661 * std::list contents of a collection that fall in the range [start, end) and no more than a specified many result
7c673cae
FG
662 *
663 * @param c collection
664 * @param start list object that sort >= this value
665 * @param end list objects that sort < this value
666 * @param max return no more than this many results
667 * @param seq return no objects with snap < seq
668 * @param ls [out] result
669 * @param next [out] next item sorts >= this value
670 * @return zero on success, or negative error
671 */
7c673cae
FG
672 virtual int collection_list(CollectionHandle &c,
673 const ghobject_t& start, const ghobject_t& end,
674 int max,
9f95a23c 675 std::vector<ghobject_t> *ls, ghobject_t *next) = 0;
7c673cae 676
f91f0fd5
TL
677 virtual int collection_list_legacy(CollectionHandle &c,
678 const ghobject_t& start,
679 const ghobject_t& end, int max,
680 std::vector<ghobject_t> *ls,
681 ghobject_t *next) {
682 return collection_list(c, start, end, max, ls, next);
683 }
7c673cae
FG
684
685 /// OMAP
686 /// Get omap contents
7c673cae
FG
687 virtual int omap_get(
688 CollectionHandle &c, ///< [in] Collection containing oid
689 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
690 ceph::buffer::list *header, ///< [out] omap header
691 std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value std::map
11fdf7f2 692 ) = 0;
7c673cae
FG
693
694 /// Get omap header
7c673cae
FG
695 virtual int omap_get_header(
696 CollectionHandle &c, ///< [in] Collection containing oid
697 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c 698 ceph::buffer::list *header, ///< [out] omap header
7c673cae 699 bool allow_eio = false ///< [in] don't assert on eio
11fdf7f2 700 ) = 0;
7c673cae
FG
701
702 /// Get keys defined on oid
7c673cae
FG
703 virtual int omap_get_keys(
704 CollectionHandle &c, ///< [in] Collection containing oid
705 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c 706 std::set<std::string> *keys ///< [out] Keys defined on oid
11fdf7f2 707 ) = 0;
7c673cae
FG
708
709 /// Get key values
7c673cae
FG
710 virtual int omap_get_values(
711 CollectionHandle &c, ///< [in] Collection containing oid
712 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
713 const std::set<std::string> &keys, ///< [in] Keys to get
714 std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
11fdf7f2 715 ) = 0;
7c673cae 716
9f95a23c
TL
717#ifdef WITH_SEASTAR
718 virtual int omap_get_values(
719 CollectionHandle &c, ///< [in] Collection containing oid
720 const ghobject_t &oid, ///< [in] Object containing omap
721 const std::optional<std::string> &start_after, ///< [in] Keys to get
722 std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
723 ) = 0;
724#endif
725
7c673cae 726 /// Filters keys into out which are defined on oid
7c673cae
FG
727 virtual int omap_check_keys(
728 CollectionHandle &c, ///< [in] Collection containing oid
729 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
730 const std::set<std::string> &keys, ///< [in] Keys to check
731 std::set<std::string> *out ///< [out] Subset of keys defined on oid
11fdf7f2 732 ) = 0;
7c673cae
FG
733
734 /**
735 * Returns an object map iterator
736 *
737 * Warning! The returned iterator is an implicit lock on filestore
738 * operations in c. Do not use filestore methods on c while the returned
739 * iterator is live. (Filling in a transaction is no problem).
740 *
741 * @return iterator, null on error
742 */
7c673cae
FG
743 virtual ObjectMap::ObjectMapIterator get_omap_iterator(
744 CollectionHandle &c, ///< [in] collection
745 const ghobject_t &oid ///< [in] object
11fdf7f2 746 ) = 0;
7c673cae
FG
747
748 virtual int flush_journal() { return -EOPNOTSUPP; }
749
9f95a23c 750 virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; }
7c673cae 751
9f95a23c 752 virtual int snapshot(const std::string& name) { return -EOPNOTSUPP; }
7c673cae
FG
753
754 /**
755 * Set and get internal fsid for this instance. No external data is modified
756 */
757 virtual void set_fsid(uuid_d u) = 0;
758 virtual uuid_d get_fsid() = 0;
759
760 /**
761 * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store
762 * - num objects - total (including witeouts) object count to measure used space for.
763 */
764 virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0;
765
766
767 // DEBUG
768 virtual void inject_data_error(const ghobject_t &oid) {}
769 virtual void inject_mdata_error(const ghobject_t &oid) {}
224ce89b
WB
770
771 virtual void compact() {}
28e407b8
AA
772 virtual bool has_builtin_csum() const {
773 return false;
774 }
7c673cae 775};
7c673cae
FG
776
777#endif