]> git.proxmox.com Git - ceph.git/blame - ceph/src/os/ObjectStore.h
import ceph quincy 17.2.6
[ceph.git] / ceph / src / os / ObjectStore.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14#ifndef CEPH_OBJECTSTORE_H
15#define CEPH_OBJECTSTORE_H
16
f67539c2 17#include "include/buffer.h"
9f95a23c 18#include "include/common_fwd.h"
7c673cae 19#include "include/Context.h"
f67539c2 20#include "include/interval_set.h"
11fdf7f2 21#include "include/stringify.h"
f67539c2
TL
22#include "include/types.h"
23
7c673cae
FG
24#include "osd/osd_types.h"
25#include "common/TrackedOp.h"
26#include "common/WorkQueue.h"
27#include "ObjectMap.h"
9f95a23c 28#include "os/Transaction.h"
7c673cae
FG
29
30#include <errno.h>
31#include <sys/stat.h>
7c673cae 32#include <map>
20effc67
TL
33#include <memory>
34#include <vector>
7c673cae 35
f67539c2 36#if defined(__APPLE__) || defined(__FreeBSD__) || defined(__sun) || defined(_WIN32)
7c673cae
FG
37#include <sys/statvfs.h>
38#else
39#include <sys/vfs.h> /* or <sys/statfs.h> */
11fdf7f2 40#endif
7c673cae 41
7c673cae
FG
42namespace ceph {
43 class Formatter;
44}
45
46/*
47 * low-level interface to the local OSD file system
48 */
49
50class Logger;
11fdf7f2 51class ContextQueue;
7c673cae 52
9f95a23c
TL
53static inline void encode(const std::map<std::string,ceph::buffer::ptr> *attrset, ceph::buffer::list &bl) {
54 using ceph::encode;
11fdf7f2 55 encode(*attrset, bl);
7c673cae
FG
56}
57
7c673cae
FG
58// Flag bits
59typedef uint32_t osflagbits_t;
60const int SKIP_JOURNAL_REPLAY = 1 << 0;
61const int SKIP_MOUNT_OMAP = 1 << 1;
62
63class ObjectStore {
64protected:
9f95a23c 65 std::string path;
7c673cae
FG
66
67public:
9f95a23c
TL
68 using Transaction = ceph::os::Transaction;
69
7c673cae
FG
70 CephContext* cct;
71 /**
72 * create - create an ObjectStore instance.
73 *
74 * This is invoked once at initialization time.
75 *
9f95a23c 76 * @param type type of store. This is a std::string from the configuration file.
7c673cae
FG
77 * @param data path (or other descriptor) for data
78 * @param journal path (or other descriptor) for journal (optional)
79 * @param flags which filestores should check if applicable
80 */
20effc67
TL
81#ifndef WITH_SEASTAR
82 static std::unique_ptr<ObjectStore> create(
83 CephContext *cct,
84 const std::string& type,
85 const std::string& data,
86 const std::string& journal,
87 osflagbits_t flags = 0);
88#endif
89 static std::unique_ptr<ObjectStore> create(
90 CephContext *cct,
91 const std::string& type,
92 const std::string& data);
7c673cae
FG
93
94 /**
95 * probe a block device to learn the uuid of the owning OSD
96 *
97 * @param cct cct
98 * @param path path to device
99 * @param fsid [out] osd uuid
100 */
101 static int probe_block_device_fsid(
102 CephContext *cct,
9f95a23c 103 const std::string& path,
7c673cae
FG
104 uuid_d *fsid);
105
106 /**
107 * Fetch Object Store statistics.
108 *
109 * Currently only latency of write and apply times are measured.
110 *
111 * This appears to be called with nothing locked.
112 */
113 virtual objectstore_perf_stat_t get_cur_stats() = 0;
114
115 /**
116 * Fetch Object Store performance counters.
117 *
118 *
119 * This appears to be called with nothing locked.
120 */
121 virtual const PerfCounters* get_perf_counters() const = 0;
122
123 /**
11fdf7f2 124 * a collection also orders transactions
7c673cae 125 *
11fdf7f2
TL
126 * Any transactions queued under a given collection will be applied in
127 * sequence. Transactions queued under different collections may run
7c673cae
FG
128 * in parallel.
129 *
9f95a23c 130 * ObjectStore users may get collection handles with open_collection() (or,
11fdf7f2 131 * for bootstrapping a new collection, create_new_collection()).
7c673cae 132 */
11fdf7f2
TL
133 struct CollectionImpl : public RefCountedObject {
134 const coll_t cid;
7c673cae 135
11fdf7f2 136 /// wait for any queued transactions to apply
7c673cae
FG
137 // block until any previous transactions are visible. specifically,
138 // collection_list and collection_empty need to reflect prior operations.
139 virtual void flush() = 0;
140
7c673cae
FG
141 /**
142 * Async flush_commit
143 *
144 * There are two cases:
11fdf7f2 145 * 1) collection is currently idle: the method returns true. c is
7c673cae 146 * not touched.
11fdf7f2
TL
147 * 2) collection is not idle: the method returns false and c is
148 * called asynchronously with a value of 0 once all transactions
149 * queued on this collection prior to the call have been applied
7c673cae
FG
150 * and committed.
151 */
11fdf7f2 152 virtual bool flush_commit(Context *c) = 0;
7c673cae 153
11fdf7f2
TL
154 const coll_t &get_cid() {
155 return cid;
7c673cae 156 }
9f95a23c
TL
157 protected:
158 CollectionImpl() = delete;
159 CollectionImpl(CephContext* cct, const coll_t& c) : RefCountedObject(cct), cid(c) {}
160 ~CollectionImpl() = default;
7c673cae 161 };
9f95a23c 162 using CollectionHandle = ceph::ref_t<CollectionImpl>;
7c673cae 163
7c673cae
FG
164
165 /*********************************
166 *
167 * Object Contents and semantics
168 *
169 * All ObjectStore objects are identified as a named object
170 * (ghobject_t and hobject_t) in a named collection (coll_t).
171 * ObjectStore operations support the creation, mutation, deletion
172 * and enumeration of objects within a collection. Enumeration is
173 * in sorted key order (where keys are sorted by hash). Object names
174 * are globally unique.
175 *
176 * Each object has four distinct parts: byte data, xattrs, omap_header
177 * and omap entries.
178 *
179 * The data portion of an object is conceptually equivalent to a
180 * file in a file system. Random and Partial access for both read
181 * and write operations is required. The ability to have a sparse
182 * implementation of the data portion of an object is beneficial for
183 * some workloads, but not required. There is a system-wide limit on
184 * the maximum size of an object, which is typically around 100 MB.
185 *
186 * Xattrs are equivalent to the extended attributes of file
9f95a23c
TL
187 * systems. Xattrs are a std::set of key/value pairs. Sub-value access
188 * is not required. It is possible to enumerate the std::set of xattrs in
7c673cae
FG
189 * key order. At the implementation level, xattrs are used
190 * exclusively internal to Ceph and the implementer can expect the
191 * total size of all of the xattrs on an object to be relatively
192 * small, i.e., less than 64KB. Much of Ceph assumes that accessing
193 * xattrs on temporally adjacent object accesses (recent past or
194 * near future) is inexpensive.
195 *
196 * omap_header is a single blob of data. It can be read or written
197 * in total.
198 *
199 * Omap entries are conceptually the same as xattrs
200 * but in a different address space. In other words, you can have
201 * the same key as an xattr and an omap entry and they have distinct
202 * values. Enumeration of xattrs doesn't include omap entries and
203 * vice versa. The size and access characteristics of omap entries
204 * are very different from xattrs. In particular, the value portion
205 * of an omap entry can be quite large (MBs). More importantly, the
206 * interface must support efficient range queries on omap entries even
207 * when there are a large numbers of entries.
208 *
209 *********************************/
210
211 /*******************************
212 *
213 * Collections
214 *
215 * A collection is simply a grouping of objects. Collections have
216 * names (coll_t) and can be enumerated in order. Like an
9f95a23c 217 * individual object, a collection also has a std::set of xattrs.
7c673cae 218 *
7c673cae
FG
219 *
220 */
7c673cae 221
7c673cae 222
11fdf7f2
TL
223 int queue_transaction(CollectionHandle& ch,
224 Transaction&& t,
225 TrackedOpRef op = TrackedOpRef(),
226 ThreadPool::TPHandle *handle = NULL) {
9f95a23c 227 std::vector<Transaction> tls;
7c673cae 228 tls.push_back(std::move(t));
11fdf7f2 229 return queue_transactions(ch, tls, op, handle);
7c673cae
FG
230 }
231
232 virtual int queue_transactions(
9f95a23c 233 CollectionHandle& ch, std::vector<Transaction>& tls,
7c673cae
FG
234 TrackedOpRef op = TrackedOpRef(),
235 ThreadPool::TPHandle *handle = NULL) = 0;
236
237
7c673cae
FG
238 public:
239 ObjectStore(CephContext* cct,
240 const std::string& path_) : path(path_), cct(cct) {}
241 virtual ~ObjectStore() {}
242
243 // no copying
244 explicit ObjectStore(const ObjectStore& o) = delete;
245 const ObjectStore& operator=(const ObjectStore& o) = delete;
246
247 // versioning
248 virtual int upgrade() {
249 return 0;
250 }
251
9f95a23c
TL
252 virtual void get_db_statistics(ceph::Formatter *f) { }
253 virtual void generate_db_histogram(ceph::Formatter *f) { }
254 virtual int flush_cache(std::ostream *os = NULL) { return -1; }
255 virtual void dump_perf_counters(ceph::Formatter *f) {}
256 virtual void dump_cache_stats(ceph::Formatter *f) {}
257 virtual void dump_cache_stats(std::ostream& os) {}
7c673cae 258
9f95a23c 259 virtual std::string get_type() = 0;
7c673cae
FG
260
261 // mgmt
262 virtual bool test_mount_in_use() = 0;
263 virtual int mount() = 0;
264 virtual int umount() = 0;
265 virtual int fsck(bool deep) {
266 return -EOPNOTSUPP;
267 }
3efd9988
FG
268 virtual int repair(bool deep) {
269 return -EOPNOTSUPP;
270 }
eafe8130
TL
271 virtual int quick_fix() {
272 return -EOPNOTSUPP;
273 }
7c673cae
FG
274
275 virtual void set_cache_shards(unsigned num) { }
276
277 /**
278 * Returns 0 if the hobject is valid, -error otherwise
279 *
280 * Errors:
281 * -ENAMETOOLONG: locator/namespace/name too large
282 */
283 virtual int validate_hobject_key(const hobject_t &obj) const = 0;
284
285 virtual unsigned get_max_attr_name_length() = 0;
286 virtual int mkfs() = 0; // wipe
287 virtual int mkjournal() = 0; // journal only
288 virtual bool needs_journal() = 0; //< requires a journal
289 virtual bool wants_journal() = 0; //< prefers a journal
290 virtual bool allows_journal() = 0; //< allows a journal
1d09f67e 291 virtual void prepare_for_fast_shutdown() {}
39ae355f 292 virtual bool has_null_manager() const { return false; }
9f95a23c
TL
293 // return store min allocation size, if applicable
294 virtual uint64_t get_min_alloc_size() const {
295 return 0;
296 }
297
11fdf7f2 298 /// enumerate hardware devices (by 'devname', e.g., 'sda' as in /sys/block/sda)
9f95a23c 299 virtual int get_devices(std::set<std::string> *devls) {
11fdf7f2
TL
300 return -EOPNOTSUPP;
301 }
302
303 /// true if a txn is readable immediately after it is queued.
304 virtual bool is_sync_onreadable() const {
305 return true;
306 }
307
31f18b77
FG
308 /**
309 * is_rotational
310 *
311 * Check whether store is backed by a rotational (HDD) or non-rotational
312 * (SSD) device.
313 *
314 * This must be usable *before* the store is mounted.
315 *
316 * @return true for HDD, false for SSD
317 */
318 virtual bool is_rotational() {
319 return true;
320 }
321
d2e6a577
FG
322 /**
323 * is_journal_rotational
324 *
325 * Check whether journal is backed by a rotational (HDD) or non-rotational
326 * (SSD) device.
327 *
328 *
329 * @return true for HDD, false for SSD
330 */
331 virtual bool is_journal_rotational() {
332 return true;
333 }
334
9f95a23c 335 virtual std::string get_default_device_class() {
224ce89b
WB
336 return is_rotational() ? "hdd" : "ssd";
337 }
338
11fdf7f2
TL
339 virtual int get_numa_node(
340 int *numa_node,
9f95a23c
TL
341 std::set<int> *nodes,
342 std::set<std::string> *failed) {
11fdf7f2
TL
343 return -EOPNOTSUPP;
344 }
345
346
7c673cae
FG
347 virtual bool can_sort_nibblewise() {
348 return false; // assume a backend cannot, unless it says otherwise
349 }
350
11fdf7f2
TL
351 virtual int statfs(struct store_statfs_t *buf,
352 osd_alert_list_t* alerts = nullptr) = 0;
9f95a23c
TL
353 virtual int pool_statfs(uint64_t pool_id, struct store_statfs_t *buf,
354 bool *per_pool_omap) = 0;
7c673cae 355
f67539c2 356 virtual void collect_metadata(std::map<std::string,std::string> *pm) { }
7c673cae
FG
357
358 /**
359 * write_meta - write a simple configuration key out-of-band
360 *
361 * Write a simple key/value pair for basic store configuration
362 * (e.g., a uuid or magic number) to an unopened/unmounted store.
363 * The default implementation writes this to a plaintext file in the
364 * path.
365 *
366 * A newline is appended.
367 *
368 * @param key key name (e.g., "fsid")
9f95a23c 369 * @param value value (e.g., a uuid rendered as a std::string)
7c673cae
FG
370 * @returns 0 for success, or an error code
371 */
372 virtual int write_meta(const std::string& key,
373 const std::string& value);
374
375 /**
376 * read_meta - read a simple configuration key out-of-band
377 *
378 * Read a simple key value to an unopened/mounted store.
379 *
380 * Trailing whitespace is stripped off.
381 *
382 * @param key key name
9f95a23c 383 * @param value pointer to value std::string
7c673cae
FG
384 * @returns 0 for success, or an error code
385 */
386 virtual int read_meta(const std::string& key,
387 std::string *value);
388
389 /**
390 * get ideal max value for collection_list()
391 *
392 * default to some arbitrary values; the implementation will override.
393 */
394 virtual int get_ideal_list_max() { return 64; }
395
396
397 /**
398 * get a collection handle
399 *
400 * Provide a trivial handle as a default to avoid converting legacy
401 * implementations.
402 */
11fdf7f2
TL
403 virtual CollectionHandle open_collection(const coll_t &cid) = 0;
404
405 /**
406 * get a collection handle for a soon-to-be-created collection
407 *
408 * This handle must be used by queue_transaction that includes a
409 * create_collection call in order to become valid. It will become the
410 * reference to the created collection.
411 */
412 virtual CollectionHandle create_new_collection(const coll_t &cid) = 0;
7c673cae 413
11fdf7f2 414 /**
9f95a23c 415 * std::set ContextQueue for a collection
11fdf7f2
TL
416 *
417 * After that, oncommits of Transaction will queue into commit_queue.
418 * And osd ShardThread will call oncommits.
419 */
420 virtual void set_collection_commit_queue(const coll_t &cid, ContextQueue *commit_queue) = 0;
7c673cae
FG
421
422 /**
423 * Synchronous read operations
424 */
425
426 /**
20effc67 427 * exists -- Test for existence of object
7c673cae
FG
428 *
429 * @param cid collection for object
430 * @param oid oid of object
431 * @returns true if object exists, false otherwise
432 */
11fdf7f2 433 virtual bool exists(CollectionHandle& c, const ghobject_t& oid) = 0;
7c673cae 434 /**
9f95a23c 435 * set_collection_opts -- std::set pool options for a collectioninformation for an object
7c673cae
FG
436 *
437 * @param cid collection
438 * @param opts new collection options
439 * @returns 0 on success, negative error code on failure.
440 */
441 virtual int set_collection_opts(
11fdf7f2 442 CollectionHandle& c,
7c673cae
FG
443 const pool_opts_t& opts) = 0;
444
445 /**
446 * stat -- get information for an object
447 *
448 * @param cid collection for object
449 * @param oid oid of object
450 * @param st output information for the object
451 * @param allow_eio if false, assert on -EIO operation failure
452 * @returns 0 on success, negative error code on failure.
453 */
7c673cae
FG
454 virtual int stat(
455 CollectionHandle &c,
456 const ghobject_t& oid,
457 struct stat *st,
11fdf7f2 458 bool allow_eio = false) = 0;
7c673cae
FG
459 /**
460 * read -- read a byte range of data from an object
461 *
462 * Note: if reading from an offset past the end of the object, we
463 * return 0 (not, say, -EINVAL).
464 *
465 * @param cid collection for object
466 * @param oid oid of object
467 * @param offset location offset of first byte to be read
468 * @param len number of bytes to be read
9f95a23c 469 * @param bl output ceph::buffer::list
7c673cae 470 * @param op_flags is CEPH_OSD_OP_FLAG_*
7c673cae
FG
471 * @returns number of bytes read on success, or negative error code on failure.
472 */
7c673cae
FG
473 virtual int read(
474 CollectionHandle &c,
475 const ghobject_t& oid,
476 uint64_t offset,
477 size_t len,
9f95a23c 478 ceph::buffer::list& bl,
11fdf7f2 479 uint32_t op_flags = 0) = 0;
7c673cae
FG
480
481 /**
9f95a23c 482 * fiemap -- get extent std::map of data of an object
7c673cae 483 *
9f95a23c
TL
484 * Returns an encoded std::map of the extents of an object's data portion
485 * (std::map<offset,size>).
7c673cae
FG
486 *
487 * A non-enlightened implementation is free to return the extent (offset, len)
488 * as the sole extent.
489 *
490 * @param cid collection for object
491 * @param oid oid of object
492 * @param offset location offset of first byte to be read
493 * @param len number of bytes to be read
9f95a23c 494 * @param bl output ceph::buffer::list for extent std::map information.
7c673cae
FG
495 * @returns 0 on success, negative error code on failure.
496 */
7c673cae 497 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
9f95a23c 498 uint64_t offset, size_t len, ceph::buffer::list& bl) = 0;
7c673cae 499 virtual int fiemap(CollectionHandle& c, const ghobject_t& oid,
9f95a23c
TL
500 uint64_t offset, size_t len, std::map<uint64_t, uint64_t>& destmap) = 0;
501
502 /**
503 * readv -- read specfic intervals from an object;
504 * caller must call fiemap to fill in the extent-map first.
505 *
506 * Note: if reading from an offset past the end of the object, we
507 * return 0 (not, say, -EINVAL). Also the default version of readv
508 * reads each extent separately synchronously, which can become horribly
509 * inefficient if the physical layout of the pushing object get massively
510 * fragmented and hence should be overridden by any real os that
511 * cares about the performance..
512 *
513 * @param cid collection for object
514 * @param oid oid of object
515 * @param m intervals to be read
516 * @param bl output ceph::buffer::list
517 * @param op_flags is CEPH_OSD_OP_FLAG_*
518 * @returns number of bytes read on success, or negative error code on failure.
519 */
520 virtual int readv(
521 CollectionHandle &c,
522 const ghobject_t& oid,
523 interval_set<uint64_t>& m,
524 ceph::buffer::list& bl,
525 uint32_t op_flags = 0) {
526 int total = 0;
527 for (auto p = m.begin(); p != m.end(); p++) {
f67539c2 528 ceph::buffer::list t;
9f95a23c
TL
529 int r = read(c, oid, p.get_start(), p.get_len(), t, op_flags);
530 if (r < 0)
531 return r;
532 total += r;
533 // prune fiemap, if necessary
534 if (p.get_len() != t.length()) {
535 auto save = p++;
536 if (t.length() == 0) {
537 m.erase(save); // Remove this empty interval
538 } else {
539 save.set_len(t.length()); // fix interval length
540 bl.claim_append(t);
541 }
542 // Remove any other follow-up intervals present too
543 while (p != m.end()) {
544 save = p++;
545 m.erase(save);
546 }
547 break;
548 }
549 bl.claim_append(t);
550 }
551 return total;
552 }
553
554 /**
555 * dump_onode -- dumps onode metadata in human readable form,
556 intended primiarily for debugging
557 *
558 * @param cid collection for object
559 * @param oid oid of object
560 * @param section_name section name to create and print under
561 * @param f Formatter class instance to print to
562 * @returns 0 on success, negative error code on failure.
563 */
564 virtual int dump_onode(
565 CollectionHandle &c,
566 const ghobject_t& oid,
f67539c2
TL
567 const std::string& section_name,
568 ceph::Formatter *f) {
9f95a23c
TL
569 return -ENOTSUP;
570 }
7c673cae
FG
571
572 /**
573 * getattr -- get an xattr of an object
574 *
575 * @param cid collection for object
576 * @param oid oid of object
577 * @param name name of attr to read
578 * @param value place to put output result.
579 * @returns 0 on success, negative error code on failure.
580 */
7c673cae 581 virtual int getattr(CollectionHandle &c, const ghobject_t& oid,
9f95a23c 582 const char *name, ceph::buffer::ptr& value) = 0;
7c673cae
FG
583
584 /**
585 * getattr -- get an xattr of an object
586 *
587 * @param cid collection for object
588 * @param oid oid of object
589 * @param name name of attr to read
590 * @param value place to put output result.
591 * @returns 0 on success, negative error code on failure.
592 */
7c673cae
FG
593 int getattr(
594 CollectionHandle &c, const ghobject_t& oid,
9f95a23c
TL
595 const std::string& name, ceph::buffer::list& value) {
596 ceph::buffer::ptr bp;
7c673cae
FG
597 int r = getattr(c, oid, name.c_str(), bp);
598 value.push_back(bp);
599 return r;
600 }
601
602 /**
603 * getattrs -- get all of the xattrs of an object
604 *
605 * @param cid collection for object
606 * @param oid oid of object
607 * @param aset place to put output result.
608 * @returns 0 on success, negative error code on failure.
609 */
7c673cae 610 virtual int getattrs(CollectionHandle &c, const ghobject_t& oid,
20effc67 611 std::map<std::string,ceph::buffer::ptr, std::less<>>& aset) = 0;
7c673cae
FG
612
613 /**
614 * getattrs -- get all of the xattrs of an object
615 *
616 * @param cid collection for object
617 * @param oid oid of object
618 * @param aset place to put output result.
619 * @returns 0 on success, negative error code on failure.
620 */
7c673cae 621 int getattrs(CollectionHandle &c, const ghobject_t& oid,
20effc67
TL
622 std::map<std::string,ceph::buffer::list,std::less<>>& aset) {
623 std::map<std::string,ceph::buffer::ptr,std::less<>> bmap;
7c673cae 624 int r = getattrs(c, oid, bmap);
9f95a23c 625 for (auto i = bmap.begin(); i != bmap.end(); ++i) {
7c673cae
FG
626 aset[i->first].append(i->second);
627 }
628 return r;
629 }
630
631
632 // collections
633
634 /**
635 * list_collections -- get all of the collections known to this ObjectStore
636 *
9f95a23c 637 * @param ls std::list of the collections in sorted order.
7c673cae
FG
638 * @returns 0 on success, negative error code on failure.
639 */
9f95a23c 640 virtual int list_collections(std::vector<coll_t>& ls) = 0;
7c673cae
FG
641
642 /**
643 * does a collection exist?
644 *
645 * @param c collection
646 * @returns true if it exists, false otherwise
647 */
648 virtual bool collection_exists(const coll_t& c) = 0;
649
650 /**
651 * is a collection empty?
652 *
653 * @param c collection
654 * @param empty true if the specified collection is empty, false otherwise
655 * @returns 0 on success, negative error code on failure.
656 */
11fdf7f2 657 virtual int collection_empty(CollectionHandle& c, bool *empty) = 0;
7c673cae
FG
658
659 /**
660 * return the number of significant bits of the coll_t::pgid.
661 *
662 * This should return what the last create_collection or split_collection
9f95a23c 663 * std::set. A legacy backend may return -EAGAIN if the value is unavailable
7c673cae
FG
664 * (because we upgraded from an older version, e.g., FileStore).
665 */
11fdf7f2 666 virtual int collection_bits(CollectionHandle& c) = 0;
7c673cae
FG
667
668
669 /**
9f95a23c 670 * std::list contents of a collection that fall in the range [start, end) and no more than a specified many result
7c673cae
FG
671 *
672 * @param c collection
673 * @param start list object that sort >= this value
674 * @param end list objects that sort < this value
675 * @param max return no more than this many results
676 * @param seq return no objects with snap < seq
677 * @param ls [out] result
678 * @param next [out] next item sorts >= this value
679 * @return zero on success, or negative error
680 */
7c673cae
FG
681 virtual int collection_list(CollectionHandle &c,
682 const ghobject_t& start, const ghobject_t& end,
683 int max,
9f95a23c 684 std::vector<ghobject_t> *ls, ghobject_t *next) = 0;
7c673cae 685
f91f0fd5
TL
686 virtual int collection_list_legacy(CollectionHandle &c,
687 const ghobject_t& start,
688 const ghobject_t& end, int max,
689 std::vector<ghobject_t> *ls,
690 ghobject_t *next) {
691 return collection_list(c, start, end, max, ls, next);
692 }
7c673cae
FG
693
694 /// OMAP
695 /// Get omap contents
7c673cae
FG
696 virtual int omap_get(
697 CollectionHandle &c, ///< [in] Collection containing oid
698 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
699 ceph::buffer::list *header, ///< [out] omap header
700 std::map<std::string, ceph::buffer::list> *out /// < [out] Key to value std::map
11fdf7f2 701 ) = 0;
7c673cae
FG
702
703 /// Get omap header
7c673cae
FG
704 virtual int omap_get_header(
705 CollectionHandle &c, ///< [in] Collection containing oid
706 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c 707 ceph::buffer::list *header, ///< [out] omap header
7c673cae 708 bool allow_eio = false ///< [in] don't assert on eio
11fdf7f2 709 ) = 0;
7c673cae
FG
710
711 /// Get keys defined on oid
7c673cae
FG
712 virtual int omap_get_keys(
713 CollectionHandle &c, ///< [in] Collection containing oid
714 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c 715 std::set<std::string> *keys ///< [out] Keys defined on oid
11fdf7f2 716 ) = 0;
7c673cae
FG
717
718 /// Get key values
7c673cae
FG
719 virtual int omap_get_values(
720 CollectionHandle &c, ///< [in] Collection containing oid
721 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
722 const std::set<std::string> &keys, ///< [in] Keys to get
723 std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
11fdf7f2 724 ) = 0;
7c673cae 725
9f95a23c
TL
726#ifdef WITH_SEASTAR
727 virtual int omap_get_values(
728 CollectionHandle &c, ///< [in] Collection containing oid
729 const ghobject_t &oid, ///< [in] Object containing omap
730 const std::optional<std::string> &start_after, ///< [in] Keys to get
731 std::map<std::string, ceph::buffer::list> *out ///< [out] Returned keys and values
732 ) = 0;
733#endif
734
7c673cae 735 /// Filters keys into out which are defined on oid
7c673cae
FG
736 virtual int omap_check_keys(
737 CollectionHandle &c, ///< [in] Collection containing oid
738 const ghobject_t &oid, ///< [in] Object containing omap
9f95a23c
TL
739 const std::set<std::string> &keys, ///< [in] Keys to check
740 std::set<std::string> *out ///< [out] Subset of keys defined on oid
11fdf7f2 741 ) = 0;
7c673cae
FG
742
743 /**
744 * Returns an object map iterator
745 *
746 * Warning! The returned iterator is an implicit lock on filestore
747 * operations in c. Do not use filestore methods on c while the returned
748 * iterator is live. (Filling in a transaction is no problem).
749 *
750 * @return iterator, null on error
751 */
7c673cae
FG
752 virtual ObjectMap::ObjectMapIterator get_omap_iterator(
753 CollectionHandle &c, ///< [in] collection
754 const ghobject_t &oid ///< [in] object
11fdf7f2 755 ) = 0;
7c673cae
FG
756
757 virtual int flush_journal() { return -EOPNOTSUPP; }
758
9f95a23c 759 virtual int dump_journal(std::ostream& out) { return -EOPNOTSUPP; }
7c673cae 760
9f95a23c 761 virtual int snapshot(const std::string& name) { return -EOPNOTSUPP; }
7c673cae
FG
762
763 /**
764 * Set and get internal fsid for this instance. No external data is modified
765 */
766 virtual void set_fsid(uuid_d u) = 0;
767 virtual uuid_d get_fsid() = 0;
768
769 /**
770 * Estimates additional disk space used by the specified amount of objects and caused by file allocation granularity and metadata store
771 * - num objects - total (including witeouts) object count to measure used space for.
772 */
773 virtual uint64_t estimate_objects_overhead(uint64_t num_objects) = 0;
774
775
776 // DEBUG
777 virtual void inject_data_error(const ghobject_t &oid) {}
778 virtual void inject_mdata_error(const ghobject_t &oid) {}
224ce89b
WB
779
780 virtual void compact() {}
28e407b8
AA
781 virtual bool has_builtin_csum() const {
782 return false;
783 }
7c673cae 784};
7c673cae
FG
785
786#endif