]> git.proxmox.com Git - ceph.git/blob - ceph/src/include/rados/librados.h
update sources to v12.1.2
[ceph.git] / ceph / src / include / rados / librados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_LIBRADOS_H
16 #define CEPH_LIBRADOS_H
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 #include <netinet/in.h>
23 #if defined(__linux__)
24 #include <linux/types.h>
25 #elif defined(__FreeBSD__)
26 #include <sys/types.h>
27 #endif
28 #include <unistd.h>
29 #include <string.h>
30 #include "rados_types.h"
31
32 #include <sys/time.h>
33
34 #ifndef CEPH_OSD_TMAP_SET
35 /* These are also defined in rados.h and objclass.h. Keep them in sync! */
36 #define CEPH_OSD_TMAP_HDR 'h'
37 #define CEPH_OSD_TMAP_SET 's'
38 #define CEPH_OSD_TMAP_CREATE 'c'
39 #define CEPH_OSD_TMAP_RM 'r'
40 #endif
41
42 #define LIBRADOS_VER_MAJOR 0
43 #define LIBRADOS_VER_MINOR 69
44 #define LIBRADOS_VER_EXTRA 1
45
46 #define LIBRADOS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
47
48 #define LIBRADOS_VERSION_CODE LIBRADOS_VERSION(LIBRADOS_VER_MAJOR, LIBRADOS_VER_MINOR, LIBRADOS_VER_EXTRA)
49
50 #define LIBRADOS_SUPPORTS_WATCH 1
51 #define LIBRADOS_SUPPORTS_SERVICES 1
52
53 /* RADOS lock flags
54 * They are also defined in cls_lock_types.h. Keep them in sync!
55 */
56 #define LIBRADOS_LOCK_FLAG_RENEW 0x1
57
58 /*
59 * Constants for rados_write_op_create().
60 */
61 #define LIBRADOS_CREATE_EXCLUSIVE 1
62 #define LIBRADOS_CREATE_IDEMPOTENT 0
63
64 /*
65 * Flags that can be set on a per-op basis via
66 * rados_read_op_set_flags() and rados_write_op_set_flags().
67 */
68 enum {
69 // fail a create operation if the object already exists
70 LIBRADOS_OP_FLAG_EXCL = 0x1,
71 // allow the transaction to succeed even if the flagged op fails
72 LIBRADOS_OP_FLAG_FAILOK = 0x2,
73 // indicate read/write op random
74 LIBRADOS_OP_FLAG_FADVISE_RANDOM = 0x4,
75 // indicate read/write op sequential
76 LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL = 0x8,
77 // indicate read/write data will be accessed in the near future (by someone)
78 LIBRADOS_OP_FLAG_FADVISE_WILLNEED = 0x10,
79 // indicate read/write data will not accessed in the near future (by anyone)
80 LIBRADOS_OP_FLAG_FADVISE_DONTNEED = 0x20,
81 // indicate read/write data will not accessed again (by *this* client)
82 LIBRADOS_OP_FLAG_FADVISE_NOCACHE = 0x40,
83 };
84
85 #if __GNUC__ >= 4
86 #define CEPH_RADOS_API __attribute__ ((visibility ("default")))
87 #else
88 #define CEPH_RADOS_API
89 #endif
90
91 /**
92 * @name xattr comparison operations
93 * Operators for comparing xattrs on objects, and aborting the
94 * rados_read_op or rados_write_op transaction if the comparison
95 * fails.
96 *
97 * @{
98 */
99 enum {
100 LIBRADOS_CMPXATTR_OP_EQ = 1,
101 LIBRADOS_CMPXATTR_OP_NE = 2,
102 LIBRADOS_CMPXATTR_OP_GT = 3,
103 LIBRADOS_CMPXATTR_OP_GTE = 4,
104 LIBRADOS_CMPXATTR_OP_LT = 5,
105 LIBRADOS_CMPXATTR_OP_LTE = 6
106 };
107 /** @} */
108
109 /**
110 * @name Operation Flags
111 * Flags for rados_read_op_opeprate(), rados_write_op_operate(),
112 * rados_aio_read_op_operate(), and rados_aio_write_op_operate().
113 * See librados.hpp for details.
114 * @{
115 */
116 enum {
117 LIBRADOS_OPERATION_NOFLAG = 0,
118 LIBRADOS_OPERATION_BALANCE_READS = 1,
119 LIBRADOS_OPERATION_LOCALIZE_READS = 2,
120 LIBRADOS_OPERATION_ORDER_READS_WRITES = 4,
121 LIBRADOS_OPERATION_IGNORE_CACHE = 8,
122 LIBRADOS_OPERATION_SKIPRWLOCKS = 16,
123 LIBRADOS_OPERATION_IGNORE_OVERLAY = 32,
124 /* send requests to cluster despite the cluster or pool being marked
125 full; ops will either succeed (e.g., delete) or return EDQUOT or
126 ENOSPC. */
127 LIBRADOS_OPERATION_FULL_TRY = 64,
128 /*
129 * Mainly for delete op
130 */
131 LIBRADOS_OPERATION_FULL_FORCE = 128,
132 LIBRADOS_OPERATION_IGNORE_REDIRECT = 256,
133 };
134 /** @} */
135
136 /**
137 * @name Alloc hint flags
138 * Flags for rados_write_op_alloc_hint2() and rados_set_alloc_hint2()
139 * indicating future IO patterns.
140 * @{
141 */
142 enum {
143 LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_WRITE = 1,
144 LIBRADOS_ALLOC_HINT_FLAG_RANDOM_WRITE = 2,
145 LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4,
146 LIBRADOS_ALLOC_HINT_FLAG_RANDOM_READ = 8,
147 LIBRADOS_ALLOC_HINT_FLAG_APPEND_ONLY = 16,
148 LIBRADOS_ALLOC_HINT_FLAG_IMMUTABLE = 32,
149 LIBRADOS_ALLOC_HINT_FLAG_SHORTLIVED = 64,
150 LIBRADOS_ALLOC_HINT_FLAG_LONGLIVED = 128,
151 LIBRADOS_ALLOC_HINT_FLAG_COMPRESSIBLE = 256,
152 LIBRADOS_ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512,
153 };
154 /** @} */
155
156 typedef enum {
157 LIBRADOS_CHECKSUM_TYPE_XXHASH32 = 0,
158 LIBRADOS_CHECKSUM_TYPE_XXHASH64 = 1,
159 LIBRADOS_CHECKSUM_TYPE_CRC32C = 2
160 } rados_checksum_type_t;
161
162 /*
163 * snap id contants
164 */
165 #define LIBRADOS_SNAP_HEAD ((uint64_t)(-2))
166 #define LIBRADOS_SNAP_DIR ((uint64_t)(-1))
167
168 /**
169 * @typedef rados_t
170 *
171 * A handle for interacting with a RADOS cluster. It encapsulates all
172 * RADOS client configuration, including username, key for
173 * authentication, logging, and debugging. Talking different clusters
174 * -- or to the same cluster with different users -- requires
175 * different cluster handles.
176 */
177 #ifndef VOIDPTR_RADOS_T
178 #define VOIDPTR_RADOS_T
179 typedef void *rados_t;
180 #endif //VOIDPTR_RADOS_T
181
182 /**
183 * @typedef rados_config_t
184 *
185 * A handle for the ceph configuration context for the rados_t cluster
186 * instance. This can be used to share configuration context/state
187 * (e.g., logging configuration) between librados instance.
188 *
189 * @warning The config context does not have independent reference
190 * counting. As such, a rados_config_t handle retrieved from a given
191 * rados_t is only valid as long as that rados_t.
192 */
193 typedef void *rados_config_t;
194
195 /**
196 * @typedef rados_ioctx_t
197 *
198 * An io context encapsulates a few settings for all I/O operations
199 * done on it:
200 * - pool - set when the io context is created (see rados_ioctx_create())
201 * - snapshot context for writes (see
202 * rados_ioctx_selfmanaged_snap_set_write_ctx())
203 * - snapshot id to read from (see rados_ioctx_snap_set_read())
204 * - object locator for all single-object operations (see
205 * rados_ioctx_locator_set_key())
206 * - namespace for all single-object operations (see
207 * rados_ioctx_set_namespace()). Set to LIBRADOS_ALL_NSPACES
208 * before rados_nobjects_list_open() will list all objects in all
209 * namespaces.
210 *
211 * @warning Changing any of these settings is not thread-safe -
212 * librados users must synchronize any of these changes on their own,
213 * or use separate io contexts for each thread
214 */
215 typedef void *rados_ioctx_t;
216
217 /**
218 * @typedef rados_list_ctx_t
219 *
220 * An iterator for listing the objects in a pool.
221 * Used with rados_nobjects_list_open(),
222 * rados_nobjects_list_next(), and
223 * rados_nobjects_list_close().
224 */
225 typedef void *rados_list_ctx_t;
226
227 /**
228 * @typedef rados_object_list_cursor
229 *
230 * The cursor used with rados_enumerate_objects
231 * and accompanying methods.
232 */
233 typedef void * rados_object_list_cursor;
234
235 typedef struct rados_object_list_item {
236 size_t oid_length;
237 char *oid;
238
239 size_t nspace_length;
240 char *nspace;
241
242 size_t locator_length;
243 char *locator;
244 } rados_object_list_item;
245
246 /**
247 * @typedef rados_snap_t
248 * The id of a snapshot.
249 */
250 typedef uint64_t rados_snap_t;
251
252 /**
253 * @typedef rados_xattrs_iter_t
254 * An iterator for listing extended attrbutes on an object.
255 * Used with rados_getxattrs(), rados_getxattrs_next(), and
256 * rados_getxattrs_end().
257 */
258 typedef void *rados_xattrs_iter_t;
259
260 /**
261 * @typedef rados_omap_iter_t
262 * An iterator for listing omap key/value pairs on an object.
263 * Used with rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals(),
264 * rados_read_op_omap_get_vals_by_keys(), rados_omap_get_next(), and
265 * rados_omap_get_end().
266 */
267 typedef void *rados_omap_iter_t;
268
269 /**
270 * @struct rados_pool_stat_t
271 * Usage information for a pool.
272 */
273 struct rados_pool_stat_t {
274 /// space used in bytes
275 uint64_t num_bytes;
276 /// space used in KB
277 uint64_t num_kb;
278 /// number of objects in the pool
279 uint64_t num_objects;
280 /// number of clones of objects
281 uint64_t num_object_clones;
282 /// num_objects * num_replicas
283 uint64_t num_object_copies;
284 uint64_t num_objects_missing_on_primary;
285 /// number of objects found on no OSDs
286 uint64_t num_objects_unfound;
287 /// number of objects replicated fewer times than they should be
288 /// (but found on at least one OSD)
289 uint64_t num_objects_degraded;
290 uint64_t num_rd;
291 uint64_t num_rd_kb;
292 uint64_t num_wr;
293 uint64_t num_wr_kb;
294 };
295
296 /**
297 * @struct rados_cluster_stat_t
298 * Cluster-wide usage information
299 */
300 struct rados_cluster_stat_t {
301 uint64_t kb, kb_used, kb_avail;
302 uint64_t num_objects;
303 };
304
305 /**
306 * @typedef rados_write_op_t
307 *
308 * An object write operation stores a number of operations which can be
309 * executed atomically. For usage, see:
310 * - Creation and deletion: rados_create_write_op() rados_release_write_op()
311 * - Extended attribute manipulation: rados_write_op_cmpxattr()
312 * rados_write_op_cmpxattr(), rados_write_op_setxattr(),
313 * rados_write_op_rmxattr()
314 * - Object map key/value pairs: rados_write_op_omap_set(),
315 * rados_write_op_omap_rm_keys(), rados_write_op_omap_clear(),
316 * rados_write_op_omap_cmp()
317 * - Object properties: rados_write_op_assert_exists(),
318 * rados_write_op_assert_version()
319 * - Creating objects: rados_write_op_create()
320 * - IO on objects: rados_write_op_append(), rados_write_op_write(), rados_write_op_zero
321 * rados_write_op_write_full(), rados_write_op_writesame(), rados_write_op_remove,
322 * rados_write_op_truncate(), rados_write_op_zero(), rados_write_op_cmpext()
323 * - Hints: rados_write_op_set_alloc_hint()
324 * - Performing the operation: rados_write_op_operate(), rados_aio_write_op_operate()
325 */
326 typedef void *rados_write_op_t;
327
328 /**
329 * @typedef rados_read_op_t
330 *
331 * An object read operation stores a number of operations which can be
332 * executed atomically. For usage, see:
333 * - Creation and deletion: rados_create_read_op() rados_release_read_op()
334 * - Extended attribute manipulation: rados_read_op_cmpxattr(),
335 * rados_read_op_getxattr(), rados_read_op_getxattrs()
336 * - Object map key/value pairs: rados_read_op_omap_get_vals(),
337 * rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals_by_keys(),
338 * rados_read_op_omap_cmp()
339 * - Object properties: rados_read_op_stat(), rados_read_op_assert_exists(),
340 * rados_read_op_assert_version()
341 * - IO on objects: rados_read_op_read(), rados_read_op_checksum(),
342 * rados_read_op_cmpext()
343 * - Custom operations: rados_read_op_exec(), rados_read_op_exec_user_buf()
344 * - Request properties: rados_read_op_set_flags()
345 * - Performing the operation: rados_read_op_operate(),
346 * rados_aio_read_op_operate()
347 */
348 typedef void *rados_read_op_t;
349
350 /**
351 * @typedef rados_completion_t
352 * Represents the state of an asynchronous operation - it contains the
353 * return value once the operation completes, and can be used to block
354 * until the operation is complete or safe.
355 */
356 typedef void *rados_completion_t;
357
358 /**
359 * @struct blkin_trace_info
360 * blkin trace information for Zipkin tracing
361 */
362 struct blkin_trace_info;
363
364 /**
365 * Get the version of librados.
366 *
367 * The version number is major.minor.extra. Note that this is
368 * unrelated to the Ceph version number.
369 *
370 * TODO: define version semantics, i.e.:
371 * - incrementing major is for backwards-incompatible changes
372 * - incrementing minor is for backwards-compatible changes
373 * - incrementing extra is for bug fixes
374 *
375 * @param major where to store the major version number
376 * @param minor where to store the minor version number
377 * @param extra where to store the extra version number
378 */
379 CEPH_RADOS_API void rados_version(int *major, int *minor, int *extra);
380
381 /**
382 * @name Setup and Teardown
383 * These are the first and last functions to that should be called
384 * when using librados.
385 *
386 * @{
387 */
388
389 /**
390 * Create a handle for communicating with a RADOS cluster.
391 *
392 * Ceph environment variables are read when this is called, so if
393 * $CEPH_ARGS specifies everything you need to connect, no further
394 * configuration is necessary.
395 *
396 * @param cluster where to store the handle
397 * @param id the user to connect as (i.e. admin, not client.admin)
398 * @returns 0 on success, negative error code on failure
399 */
400 CEPH_RADOS_API int rados_create(rados_t *cluster, const char * const id);
401
402 /**
403 * Extended version of rados_create.
404 *
405 * Like rados_create, but
406 * 1) don't assume 'client\.'+id; allow full specification of name
407 * 2) allow specification of cluster name
408 * 3) flags for future expansion
409 */
410 CEPH_RADOS_API int rados_create2(rados_t *pcluster,
411 const char *const clustername,
412 const char * const name, uint64_t flags);
413
414 /**
415 * Initialize a cluster handle from an existing configuration.
416 *
417 * Share configuration state with another rados_t instance.
418 *
419 * @param cluster where to store the handle
420 * @param cct the existing configuration to use
421 * @returns 0 on success, negative error code on failure
422 */
423 CEPH_RADOS_API int rados_create_with_context(rados_t *cluster,
424 rados_config_t cct);
425
426 /**
427 * Ping the monitor with ID mon_id, storing the resulting reply in
428 * buf (if specified) with a maximum size of len.
429 *
430 * The result buffer is allocated on the heap; the caller is
431 * expected to release that memory with rados_buffer_free(). The
432 * buffer and length pointers can be NULL, in which case they are
433 * not filled in.
434 *
435 * @param cluster cluster handle
436 * @param[in] mon_id ID of the monitor to ping
437 * @param[out] outstr double pointer with the resulting reply
438 * @param[out] outstrlen pointer with the size of the reply in outstr
439 */
440 CEPH_RADOS_API int rados_ping_monitor(rados_t cluster, const char *mon_id,
441 char **outstr, size_t *outstrlen);
442
443 /**
444 * Connect to the cluster.
445 *
446 * @note BUG: Before calling this, calling a function that communicates with the
447 * cluster will crash.
448 *
449 * @pre The cluster handle is configured with at least a monitor
450 * address. If cephx is enabled, a client name and secret must also be
451 * set.
452 *
453 * @post If this succeeds, any function in librados may be used
454 *
455 * @param cluster The cluster to connect to.
456 * @returns 0 on sucess, negative error code on failure
457 */
458 CEPH_RADOS_API int rados_connect(rados_t cluster);
459
460 /**
461 * Disconnects from the cluster.
462 *
463 * For clean up, this is only necessary after rados_connect() has
464 * succeeded.
465 *
466 * @warning This does not guarantee any asynchronous writes have
467 * completed. To do that, you must call rados_aio_flush() on all open
468 * io contexts.
469 *
470 * @warning We implicitly call rados_watch_flush() on shutdown. If
471 * there are watches being used, this should be done explicitly before
472 * destroying the relevant IoCtx. We do it here as a safety measure.
473 *
474 * @post the cluster handle cannot be used again
475 *
476 * @param cluster the cluster to shutdown
477 */
478 CEPH_RADOS_API void rados_shutdown(rados_t cluster);
479
480 /** @} init */
481
482 /**
483 * @name Configuration
484 * These functions read and update Ceph configuration for a cluster
485 * handle. Any configuration changes must be done before connecting to
486 * the cluster.
487 *
488 * Options that librados users might want to set include:
489 * - mon_host
490 * - auth_supported
491 * - key, keyfile, or keyring when using cephx
492 * - log_file, log_to_stderr, err_to_stderr, and log_to_syslog
493 * - debug_rados, debug_objecter, debug_monc, debug_auth, or debug_ms
494 *
495 * See docs.ceph.com for information about available configuration options`
496 *
497 * @{
498 */
499
500 /**
501 * Configure the cluster handle using a Ceph config file
502 *
503 * If path is NULL, the default locations are searched, and the first
504 * found is used. The locations are:
505 * - $CEPH_CONF (environment variable)
506 * - /etc/ceph/ceph.conf
507 * - ~/.ceph/config
508 * - ceph.conf (in the current working directory)
509 *
510 * @pre rados_connect() has not been called on the cluster handle
511 *
512 * @param cluster cluster handle to configure
513 * @param path path to a Ceph configuration file
514 * @returns 0 on success, negative error code on failure
515 */
516 CEPH_RADOS_API int rados_conf_read_file(rados_t cluster, const char *path);
517
518 /**
519 * Configure the cluster handle with command line arguments
520 *
521 * argv can contain any common Ceph command line option, including any
522 * configuration parameter prefixed by '--' and replacing spaces with
523 * dashes or underscores. For example, the following options are equivalent:
524 * - --mon-host 10.0.0.1:6789
525 * - --mon_host 10.0.0.1:6789
526 * - -m 10.0.0.1:6789
527 *
528 * @pre rados_connect() has not been called on the cluster handle
529 *
530 * @param cluster cluster handle to configure
531 * @param argc number of arguments in argv
532 * @param argv arguments to parse
533 * @returns 0 on success, negative error code on failure
534 */
535 CEPH_RADOS_API int rados_conf_parse_argv(rados_t cluster, int argc,
536 const char **argv);
537
538
539 /**
540 * Configure the cluster handle with command line arguments, returning
541 * any remainders. Same rados_conf_parse_argv, except for extra
542 * remargv argument to hold returns unrecognized arguments.
543 *
544 * @pre rados_connect() has not been called on the cluster handle
545 *
546 * @param cluster cluster handle to configure
547 * @param argc number of arguments in argv
548 * @param argv arguments to parse
549 * @param remargv char* array for returned unrecognized arguments
550 * @returns 0 on success, negative error code on failure
551 */
552 CEPH_RADOS_API int rados_conf_parse_argv_remainder(rados_t cluster, int argc,
553 const char **argv,
554 const char **remargv);
555 /**
556 * Configure the cluster handle based on an environment variable
557 *
558 * The contents of the environment variable are parsed as if they were
559 * Ceph command line options. If var is NULL, the CEPH_ARGS
560 * environment variable is used.
561 *
562 * @pre rados_connect() has not been called on the cluster handle
563 *
564 * @note BUG: this is not threadsafe - it uses a static buffer
565 *
566 * @param cluster cluster handle to configure
567 * @param var name of the environment variable to read
568 * @returns 0 on success, negative error code on failure
569 */
570 CEPH_RADOS_API int rados_conf_parse_env(rados_t cluster, const char *var);
571
572 /**
573 * Set a configuration option
574 *
575 * @pre rados_connect() has not been called on the cluster handle
576 *
577 * @param cluster cluster handle to configure
578 * @param option option to set
579 * @param value value of the option
580 * @returns 0 on success, negative error code on failure
581 * @returns -ENOENT when the option is not a Ceph configuration option
582 */
583 CEPH_RADOS_API int rados_conf_set(rados_t cluster, const char *option,
584 const char *value);
585
586 /**
587 * Get the value of a configuration option
588 *
589 * @param cluster configuration to read
590 * @param option which option to read
591 * @param buf where to write the configuration value
592 * @param len the size of buf in bytes
593 * @returns 0 on success, negative error code on failure
594 * @returns -ENAMETOOLONG if the buffer is too short to contain the
595 * requested value
596 */
597 CEPH_RADOS_API int rados_conf_get(rados_t cluster, const char *option,
598 char *buf, size_t len);
599
600 /** @} config */
601
602 /**
603 * Read usage info about the cluster
604 *
605 * This tells you total space, space used, space available, and number
606 * of objects. These are not updated immediately when data is written,
607 * they are eventually consistent.
608 *
609 * @param cluster cluster to query
610 * @param result where to store the results
611 * @returns 0 on success, negative error code on failure
612 */
613 CEPH_RADOS_API int rados_cluster_stat(rados_t cluster,
614 struct rados_cluster_stat_t *result);
615
616 /**
617 * Get the fsid of the cluster as a hexadecimal string.
618 *
619 * The fsid is a unique id of an entire Ceph cluster.
620 *
621 * @param cluster where to get the fsid
622 * @param buf where to write the fsid
623 * @param len the size of buf in bytes (should be 37)
624 * @returns 0 on success, negative error code on failure
625 * @returns -ERANGE if the buffer is too short to contain the
626 * fsid
627 */
628 CEPH_RADOS_API int rados_cluster_fsid(rados_t cluster, char *buf, size_t len);
629
630 /**
631 * Get/wait for the most recent osdmap
632 *
633 * @param cluster the cluster to shutdown
634 * @returns 0 on sucess, negative error code on failure
635 */
636 CEPH_RADOS_API int rados_wait_for_latest_osdmap(rados_t cluster);
637
638 /**
639 * @name Pools
640 *
641 * RADOS pools are separate namespaces for objects. Pools may have
642 * different crush rules associated with them, so they could have
643 * differing replication levels or placement strategies. RADOS
644 * permissions are also tied to pools - users can have different read,
645 * write, and execute permissions on a per-pool basis.
646 *
647 * @{
648 */
649
650 /**
651 * List pools
652 *
653 * Gets a list of pool names as NULL-terminated strings. The pool
654 * names will be placed in the supplied buffer one after another.
655 * After the last pool name, there will be two 0 bytes in a row.
656 *
657 * If len is too short to fit all the pool name entries we need, we will fill
658 * as much as we can.
659 *
660 * Buf may be null to determine the buffer size needed to list all pools.
661 *
662 * @param cluster cluster handle
663 * @param buf output buffer
664 * @param len output buffer length
665 * @returns length of the buffer we would need to list all pools
666 */
667 CEPH_RADOS_API int rados_pool_list(rados_t cluster, char *buf, size_t len);
668
669 /**
670 * List inconsistent placement groups of the given pool
671 *
672 * Gets a list of inconsistent placement groups as NULL-terminated strings.
673 * The placement group names will be placed in the supplied buffer one after
674 * another. After the last name, there will be two 0 types in a row.
675 *
676 * If len is too short to fit all the placement group entries we need, we will
677 * fill as much as we can.
678 *
679 * @param cluster cluster handle
680 * @param pool pool ID
681 * @param buf output buffer
682 * @param len output buffer length
683 * @returns length of the buffer we would need to list all pools
684 */
685 CEPH_RADOS_API int rados_inconsistent_pg_list(rados_t cluster, int64_t pool,
686 char *buf, size_t len);
687
688 /**
689 * Get a configuration handle for a rados cluster handle
690 *
691 * This handle is valid only as long as the cluster handle is valid.
692 *
693 * @param cluster cluster handle
694 * @returns config handle for this cluster
695 */
696 CEPH_RADOS_API rados_config_t rados_cct(rados_t cluster);
697
698 /**
699 * Get a global id for current instance
700 *
701 * This id is a unique representation of current connection to the cluster
702 *
703 * @param cluster cluster handle
704 * @returns instance global id
705 */
706 CEPH_RADOS_API uint64_t rados_get_instance_id(rados_t cluster);
707
708 /**
709 * Create an io context
710 *
711 * The io context allows you to perform operations within a particular
712 * pool. For more details see rados_ioctx_t.
713 *
714 * @param cluster which cluster the pool is in
715 * @param pool_name name of the pool
716 * @param ioctx where to store the io context
717 * @returns 0 on success, negative error code on failure
718 */
719 CEPH_RADOS_API int rados_ioctx_create(rados_t cluster, const char *pool_name,
720 rados_ioctx_t *ioctx);
721 CEPH_RADOS_API int rados_ioctx_create2(rados_t cluster, int64_t pool_id,
722 rados_ioctx_t *ioctx);
723
724 /**
725 * The opposite of rados_ioctx_create
726 *
727 * This just tells librados that you no longer need to use the io context.
728 * It may not be freed immediately if there are pending asynchronous
729 * requests on it, but you should not use an io context again after
730 * calling this function on it.
731 *
732 * @warning This does not guarantee any asynchronous
733 * writes have completed. You must call rados_aio_flush()
734 * on the io context before destroying it to do that.
735 *
736 * @warning If this ioctx is used by rados_watch, the caller needs to
737 * be sure that all registered watches are disconnected via
738 * rados_unwatch() and that rados_watch_flush() is called. This
739 * ensures that a racing watch callback does not make use of a
740 * destroyed ioctx.
741 *
742 * @param io the io context to dispose of
743 */
744 CEPH_RADOS_API void rados_ioctx_destroy(rados_ioctx_t io);
745
746 /**
747 * Get configuration handle for a pool handle
748 *
749 * @param io pool handle
750 * @returns rados_config_t for this cluster
751 */
752 CEPH_RADOS_API rados_config_t rados_ioctx_cct(rados_ioctx_t io);
753
754 /**
755 * Get the cluster handle used by this rados_ioctx_t
756 * Note that this is a weak reference, and should not
757 * be destroyed via rados_shutdown().
758 *
759 * @param io the io context
760 * @returns the cluster handle for this io context
761 */
762 CEPH_RADOS_API rados_t rados_ioctx_get_cluster(rados_ioctx_t io);
763
764 /**
765 * Get pool usage statistics
766 *
767 * Fills in a rados_pool_stat_t after querying the cluster.
768 *
769 * @param io determines which pool to query
770 * @param stats where to store the results
771 * @returns 0 on success, negative error code on failure
772 */
773 CEPH_RADOS_API int rados_ioctx_pool_stat(rados_ioctx_t io,
774 struct rados_pool_stat_t *stats);
775
776 /**
777 * Get the id of a pool
778 *
779 * @param cluster which cluster the pool is in
780 * @param pool_name which pool to look up
781 * @returns id of the pool
782 * @returns -ENOENT if the pool is not found
783 */
784 CEPH_RADOS_API int64_t rados_pool_lookup(rados_t cluster,
785 const char *pool_name);
786
787 /**
788 * Get the name of a pool
789 *
790 * @param cluster which cluster the pool is in
791 * @param id the id of the pool
792 * @param buf where to store the pool name
793 * @param maxlen size of buffer where name will be stored
794 * @returns length of string stored, or -ERANGE if buffer too small
795 */
796 CEPH_RADOS_API int rados_pool_reverse_lookup(rados_t cluster, int64_t id,
797 char *buf, size_t maxlen);
798
799 /**
800 * Create a pool with default settings
801 *
802 * The default owner is the admin user (auid 0).
803 * The default crush rule is rule 0.
804 *
805 * @param cluster the cluster in which the pool will be created
806 * @param pool_name the name of the new pool
807 * @returns 0 on success, negative error code on failure
808 */
809 CEPH_RADOS_API int rados_pool_create(rados_t cluster, const char *pool_name);
810
811 /**
812 * Create a pool owned by a specific auid
813 *
814 * The auid is the authenticated user id to give ownership of the pool.
815 * TODO: document auid and the rest of the auth system
816 *
817 * @param cluster the cluster in which the pool will be created
818 * @param pool_name the name of the new pool
819 * @param auid the id of the owner of the new pool
820 * @returns 0 on success, negative error code on failure
821 */
822 CEPH_RADOS_API int rados_pool_create_with_auid(rados_t cluster,
823 const char *pool_name,
824 uint64_t auid);
825
826 /**
827 * Create a pool with a specific CRUSH rule
828 *
829 * @param cluster the cluster in which the pool will be created
830 * @param pool_name the name of the new pool
831 * @param crush_rule_num which rule to use for placement in the new pool1
832 * @returns 0 on success, negative error code on failure
833 */
834 CEPH_RADOS_API int rados_pool_create_with_crush_rule(rados_t cluster,
835 const char *pool_name,
836 uint8_t crush_rule_num);
837
838 /**
839 * Create a pool with a specific CRUSH rule and auid
840 *
841 * This is a combination of rados_pool_create_with_crush_rule() and
842 * rados_pool_create_with_auid().
843 *
844 * @param cluster the cluster in which the pool will be created
845 * @param pool_name the name of the new pool
846 * @param crush_rule_num which rule to use for placement in the new pool2
847 * @param auid the id of the owner of the new pool
848 * @returns 0 on success, negative error code on failure
849 */
850 CEPH_RADOS_API int rados_pool_create_with_all(rados_t cluster,
851 const char *pool_name,
852 uint64_t auid,
853 uint8_t crush_rule_num);
854
855 /**
856 * Returns the pool that is the base tier for this pool.
857 *
858 * The return value is the ID of the pool that should be used to read from/write to.
859 * If tiering is not set up for the pool, returns \c pool.
860 *
861 * @param cluster the cluster the pool is in
862 * @param pool ID of the pool to query
863 * @param[out] base_tier base tier, or \c pool if tiering is not configured
864 * @returns 0 on success, negative error code on failure
865 */
866 CEPH_RADOS_API int rados_pool_get_base_tier(rados_t cluster, int64_t pool,
867 int64_t* base_tier);
868
869 /**
870 * Delete a pool and all data inside it
871 *
872 * The pool is removed from the cluster immediately,
873 * but the actual data is deleted in the background.
874 *
875 * @param cluster the cluster the pool is in
876 * @param pool_name which pool to delete
877 * @returns 0 on success, negative error code on failure
878 */
879 CEPH_RADOS_API int rados_pool_delete(rados_t cluster, const char *pool_name);
880
881 /**
882 * Attempt to change an io context's associated auid "owner"
883 *
884 * Requires that you have write permission on both the current and new
885 * auid.
886 *
887 * @param io reference to the pool to change.
888 * @param auid the auid you wish the io to have.
889 * @returns 0 on success, negative error code on failure
890 */
891 CEPH_RADOS_API int rados_ioctx_pool_set_auid(rados_ioctx_t io, uint64_t auid);
892
893 /**
894 * Get the auid of a pool
895 *
896 * @param io pool to query
897 * @param auid where to store the auid
898 * @returns 0 on success, negative error code on failure
899 */
900 CEPH_RADOS_API int rados_ioctx_pool_get_auid(rados_ioctx_t io, uint64_t *auid);
901
902 /* deprecated, use rados_ioctx_pool_requires_alignment2 instead */
903 CEPH_RADOS_API int rados_ioctx_pool_requires_alignment(rados_ioctx_t io)
904 __attribute__((deprecated));
905
906 /**
907 * Test whether the specified pool requires alignment or not.
908 *
909 * @param io pool to query
910 * @param requires 1 if alignment is supported, 0 if not.
911 * @returns 0 on success, negative error code on failure
912 */
913 CEPH_RADOS_API int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io,
914 int *requires);
915
916 /* deprecated, use rados_ioctx_pool_required_alignment2 instead */
917 CEPH_RADOS_API uint64_t rados_ioctx_pool_required_alignment(rados_ioctx_t io)
918 __attribute__((deprecated));
919
920 /**
921 * Get the alignment flavor of a pool
922 *
923 * @param io pool to query
924 * @param alignment where to store the alignment flavor
925 * @returns 0 on success, negative error code on failure
926 */
927 CEPH_RADOS_API int rados_ioctx_pool_required_alignment2(rados_ioctx_t io,
928 uint64_t *alignment);
929
930 /**
931 * Get the pool id of the io context
932 *
933 * @param io the io context to query
934 * @returns the id of the pool the io context uses
935 */
936 CEPH_RADOS_API int64_t rados_ioctx_get_id(rados_ioctx_t io);
937
938 /**
939 * Get the pool name of the io context
940 *
941 * @param io the io context to query
942 * @param buf pointer to buffer where name will be stored
943 * @param maxlen size of buffer where name will be stored
944 * @returns length of string stored, or -ERANGE if buffer too small
945 */
946 CEPH_RADOS_API int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf,
947 unsigned maxlen);
948
949 /** @} pools */
950
951 /**
952 * @name Object Locators
953 *
954 * @{
955 */
956
957 /**
958 * Set the key for mapping objects to pgs within an io context.
959 *
960 * The key is used instead of the object name to determine which
961 * placement groups an object is put in. This affects all subsequent
962 * operations of the io context - until a different locator key is
963 * set, all objects in this io context will be placed in the same pg.
964 *
965 * @param io the io context to change
966 * @param key the key to use as the object locator, or NULL to discard
967 * any previously set key
968 */
969 CEPH_RADOS_API void rados_ioctx_locator_set_key(rados_ioctx_t io,
970 const char *key);
971
972 /**
973 * Set the namespace for objects within an io context
974 *
975 * The namespace specification further refines a pool into different
976 * domains. The mapping of objects to pgs is also based on this
977 * value.
978 *
979 * @param io the io context to change
980 * @param nspace the name to use as the namespace, or NULL use the
981 * default namespace
982 */
983 CEPH_RADOS_API void rados_ioctx_set_namespace(rados_ioctx_t io,
984 const char *nspace);
985 /** @} obj_loc */
986
987 /**
988 * @name Listing Objects
989 * @{
990 */
991 /**
992 * Start listing objects in a pool
993 *
994 * @param io the pool to list from
995 * @param ctx the handle to store list context in
996 * @returns 0 on success, negative error code on failure
997 */
998 CEPH_RADOS_API int rados_nobjects_list_open(rados_ioctx_t io,
999 rados_list_ctx_t *ctx);
1000
1001 /**
1002 * Return hash position of iterator, rounded to the current PG
1003 *
1004 * @param ctx iterator marking where you are in the listing
1005 * @returns current hash position, rounded to the current pg
1006 */
1007 CEPH_RADOS_API uint32_t rados_nobjects_list_get_pg_hash_position(rados_list_ctx_t ctx);
1008
1009 /**
1010 * Reposition object iterator to a different hash position
1011 *
1012 * @param ctx iterator marking where you are in the listing
1013 * @param pos hash position to move to
1014 * @returns actual (rounded) position we moved to
1015 */
1016 CEPH_RADOS_API uint32_t rados_nobjects_list_seek(rados_list_ctx_t ctx,
1017 uint32_t pos);
1018
1019 /**
1020 * Reposition object iterator to a different position
1021 *
1022 * @param ctx iterator marking where you are in the listing
1023 * @param cursor position to move to
1024 * @returns rounded position we moved to
1025 */
1026 CEPH_RADOS_API uint32_t rados_nobjects_list_seek_cursor(rados_list_ctx_t ctx,
1027 rados_object_list_cursor cursor);
1028
1029 /**
1030 * Reposition object iterator to a different position
1031 *
1032 * The returned handle must be released with rados_object_list_cursor_free().
1033 *
1034 * @param ctx iterator marking where you are in the listing
1035 * @param cursor where to store cursor
1036 * @returns 0 on success, negative error code on failure
1037 */
1038 CEPH_RADOS_API int rados_nobjects_list_get_cursor(rados_list_ctx_t ctx,
1039 rados_object_list_cursor *cursor);
1040
1041 /**
1042 * Get the next object name and locator in the pool
1043 *
1044 * *entry and *key are valid until next call to rados_nobjects_list_*
1045 *
1046 * @param ctx iterator marking where you are in the listing
1047 * @param entry where to store the name of the entry
1048 * @param key where to store the object locator (set to NULL to ignore)
1049 * @param nspace where to store the object namespace (set to NULL to ignore)
1050 * @returns 0 on success, negative error code on failure
1051 * @returns -ENOENT when there are no more objects to list
1052 */
1053 CEPH_RADOS_API int rados_nobjects_list_next(rados_list_ctx_t ctx,
1054 const char **entry,
1055 const char **key,
1056 const char **nspace);
1057
1058 /**
1059 * Close the object listing handle.
1060 *
1061 * This should be called when the handle is no longer needed.
1062 * The handle should not be used after it has been closed.
1063 *
1064 * @param ctx the handle to close
1065 */
1066 CEPH_RADOS_API void rados_nobjects_list_close(rados_list_ctx_t ctx);
1067
1068 /**
1069 * Get cursor handle pointing to the *beginning* of a pool.
1070 *
1071 * This is an opaque handle pointing to the start of a pool. It must
1072 * be released with rados_object_list_cursor_free().
1073 *
1074 * @param io ioctx for the pool
1075 * @returns handle for the pool, NULL on error (pool does not exist)
1076 */
1077 CEPH_RADOS_API rados_object_list_cursor rados_object_list_begin(
1078 rados_ioctx_t io);
1079
1080 /**
1081 * Get cursor handle pointing to the *end* of a pool.
1082 *
1083 * This is an opaque handle pointing to the start of a pool. It must
1084 * be released with rados_object_list_cursor_free().
1085 *
1086 * @param io ioctx for the pool
1087 * @returns handle for the pool, NULL on error (pool does not exist)
1088 */
1089 CEPH_RADOS_API rados_object_list_cursor rados_object_list_end(rados_ioctx_t io);
1090
1091 /**
1092 * Check if a cursor has reached the end of a pool
1093 *
1094 * @param io ioctx
1095 * @param cur cursor
1096 * @returns 1 if the cursor has reached the end of the pool, 0 otherwise
1097 */
1098 CEPH_RADOS_API int rados_object_list_is_end(rados_ioctx_t io,
1099 rados_object_list_cursor cur);
1100
1101 /**
1102 * Release a cursor
1103 *
1104 * Release a cursor. The handle may not be used after this point.
1105 *
1106 * @param io ioctx
1107 * @param cur cursor
1108 */
1109 CEPH_RADOS_API void rados_object_list_cursor_free(rados_ioctx_t io,
1110 rados_object_list_cursor cur);
1111
1112 /**
1113 * Compare two cursor positions
1114 *
1115 * Compare two cursors, and indicate whether the first cursor precedes,
1116 * matches, or follows the second.
1117 *
1118 * @param io ioctx
1119 * @param lhs first cursor
1120 * @param rhs second cursor
1121 * @returns -1, 0, or 1 for lhs < rhs, lhs == rhs, or lhs > rhs
1122 */
1123 CEPH_RADOS_API int rados_object_list_cursor_cmp(rados_ioctx_t io,
1124 rados_object_list_cursor lhs, rados_object_list_cursor rhs);
1125
1126 /**
1127 * @return the number of items set in the result array
1128 */
1129 CEPH_RADOS_API int rados_object_list(rados_ioctx_t io,
1130 const rados_object_list_cursor start,
1131 const rados_object_list_cursor finish,
1132 const size_t result_size,
1133 const char *filter_buf,
1134 const size_t filter_buf_len,
1135 rados_object_list_item *results,
1136 rados_object_list_cursor *next);
1137
1138 CEPH_RADOS_API void rados_object_list_free(
1139 const size_t result_size,
1140 rados_object_list_item *results);
1141
1142 /**
1143 * Obtain cursors delineating a subset of a range. Use this
1144 * when you want to split up the work of iterating over the
1145 * global namespace. Expected use case is when you are iterating
1146 * in parallel, with `m` workers, and each worker taking an id `n`.
1147 *
1148 * @param start start of the range to be sliced up (inclusive)
1149 * @param finish end of the range to be sliced up (exclusive)
1150 * @param m how many chunks to divide start-finish into
1151 * @param n which of the m chunks you would like to get cursors for
1152 * @param split_start cursor populated with start of the subrange (inclusive)
1153 * @param split_finish cursor populated with end of the subrange (exclusive)
1154 */
1155 CEPH_RADOS_API void rados_object_list_slice(rados_ioctx_t io,
1156 const rados_object_list_cursor start,
1157 const rados_object_list_cursor finish,
1158 const size_t n,
1159 const size_t m,
1160 rados_object_list_cursor *split_start,
1161 rados_object_list_cursor *split_finish);
1162
1163
1164 /** @} Listing Objects */
1165
1166 /**
1167 * @name Snapshots
1168 *
1169 * RADOS snapshots are based upon sequence numbers that form a
1170 * snapshot context. They are pool-specific. The snapshot context
1171 * consists of the current snapshot sequence number for a pool, and an
1172 * array of sequence numbers at which snapshots were taken, in
1173 * descending order. Whenever a snapshot is created or deleted, the
1174 * snapshot sequence number for the pool is increased. To add a new
1175 * snapshot, the new snapshot sequence number must be increased and
1176 * added to the snapshot context.
1177 *
1178 * There are two ways to manage these snapshot contexts:
1179 * -# within the RADOS cluster
1180 * These are called pool snapshots, and store the snapshot context
1181 * in the OSDMap. These represent a snapshot of all the objects in
1182 * a pool.
1183 * -# within the RADOS clients
1184 * These are called self-managed snapshots, and push the
1185 * responsibility for keeping track of the snapshot context to the
1186 * clients. For every write, the client must send the snapshot
1187 * context. In librados, this is accomplished with
1188 * rados_selfmanaged_snap_set_write_ctx(). These are more
1189 * difficult to manage, but are restricted to specific objects
1190 * instead of applying to an entire pool.
1191 *
1192 * @{
1193 */
1194
1195 /**
1196 * Create a pool-wide snapshot
1197 *
1198 * @param io the pool to snapshot
1199 * @param snapname the name of the snapshot
1200 * @returns 0 on success, negative error code on failure
1201 */
1202 CEPH_RADOS_API int rados_ioctx_snap_create(rados_ioctx_t io,
1203 const char *snapname);
1204
1205 /**
1206 * Delete a pool snapshot
1207 *
1208 * @param io the pool to delete the snapshot from
1209 * @param snapname which snapshot to delete
1210 * @returns 0 on success, negative error code on failure
1211 */
1212 CEPH_RADOS_API int rados_ioctx_snap_remove(rados_ioctx_t io,
1213 const char *snapname);
1214
1215 /**
1216 * Rollback an object to a pool snapshot
1217 *
1218 * The contents of the object will be the same as
1219 * when the snapshot was taken.
1220 *
1221 * @param io the pool in which the object is stored
1222 * @param oid the name of the object to rollback
1223 * @param snapname which snapshot to rollback to
1224 * @returns 0 on success, negative error code on failure
1225 */
1226 CEPH_RADOS_API int rados_ioctx_snap_rollback(rados_ioctx_t io, const char *oid,
1227 const char *snapname);
1228
1229 /**
1230 * @warning Deprecated: Use rados_ioctx_snap_rollback() instead
1231 */
1232 CEPH_RADOS_API int rados_rollback(rados_ioctx_t io, const char *oid,
1233 const char *snapname)
1234 __attribute__((deprecated));
1235
1236 /**
1237 * Set the snapshot from which reads are performed.
1238 *
1239 * Subsequent reads will return data as it was at the time of that
1240 * snapshot.
1241 *
1242 * @param io the io context to change
1243 * @param snap the id of the snapshot to set, or LIBRADOS_SNAP_HEAD for no
1244 * snapshot (i.e. normal operation)
1245 */
1246 CEPH_RADOS_API void rados_ioctx_snap_set_read(rados_ioctx_t io,
1247 rados_snap_t snap);
1248
1249 /**
1250 * Allocate an ID for a self-managed snapshot
1251 *
1252 * Get a unique ID to put in the snaphot context to create a
1253 * snapshot. A clone of an object is not created until a write with
1254 * the new snapshot context is completed.
1255 *
1256 * @param io the pool in which the snapshot will exist
1257 * @param snapid where to store the newly allocated snapshot ID
1258 * @returns 0 on success, negative error code on failure
1259 */
1260 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io,
1261 rados_snap_t *snapid);
1262 CEPH_RADOS_API void
1263 rados_aio_ioctx_selfmanaged_snap_create(rados_ioctx_t io,
1264 rados_snap_t *snapid,
1265 rados_completion_t completion);
1266
1267 /**
1268 * Remove a self-managed snapshot
1269 *
1270 * This increases the snapshot sequence number, which will cause
1271 * snapshots to be removed lazily.
1272 *
1273 * @param io the pool in which the snapshot will exist
1274 * @param snapid where to store the newly allocated snapshot ID
1275 * @returns 0 on success, negative error code on failure
1276 */
1277 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io,
1278 rados_snap_t snapid);
1279 CEPH_RADOS_API void
1280 rados_aio_ioctx_selfmanaged_snap_remove(rados_ioctx_t io,
1281 rados_snap_t snapid,
1282 rados_completion_t completion);
1283
1284 /**
1285 * Rollback an object to a self-managed snapshot
1286 *
1287 * The contents of the object will be the same as
1288 * when the snapshot was taken.
1289 *
1290 * @param io the pool in which the object is stored
1291 * @param oid the name of the object to rollback
1292 * @param snapid which snapshot to rollback to
1293 * @returns 0 on success, negative error code on failure
1294 */
1295 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io,
1296 const char *oid,
1297 rados_snap_t snapid);
1298
1299 /**
1300 * Set the snapshot context for use when writing to objects
1301 *
1302 * This is stored in the io context, and applies to all future writes.
1303 *
1304 * @param io the io context to change
1305 * @param seq the newest snapshot sequence number for the pool
1306 * @param snaps array of snapshots in sorted by descending id
1307 * @param num_snaps how many snaphosts are in the snaps array
1308 * @returns 0 on success, negative error code on failure
1309 * @returns -EINVAL if snaps are not in descending order
1310 */
1311 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io,
1312 rados_snap_t seq,
1313 rados_snap_t *snaps,
1314 int num_snaps);
1315
1316 /**
1317 * List all the ids of pool snapshots
1318 *
1319 * If the output array does not have enough space to fit all the
1320 * snapshots, -ERANGE is returned and the caller should retry with a
1321 * larger array.
1322 *
1323 * @param io the pool to read from
1324 * @param snaps where to store the results
1325 * @param maxlen the number of rados_snap_t that fit in the snaps array
1326 * @returns number of snapshots on success, negative error code on failure
1327 * @returns -ERANGE is returned if the snaps array is too short
1328 */
1329 CEPH_RADOS_API int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t *snaps,
1330 int maxlen);
1331
1332 /**
1333 * Get the id of a pool snapshot
1334 *
1335 * @param io the pool to read from
1336 * @param name the snapshot to find
1337 * @param id where to store the result
1338 * @returns 0 on success, negative error code on failure
1339 */
1340 CEPH_RADOS_API int rados_ioctx_snap_lookup(rados_ioctx_t io, const char *name,
1341 rados_snap_t *id);
1342
1343 /**
1344 * Get the name of a pool snapshot
1345 *
1346 * @param io the pool to read from
1347 * @param id the snapshot to find
1348 * @param name where to store the result
1349 * @param maxlen the size of the name array
1350 * @returns 0 on success, negative error code on failure
1351 * @returns -ERANGE if the name array is too small
1352 */
1353 CEPH_RADOS_API int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id,
1354 char *name, int maxlen);
1355
1356 /**
1357 * Find when a pool snapshot occurred
1358 *
1359 * @param io the pool the snapshot was taken in
1360 * @param id the snapshot to lookup
1361 * @param t where to store the result
1362 * @returns 0 on success, negative error code on failure
1363 */
1364 CEPH_RADOS_API int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id,
1365 time_t *t);
1366
1367 /** @} Snapshots */
1368
1369 /**
1370 * @name Synchronous I/O
1371 * Writes are replicated to a number of OSDs based on the
1372 * configuration of the pool they are in. These write functions block
1373 * until data is in memory on all replicas of the object they're
1374 * writing to - they are equivalent to doing the corresponding
1375 * asynchronous write, and the calling
1376 * rados_ioctx_wait_for_complete(). For greater data safety, use the
1377 * asynchronous functions and rados_aio_wait_for_safe().
1378 *
1379 * @{
1380 */
1381
1382 /**
1383 * Return the version of the last object read or written to.
1384 *
1385 * This exposes the internal version number of the last object read or
1386 * written via this io context
1387 *
1388 * @param io the io context to check
1389 * @returns last read or written object version
1390 */
1391 CEPH_RADOS_API uint64_t rados_get_last_version(rados_ioctx_t io);
1392
1393 /**
1394 * Write *len* bytes from *buf* into the *oid* object, starting at
1395 * offset *off*. The value of *len* must be <= UINT_MAX/2.
1396 *
1397 * @note This will never return a positive value not equal to len.
1398 * @param io the io context in which the write will occur
1399 * @param oid name of the object
1400 * @param buf data to write
1401 * @param len length of the data, in bytes
1402 * @param off byte offset in the object to begin writing at
1403 * @returns 0 on success, negative error code on failure
1404 */
1405 CEPH_RADOS_API int rados_write(rados_ioctx_t io, const char *oid,
1406 const char *buf, size_t len, uint64_t off);
1407
1408 /**
1409 * Write *len* bytes from *buf* into the *oid* object. The value of
1410 * *len* must be <= UINT_MAX/2.
1411 *
1412 * The object is filled with the provided data. If the object exists,
1413 * it is atomically truncated and then written.
1414 *
1415 * @param io the io context in which the write will occur
1416 * @param oid name of the object
1417 * @param buf data to write
1418 * @param len length of the data, in bytes
1419 * @returns 0 on success, negative error code on failure
1420 */
1421 CEPH_RADOS_API int rados_write_full(rados_ioctx_t io, const char *oid,
1422 const char *buf, size_t len);
1423
1424 /**
1425 * Write the same *data_len* bytes from *buf* multiple times into the
1426 * *oid* object. *write_len* bytes are written in total, which must be
1427 * a multiple of *data_len*. The value of *write_len* and *data_len*
1428 * must be <= UINT_MAX/2.
1429 *
1430 * @param io the io context in which the write will occur
1431 * @param oid name of the object
1432 * @param buf data to write
1433 * @param data_len length of the data, in bytes
1434 * @param write_len the total number of bytes to write
1435 * @param off byte offset in the object to begin writing at
1436 * @returns 0 on success, negative error code on failure
1437 */
1438 CEPH_RADOS_API int rados_writesame(rados_ioctx_t io, const char *oid,
1439 const char *buf, size_t data_len,
1440 size_t write_len, uint64_t off);
1441
1442 /**
1443 * Append *len* bytes from *buf* into the *oid* object. The value of
1444 * *len* must be <= UINT_MAX/2.
1445 *
1446 * @param io the context to operate in
1447 * @param oid the name of the object
1448 * @param buf the data to append
1449 * @param len length of buf (in bytes)
1450 * @returns 0 on success, negative error code on failure
1451 */
1452 CEPH_RADOS_API int rados_append(rados_ioctx_t io, const char *oid,
1453 const char *buf, size_t len);
1454
1455 /**
1456 * Read data from an object
1457 *
1458 * The io context determines the snapshot to read from, if any was set
1459 * by rados_ioctx_snap_set_read().
1460 *
1461 * @param io the context in which to perform the read
1462 * @param oid the name of the object to read from
1463 * @param buf where to store the results
1464 * @param len the number of bytes to read
1465 * @param off the offset to start reading from in the object
1466 * @returns number of bytes read on success, negative error code on
1467 * failure
1468 */
1469 CEPH_RADOS_API int rados_read(rados_ioctx_t io, const char *oid, char *buf,
1470 size_t len, uint64_t off);
1471
1472 /**
1473 * Compute checksum from object data
1474 *
1475 * The io context determines the snapshot to checksum, if any was set
1476 * by rados_ioctx_snap_set_read(). The length of the init_value and
1477 * resulting checksum are dependent upon the checksum type:
1478 *
1479 * XXHASH64: le64
1480 * XXHASH32: le32
1481 * CRC32C: le32
1482 *
1483 * The checksum result is encoded the following manner:
1484 *
1485 * le32 num_checksum_chunks
1486 * {
1487 * leXX checksum for chunk (where XX = appropriate size for the checksum type)
1488 * } * num_checksum_chunks
1489 *
1490 * @param io the context in which to perform the checksum
1491 * @param oid the name of the object to checksum
1492 * @param type the checksum algorithm to utilize
1493 * @param init_value the init value for the algorithm
1494 * @param init_value_len the length of the init value
1495 * @param len the number of bytes to checksum
1496 * @param off the offset to start checksuming in the object
1497 * @param chunk_size optional length-aligned chunk size for checksums
1498 * @param pchecksum where to store the checksum result
1499 * @param checksum_len the number of bytes available for the result
1500 * @return negative error code on failure
1501 */
1502 CEPH_RADOS_API int rados_checksum(rados_ioctx_t io, const char *oid,
1503 rados_checksum_type_t type,
1504 const char *init_value, size_t init_value_len,
1505 size_t len, uint64_t off, size_t chunk_size,
1506 char *pchecksum, size_t checksum_len);
1507
1508 /**
1509 * Delete an object
1510 *
1511 * @note This does not delete any snapshots of the object.
1512 *
1513 * @param io the pool to delete the object from
1514 * @param oid the name of the object to delete
1515 * @returns 0 on success, negative error code on failure
1516 */
1517 CEPH_RADOS_API int rados_remove(rados_ioctx_t io, const char *oid);
1518
1519 /**
1520 * Resize an object
1521 *
1522 * If this enlarges the object, the new area is logically filled with
1523 * zeroes. If this shrinks the object, the excess data is removed.
1524 *
1525 * @param io the context in which to truncate
1526 * @param oid the name of the object
1527 * @param size the new size of the object in bytes
1528 * @returns 0 on success, negative error code on failure
1529 */
1530 CEPH_RADOS_API int rados_trunc(rados_ioctx_t io, const char *oid,
1531 uint64_t size);
1532
1533 /**
1534 * Compare an on-disk object range with a buffer
1535 *
1536 * @param io the context in which to perform the comparison
1537 * @param o name of the object
1538 * @param cmp_buf buffer containing bytes to be compared with object contents
1539 * @param cmp_len length to compare and size of @cmp_buf in bytes
1540 * @param off object byte offset at which to start the comparison
1541 * @returns 0 on success, negative error code on failure,
1542 * (-MAX_ERRNO - mismatch_off) on mismatch
1543 */
1544 CEPH_RADOS_API int rados_cmpext(rados_ioctx_t io, const char *o,
1545 const char *cmp_buf, size_t cmp_len,
1546 uint64_t off);
1547
1548 /**
1549 * @name Xattrs
1550 * Extended attributes are stored as extended attributes on the files
1551 * representing an object on the OSDs. Thus, they have the same
1552 * limitations as the underlying filesystem. On ext4, this means that
1553 * the total data stored in xattrs cannot exceed 4KB.
1554 *
1555 * @{
1556 */
1557
1558 /**
1559 * Get the value of an extended attribute on an object.
1560 *
1561 * @param io the context in which the attribute is read
1562 * @param o name of the object
1563 * @param name which extended attribute to read
1564 * @param buf where to store the result
1565 * @param len size of buf in bytes
1566 * @returns length of xattr value on success, negative error code on failure
1567 */
1568 CEPH_RADOS_API int rados_getxattr(rados_ioctx_t io, const char *o,
1569 const char *name, char *buf, size_t len);
1570
1571 /**
1572 * Set an extended attribute on an object.
1573 *
1574 * @param io the context in which xattr is set
1575 * @param o name of the object
1576 * @param name which extended attribute to set
1577 * @param buf what to store in the xattr
1578 * @param len the number of bytes in buf
1579 * @returns 0 on success, negative error code on failure
1580 */
1581 CEPH_RADOS_API int rados_setxattr(rados_ioctx_t io, const char *o,
1582 const char *name, const char *buf,
1583 size_t len);
1584
1585 /**
1586 * Delete an extended attribute from an object.
1587 *
1588 * @param io the context in which to delete the xattr
1589 * @param o the name of the object
1590 * @param name which xattr to delete
1591 * @returns 0 on success, negative error code on failure
1592 */
1593 CEPH_RADOS_API int rados_rmxattr(rados_ioctx_t io, const char *o,
1594 const char *name);
1595
1596 /**
1597 * Start iterating over xattrs on an object.
1598 *
1599 * @post iter is a valid iterator
1600 *
1601 * @param io the context in which to list xattrs
1602 * @param oid name of the object
1603 * @param iter where to store the iterator
1604 * @returns 0 on success, negative error code on failure
1605 */
1606 CEPH_RADOS_API int rados_getxattrs(rados_ioctx_t io, const char *oid,
1607 rados_xattrs_iter_t *iter);
1608
1609 /**
1610 * Get the next xattr on the object
1611 *
1612 * @pre iter is a valid iterator
1613 *
1614 * @post name is the NULL-terminated name of the next xattr, and val
1615 * contains the value of the xattr, which is of length len. If the end
1616 * of the list has been reached, name and val are NULL, and len is 0.
1617 *
1618 * @param iter iterator to advance
1619 * @param name where to store the name of the next xattr
1620 * @param val where to store the value of the next xattr
1621 * @param len the number of bytes in val
1622 * @returns 0 on success, negative error code on failure
1623 */
1624 CEPH_RADOS_API int rados_getxattrs_next(rados_xattrs_iter_t iter,
1625 const char **name, const char **val,
1626 size_t *len);
1627
1628 /**
1629 * Close the xattr iterator.
1630 *
1631 * iter should not be used after this is called.
1632 *
1633 * @param iter the iterator to close
1634 */
1635 CEPH_RADOS_API void rados_getxattrs_end(rados_xattrs_iter_t iter);
1636
1637 /** @} Xattrs */
1638
1639 /**
1640 * Get the next omap key/value pair on the object
1641 *
1642 * @pre iter is a valid iterator
1643 *
1644 * @post key and val are the next key/value pair. key is
1645 * null-terminated, and val has length len. If the end of the list has
1646 * been reached, key and val are NULL, and len is 0. key and val will
1647 * not be accessible after rados_omap_get_end() is called on iter, so
1648 * if they are needed after that they should be copied.
1649 *
1650 * @param iter iterator to advance
1651 * @param key where to store the key of the next omap entry
1652 * @param val where to store the value of the next omap entry
1653 * @param len where to store the number of bytes in val
1654 * @returns 0 on success, negative error code on failure
1655 */
1656 CEPH_RADOS_API int rados_omap_get_next(rados_omap_iter_t iter,
1657 char **key,
1658 char **val,
1659 size_t *len);
1660
1661 /**
1662 * Close the omap iterator.
1663 *
1664 * iter should not be used after this is called.
1665 *
1666 * @param iter the iterator to close
1667 */
1668 CEPH_RADOS_API void rados_omap_get_end(rados_omap_iter_t iter);
1669
1670 /**
1671 * Get object stats (size/mtime)
1672 *
1673 * TODO: when are these set, and by whom? can they be out of date?
1674 *
1675 * @param io ioctx
1676 * @param o object name
1677 * @param psize where to store object size
1678 * @param pmtime where to store modification time
1679 * @returns 0 on success, negative error code on failure
1680 */
1681 CEPH_RADOS_API int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize,
1682 time_t *pmtime);
1683
1684 /**
1685 * Update tmap (trivial map)
1686 *
1687 * Do compound update to a tmap object, inserting or deleting some
1688 * number of records. cmdbuf is a series of operation byte
1689 * codes, following by command payload. Each command is a single-byte
1690 * command code, whose value is one of CEPH_OSD_TMAP_*.
1691 *
1692 * - update tmap 'header'
1693 * - 1 byte = CEPH_OSD_TMAP_HDR
1694 * - 4 bytes = data length (little endian)
1695 * - N bytes = data
1696 *
1697 * - insert/update one key/value pair
1698 * - 1 byte = CEPH_OSD_TMAP_SET
1699 * - 4 bytes = key name length (little endian)
1700 * - N bytes = key name
1701 * - 4 bytes = data length (little endian)
1702 * - M bytes = data
1703 *
1704 * - insert one key/value pair; return -EEXIST if it already exists.
1705 * - 1 byte = CEPH_OSD_TMAP_CREATE
1706 * - 4 bytes = key name length (little endian)
1707 * - N bytes = key name
1708 * - 4 bytes = data length (little endian)
1709 * - M bytes = data
1710 *
1711 * - remove one key/value pair
1712 * - 1 byte = CEPH_OSD_TMAP_RM
1713 * - 4 bytes = key name length (little endian)
1714 * - N bytes = key name
1715 *
1716 * Restrictions:
1717 * - The HDR update must preceed any key/value updates.
1718 * - All key/value updates must be in lexicographically sorted order
1719 * in cmdbuf.
1720 * - You can read/write to a tmap object via the regular APIs, but
1721 * you should be careful not to corrupt it. Also be aware that the
1722 * object format may change without notice.
1723 *
1724 * @param io ioctx
1725 * @param o object name
1726 * @param cmdbuf command buffer
1727 * @param cmdbuflen command buffer length in bytes
1728 * @returns 0 on success, negative error code on failure
1729 */
1730 CEPH_RADOS_API int rados_tmap_update(rados_ioctx_t io, const char *o,
1731 const char *cmdbuf, size_t cmdbuflen);
1732
1733 /**
1734 * Store complete tmap (trivial map) object
1735 *
1736 * Put a full tmap object into the store, replacing what was there.
1737 *
1738 * The format of buf is:
1739 * - 4 bytes - length of header (little endian)
1740 * - N bytes - header data
1741 * - 4 bytes - number of keys (little endian)
1742 *
1743 * and for each key,
1744 * - 4 bytes - key name length (little endian)
1745 * - N bytes - key name
1746 * - 4 bytes - value length (little endian)
1747 * - M bytes - value data
1748 *
1749 * @param io ioctx
1750 * @param o object name
1751 * @param buf buffer
1752 * @param buflen buffer length in bytes
1753 * @returns 0 on success, negative error code on failure
1754 */
1755 CEPH_RADOS_API int rados_tmap_put(rados_ioctx_t io, const char *o,
1756 const char *buf, size_t buflen);
1757
1758 /**
1759 * Fetch complete tmap (trivial map) object
1760 *
1761 * Read a full tmap object. See rados_tmap_put() for the format the
1762 * data is returned in.
1763 *
1764 * @param io ioctx
1765 * @param o object name
1766 * @param buf buffer
1767 * @param buflen buffer length in bytes
1768 * @returns 0 on success, negative error code on failure
1769 * @returns -ERANGE if buf isn't big enough
1770 */
1771 CEPH_RADOS_API int rados_tmap_get(rados_ioctx_t io, const char *o, char *buf,
1772 size_t buflen);
1773
1774 /**
1775 * Execute an OSD class method on an object
1776 *
1777 * The OSD has a plugin mechanism for performing complicated
1778 * operations on an object atomically. These plugins are called
1779 * classes. This function allows librados users to call the custom
1780 * methods. The input and output formats are defined by the class.
1781 * Classes in ceph.git can be found in src/cls subdirectories
1782 *
1783 * @param io the context in which to call the method
1784 * @param oid the object to call the method on
1785 * @param cls the name of the class
1786 * @param method the name of the method
1787 * @param in_buf where to find input
1788 * @param in_len length of in_buf in bytes
1789 * @param buf where to store output
1790 * @param out_len length of buf in bytes
1791 * @returns the length of the output, or
1792 * -ERANGE if out_buf does not have enough space to store it (For methods that return data). For
1793 * methods that don't return data, the return value is
1794 * method-specific.
1795 */
1796 CEPH_RADOS_API int rados_exec(rados_ioctx_t io, const char *oid,
1797 const char *cls, const char *method,
1798 const char *in_buf, size_t in_len, char *buf,
1799 size_t out_len);
1800
1801
1802 /** @} Synchronous I/O */
1803
1804 /**
1805 * @name Asynchronous I/O
1806 * Read and write to objects without blocking.
1807 *
1808 * @{
1809 */
1810
1811 /**
1812 * @typedef rados_callback_t
1813 * Callbacks for asynchrous operations take two parameters:
1814 * - cb the completion that has finished
1815 * - arg application defined data made available to the callback function
1816 */
1817 typedef void (*rados_callback_t)(rados_completion_t cb, void *arg);
1818
1819 /**
1820 * Constructs a completion to use with asynchronous operations
1821 *
1822 * The complete and safe callbacks correspond to operations being
1823 * acked and committed, respectively. The callbacks are called in
1824 * order of receipt, so the safe callback may be triggered before the
1825 * complete callback, and vice versa. This is affected by journalling
1826 * on the OSDs.
1827 *
1828 * TODO: more complete documentation of this elsewhere (in the RADOS docs?)
1829 *
1830 * @note Read operations only get a complete callback.
1831 * @note BUG: this should check for ENOMEM instead of throwing an exception
1832 *
1833 * @param cb_arg application-defined data passed to the callback functions
1834 * @param cb_complete the function to be called when the operation is
1835 * in memory on all relpicas
1836 * @param cb_safe the function to be called when the operation is on
1837 * stable storage on all replicas
1838 * @param pc where to store the completion
1839 * @returns 0
1840 */
1841 CEPH_RADOS_API int rados_aio_create_completion(void *cb_arg,
1842 rados_callback_t cb_complete,
1843 rados_callback_t cb_safe,
1844 rados_completion_t *pc);
1845
1846 /**
1847 * Block until an operation completes
1848 *
1849 * This means it is in memory on all replicas.
1850 *
1851 * @note BUG: this should be void
1852 *
1853 * @param c operation to wait for
1854 * @returns 0
1855 */
1856 CEPH_RADOS_API int rados_aio_wait_for_complete(rados_completion_t c);
1857
1858 /**
1859 * Block until an operation is safe
1860 *
1861 * This means it is on stable storage on all replicas.
1862 *
1863 * @note BUG: this should be void
1864 *
1865 * @param c operation to wait for
1866 * @returns 0
1867 */
1868 CEPH_RADOS_API int rados_aio_wait_for_safe(rados_completion_t c);
1869
1870 /**
1871 * Has an asynchronous operation completed?
1872 *
1873 * @warning This does not imply that the complete callback has
1874 * finished
1875 *
1876 * @param c async operation to inspect
1877 * @returns whether c is complete
1878 */
1879 CEPH_RADOS_API int rados_aio_is_complete(rados_completion_t c);
1880
1881 /**
1882 * Is an asynchronous operation safe?
1883 *
1884 * @warning This does not imply that the safe callback has
1885 * finished
1886 *
1887 * @param c async operation to inspect
1888 * @returns whether c is safe
1889 */
1890 CEPH_RADOS_API int rados_aio_is_safe(rados_completion_t c);
1891
1892 /**
1893 * Block until an operation completes and callback completes
1894 *
1895 * This means it is in memory on all replicas and can be read.
1896 *
1897 * @note BUG: this should be void
1898 *
1899 * @param c operation to wait for
1900 * @returns 0
1901 */
1902 CEPH_RADOS_API int rados_aio_wait_for_complete_and_cb(rados_completion_t c);
1903
1904 /**
1905 * Block until an operation is safe and callback has completed
1906 *
1907 * This means it is on stable storage on all replicas.
1908 *
1909 * @note BUG: this should be void
1910 *
1911 * @param c operation to wait for
1912 * @returns 0
1913 */
1914 CEPH_RADOS_API int rados_aio_wait_for_safe_and_cb(rados_completion_t c);
1915
1916 /**
1917 * Has an asynchronous operation and callback completed
1918 *
1919 * @param c async operation to inspect
1920 * @returns whether c is complete
1921 */
1922 CEPH_RADOS_API int rados_aio_is_complete_and_cb(rados_completion_t c);
1923
1924 /**
1925 * Is an asynchronous operation safe and has the callback completed
1926 *
1927 * @param c async operation to inspect
1928 * @returns whether c is safe
1929 */
1930 CEPH_RADOS_API int rados_aio_is_safe_and_cb(rados_completion_t c);
1931
1932 /**
1933 * Get the return value of an asychronous operation
1934 *
1935 * The return value is set when the operation is complete or safe,
1936 * whichever comes first.
1937 *
1938 * @pre The operation is safe or complete
1939 *
1940 * @note BUG: complete callback may never be called when the safe
1941 * message is received before the complete message
1942 *
1943 * @param c async operation to inspect
1944 * @returns return value of the operation
1945 */
1946 CEPH_RADOS_API int rados_aio_get_return_value(rados_completion_t c);
1947
1948 /**
1949 * Get the internal object version of the target of an asychronous operation
1950 *
1951 * The return value is set when the operation is complete or safe,
1952 * whichever comes first.
1953 *
1954 * @pre The operation is safe or complete
1955 *
1956 * @note BUG: complete callback may never be called when the safe
1957 * message is received before the complete message
1958 *
1959 * @param c async operation to inspect
1960 * @returns version number of the asychronous operation's target
1961 */
1962 CEPH_RADOS_API uint64_t rados_aio_get_version(rados_completion_t c);
1963
1964 /**
1965 * Release a completion
1966 *
1967 * Call this when you no longer need the completion. It may not be
1968 * freed immediately if the operation is not acked and committed.
1969 *
1970 * @param c completion to release
1971 */
1972 CEPH_RADOS_API void rados_aio_release(rados_completion_t c);
1973
1974 /**
1975 * Write data to an object asynchronously
1976 *
1977 * Queues the write and returns. The return value of the completion
1978 * will be 0 on success, negative error code on failure.
1979 *
1980 * @param io the context in which the write will occur
1981 * @param oid name of the object
1982 * @param completion what to do when the write is safe and complete
1983 * @param buf data to write
1984 * @param len length of the data, in bytes
1985 * @param off byte offset in the object to begin writing at
1986 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
1987 * other than LIBRADOS_SNAP_HEAD
1988 */
1989 CEPH_RADOS_API int rados_aio_write(rados_ioctx_t io, const char *oid,
1990 rados_completion_t completion,
1991 const char *buf, size_t len, uint64_t off);
1992
1993 /**
1994 * Asychronously append data to an object
1995 *
1996 * Queues the append and returns.
1997 *
1998 * The return value of the completion will be 0 on success, negative
1999 * error code on failure.
2000 *
2001 * @param io the context to operate in
2002 * @param oid the name of the object
2003 * @param completion what to do when the append is safe and complete
2004 * @param buf the data to append
2005 * @param len length of buf (in bytes)
2006 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2007 * other than LIBRADOS_SNAP_HEAD
2008 */
2009 CEPH_RADOS_API int rados_aio_append(rados_ioctx_t io, const char *oid,
2010 rados_completion_t completion,
2011 const char *buf, size_t len);
2012
2013 /**
2014 * Asychronously write an entire object
2015 *
2016 * The object is filled with the provided data. If the object exists,
2017 * it is atomically truncated and then written.
2018 * Queues the write_full and returns.
2019 *
2020 * The return value of the completion will be 0 on success, negative
2021 * error code on failure.
2022 *
2023 * @param io the io context in which the write will occur
2024 * @param oid name of the object
2025 * @param completion what to do when the write_full is safe and complete
2026 * @param buf data to write
2027 * @param len length of the data, in bytes
2028 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2029 * other than LIBRADOS_SNAP_HEAD
2030 */
2031 CEPH_RADOS_API int rados_aio_write_full(rados_ioctx_t io, const char *oid,
2032 rados_completion_t completion,
2033 const char *buf, size_t len);
2034
2035 /**
2036 * Asychronously write the same buffer multiple times
2037 *
2038 * Queues the writesame and returns.
2039 *
2040 * The return value of the completion will be 0 on success, negative
2041 * error code on failure.
2042 *
2043 * @param io the io context in which the write will occur
2044 * @param oid name of the object
2045 * @param completion what to do when the writesame is safe and complete
2046 * @param buf data to write
2047 * @param data_len length of the data, in bytes
2048 * @param write_len the total number of bytes to write
2049 * @param off byte offset in the object to begin writing at
2050 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2051 * other than LIBRADOS_SNAP_HEAD
2052 */
2053 CEPH_RADOS_API int rados_aio_writesame(rados_ioctx_t io, const char *oid,
2054 rados_completion_t completion,
2055 const char *buf, size_t data_len,
2056 size_t write_len, uint64_t off);
2057
2058 /**
2059 * Asychronously remove an object
2060 *
2061 * Queues the remove and returns.
2062 *
2063 * The return value of the completion will be 0 on success, negative
2064 * error code on failure.
2065 *
2066 * @param io the context to operate in
2067 * @param oid the name of the object
2068 * @param completion what to do when the remove is safe and complete
2069 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2070 * other than LIBRADOS_SNAP_HEAD
2071 */
2072 CEPH_RADOS_API int rados_aio_remove(rados_ioctx_t io, const char *oid,
2073 rados_completion_t completion);
2074
2075 /**
2076 * Asychronously read data from an object
2077 *
2078 * The io context determines the snapshot to read from, if any was set
2079 * by rados_ioctx_snap_set_read().
2080 *
2081 * The return value of the completion will be number of bytes read on
2082 * success, negative error code on failure.
2083 *
2084 * @note only the 'complete' callback of the completion will be called.
2085 *
2086 * @param io the context in which to perform the read
2087 * @param oid the name of the object to read from
2088 * @param completion what to do when the read is complete
2089 * @param buf where to store the results
2090 * @param len the number of bytes to read
2091 * @param off the offset to start reading from in the object
2092 * @returns 0 on success, negative error code on failure
2093 */
2094 CEPH_RADOS_API int rados_aio_read(rados_ioctx_t io, const char *oid,
2095 rados_completion_t completion,
2096 char *buf, size_t len, uint64_t off);
2097
2098 /**
2099 * Block until all pending writes in an io context are safe
2100 *
2101 * This is not equivalent to calling rados_aio_wait_for_safe() on all
2102 * write completions, since this waits for the associated callbacks to
2103 * complete as well.
2104 *
2105 * @note BUG: always returns 0, should be void or accept a timeout
2106 *
2107 * @param io the context to flush
2108 * @returns 0 on success, negative error code on failure
2109 */
2110 CEPH_RADOS_API int rados_aio_flush(rados_ioctx_t io);
2111
2112
2113 /**
2114 * Schedule a callback for when all currently pending
2115 * aio writes are safe. This is a non-blocking version of
2116 * rados_aio_flush().
2117 *
2118 * @param io the context to flush
2119 * @param completion what to do when the writes are safe
2120 * @returns 0 on success, negative error code on failure
2121 */
2122 CEPH_RADOS_API int rados_aio_flush_async(rados_ioctx_t io,
2123 rados_completion_t completion);
2124
2125
2126 /**
2127 * Asynchronously get object stats (size/mtime)
2128 *
2129 * @param io ioctx
2130 * @param o object name
2131 * @param psize where to store object size
2132 * @param pmtime where to store modification time
2133 * @returns 0 on success, negative error code on failure
2134 */
2135 CEPH_RADOS_API int rados_aio_stat(rados_ioctx_t io, const char *o,
2136 rados_completion_t completion,
2137 uint64_t *psize, time_t *pmtime);
2138
2139 /**
2140 * Asynchronously compare an on-disk object range with a buffer
2141 *
2142 * @param io the context in which to perform the comparison
2143 * @param o the name of the object to compare with
2144 * @param completion what to do when the comparison is complete
2145 * @param cmp_buf buffer containing bytes to be compared with object contents
2146 * @param cmp_len length to compare and size of @cmp_buf in bytes
2147 * @param off object byte offset at which to start the comparison
2148 * @returns 0 on success, negative error code on failure,
2149 * (-MAX_ERRNO - mismatch_off) on mismatch
2150 */
2151 CEPH_RADOS_API int rados_aio_cmpext(rados_ioctx_t io, const char *o,
2152 rados_completion_t completion,
2153 const char *cmp_buf,
2154 size_t cmp_len,
2155 uint64_t off);
2156
2157 /**
2158 * Cancel async operation
2159 *
2160 * @param io ioctx
2161 * @param completion completion handle
2162 * @returns 0 on success, negative error code on failure
2163 */
2164 CEPH_RADOS_API int rados_aio_cancel(rados_ioctx_t io,
2165 rados_completion_t completion);
2166
2167 /**
2168 * Asynchronously execute an OSD class method on an object
2169 *
2170 * The OSD has a plugin mechanism for performing complicated
2171 * operations on an object atomically. These plugins are called
2172 * classes. This function allows librados users to call the custom
2173 * methods. The input and output formats are defined by the class.
2174 * Classes in ceph.git can be found in src/cls subdirectories
2175 *
2176 * @param io the context in which to call the method
2177 * @param oid the object to call the method on
2178 * @param cls the name of the class
2179 * @param method the name of the method
2180 * @param in_buf where to find input
2181 * @param in_len length of in_buf in bytes
2182 * @param buf where to store output
2183 * @param out_len length of buf in bytes
2184 * @returns 0 on success, negative error code on failure
2185 */
2186 CEPH_RADOS_API int rados_aio_exec(rados_ioctx_t io, const char *o,
2187 rados_completion_t completion,
2188 const char *cls, const char *method,
2189 const char *in_buf, size_t in_len,
2190 char *buf, size_t out_len);
2191
2192 /** @} Asynchronous I/O */
2193
2194 /**
2195 * @name Asynchronous Xattrs
2196 * Extended attributes are stored as extended attributes on the files
2197 * representing an object on the OSDs. Thus, they have the same
2198 * limitations as the underlying filesystem. On ext4, this means that
2199 * the total data stored in xattrs cannot exceed 4KB.
2200 *
2201 * @{
2202 */
2203
2204 /**
2205 * Asynchronously get the value of an extended attribute on an object.
2206 *
2207 * @param io the context in which the attribute is read
2208 * @param o name of the object
2209 * @param completion what to do when the getxattr completes
2210 * @param name which extended attribute to read
2211 * @param buf where to store the result
2212 * @param len size of buf in bytes
2213 * @returns length of xattr value on success, negative error code on failure
2214 */
2215 CEPH_RADOS_API int rados_aio_getxattr(rados_ioctx_t io, const char *o,
2216 rados_completion_t completion,
2217 const char *name, char *buf, size_t len);
2218
2219 /**
2220 * Asynchronously set an extended attribute on an object.
2221 *
2222 * @param io the context in which xattr is set
2223 * @param o name of the object
2224 * @param completion what to do when the setxattr completes
2225 * @param name which extended attribute to set
2226 * @param buf what to store in the xattr
2227 * @param len the number of bytes in buf
2228 * @returns 0 on success, negative error code on failure
2229 */
2230 CEPH_RADOS_API int rados_aio_setxattr(rados_ioctx_t io, const char *o,
2231 rados_completion_t completion,
2232 const char *name, const char *buf,
2233 size_t len);
2234
2235 /**
2236 * Asynchronously delete an extended attribute from an object.
2237 *
2238 * @param io the context in which to delete the xattr
2239 * @param o the name of the object
2240 * @param completion what to do when the rmxattr completes
2241 * @param name which xattr to delete
2242 * @returns 0 on success, negative error code on failure
2243 */
2244 CEPH_RADOS_API int rados_aio_rmxattr(rados_ioctx_t io, const char *o,
2245 rados_completion_t completion,
2246 const char *name);
2247
2248 /**
2249 * Asynchronously start iterating over xattrs on an object.
2250 *
2251 * @post iter is a valid iterator
2252 *
2253 * @param io the context in which to list xattrs
2254 * @param oid name of the object
2255 * @param iter where to store the iterator
2256 * @returns 0 on success, negative error code on failure
2257 */
2258 CEPH_RADOS_API int rados_aio_getxattrs(rados_ioctx_t io, const char *oid,
2259 rados_completion_t completion,
2260 rados_xattrs_iter_t *iter);
2261
2262 /** @} Asynchronous Xattrs */
2263
2264 /**
2265 * @name Watch/Notify
2266 *
2267 * Watch/notify is a protocol to help communicate among clients. It
2268 * can be used to sychronize client state. All that's needed is a
2269 * well-known object name (for example, rbd uses the header object of
2270 * an image).
2271 *
2272 * Watchers register an interest in an object, and receive all
2273 * notifies on that object. A notify attempts to communicate with all
2274 * clients watching an object, and blocks on the notifier until each
2275 * client responds or a timeout is reached.
2276 *
2277 * See rados_watch() and rados_notify() for more details.
2278 *
2279 * @{
2280 */
2281
2282 /**
2283 * @typedef rados_watchcb_t
2284 *
2285 * Callback activated when a notify is received on a watched
2286 * object.
2287 *
2288 * @param opcode undefined
2289 * @param ver version of the watched object
2290 * @param arg application-specific data
2291 *
2292 * @note BUG: opcode is an internal detail that shouldn't be exposed
2293 * @note BUG: ver is unused
2294 */
2295 typedef void (*rados_watchcb_t)(uint8_t opcode, uint64_t ver, void *arg);
2296
2297 /**
2298 * @typedef rados_watchcb2_t
2299 *
2300 * Callback activated when a notify is received on a watched
2301 * object.
2302 *
2303 * @param arg opaque user-defined value provided to rados_watch2()
2304 * @param notify_id an id for this notify event
2305 * @param handle the watcher handle we are notifying
2306 * @param notifier_id the unique client id for the notifier
2307 * @param data payload from the notifier
2308 * @param datalen length of payload buffer
2309 */
2310 typedef void (*rados_watchcb2_t)(void *arg,
2311 uint64_t notify_id,
2312 uint64_t handle,
2313 uint64_t notifier_id,
2314 void *data,
2315 size_t data_len);
2316
2317 /**
2318 * @typedef rados_watcherrcb_t
2319 *
2320 * Callback activated when we encounter an error with the watch session.
2321 * This can happen when the location of the objects moves within the
2322 * cluster and we fail to register our watch with the new object location,
2323 * or when our connection with the object OSD is otherwise interrupted and
2324 * we may have missed notify events.
2325 *
2326 * @param pre opaque user-defined value provided to rados_watch2()
2327 * @param err error code
2328 */
2329 typedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err);
2330
2331 /**
2332 * Register an interest in an object
2333 *
2334 * A watch operation registers the client as being interested in
2335 * notifications on an object. OSDs keep track of watches on
2336 * persistent storage, so they are preserved across cluster changes by
2337 * the normal recovery process. If the client loses its connection to
2338 * the primary OSD for a watched object, the watch will be removed
2339 * after 30 seconds. Watches are automatically reestablished when a new
2340 * connection is made, or a placement group switches OSDs.
2341 *
2342 * @note BUG: librados should provide a way for watchers to notice connection resets
2343 * @note BUG: the ver parameter does not work, and -ERANGE will never be returned
2344 * (See URL tracker.ceph.com/issues/2592)
2345 *
2346 * @param io the pool the object is in
2347 * @param o the object to watch
2348 * @param ver expected version of the object
2349 * @param cookie where to store the internal id assigned to this watch
2350 * @param watchcb what to do when a notify is received on this object
2351 * @param arg application defined data to pass when watchcb is called
2352 * @returns 0 on success, negative error code on failure
2353 * @returns -ERANGE if the version of the object is greater than ver
2354 */
2355 CEPH_RADOS_API int rados_watch(rados_ioctx_t io, const char *o, uint64_t ver,
2356 uint64_t *cookie,
2357 rados_watchcb_t watchcb, void *arg)
2358 __attribute__((deprecated));
2359
2360
2361 /**
2362 * Register an interest in an object
2363 *
2364 * A watch operation registers the client as being interested in
2365 * notifications on an object. OSDs keep track of watches on
2366 * persistent storage, so they are preserved across cluster changes by
2367 * the normal recovery process. If the client loses its connection to the
2368 * primary OSD for a watched object, the watch will be removed after
2369 * a timeout configured with osd_client_watch_timeout.
2370 * Watches are automatically reestablished when a new
2371 * connection is made, or a placement group switches OSDs.
2372 *
2373 * @param io the pool the object is in
2374 * @param o the object to watch
2375 * @param cookie where to store the internal id assigned to this watch
2376 * @param watchcb what to do when a notify is received on this object
2377 * @param watcherrcb what to do when the watch session encounters an error
2378 * @param arg opaque value to pass to the callback
2379 * @returns 0 on success, negative error code on failure
2380 */
2381 CEPH_RADOS_API int rados_watch2(rados_ioctx_t io, const char *o, uint64_t *cookie,
2382 rados_watchcb2_t watchcb,
2383 rados_watcherrcb_t watcherrcb,
2384 void *arg);
2385
2386 /**
2387 * Register an interest in an object
2388 *
2389 * A watch operation registers the client as being interested in
2390 * notifications on an object. OSDs keep track of watches on
2391 * persistent storage, so they are preserved across cluster changes by
2392 * the normal recovery process. Watches are automatically reestablished when a new
2393 * connection is made, or a placement group switches OSDs.
2394 *
2395 * @param io the pool the object is in
2396 * @param o the object to watch
2397 * @param cookie where to store the internal id assigned to this watch
2398 * @param watchcb what to do when a notify is received on this object
2399 * @param watcherrcb what to do when the watch session encounters an error
2400 * @param timeout how many seconds the connection will keep after disconnection
2401 * @param arg opaque value to pass to the callback
2402 * @returns 0 on success, negative error code on failure
2403 */
2404 CEPH_RADOS_API int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie,
2405 rados_watchcb2_t watchcb,
2406 rados_watcherrcb_t watcherrcb,
2407 uint32_t timeout,
2408 void *arg);
2409
2410 /**
2411 * Asynchronous register an interest in an object
2412 *
2413 * A watch operation registers the client as being interested in
2414 * notifications on an object. OSDs keep track of watches on
2415 * persistent storage, so they are preserved across cluster changes by
2416 * the normal recovery process. If the client loses its connection to
2417 * the primary OSD for a watched object, the watch will be removed
2418 * after 30 seconds. Watches are automatically reestablished when a new
2419 * connection is made, or a placement group switches OSDs.
2420 *
2421 * @param io the pool the object is in
2422 * @param o the object to watch
2423 * @param completion what to do when operation has been attempted
2424 * @param handle where to store the internal id assigned to this watch
2425 * @param watchcb what to do when a notify is received on this object
2426 * @param watcherrcb what to do when the watch session encounters an error
2427 * @param arg opaque value to pass to the callback
2428 * @returns 0 on success, negative error code on failure
2429 */
2430 CEPH_RADOS_API int rados_aio_watch(rados_ioctx_t io, const char *o,
2431 rados_completion_t completion, uint64_t *handle,
2432 rados_watchcb2_t watchcb,
2433 rados_watcherrcb_t watcherrcb,
2434 void *arg);
2435
2436 /**
2437 * Asynchronous register an interest in an object
2438 *
2439 * A watch operation registers the client as being interested in
2440 * notifications on an object. OSDs keep track of watches on
2441 * persistent storage, so they are preserved across cluster changes by
2442 * the normal recovery process. If the client loses its connection to
2443 * the primary OSD for a watched object, the watch will be removed
2444 * after the number of seconds that configured in timeout parameter.
2445 * Watches are automatically reestablished when a new
2446 * connection is made, or a placement group switches OSDs.
2447 *
2448 * @param io the pool the object is in
2449 * @param o the object to watch
2450 * @param completion what to do when operation has been attempted
2451 * @param handle where to store the internal id assigned to this watch
2452 * @param watchcb what to do when a notify is received on this object
2453 * @param watcherrcb what to do when the watch session encounters an error
2454 * @param timeout how many seconds the connection will keep after disconnection
2455 * @param arg opaque value to pass to the callback
2456 * @returns 0 on success, negative error code on failure
2457 */
2458 CEPH_RADOS_API int rados_aio_watch2(rados_ioctx_t io, const char *o,
2459 rados_completion_t completion, uint64_t *handle,
2460 rados_watchcb2_t watchcb,
2461 rados_watcherrcb_t watcherrcb,
2462 uint32_t timeout,
2463 void *arg);
2464
2465 /**
2466 * Check on the status of a watch
2467 *
2468 * Return the number of milliseconds since the watch was last confirmed.
2469 * Or, if there has been an error, return that.
2470 *
2471 * If there is an error, the watch is no longer valid, and should be
2472 * destroyed with rados_unwatch2(). The the user is still interested
2473 * in the object, a new watch should be created with rados_watch2().
2474 *
2475 * @param io the pool the object is in
2476 * @param cookie the watch handle
2477 * @returns ms since last confirmed on success, negative error code on failure
2478 */
2479 CEPH_RADOS_API int rados_watch_check(rados_ioctx_t io, uint64_t cookie);
2480
2481 /**
2482 * Unregister an interest in an object
2483 *
2484 * Once this completes, no more notifies will be sent to us for this
2485 * watch. This should be called to clean up unneeded watchers.
2486 *
2487 * @param io the pool the object is in
2488 * @param o the name of the watched object (ignored)
2489 * @param cookie which watch to unregister
2490 * @returns 0 on success, negative error code on failure
2491 */
2492 CEPH_RADOS_API int rados_unwatch(rados_ioctx_t io, const char *o, uint64_t cookie)
2493 __attribute__((deprecated));
2494
2495 /**
2496 * Unregister an interest in an object
2497 *
2498 * Once this completes, no more notifies will be sent to us for this
2499 * watch. This should be called to clean up unneeded watchers.
2500 *
2501 * @param io the pool the object is in
2502 * @param cookie which watch to unregister
2503 * @returns 0 on success, negative error code on failure
2504 */
2505 CEPH_RADOS_API int rados_unwatch2(rados_ioctx_t io, uint64_t cookie);
2506
2507 /**
2508 * Asynchronous unregister an interest in an object
2509 *
2510 * Once this completes, no more notifies will be sent to us for this
2511 * watch. This should be called to clean up unneeded watchers.
2512 *
2513 * @param io the pool the object is in
2514 * @param completion what to do when operation has been attempted
2515 * @param cookie which watch to unregister
2516 * @returns 0 on success, negative error code on failure
2517 */
2518 CEPH_RADOS_API int rados_aio_unwatch(rados_ioctx_t io, uint64_t cookie,
2519 rados_completion_t completion);
2520
2521 /**
2522 * Sychronously notify watchers of an object
2523 *
2524 * This blocks until all watchers of the object have received and
2525 * reacted to the notify, or a timeout is reached.
2526 *
2527 * @note BUG: the timeout is not changeable via the C API
2528 * @note BUG: the bufferlist is inaccessible in a rados_watchcb_t
2529 *
2530 * @param io the pool the object is in
2531 * @param o the name of the object
2532 * @param ver obsolete - just pass zero
2533 * @param buf data to send to watchers
2534 * @param buf_len length of buf in bytes
2535 * @returns 0 on success, negative error code on failure
2536 */
2537 CEPH_RADOS_API int rados_notify(rados_ioctx_t io, const char *o, uint64_t ver,
2538 const char *buf, int buf_len)
2539 __attribute__((deprecated));
2540
2541 /**
2542 * Sychronously notify watchers of an object
2543 *
2544 * This blocks until all watchers of the object have received and
2545 * reacted to the notify, or a timeout is reached.
2546 *
2547 * The reply buffer is optional. If specified, the client will get
2548 * back an encoded buffer that includes the ids of the clients that
2549 * acknowledged the notify as well as their notify ack payloads (if
2550 * any). Clients that timed out are not included. Even clients that
2551 * do not include a notify ack payload are included in the list but
2552 * have a 0-length payload associated with them. The format:
2553 *
2554 * le32 num_acks
2555 * {
2556 * le64 gid global id for the client (for client.1234 that's 1234)
2557 * le64 cookie cookie for the client
2558 * le32 buflen length of reply message buffer
2559 * u8 * buflen payload
2560 * } * num_acks
2561 * le32 num_timeouts
2562 * {
2563 * le64 gid global id for the client
2564 * le64 cookie cookie for the client
2565 * } * num_timeouts
2566 *
2567 * Note: There may be multiple instances of the same gid if there are
2568 * multiple watchers registered via the same client.
2569 *
2570 * Note: The buffer must be released with rados_buffer_free() when the
2571 * user is done with it.
2572 *
2573 * Note: Since the result buffer includes clients that time out, it
2574 * will be set even when rados_notify() returns an error code (like
2575 * -ETIMEDOUT).
2576 *
2577 * @param io the pool the object is in
2578 * @param completion what to do when operation has been attempted
2579 * @param o the name of the object
2580 * @param buf data to send to watchers
2581 * @param buf_len length of buf in bytes
2582 * @param timeout_ms notify timeout (in ms)
2583 * @param reply_buffer pointer to reply buffer pointer (free with rados_buffer_free)
2584 * @param reply_buffer_len pointer to size of reply buffer
2585 * @returns 0 on success, negative error code on failure
2586 */
2587 CEPH_RADOS_API int rados_notify2(rados_ioctx_t io, const char *o,
2588 const char *buf, int buf_len,
2589 uint64_t timeout_ms,
2590 char **reply_buffer, size_t *reply_buffer_len);
2591 CEPH_RADOS_API int rados_aio_notify(rados_ioctx_t io, const char *o,
2592 rados_completion_t completion,
2593 const char *buf, int buf_len,
2594 uint64_t timeout_ms, char **reply_buffer,
2595 size_t *reply_buffer_len);
2596
2597 /**
2598 * Acknolwedge receipt of a notify
2599 *
2600 * @param io the pool the object is in
2601 * @param o the name of the object
2602 * @param notify_id the notify_id we got on the watchcb2_t callback
2603 * @param cookie the watcher handle
2604 * @param buf payload to return to notifier (optional)
2605 * @param buf_len payload length
2606 * @returns 0 on success
2607 */
2608 CEPH_RADOS_API int rados_notify_ack(rados_ioctx_t io, const char *o,
2609 uint64_t notify_id, uint64_t cookie,
2610 const char *buf, int buf_len);
2611
2612 /**
2613 * Flush watch/notify callbacks
2614 *
2615 * This call will block until all pending watch/notify callbacks have
2616 * been executed and the queue is empty. It should usually be called
2617 * after shutting down any watches before shutting down the ioctx or
2618 * librados to ensure that any callbacks do not misuse the ioctx (for
2619 * example by calling rados_notify_ack after the ioctx has been
2620 * destroyed).
2621 *
2622 * @param cluster the cluster handle
2623 */
2624 CEPH_RADOS_API int rados_watch_flush(rados_t cluster);
2625 /**
2626 * Flush watch/notify callbacks
2627 *
2628 * This call will be nonblock, and the completion will be called
2629 * until all pending watch/notify callbacks have been executed and
2630 * the queue is empty. It should usually be called after shutting
2631 * down any watches before shutting down the ioctx or
2632 * librados to ensure that any callbacks do not misuse the ioctx (for
2633 * example by calling rados_notify_ack after the ioctx has been
2634 * destroyed).
2635 *
2636 * @param cluster the cluster handle
2637 * @param completion what to do when operation has been attempted
2638 */
2639 CEPH_RADOS_API int rados_aio_watch_flush(rados_t cluster, rados_completion_t completion);
2640
2641 /** @} Watch/Notify */
2642
2643 /**
2644 * Pin an object in the cache tier
2645 *
2646 * When an object is pinned in the cache tier, it stays in the cache
2647 * tier, and won't be flushed out.
2648 *
2649 * @param io the pool the object is in
2650 * @param o the object id
2651 * @returns 0 on success, negative error code on failure
2652 */
2653 CEPH_RADOS_API int rados_cache_pin(rados_ioctx_t io, const char *o);
2654
2655 /**
2656 * Unpin an object in the cache tier
2657 *
2658 * After an object is unpinned in the cache tier, it can be flushed out
2659 *
2660 * @param io the pool the object is in
2661 * @param o the object id
2662 * @returns 0 on success, negative error code on failure
2663 */
2664 CEPH_RADOS_API int rados_cache_unpin(rados_ioctx_t io, const char *o);
2665
2666 /**
2667 * @name Hints
2668 *
2669 * @{
2670 */
2671
2672 /**
2673 * Set allocation hint for an object
2674 *
2675 * This is an advisory operation, it will always succeed (as if it was
2676 * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2677 * guaranteed to do anything on the backend.
2678 *
2679 * @param io the pool the object is in
2680 * @param o the name of the object
2681 * @param expected_object_size expected size of the object, in bytes
2682 * @param expected_write_size expected size of writes to the object, in bytes
2683 * @returns 0 on success, negative error code on failure
2684 */
2685 CEPH_RADOS_API int rados_set_alloc_hint(rados_ioctx_t io, const char *o,
2686 uint64_t expected_object_size,
2687 uint64_t expected_write_size);
2688
2689 /**
2690 * Set allocation hint for an object
2691 *
2692 * This is an advisory operation, it will always succeed (as if it was
2693 * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2694 * guaranteed to do anything on the backend.
2695 *
2696 * @param io the pool the object is in
2697 * @param o the name of the object
2698 * @param expected_object_size expected size of the object, in bytes
2699 * @param expected_write_size expected size of writes to the object, in bytes
2700 * @param flags hints about future IO patterns
2701 * @returns 0 on success, negative error code on failure
2702 */
2703 CEPH_RADOS_API int rados_set_alloc_hint2(rados_ioctx_t io, const char *o,
2704 uint64_t expected_object_size,
2705 uint64_t expected_write_size,
2706 uint32_t flags);
2707
2708 /** @} Hints */
2709
2710 /**
2711 * @name Object Operations
2712 *
2713 * A single rados operation can do multiple operations on one object
2714 * atomicly. The whole operation will suceed or fail, and no partial
2715 * results will be visible.
2716 *
2717 * Operations may be either reads, which can return data, or writes,
2718 * which cannot. The effects of writes are applied and visible all at
2719 * once, so an operation that sets an xattr and then checks its value
2720 * will not see the updated value.
2721 *
2722 * @{
2723 */
2724
2725 /**
2726 * Create a new rados_write_op_t write operation. This will store all actions
2727 * to be performed atomically. You must call rados_release_write_op when you are
2728 * finished with it.
2729 *
2730 * @returns non-NULL on success, NULL on memory allocation error.
2731 */
2732 CEPH_RADOS_API rados_write_op_t rados_create_write_op(void);
2733
2734 /**
2735 * Free a rados_write_op_t, must be called when you're done with it.
2736 * @param write_op operation to deallocate, created with rados_create_write_op
2737 */
2738 CEPH_RADOS_API void rados_release_write_op(rados_write_op_t write_op);
2739
2740 /**
2741 * Set flags for the last operation added to this write_op.
2742 * At least one op must have been added to the write_op.
2743 * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
2744 */
2745 CEPH_RADOS_API void rados_write_op_set_flags(rados_write_op_t write_op,
2746 int flags);
2747
2748 /**
2749 * Ensure that the object exists before writing
2750 * @param write_op operation to add this action to
2751 */
2752 CEPH_RADOS_API void rados_write_op_assert_exists(rados_write_op_t write_op);
2753
2754 /**
2755 * Ensure that the object exists and that its internal version
2756 * number is equal to "ver" before writing. "ver" should be a
2757 * version number previously obtained with rados_get_last_version().
2758 * - If the object's version is greater than the asserted version
2759 * then rados_write_op_operate will return -ERANGE instead of
2760 * executing the op.
2761 * - If the object's version is less than the asserted version
2762 * then rados_write_op_operate will return -EOVERFLOW instead
2763 * of executing the op.
2764 * @param write_op operation to add this action to
2765 * @param ver object version number
2766 */
2767 CEPH_RADOS_API void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver);
2768
2769 /**
2770 * Ensure that given object range (extent) satisfies comparison.
2771 *
2772 * @param write_op operation to add this action to
2773 * @param cmp_buf buffer containing bytes to be compared with object contents
2774 * @param cmp_len length to compare and size of @cmp_buf in bytes
2775 * @param off object byte offset at which to start the comparison
2776 * @param prval returned result of comparison, 0 on success, negative error code
2777 * on failure, (-MAX_ERRNO - mismatch_off) on mismatch
2778 */
2779 CEPH_RADOS_API void rados_write_op_cmpext(rados_write_op_t write_op,
2780 const char *cmp_buf,
2781 size_t cmp_len,
2782 uint64_t off,
2783 int *prval);
2784
2785 /**
2786 * Ensure that given xattr satisfies comparison.
2787 * If the comparison is not satisfied, the return code of the
2788 * operation will be -ECANCELED
2789 * @param write_op operation to add this action to
2790 * @param name name of the xattr to look up
2791 * @param comparison_operator currently undocumented, look for
2792 * LIBRADOS_CMPXATTR_OP_EQ in librados.h
2793 * @param value buffer to compare actual xattr value to
2794 * @param value_len length of buffer to compare actual xattr value to
2795 */
2796 CEPH_RADOS_API void rados_write_op_cmpxattr(rados_write_op_t write_op,
2797 const char *name,
2798 uint8_t comparison_operator,
2799 const char *value,
2800 size_t value_len);
2801
2802 /**
2803 * Ensure that the an omap value satisfies a comparison,
2804 * with the supplied value on the right hand side (i.e.
2805 * for OP_LT, the comparison is actual_value < value.
2806 *
2807 * @param write_op operation to add this action to
2808 * @param key which omap value to compare
2809 * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
2810 LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
2811 * @param val value to compare with
2812 * @param val_len length of value in bytes
2813 * @param prval where to store the return value from this action
2814 */
2815 CEPH_RADOS_API void rados_write_op_omap_cmp(rados_write_op_t write_op,
2816 const char *key,
2817 uint8_t comparison_operator,
2818 const char *val,
2819 size_t val_len,
2820 int *prval);
2821
2822 /**
2823 * Set an xattr
2824 * @param write_op operation to add this action to
2825 * @param name name of the xattr
2826 * @param value buffer to set xattr to
2827 * @param value_len length of buffer to set xattr to
2828 */
2829 CEPH_RADOS_API void rados_write_op_setxattr(rados_write_op_t write_op,
2830 const char *name,
2831 const char *value,
2832 size_t value_len);
2833
2834 /**
2835 * Remove an xattr
2836 * @param write_op operation to add this action to
2837 * @param name name of the xattr to remove
2838 */
2839 CEPH_RADOS_API void rados_write_op_rmxattr(rados_write_op_t write_op,
2840 const char *name);
2841
2842 /**
2843 * Create the object
2844 * @param write_op operation to add this action to
2845 * @param exclusive set to either LIBRADOS_CREATE_EXCLUSIVE or
2846 LIBRADOS_CREATE_IDEMPOTENT
2847 * will error if the object already exists.
2848 * @param category category string (DEPRECATED, HAS NO EFFECT)
2849 */
2850 CEPH_RADOS_API void rados_write_op_create(rados_write_op_t write_op,
2851 int exclusive,
2852 const char* category);
2853
2854 /**
2855 * Write to offset
2856 * @param write_op operation to add this action to
2857 * @param offset offset to write to
2858 * @param buffer bytes to write
2859 * @param len length of buffer
2860 */
2861 CEPH_RADOS_API void rados_write_op_write(rados_write_op_t write_op,
2862 const char *buffer,
2863 size_t len,
2864 uint64_t offset);
2865
2866 /**
2867 * Write whole object, atomically replacing it.
2868 * @param write_op operation to add this action to
2869 * @param buffer bytes to write
2870 * @param len length of buffer
2871 */
2872 CEPH_RADOS_API void rados_write_op_write_full(rados_write_op_t write_op,
2873 const char *buffer,
2874 size_t len);
2875
2876 /**
2877 * Write the same buffer multiple times
2878 * @param write_op operation to add this action to
2879 * @param buffer bytes to write
2880 * @param data_len length of buffer
2881 * @param write_len total number of bytes to write, as a multiple of @data_len
2882 * @param offset offset to write to
2883 */
2884 CEPH_RADOS_API void rados_write_op_writesame(rados_write_op_t write_op,
2885 const char *buffer,
2886 size_t data_len,
2887 size_t write_len,
2888 uint64_t offset);
2889
2890 /**
2891 * Append to end of object.
2892 * @param write_op operation to add this action to
2893 * @param buffer bytes to write
2894 * @param len length of buffer
2895 */
2896 CEPH_RADOS_API void rados_write_op_append(rados_write_op_t write_op,
2897 const char *buffer,
2898 size_t len);
2899 /**
2900 * Remove object
2901 * @param write_op operation to add this action to
2902 */
2903 CEPH_RADOS_API void rados_write_op_remove(rados_write_op_t write_op);
2904
2905 /**
2906 * Truncate an object
2907 * @param write_op operation to add this action to
2908 * @param offset Offset to truncate to
2909 */
2910 CEPH_RADOS_API void rados_write_op_truncate(rados_write_op_t write_op,
2911 uint64_t offset);
2912
2913 /**
2914 * Zero part of an object
2915 * @param write_op operation to add this action to
2916 * @param offset Offset to zero
2917 * @param len length to zero
2918 */
2919 CEPH_RADOS_API void rados_write_op_zero(rados_write_op_t write_op,
2920 uint64_t offset,
2921 uint64_t len);
2922
2923 /**
2924 * Execute an OSD class method on an object
2925 * See rados_exec() for general description.
2926 *
2927 * @param write_op operation to add this action to
2928 * @param cls the name of the class
2929 * @param method the name of the method
2930 * @param in_buf where to find input
2931 * @param in_len length of in_buf in bytes
2932 * @param prval where to store the return value from the method
2933 */
2934 CEPH_RADOS_API void rados_write_op_exec(rados_write_op_t write_op,
2935 const char *cls,
2936 const char *method,
2937 const char *in_buf,
2938 size_t in_len,
2939 int *prval);
2940
2941 /**
2942 * Set key/value pairs on an object
2943 *
2944 * @param write_op operation to add this action to
2945 * @param keys array of null-terminated char arrays representing keys to set
2946 * @param vals array of pointers to values to set
2947 * @param lens array of lengths corresponding to each value
2948 * @param num number of key/value pairs to set
2949 */
2950 CEPH_RADOS_API void rados_write_op_omap_set(rados_write_op_t write_op,
2951 char const* const* keys,
2952 char const* const* vals,
2953 const size_t *lens,
2954 size_t num);
2955
2956 /**
2957 * Remove key/value pairs from an object
2958 *
2959 * @param write_op operation to add this action to
2960 * @param keys array of null-terminated char arrays representing keys to remove
2961 * @param keys_len number of key/value pairs to remove
2962 */
2963 CEPH_RADOS_API void rados_write_op_omap_rm_keys(rados_write_op_t write_op,
2964 char const* const* keys,
2965 size_t keys_len);
2966
2967 /**
2968 * Remove all key/value pairs from an object
2969 *
2970 * @param write_op operation to add this action to
2971 */
2972 CEPH_RADOS_API void rados_write_op_omap_clear(rados_write_op_t write_op);
2973
2974 /**
2975 * Set allocation hint for an object
2976 *
2977 * @param write_op operation to add this action to
2978 * @param expected_object_size expected size of the object, in bytes
2979 * @param expected_write_size expected size of writes to the object, in bytes
2980 */
2981 CEPH_RADOS_API void rados_write_op_set_alloc_hint(rados_write_op_t write_op,
2982 uint64_t expected_object_size,
2983 uint64_t expected_write_size);
2984
2985 /**
2986 * Set allocation hint for an object
2987 *
2988 * @param write_op operation to add this action to
2989 * @param expected_object_size expected size of the object, in bytes
2990 * @param expected_write_size expected size of writes to the object, in bytes
2991 * @param flags hints about future IO patterns
2992 */
2993 CEPH_RADOS_API void rados_write_op_set_alloc_hint2(rados_write_op_t write_op,
2994 uint64_t expected_object_size,
2995 uint64_t expected_write_size,
2996 uint32_t flags);
2997
2998 /**
2999 * Perform a write operation synchronously
3000 * @param write_op operation to perform
3001 * @param io the ioctx that the object is in
3002 * @param oid the object id
3003 * @param mtime the time to set the mtime to, NULL for the current time
3004 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3005 */
3006 CEPH_RADOS_API int rados_write_op_operate(rados_write_op_t write_op,
3007 rados_ioctx_t io,
3008 const char *oid,
3009 time_t *mtime,
3010 int flags);
3011 /**
3012 * Perform a write operation synchronously
3013 * @param write_op operation to perform
3014 * @param io the ioctx that the object is in
3015 * @param oid the object id
3016 * @param mtime the time to set the mtime to, NULL for the current time
3017 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3018 */
3019
3020 CEPH_RADOS_API int rados_write_op_operate2(rados_write_op_t write_op,
3021 rados_ioctx_t io,
3022 const char *oid,
3023 struct timespec *mtime,
3024 int flags);
3025
3026 /**
3027 * Perform a write operation asynchronously
3028 * @param write_op operation to perform
3029 * @param io the ioctx that the object is in
3030 * @param completion what to do when operation has been attempted
3031 * @param oid the object id
3032 * @param mtime the time to set the mtime to, NULL for the current time
3033 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3034 */
3035 CEPH_RADOS_API int rados_aio_write_op_operate(rados_write_op_t write_op,
3036 rados_ioctx_t io,
3037 rados_completion_t completion,
3038 const char *oid,
3039 time_t *mtime,
3040 int flags);
3041
3042 /**
3043 * Create a new rados_read_op_t write operation. This will store all
3044 * actions to be performed atomically. You must call
3045 * rados_release_read_op when you are finished with it (after it
3046 * completes, or you decide not to send it in the first place).
3047 *
3048 * @returns non-NULL on success, NULL on memory allocation error.
3049 */
3050 CEPH_RADOS_API rados_read_op_t rados_create_read_op(void);
3051
3052 /**
3053 * Free a rados_read_op_t, must be called when you're done with it.
3054 * @param read_op operation to deallocate, created with rados_create_read_op
3055 */
3056 CEPH_RADOS_API void rados_release_read_op(rados_read_op_t read_op);
3057
3058 /**
3059 * Set flags for the last operation added to this read_op.
3060 * At least one op must have been added to the read_op.
3061 * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
3062 */
3063 CEPH_RADOS_API void rados_read_op_set_flags(rados_read_op_t read_op, int flags);
3064
3065 /**
3066 * Ensure that the object exists before reading
3067 * @param read_op operation to add this action to
3068 */
3069 CEPH_RADOS_API void rados_read_op_assert_exists(rados_read_op_t read_op);
3070
3071 /**
3072 * Ensure that the object exists and that its internal version
3073 * number is equal to "ver" before reading. "ver" should be a
3074 * version number previously obtained with rados_get_last_version().
3075 * - If the object's version is greater than the asserted version
3076 * then rados_read_op_operate will return -ERANGE instead of
3077 * executing the op.
3078 * - If the object's version is less than the asserted version
3079 * then rados_read_op_operate will return -EOVERFLOW instead
3080 * of executing the op.
3081 * @param read_op operation to add this action to
3082 * @param ver object version number
3083 */
3084 CEPH_RADOS_API void rados_read_op_assert_version(rados_read_op_t read_op, uint64_t ver);
3085
3086 /**
3087 * Ensure that given object range (extent) satisfies comparison.
3088 *
3089 * @param read_op operation to add this action to
3090 * @param cmp_buf buffer containing bytes to be compared with object contents
3091 * @param cmp_len length to compare and size of @cmp_buf in bytes
3092 * @param off object byte offset at which to start the comparison
3093 * @param prval returned result of comparison, 0 on success, negative error code
3094 * on failure, (-MAX_ERRNO - mismatch_off) on mismatch
3095 */
3096 CEPH_RADOS_API void rados_read_op_cmpext(rados_read_op_t read_op,
3097 const char *cmp_buf,
3098 size_t cmp_len,
3099 uint64_t off,
3100 int *prval);
3101
3102 /**
3103 * Ensure that the an xattr satisfies a comparison
3104 * If the comparison is not satisfied, the return code of the
3105 * operation will be -ECANCELED
3106 * @param read_op operation to add this action to
3107 * @param name name of the xattr to look up
3108 * @param comparison_operator currently undocumented, look for
3109 * LIBRADOS_CMPXATTR_OP_EQ in librados.h
3110 * @param value buffer to compare actual xattr value to
3111 * @param value_len length of buffer to compare actual xattr value to
3112 */
3113 CEPH_RADOS_API void rados_read_op_cmpxattr(rados_read_op_t read_op,
3114 const char *name,
3115 uint8_t comparison_operator,
3116 const char *value,
3117 size_t value_len);
3118
3119 /**
3120 * Start iterating over xattrs on an object.
3121 *
3122 * @param read_op operation to add this action to
3123 * @param iter where to store the iterator
3124 * @param prval where to store the return value of this action
3125 */
3126 CEPH_RADOS_API void rados_read_op_getxattrs(rados_read_op_t read_op,
3127 rados_xattrs_iter_t *iter,
3128 int *prval);
3129
3130 /**
3131 * Ensure that the an omap value satisfies a comparison,
3132 * with the supplied value on the right hand side (i.e.
3133 * for OP_LT, the comparison is actual_value < value.
3134 *
3135 * @param read_op operation to add this action to
3136 * @param key which omap value to compare
3137 * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
3138 LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
3139 * @param val value to compare with
3140 * @param val_len length of value in bytes
3141 * @param prval where to store the return value from this action
3142 */
3143 CEPH_RADOS_API void rados_read_op_omap_cmp(rados_read_op_t read_op,
3144 const char *key,
3145 uint8_t comparison_operator,
3146 const char *val,
3147 size_t val_len,
3148 int *prval);
3149
3150 /**
3151 * Get object size and mtime
3152 * @param read_op operation to add this action to
3153 * @param psize where to store object size
3154 * @param pmtime where to store modification time
3155 * @param prval where to store the return value of this action
3156 */
3157 CEPH_RADOS_API void rados_read_op_stat(rados_read_op_t read_op,
3158 uint64_t *psize,
3159 time_t *pmtime,
3160 int *prval);
3161
3162 /**
3163 * Read bytes from offset into buffer.
3164 *
3165 * prlen will be filled with the number of bytes read if successful.
3166 * A short read can only occur if the read reaches the end of the
3167 * object.
3168 *
3169 * @param read_op operation to add this action to
3170 * @param offset offset to read from
3171 * @param len length of buffer
3172 * @param buffer where to put the data
3173 * @param bytes_read where to store the number of bytes read by this action
3174 * @param prval where to store the return value of this action
3175 */
3176 CEPH_RADOS_API void rados_read_op_read(rados_read_op_t read_op,
3177 uint64_t offset,
3178 size_t len,
3179 char *buffer,
3180 size_t *bytes_read,
3181 int *prval);
3182
3183 /**
3184 * Compute checksum from object data
3185 *
3186 * @param read_op operation to add this action to
3187 * @param oid the name of the object to checksum
3188 * @param type the checksum algorithm to utilize
3189 * @param init_value the init value for the algorithm
3190 * @param init_value_len the length of the init value
3191 * @param len the number of bytes to checksum
3192 * @param off the offset to start checksuming in the object
3193 * @param chunk_size optional length-aligned chunk size for checksums
3194 * @param pchecksum where to store the checksum result for this action
3195 * @param checksum_len the number of bytes available for the result
3196 * @param prval where to store the return value for this action
3197 */
3198 CEPH_RADOS_API void rados_read_op_checksum(rados_read_op_t read_op,
3199 rados_checksum_type_t type,
3200 const char *init_value,
3201 size_t init_value_len,
3202 uint64_t offset, size_t len,
3203 size_t chunk_size, char *pchecksum,
3204 size_t checksum_len, int *prval);
3205
3206 /**
3207 * Execute an OSD class method on an object
3208 * See rados_exec() for general description.
3209 *
3210 * The output buffer is allocated on the heap; the caller is
3211 * expected to release that memory with rados_buffer_free(). The
3212 * buffer and length pointers can all be NULL, in which case they are
3213 * not filled in.
3214 *
3215 * @param read_op operation to add this action to
3216 * @param cls the name of the class
3217 * @param method the name of the method
3218 * @param in_buf where to find input
3219 * @param in_len length of in_buf in bytes
3220 * @param out_buf where to put librados-allocated output buffer
3221 * @param out_len length of out_buf in bytes
3222 * @param prval where to store the return value from the method
3223 */
3224 CEPH_RADOS_API void rados_read_op_exec(rados_read_op_t read_op,
3225 const char *cls,
3226 const char *method,
3227 const char *in_buf,
3228 size_t in_len,
3229 char **out_buf,
3230 size_t *out_len,
3231 int *prval);
3232
3233 /**
3234 * Execute an OSD class method on an object
3235 * See rados_exec() for general description.
3236 *
3237 * If the output buffer is too small, prval will
3238 * be set to -ERANGE and used_len will be 0.
3239 *
3240 * @param read_op operation to add this action to
3241 * @param cls the name of the class
3242 * @param method the name of the method
3243 * @param in_buf where to find input
3244 * @param in_len length of in_buf in bytes
3245 * @param out_buf user-provided buffer to read into
3246 * @param out_len length of out_buf in bytes
3247 * @param used_len where to store the number of bytes read into out_buf
3248 * @param prval where to store the return value from the method
3249 */
3250 CEPH_RADOS_API void rados_read_op_exec_user_buf(rados_read_op_t read_op,
3251 const char *cls,
3252 const char *method,
3253 const char *in_buf,
3254 size_t in_len,
3255 char *out_buf,
3256 size_t out_len,
3257 size_t *used_len,
3258 int *prval);
3259
3260 /**
3261 * Start iterating over key/value pairs on an object.
3262 *
3263 * They will be returned sorted by key.
3264 *
3265 * @param read_op operation to add this action to
3266 * @param start_after list keys starting after start_after
3267 * @param filter_prefix list only keys beginning with filter_prefix
3268 * @param max_return list no more than max_return key/value pairs
3269 * @param iter where to store the iterator
3270 * @param prval where to store the return value from this action
3271 */
3272 CEPH_RADOS_API void rados_read_op_omap_get_vals(rados_read_op_t read_op,
3273 const char *start_after,
3274 const char *filter_prefix,
3275 uint64_t max_return,
3276 rados_omap_iter_t *iter,
3277 int *prval)
3278 __attribute__((deprecated)); /* use v2 below */
3279
3280 /**
3281 * Start iterating over key/value pairs on an object.
3282 *
3283 * They will be returned sorted by key.
3284 *
3285 * @param read_op operation to add this action to
3286 * @param start_after list keys starting after start_after
3287 * @param filter_prefix list only keys beginning with filter_prefix
3288 * @param max_return list no more than max_return key/value pairs
3289 * @param iter where to store the iterator
3290 * @param pmore flag indicating whether there are more keys to fetch
3291 * @param prval where to store the return value from this action
3292 */
3293 CEPH_RADOS_API void rados_read_op_omap_get_vals2(rados_read_op_t read_op,
3294 const char *start_after,
3295 const char *filter_prefix,
3296 uint64_t max_return,
3297 rados_omap_iter_t *iter,
3298 unsigned char *pmore,
3299 int *prval);
3300
3301 /**
3302 * Start iterating over keys on an object.
3303 *
3304 * They will be returned sorted by key, and the iterator
3305 * will fill in NULL for all values if specified.
3306 *
3307 * @param read_op operation to add this action to
3308 * @param start_after list keys starting after start_after
3309 * @param max_return list no more than max_return keys
3310 * @param iter where to store the iterator
3311 * @param prval where to store the return value from this action
3312 */
3313 CEPH_RADOS_API void rados_read_op_omap_get_keys(rados_read_op_t read_op,
3314 const char *start_after,
3315 uint64_t max_return,
3316 rados_omap_iter_t *iter,
3317 int *prval)
3318 __attribute__((deprecated)); /* use v2 below */
3319
3320 /**
3321 * Start iterating over keys on an object.
3322 *
3323 * They will be returned sorted by key, and the iterator
3324 * will fill in NULL for all values if specified.
3325 *
3326 * @param read_op operation to add this action to
3327 * @param start_after list keys starting after start_after
3328 * @param max_return list no more than max_return keys
3329 * @param iter where to store the iterator
3330 * @param pmore flag indicating whether there are more keys to fetch
3331 * @param prval where to store the return value from this action
3332 */
3333 CEPH_RADOS_API void rados_read_op_omap_get_keys2(rados_read_op_t read_op,
3334 const char *start_after,
3335 uint64_t max_return,
3336 rados_omap_iter_t *iter,
3337 unsigned char *pmore,
3338 int *prval);
3339
3340 /**
3341 * Start iterating over specific key/value pairs
3342 *
3343 * They will be returned sorted by key.
3344 *
3345 * @param read_op operation to add this action to
3346 * @param keys array of pointers to null-terminated keys to get
3347 * @param keys_len the number of strings in keys
3348 * @param iter where to store the iterator
3349 * @param prval where to store the return value from this action
3350 */
3351 CEPH_RADOS_API void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op,
3352 char const* const* keys,
3353 size_t keys_len,
3354 rados_omap_iter_t *iter,
3355 int *prval);
3356
3357 /**
3358 * Perform a read operation synchronously
3359 * @param read_op operation to perform
3360 * @param io the ioctx that the object is in
3361 * @param oid the object id
3362 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3363 */
3364 CEPH_RADOS_API int rados_read_op_operate(rados_read_op_t read_op,
3365 rados_ioctx_t io,
3366 const char *oid,
3367 int flags);
3368
3369 /**
3370 * Perform a read operation asynchronously
3371 * @param read_op operation to perform
3372 * @param io the ioctx that the object is in
3373 * @param completion what to do when operation has been attempted
3374 * @param oid the object id
3375 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3376 */
3377 CEPH_RADOS_API int rados_aio_read_op_operate(rados_read_op_t read_op,
3378 rados_ioctx_t io,
3379 rados_completion_t completion,
3380 const char *oid,
3381 int flags);
3382
3383 /** @} Object Operations */
3384
3385 /**
3386 * Take an exclusive lock on an object.
3387 *
3388 * @param io the context to operate in
3389 * @param oid the name of the object
3390 * @param name the name of the lock
3391 * @param cookie user-defined identifier for this instance of the lock
3392 * @param desc user-defined lock description
3393 * @param duration the duration of the lock. Set to NULL for infinite duration.
3394 * @param flags lock flags
3395 * @returns 0 on success, negative error code on failure
3396 * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3397 * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3398 */
3399 CEPH_RADOS_API int rados_lock_exclusive(rados_ioctx_t io, const char * oid,
3400 const char * name, const char * cookie,
3401 const char * desc,
3402 struct timeval * duration,
3403 uint8_t flags);
3404
3405 /**
3406 * Take a shared lock on an object.
3407 *
3408 * @param io the context to operate in
3409 * @param o the name of the object
3410 * @param name the name of the lock
3411 * @param cookie user-defined identifier for this instance of the lock
3412 * @param tag The tag of the lock
3413 * @param desc user-defined lock description
3414 * @param duration the duration of the lock. Set to NULL for infinite duration.
3415 * @param flags lock flags
3416 * @returns 0 on success, negative error code on failure
3417 * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3418 * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3419 */
3420 CEPH_RADOS_API int rados_lock_shared(rados_ioctx_t io, const char * o,
3421 const char * name, const char * cookie,
3422 const char * tag, const char * desc,
3423 struct timeval * duration, uint8_t flags);
3424
3425 /**
3426 * Release a shared or exclusive lock on an object.
3427 *
3428 * @param io the context to operate in
3429 * @param o the name of the object
3430 * @param name the name of the lock
3431 * @param cookie user-defined identifier for the instance of the lock
3432 * @returns 0 on success, negative error code on failure
3433 * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3434 */
3435 CEPH_RADOS_API int rados_unlock(rados_ioctx_t io, const char *o,
3436 const char *name, const char *cookie);
3437
3438 /**
3439 * Asynchronous release a shared or exclusive lock on an object.
3440 *
3441 * @param io the context to operate in
3442 * @param o the name of the object
3443 * @param name the name of the lock
3444 * @param cookie user-defined identifier for the instance of the lock
3445 * @param completion what to do when operation has been attempted
3446 * @returns 0 on success, negative error code on failure
3447 */
3448 CEPH_RADOS_API int rados_aio_unlock(rados_ioctx_t io, const char *o,
3449 const char *name, const char *cookie,
3450 rados_completion_t completion);
3451
3452 /**
3453 * List clients that have locked the named object lock and information about
3454 * the lock.
3455 *
3456 * The number of bytes required in each buffer is put in the
3457 * corresponding size out parameter. If any of the provided buffers
3458 * are too short, -ERANGE is returned after these sizes are filled in.
3459 *
3460 * @param io the context to operate in
3461 * @param o the name of the object
3462 * @param name the name of the lock
3463 * @param exclusive where to store whether the lock is exclusive (1) or shared (0)
3464 * @param tag where to store the tag associated with the object lock
3465 * @param tag_len number of bytes in tag buffer
3466 * @param clients buffer in which locker clients are stored, separated by '\0'
3467 * @param clients_len number of bytes in the clients buffer
3468 * @param cookies buffer in which locker cookies are stored, separated by '\0'
3469 * @param cookies_len number of bytes in the cookies buffer
3470 * @param addrs buffer in which locker addresses are stored, separated by '\0'
3471 * @param addrs_len number of bytes in the clients buffer
3472 * @returns number of lockers on success, negative error code on failure
3473 * @returns -ERANGE if any of the buffers are too short
3474 */
3475 CEPH_RADOS_API ssize_t rados_list_lockers(rados_ioctx_t io, const char *o,
3476 const char *name, int *exclusive,
3477 char *tag, size_t *tag_len,
3478 char *clients, size_t *clients_len,
3479 char *cookies, size_t *cookies_len,
3480 char *addrs, size_t *addrs_len);
3481
3482 /**
3483 * Releases a shared or exclusive lock on an object, which was taken by the
3484 * specified client.
3485 *
3486 * @param io the context to operate in
3487 * @param o the name of the object
3488 * @param name the name of the lock
3489 * @param client the client currently holding the lock
3490 * @param cookie user-defined identifier for the instance of the lock
3491 * @returns 0 on success, negative error code on failure
3492 * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3493 * @returns -EINVAL if the client cannot be parsed
3494 */
3495 CEPH_RADOS_API int rados_break_lock(rados_ioctx_t io, const char *o,
3496 const char *name, const char *client,
3497 const char *cookie);
3498
3499 /**
3500 * Blacklists the specified client from the OSDs
3501 *
3502 * @param cluster cluster handle
3503 * @param client_address client address
3504 * @param expire_seconds number of seconds to blacklist (0 for default)
3505 * @returns 0 on success, negative error code on failure
3506 */
3507 CEPH_RADOS_API int rados_blacklist_add(rados_t cluster,
3508 char *client_address,
3509 uint32_t expire_seconds);
3510
3511 /**
3512 * Enable an application on a pool
3513 *
3514 * @param ioctx pool ioctx
3515 * @param app_name application name
3516 * @param force 0 if only single application per pool
3517 * @returns 0 on success, negative error code on failure
3518 */
3519 CEPH_RADOS_API int rados_application_enable(rados_ioctx_t io,
3520 const char *app_name, int force);
3521
3522 /**
3523 * List all enabled applications
3524 *
3525 * If the provided buffer is too short, the required length is filled in and
3526 * -ERANGE is returned. Otherwise, the buffers are filled with the application
3527 * names, with a '\0' after each.
3528 *
3529 * @param ioctx pool ioctx
3530 * @param app_name application name
3531 * @param values buffer in which to store application names
3532 * @param vals_len number of bytes in values buffer
3533 * @returns 0 on success, negative error code on failure
3534 * @returns -ERANGE if either buffer is too short
3535 */
3536 CEPH_RADOS_API int rados_application_list(rados_ioctx_t io, char *values,
3537 size_t *values_len);
3538
3539 /**
3540 * Get application metadata value from pool
3541 *
3542 * @param ioctx pool ioctx
3543 * @param app_name application name
3544 * @param key metadata key
3545 * @param value result buffer
3546 * @param value_len maximum len of value
3547 * @returns 0 on success, negative error code on failure
3548 */
3549 CEPH_RADOS_API int rados_application_metadata_get(rados_ioctx_t io,
3550 const char *app_name,
3551 const char *key, char *value,
3552 size_t *value_len);
3553
3554 /**
3555 * Set application metadata on a pool
3556 *
3557 * @param ioctx pool ioctx
3558 * @param app_name application name
3559 * @param key metadata key
3560 * @param value metadata key
3561 * @returns 0 on success, negative error code on failure
3562 */
3563 CEPH_RADOS_API int rados_application_metadata_set(rados_ioctx_t io,
3564 const char *app_name,
3565 const char *key,
3566 const char *value);
3567
3568 /**
3569 * Remove application metadata from a pool
3570 *
3571 * @param ioctx pool ioctx
3572 * @param app_name application name
3573 * @param key metadata key
3574 * @returns 0 on success, negative error code on failure
3575 */
3576 CEPH_RADOS_API int rados_application_metadata_remove(rados_ioctx_t io,
3577 const char *app_name,
3578 const char *key);
3579
3580 /**
3581 * List all metadata key/value pairs associated with an application.
3582 *
3583 * This iterates over all metadata, key_len and val_len are filled in
3584 * with the number of bytes put into the keys and values buffers.
3585 *
3586 * If the provided buffers are too short, the required lengths are filled
3587 * in and -ERANGE is returned. Otherwise, the buffers are filled with
3588 * the keys and values of the metadata, with a '\0' after each.
3589 *
3590 * @param ioctx pool ioctx
3591 * @param app_name application name
3592 * @param keys buffer in which to store key names
3593 * @param keys_len number of bytes in keys buffer
3594 * @param values buffer in which to store values
3595 * @param vals_len number of bytes in values buffer
3596 * @returns 0 on succcess, negative error code on failure
3597 * @returns -ERANGE if either buffer is too short
3598 */
3599 CEPH_RADOS_API int rados_application_metadata_list(rados_ioctx_t io,
3600 const char *app_name,
3601 char *keys, size_t *key_len,
3602 char *values,
3603 size_t *vals_len);
3604
3605 /**
3606 * @name Mon/OSD/PG Commands
3607 *
3608 * These interfaces send commands relating to the monitor, OSD, or PGs.
3609 *
3610 * @{
3611 */
3612
3613 /**
3614 * Send monitor command.
3615 *
3616 * @note Takes command string in carefully-formatted JSON; must match
3617 * defined commands, types, etc.
3618 *
3619 * The result buffers are allocated on the heap; the caller is
3620 * expected to release that memory with rados_buffer_free(). The
3621 * buffer and length pointers can all be NULL, in which case they are
3622 * not filled in.
3623 *
3624 * @param cluster cluster handle
3625 * @param cmd an array of char *'s representing the command
3626 * @param cmdlen count of valid entries in cmd
3627 * @param inbuf any bulk input data (crush map, etc.)
3628 * @param outbuf double pointer to output buffer
3629 * @param outbuflen pointer to output buffer length
3630 * @param outs double pointer to status string
3631 * @param outslen pointer to status string length
3632 * @returns 0 on success, negative error code on failure
3633 */
3634 CEPH_RADOS_API int rados_mon_command(rados_t cluster, const char **cmd,
3635 size_t cmdlen, const char *inbuf,
3636 size_t inbuflen, char **outbuf,
3637 size_t *outbuflen, char **outs,
3638 size_t *outslen);
3639
3640 /**
3641 * Send ceph-mgr command.
3642 *
3643 * @note Takes command string in carefully-formatted JSON; must match
3644 * defined commands, types, etc.
3645 *
3646 * The result buffers are allocated on the heap; the caller is
3647 * expected to release that memory with rados_buffer_free(). The
3648 * buffer and length pointers can all be NULL, in which case they are
3649 * not filled in.
3650 *
3651 * @param cluster cluster handle
3652 * @param cmd an array of char *'s representing the command
3653 * @param cmdlen count of valid entries in cmd
3654 * @param inbuf any bulk input data (crush map, etc.)
3655 * @param outbuf double pointer to output buffer
3656 * @param outbuflen pointer to output buffer length
3657 * @param outs double pointer to status string
3658 * @param outslen pointer to status string length
3659 * @returns 0 on success, negative error code on failure
3660 */
3661 CEPH_RADOS_API int rados_mgr_command(rados_t cluster, const char **cmd,
3662 size_t cmdlen, const char *inbuf,
3663 size_t inbuflen, char **outbuf,
3664 size_t *outbuflen, char **outs,
3665 size_t *outslen);
3666
3667 /**
3668 * Send monitor command to a specific monitor.
3669 *
3670 * @note Takes command string in carefully-formatted JSON; must match
3671 * defined commands, types, etc.
3672 *
3673 * The result buffers are allocated on the heap; the caller is
3674 * expected to release that memory with rados_buffer_free(). The
3675 * buffer and length pointers can all be NULL, in which case they are
3676 * not filled in.
3677 *
3678 * @param cluster cluster handle
3679 * @param name target monitor's name
3680 * @param cmd an array of char *'s representing the command
3681 * @param cmdlen count of valid entries in cmd
3682 * @param inbuf any bulk input data (crush map, etc.)
3683 * @param outbuf double pointer to output buffer
3684 * @param outbuflen pointer to output buffer length
3685 * @param outs double pointer to status string
3686 * @param outslen pointer to status string length
3687 * @returns 0 on success, negative error code on failure
3688 */
3689 CEPH_RADOS_API int rados_mon_command_target(rados_t cluster, const char *name,
3690 const char **cmd, size_t cmdlen,
3691 const char *inbuf, size_t inbuflen,
3692 char **outbuf, size_t *outbuflen,
3693 char **outs, size_t *outslen);
3694
3695 /**
3696 * free a rados-allocated buffer
3697 *
3698 * Release memory allocated by librados calls like rados_mon_command().
3699 *
3700 * @param buf buffer pointer
3701 */
3702 CEPH_RADOS_API void rados_buffer_free(char *buf);
3703
3704 CEPH_RADOS_API int rados_osd_command(rados_t cluster, int osdid,
3705 const char **cmd, size_t cmdlen,
3706 const char *inbuf, size_t inbuflen,
3707 char **outbuf, size_t *outbuflen,
3708 char **outs, size_t *outslen);
3709
3710 CEPH_RADOS_API int rados_pg_command(rados_t cluster, const char *pgstr,
3711 const char **cmd, size_t cmdlen,
3712 const char *inbuf, size_t inbuflen,
3713 char **outbuf, size_t *outbuflen,
3714 char **outs, size_t *outslen);
3715
3716 CEPH_RADOS_API int rados_mgr_command(rados_t cluster,
3717 const char **cmd, size_t cmdlen,
3718 const char *inbuf, size_t inbuflen,
3719 char **outbuf, size_t *outbuflen,
3720 char **outs, size_t *outslen);
3721
3722 /*
3723 * This is not a doxygen comment leadin, because doxygen breaks on
3724 * a typedef with function params and returns, and I can't figure out
3725 * how to fix it.
3726 *
3727 * Monitor cluster log
3728 *
3729 * Monitor events logged to the cluster log. The callback get each
3730 * log entry both as a single formatted line and with each field in a
3731 * separate arg.
3732 *
3733 * Calling with a cb argument of NULL will deregister any previously
3734 * registered callback.
3735 *
3736 * @param cluster cluster handle
3737 * @param level minimum log level (debug, info, warn|warning, err|error)
3738 * @param cb callback to run for each log message. It MUST NOT block
3739 * nor call back into librados.
3740 * @param arg void argument to pass to cb
3741 *
3742 * @returns 0 on success, negative code on error
3743 */
3744 typedef void (*rados_log_callback_t)(void *arg,
3745 const char *line,
3746 const char *who,
3747 uint64_t sec, uint64_t nsec,
3748 uint64_t seq, const char *level,
3749 const char *msg);
3750
3751 /*
3752 * This is not a doxygen comment leadin, because doxygen breaks on
3753 * a typedef with function params and returns, and I can't figure out
3754 * how to fix it.
3755 *
3756 * Monitor cluster log
3757 *
3758 * Monitor events logged to the cluster log. The callback get each
3759 * log entry both as a single formatted line and with each field in a
3760 * separate arg.
3761 *
3762 * Calling with a cb argument of NULL will deregister any previously
3763 * registered callback.
3764 *
3765 * @param cluster cluster handle
3766 * @param level minimum log level (debug, info, warn|warning, err|error)
3767 * @param cb callback to run for each log message. It MUST NOT block
3768 * nor call back into librados.
3769 * @param arg void argument to pass to cb
3770 *
3771 * @returns 0 on success, negative code on error
3772 */
3773 typedef void (*rados_log_callback2_t)(void *arg,
3774 const char *line,
3775 const char *channel,
3776 const char *who,
3777 const char *name,
3778 uint64_t sec, uint64_t nsec,
3779 uint64_t seq, const char *level,
3780 const char *msg);
3781
3782 CEPH_RADOS_API int rados_monitor_log(rados_t cluster, const char *level,
3783 rados_log_callback_t cb, void *arg);
3784 CEPH_RADOS_API int rados_monitor_log2(rados_t cluster, const char *level,
3785 rados_log_callback2_t cb, void *arg);
3786
3787
3788 /**
3789 * register daemon instance for a service
3790 *
3791 * Register us as a daemon providing a particular service. We identify
3792 * the service (e.g., 'rgw') and our instance name (e.g., 'rgw.$hostname').
3793 * The metadata is a map of keys and values with arbitrary static metdata
3794 * for this instance. The encoding is a series of NULL-terminated strings,
3795 * alternating key names and values, terminating with an empty key name.
3796 * For example, "foo\0bar\0this\0that\0\0" is the dict {foo=bar,this=that}.
3797 *
3798 * For the lifetime of the librados instance, regular beacons will be sent
3799 * to the cluster to maintain our registration in the service map.
3800 *
3801 * @param cluster handle
3802 * @param service service name
3803 * @param daemon deamon instance name
3804 * @param metadata_dict static daemon metadata dict
3805 */
3806 CEPH_RADOS_API int rados_service_register(
3807 rados_t cluster,
3808 const char *service,
3809 const char *daemon,
3810 const char *metadata_dict);
3811
3812 /**
3813 * update daemon status
3814 *
3815 * Update our mutable status information in the service map.
3816 *
3817 * The status dict is encoded the same way the daemon metadata is encoded
3818 * for rados_service_register. For example, "foo\0bar\0this\0that\0\0" is
3819 * {foo=bar,this=that}.
3820 *
3821 * @param cluster rados cluster handle
3822 * @param status_dict status dict
3823 */
3824 CEPH_RADOS_API int rados_service_update_status(
3825 rados_t cluster,
3826 const char *status_dict);
3827
3828 /** @} Mon/OSD/PG commands */
3829
3830 /*
3831 * These methods are no longer supported and return -ENOTSUP where possible.
3832 */
3833 CEPH_RADOS_API int rados_objects_list_open(
3834 rados_ioctx_t io,
3835 rados_list_ctx_t *ctx) __attribute__((deprecated));
3836 CEPH_RADOS_API uint32_t rados_objects_list_get_pg_hash_position(
3837 rados_list_ctx_t ctx) __attribute__((deprecated));
3838 CEPH_RADOS_API uint32_t rados_objects_list_seek(
3839 rados_list_ctx_t ctx,
3840 uint32_t pos) __attribute__((deprecated));
3841 CEPH_RADOS_API int rados_objects_list_next(
3842 rados_list_ctx_t ctx,
3843 const char **entry,
3844 const char **key) __attribute__((deprecated));
3845 CEPH_RADOS_API void rados_objects_list_close(
3846 rados_list_ctx_t ctx) __attribute__((deprecated));
3847
3848
3849 #ifdef __cplusplus
3850 }
3851 #endif
3852
3853 #endif