]> git.proxmox.com Git - ceph.git/blob - ceph/src/include/rados/librados.h
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / include / rados / librados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_LIBRADOS_H
16 #define CEPH_LIBRADOS_H
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 #include <netinet/in.h>
23 #if defined(__linux__)
24 #include <linux/types.h>
25 #elif defined(__FreeBSD__)
26 #include <sys/types.h>
27 #endif
28 #include <unistd.h>
29 #include <string.h>
30 #include "rados_types.h"
31
32 #include <sys/time.h>
33
34 #ifndef CEPH_OSD_TMAP_SET
35 /* These are also defined in rados.h and objclass.h. Keep them in sync! */
36 #define CEPH_OSD_TMAP_HDR 'h'
37 #define CEPH_OSD_TMAP_SET 's'
38 #define CEPH_OSD_TMAP_CREATE 'c'
39 #define CEPH_OSD_TMAP_RM 'r'
40 #endif
41
42 #define LIBRADOS_VER_MAJOR 0
43 #define LIBRADOS_VER_MINOR 69
44 #define LIBRADOS_VER_EXTRA 1
45
46 #define LIBRADOS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
47
48 #define LIBRADOS_VERSION_CODE LIBRADOS_VERSION(LIBRADOS_VER_MAJOR, LIBRADOS_VER_MINOR, LIBRADOS_VER_EXTRA)
49
50 #define LIBRADOS_SUPPORTS_WATCH 1
51
52 /* RADOS lock flags
53 * They are also defined in cls_lock_types.h. Keep them in sync!
54 */
55 #define LIBRADOS_LOCK_FLAG_RENEW 0x1
56
57 /*
58 * Constants for rados_write_op_create().
59 */
60 #define LIBRADOS_CREATE_EXCLUSIVE 1
61 #define LIBRADOS_CREATE_IDEMPOTENT 0
62
63 /*
64 * Flags that can be set on a per-op basis via
65 * rados_read_op_set_flags() and rados_write_op_set_flags().
66 */
67 enum {
68 // fail a create operation if the object already exists
69 LIBRADOS_OP_FLAG_EXCL = 0x1,
70 // allow the transaction to succeed even if the flagged op fails
71 LIBRADOS_OP_FLAG_FAILOK = 0x2,
72 // indicate read/write op random
73 LIBRADOS_OP_FLAG_FADVISE_RANDOM = 0x4,
74 // indicate read/write op sequential
75 LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL = 0x8,
76 // indicate read/write data will be accessed in the near future (by someone)
77 LIBRADOS_OP_FLAG_FADVISE_WILLNEED = 0x10,
78 // indicate read/write data will not accessed in the near future (by anyone)
79 LIBRADOS_OP_FLAG_FADVISE_DONTNEED = 0x20,
80 // indicate read/write data will not accessed again (by *this* client)
81 LIBRADOS_OP_FLAG_FADVISE_NOCACHE = 0x40,
82 };
83
84 #if __GNUC__ >= 4
85 #define CEPH_RADOS_API __attribute__ ((visibility ("default")))
86 #else
87 #define CEPH_RADOS_API
88 #endif
89
90 /**
91 * @name xattr comparison operations
92 * Operators for comparing xattrs on objects, and aborting the
93 * rados_read_op or rados_write_op transaction if the comparison
94 * fails.
95 *
96 * @{
97 */
98 enum {
99 LIBRADOS_CMPXATTR_OP_EQ = 1,
100 LIBRADOS_CMPXATTR_OP_NE = 2,
101 LIBRADOS_CMPXATTR_OP_GT = 3,
102 LIBRADOS_CMPXATTR_OP_GTE = 4,
103 LIBRADOS_CMPXATTR_OP_LT = 5,
104 LIBRADOS_CMPXATTR_OP_LTE = 6
105 };
106 /** @} */
107
108 /**
109 * @name Operation Flags
110 * Flags for rados_read_op_opeprate(), rados_write_op_operate(),
111 * rados_aio_read_op_operate(), and rados_aio_write_op_operate().
112 * See librados.hpp for details.
113 * @{
114 */
115 enum {
116 LIBRADOS_OPERATION_NOFLAG = 0,
117 LIBRADOS_OPERATION_BALANCE_READS = 1,
118 LIBRADOS_OPERATION_LOCALIZE_READS = 2,
119 LIBRADOS_OPERATION_ORDER_READS_WRITES = 4,
120 LIBRADOS_OPERATION_IGNORE_CACHE = 8,
121 LIBRADOS_OPERATION_SKIPRWLOCKS = 16,
122 LIBRADOS_OPERATION_IGNORE_OVERLAY = 32,
123 /* send requests to cluster despite the cluster or pool being marked
124 full; ops will either succeed (e.g., delete) or return EDQUOT or
125 ENOSPC. */
126 LIBRADOS_OPERATION_FULL_TRY = 64,
127 /*
128 * Mainly for delete op
129 */
130 LIBRADOS_OPERATION_FULL_FORCE = 128,
131 };
132 /** @} */
133
134 /**
135 * @name Alloc hint flags
136 * Flags for rados_write_op_alloc_hint2() and rados_set_alloc_hint2()
137 * indicating future IO patterns.
138 * @{
139 */
140 enum {
141 LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_WRITE = 1,
142 LIBRADOS_ALLOC_HINT_FLAG_RANDOM_WRITE = 2,
143 LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4,
144 LIBRADOS_ALLOC_HINT_FLAG_RANDOM_READ = 8,
145 LIBRADOS_ALLOC_HINT_FLAG_APPEND_ONLY = 16,
146 LIBRADOS_ALLOC_HINT_FLAG_IMMUTABLE = 32,
147 LIBRADOS_ALLOC_HINT_FLAG_SHORTLIVED = 64,
148 LIBRADOS_ALLOC_HINT_FLAG_LONGLIVED = 128,
149 LIBRADOS_ALLOC_HINT_FLAG_COMPRESSIBLE = 256,
150 LIBRADOS_ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512,
151 };
152 /** @} */
153
154 typedef enum {
155 LIBRADOS_CHECKSUM_TYPE_XXHASH32 = 0,
156 LIBRADOS_CHECKSUM_TYPE_XXHASH64 = 1,
157 LIBRADOS_CHECKSUM_TYPE_CRC32C = 2
158 } rados_checksum_type_t;
159
160 /*
161 * snap id contants
162 */
163 #define LIBRADOS_SNAP_HEAD ((uint64_t)(-2))
164 #define LIBRADOS_SNAP_DIR ((uint64_t)(-1))
165
166 /**
167 * @typedef rados_t
168 *
169 * A handle for interacting with a RADOS cluster. It encapsulates all
170 * RADOS client configuration, including username, key for
171 * authentication, logging, and debugging. Talking different clusters
172 * -- or to the same cluster with different users -- requires
173 * different cluster handles.
174 */
175 #ifndef VOIDPTR_RADOS_T
176 #define VOIDPTR_RADOS_T
177 typedef void *rados_t;
178 #endif //VOIDPTR_RADOS_T
179
180 /**
181 * @typedef rados_config_t
182 *
183 * A handle for the ceph configuration context for the rados_t cluster
184 * instance. This can be used to share configuration context/state
185 * (e.g., logging configuration) between librados instance.
186 *
187 * @warning The config context does not have independent reference
188 * counting. As such, a rados_config_t handle retrieved from a given
189 * rados_t is only valid as long as that rados_t.
190 */
191 typedef void *rados_config_t;
192
193 /**
194 * @typedef rados_ioctx_t
195 *
196 * An io context encapsulates a few settings for all I/O operations
197 * done on it:
198 * - pool - set when the io context is created (see rados_ioctx_create())
199 * - snapshot context for writes (see
200 * rados_ioctx_selfmanaged_snap_set_write_ctx())
201 * - snapshot id to read from (see rados_ioctx_snap_set_read())
202 * - object locator for all single-object operations (see
203 * rados_ioctx_locator_set_key())
204 * - namespace for all single-object operations (see
205 * rados_ioctx_set_namespace()). Set to LIBRADOS_ALL_NSPACES
206 * before rados_nobjects_list_open() will list all objects in all
207 * namespaces.
208 *
209 * @warning Changing any of these settings is not thread-safe -
210 * librados users must synchronize any of these changes on their own,
211 * or use separate io contexts for each thread
212 */
213 typedef void *rados_ioctx_t;
214
215 /**
216 * @typedef rados_list_ctx_t
217 *
218 * An iterator for listing the objects in a pool.
219 * Used with rados_nobjects_list_open(),
220 * rados_nobjects_list_next(), and
221 * rados_nobjects_list_close().
222 */
223 typedef void *rados_list_ctx_t;
224
225 /**
226 * @typedef rados_object_list_cursor
227 *
228 * The cursor used with rados_enumerate_objects
229 * and accompanying methods.
230 */
231 typedef void * rados_object_list_cursor;
232
233 typedef struct rados_object_list_item {
234 size_t oid_length;
235 char *oid;
236
237 size_t nspace_length;
238 char *nspace;
239
240 size_t locator_length;
241 char *locator;
242 } rados_object_list_item;
243
244 /**
245 * @typedef rados_snap_t
246 * The id of a snapshot.
247 */
248 typedef uint64_t rados_snap_t;
249
250 /**
251 * @typedef rados_xattrs_iter_t
252 * An iterator for listing extended attrbutes on an object.
253 * Used with rados_getxattrs(), rados_getxattrs_next(), and
254 * rados_getxattrs_end().
255 */
256 typedef void *rados_xattrs_iter_t;
257
258 /**
259 * @typedef rados_omap_iter_t
260 * An iterator for listing omap key/value pairs on an object.
261 * Used with rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals(),
262 * rados_read_op_omap_get_vals_by_keys(), rados_omap_get_next(), and
263 * rados_omap_get_end().
264 */
265 typedef void *rados_omap_iter_t;
266
267 /**
268 * @struct rados_pool_stat_t
269 * Usage information for a pool.
270 */
271 struct rados_pool_stat_t {
272 /// space used in bytes
273 uint64_t num_bytes;
274 /// space used in KB
275 uint64_t num_kb;
276 /// number of objects in the pool
277 uint64_t num_objects;
278 /// number of clones of objects
279 uint64_t num_object_clones;
280 /// num_objects * num_replicas
281 uint64_t num_object_copies;
282 uint64_t num_objects_missing_on_primary;
283 /// number of objects found on no OSDs
284 uint64_t num_objects_unfound;
285 /// number of objects replicated fewer times than they should be
286 /// (but found on at least one OSD)
287 uint64_t num_objects_degraded;
288 uint64_t num_rd;
289 uint64_t num_rd_kb;
290 uint64_t num_wr;
291 uint64_t num_wr_kb;
292 };
293
294 /**
295 * @struct rados_cluster_stat_t
296 * Cluster-wide usage information
297 */
298 struct rados_cluster_stat_t {
299 uint64_t kb, kb_used, kb_avail;
300 uint64_t num_objects;
301 };
302
303 /**
304 * @typedef rados_write_op_t
305 *
306 * An object write operation stores a number of operations which can be
307 * executed atomically. For usage, see:
308 * - Creation and deletion: rados_create_write_op() rados_release_write_op()
309 * - Extended attribute manipulation: rados_write_op_cmpxattr()
310 * rados_write_op_cmpxattr(), rados_write_op_setxattr(),
311 * rados_write_op_rmxattr()
312 * - Object map key/value pairs: rados_write_op_omap_set(),
313 * rados_write_op_omap_rm_keys(), rados_write_op_omap_clear(),
314 * rados_write_op_omap_cmp()
315 * - Object properties: rados_write_op_assert_exists(),
316 * rados_write_op_assert_version()
317 * - Creating objects: rados_write_op_create()
318 * - IO on objects: rados_write_op_append(), rados_write_op_write(), rados_write_op_zero
319 * rados_write_op_write_full(), rados_write_op_writesame(), rados_write_op_remove,
320 * rados_write_op_truncate(), rados_write_op_zero(), rados_write_op_cmpext()
321 * - Hints: rados_write_op_set_alloc_hint()
322 * - Performing the operation: rados_write_op_operate(), rados_aio_write_op_operate()
323 */
324 typedef void *rados_write_op_t;
325
326 /**
327 * @typedef rados_read_op_t
328 *
329 * An object read operation stores a number of operations which can be
330 * executed atomically. For usage, see:
331 * - Creation and deletion: rados_create_read_op() rados_release_read_op()
332 * - Extended attribute manipulation: rados_read_op_cmpxattr(),
333 * rados_read_op_getxattr(), rados_read_op_getxattrs()
334 * - Object map key/value pairs: rados_read_op_omap_get_vals(),
335 * rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals_by_keys(),
336 * rados_read_op_omap_cmp()
337 * - Object properties: rados_read_op_stat(), rados_read_op_assert_exists(),
338 * rados_read_op_assert_version()
339 * - IO on objects: rados_read_op_read(), rados_read_op_checksum(),
340 * rados_read_op_cmpext()
341 * - Custom operations: rados_read_op_exec(), rados_read_op_exec_user_buf()
342 * - Request properties: rados_read_op_set_flags()
343 * - Performing the operation: rados_read_op_operate(),
344 * rados_aio_read_op_operate()
345 */
346 typedef void *rados_read_op_t;
347
348 /**
349 * @typedef rados_completion_t
350 * Represents the state of an asynchronous operation - it contains the
351 * return value once the operation completes, and can be used to block
352 * until the operation is complete or safe.
353 */
354 typedef void *rados_completion_t;
355
356 /**
357 * @struct blkin_trace_info
358 * blkin trace information for Zipkin tracing
359 */
360 struct blkin_trace_info;
361
362 /**
363 * Get the version of librados.
364 *
365 * The version number is major.minor.extra. Note that this is
366 * unrelated to the Ceph version number.
367 *
368 * TODO: define version semantics, i.e.:
369 * - incrementing major is for backwards-incompatible changes
370 * - incrementing minor is for backwards-compatible changes
371 * - incrementing extra is for bug fixes
372 *
373 * @param major where to store the major version number
374 * @param minor where to store the minor version number
375 * @param extra where to store the extra version number
376 */
377 CEPH_RADOS_API void rados_version(int *major, int *minor, int *extra);
378
379 /**
380 * @name Setup and Teardown
381 * These are the first and last functions to that should be called
382 * when using librados.
383 *
384 * @{
385 */
386
387 /**
388 * Create a handle for communicating with a RADOS cluster.
389 *
390 * Ceph environment variables are read when this is called, so if
391 * $CEPH_ARGS specifies everything you need to connect, no further
392 * configuration is necessary.
393 *
394 * @param cluster where to store the handle
395 * @param id the user to connect as (i.e. admin, not client.admin)
396 * @returns 0 on success, negative error code on failure
397 */
398 CEPH_RADOS_API int rados_create(rados_t *cluster, const char * const id);
399
400 /**
401 * Extended version of rados_create.
402 *
403 * Like rados_create, but
404 * 1) don't assume 'client\.'+id; allow full specification of name
405 * 2) allow specification of cluster name
406 * 3) flags for future expansion
407 */
408 CEPH_RADOS_API int rados_create2(rados_t *pcluster,
409 const char *const clustername,
410 const char * const name, uint64_t flags);
411
412 /**
413 * Initialize a cluster handle from an existing configuration.
414 *
415 * Share configuration state with another rados_t instance.
416 *
417 * @param cluster where to store the handle
418 * @param cct the existing configuration to use
419 * @returns 0 on success, negative error code on failure
420 */
421 CEPH_RADOS_API int rados_create_with_context(rados_t *cluster,
422 rados_config_t cct);
423
424 /**
425 * Ping the monitor with ID mon_id, storing the resulting reply in
426 * buf (if specified) with a maximum size of len.
427 *
428 * The result buffer is allocated on the heap; the caller is
429 * expected to release that memory with rados_buffer_free(). The
430 * buffer and length pointers can be NULL, in which case they are
431 * not filled in.
432 *
433 * @param cluster cluster handle
434 * @param[in] mon_id ID of the monitor to ping
435 * @param[out] outstr double pointer with the resulting reply
436 * @param[out] outstrlen pointer with the size of the reply in outstr
437 */
438 CEPH_RADOS_API int rados_ping_monitor(rados_t cluster, const char *mon_id,
439 char **outstr, size_t *outstrlen);
440
441 /**
442 * Connect to the cluster.
443 *
444 * @note BUG: Before calling this, calling a function that communicates with the
445 * cluster will crash.
446 *
447 * @pre The cluster handle is configured with at least a monitor
448 * address. If cephx is enabled, a client name and secret must also be
449 * set.
450 *
451 * @post If this succeeds, any function in librados may be used
452 *
453 * @param cluster The cluster to connect to.
454 * @returns 0 on sucess, negative error code on failure
455 */
456 CEPH_RADOS_API int rados_connect(rados_t cluster);
457
458 /**
459 * Disconnects from the cluster.
460 *
461 * For clean up, this is only necessary after rados_connect() has
462 * succeeded.
463 *
464 * @warning This does not guarantee any asynchronous writes have
465 * completed. To do that, you must call rados_aio_flush() on all open
466 * io contexts.
467 *
468 * @warning We implicitly call rados_watch_flush() on shutdown. If
469 * there are watches being used, this should be done explicitly before
470 * destroying the relevant IoCtx. We do it here as a safety measure.
471 *
472 * @post the cluster handle cannot be used again
473 *
474 * @param cluster the cluster to shutdown
475 */
476 CEPH_RADOS_API void rados_shutdown(rados_t cluster);
477
478 /** @} init */
479
480 /**
481 * @name Configuration
482 * These functions read and update Ceph configuration for a cluster
483 * handle. Any configuration changes must be done before connecting to
484 * the cluster.
485 *
486 * Options that librados users might want to set include:
487 * - mon_host
488 * - auth_supported
489 * - key, keyfile, or keyring when using cephx
490 * - log_file, log_to_stderr, err_to_stderr, and log_to_syslog
491 * - debug_rados, debug_objecter, debug_monc, debug_auth, or debug_ms
492 *
493 * All possible options can be found in src/common/config_opts.h in ceph.git
494 *
495 * @{
496 */
497
498 /**
499 * Configure the cluster handle using a Ceph config file
500 *
501 * If path is NULL, the default locations are searched, and the first
502 * found is used. The locations are:
503 * - $CEPH_CONF (environment variable)
504 * - /etc/ceph/ceph.conf
505 * - ~/.ceph/config
506 * - ceph.conf (in the current working directory)
507 *
508 * @pre rados_connect() has not been called on the cluster handle
509 *
510 * @param cluster cluster handle to configure
511 * @param path path to a Ceph configuration file
512 * @returns 0 on success, negative error code on failure
513 */
514 CEPH_RADOS_API int rados_conf_read_file(rados_t cluster, const char *path);
515
516 /**
517 * Configure the cluster handle with command line arguments
518 *
519 * argv can contain any common Ceph command line option, including any
520 * configuration parameter prefixed by '--' and replacing spaces with
521 * dashes or underscores. For example, the following options are equivalent:
522 * - --mon-host 10.0.0.1:6789
523 * - --mon_host 10.0.0.1:6789
524 * - -m 10.0.0.1:6789
525 *
526 * @pre rados_connect() has not been called on the cluster handle
527 *
528 * @param cluster cluster handle to configure
529 * @param argc number of arguments in argv
530 * @param argv arguments to parse
531 * @returns 0 on success, negative error code on failure
532 */
533 CEPH_RADOS_API int rados_conf_parse_argv(rados_t cluster, int argc,
534 const char **argv);
535
536
537 /**
538 * Configure the cluster handle with command line arguments, returning
539 * any remainders. Same rados_conf_parse_argv, except for extra
540 * remargv argument to hold returns unrecognized arguments.
541 *
542 * @pre rados_connect() has not been called on the cluster handle
543 *
544 * @param cluster cluster handle to configure
545 * @param argc number of arguments in argv
546 * @param argv arguments to parse
547 * @param remargv char* array for returned unrecognized arguments
548 * @returns 0 on success, negative error code on failure
549 */
550 CEPH_RADOS_API int rados_conf_parse_argv_remainder(rados_t cluster, int argc,
551 const char **argv,
552 const char **remargv);
553 /**
554 * Configure the cluster handle based on an environment variable
555 *
556 * The contents of the environment variable are parsed as if they were
557 * Ceph command line options. If var is NULL, the CEPH_ARGS
558 * environment variable is used.
559 *
560 * @pre rados_connect() has not been called on the cluster handle
561 *
562 * @note BUG: this is not threadsafe - it uses a static buffer
563 *
564 * @param cluster cluster handle to configure
565 * @param var name of the environment variable to read
566 * @returns 0 on success, negative error code on failure
567 */
568 CEPH_RADOS_API int rados_conf_parse_env(rados_t cluster, const char *var);
569
570 /**
571 * Set a configuration option
572 *
573 * @pre rados_connect() has not been called on the cluster handle
574 *
575 * @param cluster cluster handle to configure
576 * @param option option to set
577 * @param value value of the option
578 * @returns 0 on success, negative error code on failure
579 * @returns -ENOENT when the option is not a Ceph configuration option
580 */
581 CEPH_RADOS_API int rados_conf_set(rados_t cluster, const char *option,
582 const char *value);
583
584 /**
585 * Get the value of a configuration option
586 *
587 * @param cluster configuration to read
588 * @param option which option to read
589 * @param buf where to write the configuration value
590 * @param len the size of buf in bytes
591 * @returns 0 on success, negative error code on failure
592 * @returns -ENAMETOOLONG if the buffer is too short to contain the
593 * requested value
594 */
595 CEPH_RADOS_API int rados_conf_get(rados_t cluster, const char *option,
596 char *buf, size_t len);
597
598 /** @} config */
599
600 /**
601 * Read usage info about the cluster
602 *
603 * This tells you total space, space used, space available, and number
604 * of objects. These are not updated immediately when data is written,
605 * they are eventually consistent.
606 *
607 * @param cluster cluster to query
608 * @param result where to store the results
609 * @returns 0 on success, negative error code on failure
610 */
611 CEPH_RADOS_API int rados_cluster_stat(rados_t cluster,
612 struct rados_cluster_stat_t *result);
613
614 /**
615 * Get the fsid of the cluster as a hexadecimal string.
616 *
617 * The fsid is a unique id of an entire Ceph cluster.
618 *
619 * @param cluster where to get the fsid
620 * @param buf where to write the fsid
621 * @param len the size of buf in bytes (should be 37)
622 * @returns 0 on success, negative error code on failure
623 * @returns -ERANGE if the buffer is too short to contain the
624 * fsid
625 */
626 CEPH_RADOS_API int rados_cluster_fsid(rados_t cluster, char *buf, size_t len);
627
628 /**
629 * Get/wait for the most recent osdmap
630 *
631 * @param cluster the cluster to shutdown
632 * @returns 0 on sucess, negative error code on failure
633 */
634 CEPH_RADOS_API int rados_wait_for_latest_osdmap(rados_t cluster);
635
636 /**
637 * @name Pools
638 *
639 * RADOS pools are separate namespaces for objects. Pools may have
640 * different crush rules associated with them, so they could have
641 * differing replication levels or placement strategies. RADOS
642 * permissions are also tied to pools - users can have different read,
643 * write, and execute permissions on a per-pool basis.
644 *
645 * @{
646 */
647
648 /**
649 * List pools
650 *
651 * Gets a list of pool names as NULL-terminated strings. The pool
652 * names will be placed in the supplied buffer one after another.
653 * After the last pool name, there will be two 0 bytes in a row.
654 *
655 * If len is too short to fit all the pool name entries we need, we will fill
656 * as much as we can.
657 *
658 * Buf may be null to determine the buffer size needed to list all pools.
659 *
660 * @param cluster cluster handle
661 * @param buf output buffer
662 * @param len output buffer length
663 * @returns length of the buffer we would need to list all pools
664 */
665 CEPH_RADOS_API int rados_pool_list(rados_t cluster, char *buf, size_t len);
666
667 /**
668 * List inconsistent placement groups of the given pool
669 *
670 * Gets a list of inconsistent placement groups as NULL-terminated strings.
671 * The placement group names will be placed in the supplied buffer one after
672 * another. After the last name, there will be two 0 types in a row.
673 *
674 * If len is too short to fit all the placement group entries we need, we will
675 * fill as much as we can.
676 *
677 * @param cluster cluster handle
678 * @param pool pool ID
679 * @param buf output buffer
680 * @param len output buffer length
681 * @returns length of the buffer we would need to list all pools
682 */
683 CEPH_RADOS_API int rados_inconsistent_pg_list(rados_t cluster, int64_t pool,
684 char *buf, size_t len);
685
686 /**
687 * Get a configuration handle for a rados cluster handle
688 *
689 * This handle is valid only as long as the cluster handle is valid.
690 *
691 * @param cluster cluster handle
692 * @returns config handle for this cluster
693 */
694 CEPH_RADOS_API rados_config_t rados_cct(rados_t cluster);
695
696 /**
697 * Get a global id for current instance
698 *
699 * This id is a unique representation of current connection to the cluster
700 *
701 * @param cluster cluster handle
702 * @returns instance global id
703 */
704 CEPH_RADOS_API uint64_t rados_get_instance_id(rados_t cluster);
705
706 /**
707 * Create an io context
708 *
709 * The io context allows you to perform operations within a particular
710 * pool. For more details see rados_ioctx_t.
711 *
712 * @param cluster which cluster the pool is in
713 * @param pool_name name of the pool
714 * @param ioctx where to store the io context
715 * @returns 0 on success, negative error code on failure
716 */
717 CEPH_RADOS_API int rados_ioctx_create(rados_t cluster, const char *pool_name,
718 rados_ioctx_t *ioctx);
719 CEPH_RADOS_API int rados_ioctx_create2(rados_t cluster, int64_t pool_id,
720 rados_ioctx_t *ioctx);
721
722 /**
723 * The opposite of rados_ioctx_create
724 *
725 * This just tells librados that you no longer need to use the io context.
726 * It may not be freed immediately if there are pending asynchronous
727 * requests on it, but you should not use an io context again after
728 * calling this function on it.
729 *
730 * @warning This does not guarantee any asynchronous
731 * writes have completed. You must call rados_aio_flush()
732 * on the io context before destroying it to do that.
733 *
734 * @warning If this ioctx is used by rados_watch, the caller needs to
735 * be sure that all registered watches are disconnected via
736 * rados_unwatch() and that rados_watch_flush() is called. This
737 * ensures that a racing watch callback does not make use of a
738 * destroyed ioctx.
739 *
740 * @param io the io context to dispose of
741 */
742 CEPH_RADOS_API void rados_ioctx_destroy(rados_ioctx_t io);
743
744 /**
745 * Get configuration hadnle for a pool handle
746 *
747 * @param io pool handle
748 * @returns rados_config_t for this cluster
749 */
750 CEPH_RADOS_API rados_config_t rados_ioctx_cct(rados_ioctx_t io);
751
752 /**
753 * Get the cluster handle used by this rados_ioctx_t
754 * Note that this is a weak reference, and should not
755 * be destroyed via rados_shutdown().
756 *
757 * @param io the io context
758 * @returns the cluster handle for this io context
759 */
760 CEPH_RADOS_API rados_t rados_ioctx_get_cluster(rados_ioctx_t io);
761
762 /**
763 * Get pool usage statistics
764 *
765 * Fills in a rados_pool_stat_t after querying the cluster.
766 *
767 * @param io determines which pool to query
768 * @param stats where to store the results
769 * @returns 0 on success, negative error code on failure
770 */
771 CEPH_RADOS_API int rados_ioctx_pool_stat(rados_ioctx_t io,
772 struct rados_pool_stat_t *stats);
773
774 /**
775 * Get the id of a pool
776 *
777 * @param cluster which cluster the pool is in
778 * @param pool_name which pool to look up
779 * @returns id of the pool
780 * @returns -ENOENT if the pool is not found
781 */
782 CEPH_RADOS_API int64_t rados_pool_lookup(rados_t cluster,
783 const char *pool_name);
784
785 /**
786 * Get the name of a pool
787 *
788 * @param cluster which cluster the pool is in
789 * @param id the id of the pool
790 * @param buf where to store the pool name
791 * @param maxlen size of buffer where name will be stored
792 * @returns length of string stored, or -ERANGE if buffer too small
793 */
794 CEPH_RADOS_API int rados_pool_reverse_lookup(rados_t cluster, int64_t id,
795 char *buf, size_t maxlen);
796
797 /**
798 * Create a pool with default settings
799 *
800 * The default owner is the admin user (auid 0).
801 * The default crush rule is rule 0.
802 *
803 * @param cluster the cluster in which the pool will be created
804 * @param pool_name the name of the new pool
805 * @returns 0 on success, negative error code on failure
806 */
807 CEPH_RADOS_API int rados_pool_create(rados_t cluster, const char *pool_name);
808
809 /**
810 * Create a pool owned by a specific auid
811 *
812 * The auid is the authenticated user id to give ownership of the pool.
813 * TODO: document auid and the rest of the auth system
814 *
815 * @param cluster the cluster in which the pool will be created
816 * @param pool_name the name of the new pool
817 * @param auid the id of the owner of the new pool
818 * @returns 0 on success, negative error code on failure
819 */
820 CEPH_RADOS_API int rados_pool_create_with_auid(rados_t cluster,
821 const char *pool_name,
822 uint64_t auid);
823
824 /**
825 * Create a pool with a specific CRUSH rule
826 *
827 * @param cluster the cluster in which the pool will be created
828 * @param pool_name the name of the new pool
829 * @param crush_rule_num which rule to use for placement in the new pool1
830 * @returns 0 on success, negative error code on failure
831 */
832 CEPH_RADOS_API int rados_pool_create_with_crush_rule(rados_t cluster,
833 const char *pool_name,
834 uint8_t crush_rule_num);
835
836 /**
837 * Create a pool with a specific CRUSH rule and auid
838 *
839 * This is a combination of rados_pool_create_with_crush_rule() and
840 * rados_pool_create_with_auid().
841 *
842 * @param cluster the cluster in which the pool will be created
843 * @param pool_name the name of the new pool
844 * @param crush_rule_num which rule to use for placement in the new pool2
845 * @param auid the id of the owner of the new pool
846 * @returns 0 on success, negative error code on failure
847 */
848 CEPH_RADOS_API int rados_pool_create_with_all(rados_t cluster,
849 const char *pool_name,
850 uint64_t auid,
851 uint8_t crush_rule_num);
852
853 /**
854 * Returns the pool that is the base tier for this pool.
855 *
856 * The return value is the ID of the pool that should be used to read from/write to.
857 * If tiering is not set up for the pool, returns \c pool.
858 *
859 * @param cluster the cluster the pool is in
860 * @param pool ID of the pool to query
861 * @param[out] base_tier base tier, or \c pool if tiering is not configured
862 * @returns 0 on success, negative error code on failure
863 */
864 CEPH_RADOS_API int rados_pool_get_base_tier(rados_t cluster, int64_t pool,
865 int64_t* base_tier);
866
867 /**
868 * Delete a pool and all data inside it
869 *
870 * The pool is removed from the cluster immediately,
871 * but the actual data is deleted in the background.
872 *
873 * @param cluster the cluster the pool is in
874 * @param pool_name which pool to delete
875 * @returns 0 on success, negative error code on failure
876 */
877 CEPH_RADOS_API int rados_pool_delete(rados_t cluster, const char *pool_name);
878
879 /**
880 * Attempt to change an io context's associated auid "owner"
881 *
882 * Requires that you have write permission on both the current and new
883 * auid.
884 *
885 * @param io reference to the pool to change.
886 * @param auid the auid you wish the io to have.
887 * @returns 0 on success, negative error code on failure
888 */
889 CEPH_RADOS_API int rados_ioctx_pool_set_auid(rados_ioctx_t io, uint64_t auid);
890
891 /**
892 * Get the auid of a pool
893 *
894 * @param io pool to query
895 * @param auid where to store the auid
896 * @returns 0 on success, negative error code on failure
897 */
898 CEPH_RADOS_API int rados_ioctx_pool_get_auid(rados_ioctx_t io, uint64_t *auid);
899
900 /* deprecated, use rados_ioctx_pool_requires_alignment2 instead */
901 CEPH_RADOS_API int rados_ioctx_pool_requires_alignment(rados_ioctx_t io)
902 __attribute__((deprecated));
903
904 /**
905 * Test whether the specified pool requires alignment or not.
906 *
907 * @param io pool to query
908 * @param requires 1 if alignment is supported, 0 if not.
909 * @returns 0 on success, negative error code on failure
910 */
911 CEPH_RADOS_API int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io,
912 int *requires);
913
914 /* deprecated, use rados_ioctx_pool_required_alignment2 instead */
915 CEPH_RADOS_API uint64_t rados_ioctx_pool_required_alignment(rados_ioctx_t io)
916 __attribute__((deprecated));
917
918 /**
919 * Get the alignment flavor of a pool
920 *
921 * @param io pool to query
922 * @param alignment where to store the alignment flavor
923 * @returns 0 on success, negative error code on failure
924 */
925 CEPH_RADOS_API int rados_ioctx_pool_required_alignment2(rados_ioctx_t io,
926 uint64_t *alignment);
927
928 /**
929 * Get the pool id of the io context
930 *
931 * @param io the io context to query
932 * @returns the id of the pool the io context uses
933 */
934 CEPH_RADOS_API int64_t rados_ioctx_get_id(rados_ioctx_t io);
935
936 /**
937 * Get the pool name of the io context
938 *
939 * @param io the io context to query
940 * @param buf pointer to buffer where name will be stored
941 * @param maxlen size of buffer where name will be stored
942 * @returns length of string stored, or -ERANGE if buffer too small
943 */
944 CEPH_RADOS_API int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf,
945 unsigned maxlen);
946
947 /** @} pools */
948
949 /**
950 * @name Object Locators
951 *
952 * @{
953 */
954
955 /**
956 * Set the key for mapping objects to pgs within an io context.
957 *
958 * The key is used instead of the object name to determine which
959 * placement groups an object is put in. This affects all subsequent
960 * operations of the io context - until a different locator key is
961 * set, all objects in this io context will be placed in the same pg.
962 *
963 * @param io the io context to change
964 * @param key the key to use as the object locator, or NULL to discard
965 * any previously set key
966 */
967 CEPH_RADOS_API void rados_ioctx_locator_set_key(rados_ioctx_t io,
968 const char *key);
969
970 /**
971 * Set the namespace for objects within an io context
972 *
973 * The namespace specification further refines a pool into different
974 * domains. The mapping of objects to pgs is also based on this
975 * value.
976 *
977 * @param io the io context to change
978 * @param nspace the name to use as the namespace, or NULL use the
979 * default namespace
980 */
981 CEPH_RADOS_API void rados_ioctx_set_namespace(rados_ioctx_t io,
982 const char *nspace);
983 /** @} obj_loc */
984
985 /**
986 * @name Listing Objects
987 * @{
988 */
989 /**
990 * Start listing objects in a pool
991 *
992 * @param io the pool to list from
993 * @param ctx the handle to store list context in
994 * @returns 0 on success, negative error code on failure
995 */
996 CEPH_RADOS_API int rados_nobjects_list_open(rados_ioctx_t io,
997 rados_list_ctx_t *ctx);
998
999 /**
1000 * Return hash position of iterator, rounded to the current PG
1001 *
1002 * @param ctx iterator marking where you are in the listing
1003 * @returns current hash position, rounded to the current pg
1004 */
1005 CEPH_RADOS_API uint32_t rados_nobjects_list_get_pg_hash_position(rados_list_ctx_t ctx);
1006
1007 /**
1008 * Reposition object iterator to a different hash position
1009 *
1010 * @param ctx iterator marking where you are in the listing
1011 * @param pos hash position to move to
1012 * @returns actual (rounded) position we moved to
1013 */
1014 CEPH_RADOS_API uint32_t rados_nobjects_list_seek(rados_list_ctx_t ctx,
1015 uint32_t pos);
1016
1017 /**
1018 * Reposition object iterator to a different position
1019 *
1020 * @param ctx iterator marking where you are in the listing
1021 * @param cursor position to move to
1022 * @returns rounded position we moved to
1023 */
1024 CEPH_RADOS_API uint32_t rados_nobjects_list_seek_cursor(rados_list_ctx_t ctx,
1025 rados_object_list_cursor cursor);
1026
1027 /**
1028 * Reposition object iterator to a different position
1029 *
1030 * The returned handle must be released with rados_object_list_cursor_free().
1031 *
1032 * @param ctx iterator marking where you are in the listing
1033 * @param cursor where to store cursor
1034 * @returns 0 on success, negative error code on failure
1035 */
1036 CEPH_RADOS_API int rados_nobjects_list_get_cursor(rados_list_ctx_t ctx,
1037 rados_object_list_cursor *cursor);
1038
1039 /**
1040 * Get the next object name and locator in the pool
1041 *
1042 * *entry and *key are valid until next call to rados_nobjects_list_*
1043 *
1044 * @param ctx iterator marking where you are in the listing
1045 * @param entry where to store the name of the entry
1046 * @param key where to store the object locator (set to NULL to ignore)
1047 * @param nspace where to store the object namespace (set to NULL to ignore)
1048 * @returns 0 on success, negative error code on failure
1049 * @returns -ENOENT when there are no more objects to list
1050 */
1051 CEPH_RADOS_API int rados_nobjects_list_next(rados_list_ctx_t ctx,
1052 const char **entry,
1053 const char **key,
1054 const char **nspace);
1055
1056 /**
1057 * Close the object listing handle.
1058 *
1059 * This should be called when the handle is no longer needed.
1060 * The handle should not be used after it has been closed.
1061 *
1062 * @param ctx the handle to close
1063 */
1064 CEPH_RADOS_API void rados_nobjects_list_close(rados_list_ctx_t ctx);
1065
1066 /**
1067 * Get cursor handle pointing to the *beginning* of a pool.
1068 *
1069 * This is an opaque handle pointing to the start of a pool. It must
1070 * be released with rados_object_list_cursor_free().
1071 *
1072 * @param io ioctx for the pool
1073 * @returns handle for the pool, NULL on error (pool does not exist)
1074 */
1075 CEPH_RADOS_API rados_object_list_cursor rados_object_list_begin(
1076 rados_ioctx_t io);
1077
1078 /**
1079 * Get cursor handle pointing to the *end* of a pool.
1080 *
1081 * This is an opaque handle pointing to the start of a pool. It must
1082 * be released with rados_object_list_cursor_free().
1083 *
1084 * @param io ioctx for the pool
1085 * @returns handle for the pool, NULL on error (pool does not exist)
1086 */
1087 CEPH_RADOS_API rados_object_list_cursor rados_object_list_end(rados_ioctx_t io);
1088
1089 /**
1090 * Check if a cursor has reached the end of a pool
1091 *
1092 * @param io ioctx
1093 * @param cur cursor
1094 * @returns 1 if the cursor has reached the end of the pool, 0 otherwise
1095 */
1096 CEPH_RADOS_API int rados_object_list_is_end(rados_ioctx_t io,
1097 rados_object_list_cursor cur);
1098
1099 /**
1100 * Release a cursor
1101 *
1102 * Release a cursor. The handle may not be used after this point.
1103 *
1104 * @param io ioctx
1105 * @param cur cursor
1106 */
1107 CEPH_RADOS_API void rados_object_list_cursor_free(rados_ioctx_t io,
1108 rados_object_list_cursor cur);
1109
1110 /**
1111 * Compare two cursor positions
1112 *
1113 * Compare two cursors, and indicate whether the first cursor precedes,
1114 * matches, or follows the second.
1115 *
1116 * @param io ioctx
1117 * @param lhs first cursor
1118 * @param rhs second cursor
1119 * @returns -1, 0, or 1 for lhs < rhs, lhs == rhs, or lhs > rhs
1120 */
1121 CEPH_RADOS_API int rados_object_list_cursor_cmp(rados_ioctx_t io,
1122 rados_object_list_cursor lhs, rados_object_list_cursor rhs);
1123
1124 /**
1125 * @return the number of items set in the result array
1126 */
1127 CEPH_RADOS_API int rados_object_list(rados_ioctx_t io,
1128 const rados_object_list_cursor start,
1129 const rados_object_list_cursor finish,
1130 const size_t result_size,
1131 const char *filter_buf,
1132 const size_t filter_buf_len,
1133 rados_object_list_item *results,
1134 rados_object_list_cursor *next);
1135
1136 CEPH_RADOS_API void rados_object_list_free(
1137 const size_t result_size,
1138 rados_object_list_item *results);
1139
1140 /**
1141 * Obtain cursors delineating a subset of a range. Use this
1142 * when you want to split up the work of iterating over the
1143 * global namespace. Expected use case is when you are iterating
1144 * in parallel, with `m` workers, and each worker taking an id `n`.
1145 *
1146 * @param start start of the range to be sliced up (inclusive)
1147 * @param finish end of the range to be sliced up (exclusive)
1148 * @param m how many chunks to divide start-finish into
1149 * @param n which of the m chunks you would like to get cursors for
1150 * @param split_start cursor populated with start of the subrange (inclusive)
1151 * @param split_finish cursor populated with end of the subrange (exclusive)
1152 */
1153 CEPH_RADOS_API void rados_object_list_slice(rados_ioctx_t io,
1154 const rados_object_list_cursor start,
1155 const rados_object_list_cursor finish,
1156 const size_t n,
1157 const size_t m,
1158 rados_object_list_cursor *split_start,
1159 rados_object_list_cursor *split_finish);
1160
1161
1162 /** @} Listing Objects */
1163
1164 /**
1165 * @name Snapshots
1166 *
1167 * RADOS snapshots are based upon sequence numbers that form a
1168 * snapshot context. They are pool-specific. The snapshot context
1169 * consists of the current snapshot sequence number for a pool, and an
1170 * array of sequence numbers at which snapshots were taken, in
1171 * descending order. Whenever a snapshot is created or deleted, the
1172 * snapshot sequence number for the pool is increased. To add a new
1173 * snapshot, the new snapshot sequence number must be increased and
1174 * added to the snapshot context.
1175 *
1176 * There are two ways to manage these snapshot contexts:
1177 * -# within the RADOS cluster
1178 * These are called pool snapshots, and store the snapshot context
1179 * in the OSDMap. These represent a snapshot of all the objects in
1180 * a pool.
1181 * -# within the RADOS clients
1182 * These are called self-managed snapshots, and push the
1183 * responsibility for keeping track of the snapshot context to the
1184 * clients. For every write, the client must send the snapshot
1185 * context. In librados, this is accomplished with
1186 * rados_selfmanaged_snap_set_write_ctx(). These are more
1187 * difficult to manage, but are restricted to specific objects
1188 * instead of applying to an entire pool.
1189 *
1190 * @{
1191 */
1192
1193 /**
1194 * Create a pool-wide snapshot
1195 *
1196 * @param io the pool to snapshot
1197 * @param snapname the name of the snapshot
1198 * @returns 0 on success, negative error code on failure
1199 */
1200 CEPH_RADOS_API int rados_ioctx_snap_create(rados_ioctx_t io,
1201 const char *snapname);
1202
1203 /**
1204 * Delete a pool snapshot
1205 *
1206 * @param io the pool to delete the snapshot from
1207 * @param snapname which snapshot to delete
1208 * @returns 0 on success, negative error code on failure
1209 */
1210 CEPH_RADOS_API int rados_ioctx_snap_remove(rados_ioctx_t io,
1211 const char *snapname);
1212
1213 /**
1214 * Rollback an object to a pool snapshot
1215 *
1216 * The contents of the object will be the same as
1217 * when the snapshot was taken.
1218 *
1219 * @param io the pool in which the object is stored
1220 * @param oid the name of the object to rollback
1221 * @param snapname which snapshot to rollback to
1222 * @returns 0 on success, negative error code on failure
1223 */
1224 CEPH_RADOS_API int rados_ioctx_snap_rollback(rados_ioctx_t io, const char *oid,
1225 const char *snapname);
1226
1227 /**
1228 * @warning Deprecated: Use rados_ioctx_snap_rollback() instead
1229 */
1230 CEPH_RADOS_API int rados_rollback(rados_ioctx_t io, const char *oid,
1231 const char *snapname)
1232 __attribute__((deprecated));
1233
1234 /**
1235 * Set the snapshot from which reads are performed.
1236 *
1237 * Subsequent reads will return data as it was at the time of that
1238 * snapshot.
1239 *
1240 * @param io the io context to change
1241 * @param snap the id of the snapshot to set, or LIBRADOS_SNAP_HEAD for no
1242 * snapshot (i.e. normal operation)
1243 */
1244 CEPH_RADOS_API void rados_ioctx_snap_set_read(rados_ioctx_t io,
1245 rados_snap_t snap);
1246
1247 /**
1248 * Allocate an ID for a self-managed snapshot
1249 *
1250 * Get a unique ID to put in the snaphot context to create a
1251 * snapshot. A clone of an object is not created until a write with
1252 * the new snapshot context is completed.
1253 *
1254 * @param io the pool in which the snapshot will exist
1255 * @param snapid where to store the newly allocated snapshot ID
1256 * @returns 0 on success, negative error code on failure
1257 */
1258 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io,
1259 rados_snap_t *snapid);
1260 CEPH_RADOS_API void
1261 rados_aio_ioctx_selfmanaged_snap_create(rados_ioctx_t io,
1262 rados_snap_t *snapid,
1263 rados_completion_t completion);
1264
1265 /**
1266 * Remove a self-managed snapshot
1267 *
1268 * This increases the snapshot sequence number, which will cause
1269 * snapshots to be removed lazily.
1270 *
1271 * @param io the pool in which the snapshot will exist
1272 * @param snapid where to store the newly allocated snapshot ID
1273 * @returns 0 on success, negative error code on failure
1274 */
1275 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io,
1276 rados_snap_t snapid);
1277 CEPH_RADOS_API void
1278 rados_aio_ioctx_selfmanaged_snap_remove(rados_ioctx_t io,
1279 rados_snap_t snapid,
1280 rados_completion_t completion);
1281
1282 /**
1283 * Rollback an object to a self-managed snapshot
1284 *
1285 * The contents of the object will be the same as
1286 * when the snapshot was taken.
1287 *
1288 * @param io the pool in which the object is stored
1289 * @param oid the name of the object to rollback
1290 * @param snapid which snapshot to rollback to
1291 * @returns 0 on success, negative error code on failure
1292 */
1293 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io,
1294 const char *oid,
1295 rados_snap_t snapid);
1296
1297 /**
1298 * Set the snapshot context for use when writing to objects
1299 *
1300 * This is stored in the io context, and applies to all future writes.
1301 *
1302 * @param io the io context to change
1303 * @param seq the newest snapshot sequence number for the pool
1304 * @param snaps array of snapshots in sorted by descending id
1305 * @param num_snaps how many snaphosts are in the snaps array
1306 * @returns 0 on success, negative error code on failure
1307 * @returns -EINVAL if snaps are not in descending order
1308 */
1309 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io,
1310 rados_snap_t seq,
1311 rados_snap_t *snaps,
1312 int num_snaps);
1313
1314 /**
1315 * List all the ids of pool snapshots
1316 *
1317 * If the output array does not have enough space to fit all the
1318 * snapshots, -ERANGE is returned and the caller should retry with a
1319 * larger array.
1320 *
1321 * @param io the pool to read from
1322 * @param snaps where to store the results
1323 * @param maxlen the number of rados_snap_t that fit in the snaps array
1324 * @returns number of snapshots on success, negative error code on failure
1325 * @returns -ERANGE is returned if the snaps array is too short
1326 */
1327 CEPH_RADOS_API int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t *snaps,
1328 int maxlen);
1329
1330 /**
1331 * Get the id of a pool snapshot
1332 *
1333 * @param io the pool to read from
1334 * @param name the snapshot to find
1335 * @param id where to store the result
1336 * @returns 0 on success, negative error code on failure
1337 */
1338 CEPH_RADOS_API int rados_ioctx_snap_lookup(rados_ioctx_t io, const char *name,
1339 rados_snap_t *id);
1340
1341 /**
1342 * Get the name of a pool snapshot
1343 *
1344 * @param io the pool to read from
1345 * @param id the snapshot to find
1346 * @param name where to store the result
1347 * @param maxlen the size of the name array
1348 * @returns 0 on success, negative error code on failure
1349 * @returns -ERANGE if the name array is too small
1350 */
1351 CEPH_RADOS_API int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id,
1352 char *name, int maxlen);
1353
1354 /**
1355 * Find when a pool snapshot occurred
1356 *
1357 * @param io the pool the snapshot was taken in
1358 * @param id the snapshot to lookup
1359 * @param t where to store the result
1360 * @returns 0 on success, negative error code on failure
1361 */
1362 CEPH_RADOS_API int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id,
1363 time_t *t);
1364
1365 /** @} Snapshots */
1366
1367 /**
1368 * @name Synchronous I/O
1369 * Writes are replicated to a number of OSDs based on the
1370 * configuration of the pool they are in. These write functions block
1371 * until data is in memory on all replicas of the object they're
1372 * writing to - they are equivalent to doing the corresponding
1373 * asynchronous write, and the calling
1374 * rados_ioctx_wait_for_complete(). For greater data safety, use the
1375 * asynchronous functions and rados_aio_wait_for_safe().
1376 *
1377 * @{
1378 */
1379
1380 /**
1381 * Return the version of the last object read or written to.
1382 *
1383 * This exposes the internal version number of the last object read or
1384 * written via this io context
1385 *
1386 * @param io the io context to check
1387 * @returns last read or written object version
1388 */
1389 CEPH_RADOS_API uint64_t rados_get_last_version(rados_ioctx_t io);
1390
1391 /**
1392 * Write *len* bytes from *buf* into the *oid* object, starting at
1393 * offset *off*. The value of *len* must be <= UINT_MAX/2.
1394 *
1395 * @note This will never return a positive value not equal to len.
1396 * @param io the io context in which the write will occur
1397 * @param oid name of the object
1398 * @param buf data to write
1399 * @param len length of the data, in bytes
1400 * @param off byte offset in the object to begin writing at
1401 * @returns 0 on success, negative error code on failure
1402 */
1403 CEPH_RADOS_API int rados_write(rados_ioctx_t io, const char *oid,
1404 const char *buf, size_t len, uint64_t off);
1405
1406 /**
1407 * Write *len* bytes from *buf* into the *oid* object. The value of
1408 * *len* must be <= UINT_MAX/2.
1409 *
1410 * The object is filled with the provided data. If the object exists,
1411 * it is atomically truncated and then written.
1412 *
1413 * @param io the io context in which the write will occur
1414 * @param oid name of the object
1415 * @param buf data to write
1416 * @param len length of the data, in bytes
1417 * @returns 0 on success, negative error code on failure
1418 */
1419 CEPH_RADOS_API int rados_write_full(rados_ioctx_t io, const char *oid,
1420 const char *buf, size_t len);
1421
1422 /**
1423 * Write the same *data_len* bytes from *buf* multiple times into the
1424 * *oid* object. *write_len* bytes are written in total, which must be
1425 * a multiple of *data_len*. The value of *write_len* and *data_len*
1426 * must be <= UINT_MAX/2.
1427 *
1428 * @param io the io context in which the write will occur
1429 * @param oid name of the object
1430 * @param buf data to write
1431 * @param data_len length of the data, in bytes
1432 * @param write_len the total number of bytes to write
1433 * @param off byte offset in the object to begin writing at
1434 * @returns 0 on success, negative error code on failure
1435 */
1436 CEPH_RADOS_API int rados_writesame(rados_ioctx_t io, const char *oid,
1437 const char *buf, size_t data_len,
1438 size_t write_len, uint64_t off);
1439
1440 /**
1441 * Append *len* bytes from *buf* into the *oid* object. The value of
1442 * *len* must be <= UINT_MAX/2.
1443 *
1444 * @param io the context to operate in
1445 * @param oid the name of the object
1446 * @param buf the data to append
1447 * @param len length of buf (in bytes)
1448 * @returns 0 on success, negative error code on failure
1449 */
1450 CEPH_RADOS_API int rados_append(rados_ioctx_t io, const char *oid,
1451 const char *buf, size_t len);
1452
1453 /**
1454 * Read data from an object
1455 *
1456 * The io context determines the snapshot to read from, if any was set
1457 * by rados_ioctx_snap_set_read().
1458 *
1459 * @param io the context in which to perform the read
1460 * @param oid the name of the object to read from
1461 * @param buf where to store the results
1462 * @param len the number of bytes to read
1463 * @param off the offset to start reading from in the object
1464 * @returns number of bytes read on success, negative error code on
1465 * failure
1466 */
1467 CEPH_RADOS_API int rados_read(rados_ioctx_t io, const char *oid, char *buf,
1468 size_t len, uint64_t off);
1469
1470 /**
1471 * Compute checksum from object data
1472 *
1473 * The io context determines the snapshot to checksum, if any was set
1474 * by rados_ioctx_snap_set_read(). The length of the init_value and
1475 * resulting checksum are dependent upon the checksum type:
1476 *
1477 * XXHASH64: le64
1478 * XXHASH32: le32
1479 * CRC32C: le32
1480 *
1481 * The checksum result is encoded the following manner:
1482 *
1483 * le32 num_checksum_chunks
1484 * {
1485 * leXX checksum for chunk (where XX = appropriate size for the checksum type)
1486 * } * num_checksum_chunks
1487 *
1488 * @param io the context in which to perform the checksum
1489 * @param oid the name of the object to checksum
1490 * @param type the checksum algorithm to utilize
1491 * @param init_value the init value for the algorithm
1492 * @param init_value_len the length of the init value
1493 * @param len the number of bytes to checksum
1494 * @param off the offset to start checksuming in the object
1495 * @param chunk_size optional length-aligned chunk size for checksums
1496 * @param pchecksum where to store the checksum result
1497 * @param checksum_len the number of bytes available for the result
1498 * @return negative error code on failure
1499 */
1500 CEPH_RADOS_API int rados_checksum(rados_ioctx_t io, const char *oid,
1501 rados_checksum_type_t type,
1502 const char *init_value, size_t init_value_len,
1503 size_t len, uint64_t off, size_t chunk_size,
1504 char *pchecksum, size_t checksum_len);
1505
1506 /**
1507 * Delete an object
1508 *
1509 * @note This does not delete any snapshots of the object.
1510 *
1511 * @param io the pool to delete the object from
1512 * @param oid the name of the object to delete
1513 * @returns 0 on success, negative error code on failure
1514 */
1515 CEPH_RADOS_API int rados_remove(rados_ioctx_t io, const char *oid);
1516
1517 /**
1518 * Resize an object
1519 *
1520 * If this enlarges the object, the new area is logically filled with
1521 * zeroes. If this shrinks the object, the excess data is removed.
1522 *
1523 * @param io the context in which to truncate
1524 * @param oid the name of the object
1525 * @param size the new size of the object in bytes
1526 * @returns 0 on success, negative error code on failure
1527 */
1528 CEPH_RADOS_API int rados_trunc(rados_ioctx_t io, const char *oid,
1529 uint64_t size);
1530
1531 /**
1532 * Compare an on-disk object range with a buffer
1533 *
1534 * @param io the context in which to perform the comparison
1535 * @param o name of the object
1536 * @param cmp_buf buffer containing bytes to be compared with object contents
1537 * @param cmp_len length to compare and size of @cmp_buf in bytes
1538 * @param off object byte offset at which to start the comparison
1539 * @returns 0 on success, negative error code on failure,
1540 * (-MAX_ERRNO - mismatch_off) on mismatch
1541 */
1542 CEPH_RADOS_API int rados_cmpext(rados_ioctx_t io, const char *o,
1543 const char *cmp_buf, size_t cmp_len,
1544 uint64_t off);
1545
1546 /**
1547 * @name Xattrs
1548 * Extended attributes are stored as extended attributes on the files
1549 * representing an object on the OSDs. Thus, they have the same
1550 * limitations as the underlying filesystem. On ext4, this means that
1551 * the total data stored in xattrs cannot exceed 4KB.
1552 *
1553 * @{
1554 */
1555
1556 /**
1557 * Get the value of an extended attribute on an object.
1558 *
1559 * @param io the context in which the attribute is read
1560 * @param o name of the object
1561 * @param name which extended attribute to read
1562 * @param buf where to store the result
1563 * @param len size of buf in bytes
1564 * @returns length of xattr value on success, negative error code on failure
1565 */
1566 CEPH_RADOS_API int rados_getxattr(rados_ioctx_t io, const char *o,
1567 const char *name, char *buf, size_t len);
1568
1569 /**
1570 * Set an extended attribute on an object.
1571 *
1572 * @param io the context in which xattr is set
1573 * @param o name of the object
1574 * @param name which extended attribute to set
1575 * @param buf what to store in the xattr
1576 * @param len the number of bytes in buf
1577 * @returns 0 on success, negative error code on failure
1578 */
1579 CEPH_RADOS_API int rados_setxattr(rados_ioctx_t io, const char *o,
1580 const char *name, const char *buf,
1581 size_t len);
1582
1583 /**
1584 * Delete an extended attribute from an object.
1585 *
1586 * @param io the context in which to delete the xattr
1587 * @param o the name of the object
1588 * @param name which xattr to delete
1589 * @returns 0 on success, negative error code on failure
1590 */
1591 CEPH_RADOS_API int rados_rmxattr(rados_ioctx_t io, const char *o,
1592 const char *name);
1593
1594 /**
1595 * Start iterating over xattrs on an object.
1596 *
1597 * @post iter is a valid iterator
1598 *
1599 * @param io the context in which to list xattrs
1600 * @param oid name of the object
1601 * @param iter where to store the iterator
1602 * @returns 0 on success, negative error code on failure
1603 */
1604 CEPH_RADOS_API int rados_getxattrs(rados_ioctx_t io, const char *oid,
1605 rados_xattrs_iter_t *iter);
1606
1607 /**
1608 * Get the next xattr on the object
1609 *
1610 * @pre iter is a valid iterator
1611 *
1612 * @post name is the NULL-terminated name of the next xattr, and val
1613 * contains the value of the xattr, which is of length len. If the end
1614 * of the list has been reached, name and val are NULL, and len is 0.
1615 *
1616 * @param iter iterator to advance
1617 * @param name where to store the name of the next xattr
1618 * @param val where to store the value of the next xattr
1619 * @param len the number of bytes in val
1620 * @returns 0 on success, negative error code on failure
1621 */
1622 CEPH_RADOS_API int rados_getxattrs_next(rados_xattrs_iter_t iter,
1623 const char **name, const char **val,
1624 size_t *len);
1625
1626 /**
1627 * Close the xattr iterator.
1628 *
1629 * iter should not be used after this is called.
1630 *
1631 * @param iter the iterator to close
1632 */
1633 CEPH_RADOS_API void rados_getxattrs_end(rados_xattrs_iter_t iter);
1634
1635 /** @} Xattrs */
1636
1637 /**
1638 * Get the next omap key/value pair on the object
1639 *
1640 * @pre iter is a valid iterator
1641 *
1642 * @post key and val are the next key/value pair. key is
1643 * null-terminated, and val has length len. If the end of the list has
1644 * been reached, key and val are NULL, and len is 0. key and val will
1645 * not be accessible after rados_omap_get_end() is called on iter, so
1646 * if they are needed after that they should be copied.
1647 *
1648 * @param iter iterator to advance
1649 * @param key where to store the key of the next omap entry
1650 * @param val where to store the value of the next omap entry
1651 * @param len where to store the number of bytes in val
1652 * @returns 0 on success, negative error code on failure
1653 */
1654 CEPH_RADOS_API int rados_omap_get_next(rados_omap_iter_t iter,
1655 char **key,
1656 char **val,
1657 size_t *len);
1658
1659 /**
1660 * Close the omap iterator.
1661 *
1662 * iter should not be used after this is called.
1663 *
1664 * @param iter the iterator to close
1665 */
1666 CEPH_RADOS_API void rados_omap_get_end(rados_omap_iter_t iter);
1667
1668 /**
1669 * Get object stats (size/mtime)
1670 *
1671 * TODO: when are these set, and by whom? can they be out of date?
1672 *
1673 * @param io ioctx
1674 * @param o object name
1675 * @param psize where to store object size
1676 * @param pmtime where to store modification time
1677 * @returns 0 on success, negative error code on failure
1678 */
1679 CEPH_RADOS_API int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize,
1680 time_t *pmtime);
1681
1682 /**
1683 * Update tmap (trivial map)
1684 *
1685 * Do compound update to a tmap object, inserting or deleting some
1686 * number of records. cmdbuf is a series of operation byte
1687 * codes, following by command payload. Each command is a single-byte
1688 * command code, whose value is one of CEPH_OSD_TMAP_*.
1689 *
1690 * - update tmap 'header'
1691 * - 1 byte = CEPH_OSD_TMAP_HDR
1692 * - 4 bytes = data length (little endian)
1693 * - N bytes = data
1694 *
1695 * - insert/update one key/value pair
1696 * - 1 byte = CEPH_OSD_TMAP_SET
1697 * - 4 bytes = key name length (little endian)
1698 * - N bytes = key name
1699 * - 4 bytes = data length (little endian)
1700 * - M bytes = data
1701 *
1702 * - insert one key/value pair; return -EEXIST if it already exists.
1703 * - 1 byte = CEPH_OSD_TMAP_CREATE
1704 * - 4 bytes = key name length (little endian)
1705 * - N bytes = key name
1706 * - 4 bytes = data length (little endian)
1707 * - M bytes = data
1708 *
1709 * - remove one key/value pair
1710 * - 1 byte = CEPH_OSD_TMAP_RM
1711 * - 4 bytes = key name length (little endian)
1712 * - N bytes = key name
1713 *
1714 * Restrictions:
1715 * - The HDR update must preceed any key/value updates.
1716 * - All key/value updates must be in lexicographically sorted order
1717 * in cmdbuf.
1718 * - You can read/write to a tmap object via the regular APIs, but
1719 * you should be careful not to corrupt it. Also be aware that the
1720 * object format may change without notice.
1721 *
1722 * @param io ioctx
1723 * @param o object name
1724 * @param cmdbuf command buffer
1725 * @param cmdbuflen command buffer length in bytes
1726 * @returns 0 on success, negative error code on failure
1727 */
1728 CEPH_RADOS_API int rados_tmap_update(rados_ioctx_t io, const char *o,
1729 const char *cmdbuf, size_t cmdbuflen);
1730
1731 /**
1732 * Store complete tmap (trivial map) object
1733 *
1734 * Put a full tmap object into the store, replacing what was there.
1735 *
1736 * The format of buf is:
1737 * - 4 bytes - length of header (little endian)
1738 * - N bytes - header data
1739 * - 4 bytes - number of keys (little endian)
1740 *
1741 * and for each key,
1742 * - 4 bytes - key name length (little endian)
1743 * - N bytes - key name
1744 * - 4 bytes - value length (little endian)
1745 * - M bytes - value data
1746 *
1747 * @param io ioctx
1748 * @param o object name
1749 * @param buf buffer
1750 * @param buflen buffer length in bytes
1751 * @returns 0 on success, negative error code on failure
1752 */
1753 CEPH_RADOS_API int rados_tmap_put(rados_ioctx_t io, const char *o,
1754 const char *buf, size_t buflen);
1755
1756 /**
1757 * Fetch complete tmap (trivial map) object
1758 *
1759 * Read a full tmap object. See rados_tmap_put() for the format the
1760 * data is returned in.
1761 *
1762 * @param io ioctx
1763 * @param o object name
1764 * @param buf buffer
1765 * @param buflen buffer length in bytes
1766 * @returns 0 on success, negative error code on failure
1767 * @returns -ERANGE if buf isn't big enough
1768 */
1769 CEPH_RADOS_API int rados_tmap_get(rados_ioctx_t io, const char *o, char *buf,
1770 size_t buflen);
1771
1772 /**
1773 * Execute an OSD class method on an object
1774 *
1775 * The OSD has a plugin mechanism for performing complicated
1776 * operations on an object atomically. These plugins are called
1777 * classes. This function allows librados users to call the custom
1778 * methods. The input and output formats are defined by the class.
1779 * Classes in ceph.git can be found in src/cls subdirectories
1780 *
1781 * @param io the context in which to call the method
1782 * @param oid the object to call the method on
1783 * @param cls the name of the class
1784 * @param method the name of the method
1785 * @param in_buf where to find input
1786 * @param in_len length of in_buf in bytes
1787 * @param buf where to store output
1788 * @param out_len length of buf in bytes
1789 * @returns the length of the output, or
1790 * -ERANGE if out_buf does not have enough space to store it (For methods that return data). For
1791 * methods that don't return data, the return value is
1792 * method-specific.
1793 */
1794 CEPH_RADOS_API int rados_exec(rados_ioctx_t io, const char *oid,
1795 const char *cls, const char *method,
1796 const char *in_buf, size_t in_len, char *buf,
1797 size_t out_len);
1798
1799
1800 /** @} Synchronous I/O */
1801
1802 /**
1803 * @name Asynchronous I/O
1804 * Read and write to objects without blocking.
1805 *
1806 * @{
1807 */
1808
1809 /**
1810 * @typedef rados_callback_t
1811 * Callbacks for asynchrous operations take two parameters:
1812 * - cb the completion that has finished
1813 * - arg application defined data made available to the callback function
1814 */
1815 typedef void (*rados_callback_t)(rados_completion_t cb, void *arg);
1816
1817 /**
1818 * Constructs a completion to use with asynchronous operations
1819 *
1820 * The complete and safe callbacks correspond to operations being
1821 * acked and committed, respectively. The callbacks are called in
1822 * order of receipt, so the safe callback may be triggered before the
1823 * complete callback, and vice versa. This is affected by journalling
1824 * on the OSDs.
1825 *
1826 * TODO: more complete documentation of this elsewhere (in the RADOS docs?)
1827 *
1828 * @note Read operations only get a complete callback.
1829 * @note BUG: this should check for ENOMEM instead of throwing an exception
1830 *
1831 * @param cb_arg application-defined data passed to the callback functions
1832 * @param cb_complete the function to be called when the operation is
1833 * in memory on all relpicas
1834 * @param cb_safe the function to be called when the operation is on
1835 * stable storage on all replicas
1836 * @param pc where to store the completion
1837 * @returns 0
1838 */
1839 CEPH_RADOS_API int rados_aio_create_completion(void *cb_arg,
1840 rados_callback_t cb_complete,
1841 rados_callback_t cb_safe,
1842 rados_completion_t *pc);
1843
1844 /**
1845 * Block until an operation completes
1846 *
1847 * This means it is in memory on all replicas.
1848 *
1849 * @note BUG: this should be void
1850 *
1851 * @param c operation to wait for
1852 * @returns 0
1853 */
1854 CEPH_RADOS_API int rados_aio_wait_for_complete(rados_completion_t c);
1855
1856 /**
1857 * Block until an operation is safe
1858 *
1859 * This means it is on stable storage on all replicas.
1860 *
1861 * @note BUG: this should be void
1862 *
1863 * @param c operation to wait for
1864 * @returns 0
1865 */
1866 CEPH_RADOS_API int rados_aio_wait_for_safe(rados_completion_t c);
1867
1868 /**
1869 * Has an asynchronous operation completed?
1870 *
1871 * @warning This does not imply that the complete callback has
1872 * finished
1873 *
1874 * @param c async operation to inspect
1875 * @returns whether c is complete
1876 */
1877 CEPH_RADOS_API int rados_aio_is_complete(rados_completion_t c);
1878
1879 /**
1880 * Is an asynchronous operation safe?
1881 *
1882 * @warning This does not imply that the safe callback has
1883 * finished
1884 *
1885 * @param c async operation to inspect
1886 * @returns whether c is safe
1887 */
1888 CEPH_RADOS_API int rados_aio_is_safe(rados_completion_t c);
1889
1890 /**
1891 * Block until an operation completes and callback completes
1892 *
1893 * This means it is in memory on all replicas and can be read.
1894 *
1895 * @note BUG: this should be void
1896 *
1897 * @param c operation to wait for
1898 * @returns 0
1899 */
1900 CEPH_RADOS_API int rados_aio_wait_for_complete_and_cb(rados_completion_t c);
1901
1902 /**
1903 * Block until an operation is safe and callback has completed
1904 *
1905 * This means it is on stable storage on all replicas.
1906 *
1907 * @note BUG: this should be void
1908 *
1909 * @param c operation to wait for
1910 * @returns 0
1911 */
1912 CEPH_RADOS_API int rados_aio_wait_for_safe_and_cb(rados_completion_t c);
1913
1914 /**
1915 * Has an asynchronous operation and callback completed
1916 *
1917 * @param c async operation to inspect
1918 * @returns whether c is complete
1919 */
1920 CEPH_RADOS_API int rados_aio_is_complete_and_cb(rados_completion_t c);
1921
1922 /**
1923 * Is an asynchronous operation safe and has the callback completed
1924 *
1925 * @param c async operation to inspect
1926 * @returns whether c is safe
1927 */
1928 CEPH_RADOS_API int rados_aio_is_safe_and_cb(rados_completion_t c);
1929
1930 /**
1931 * Get the return value of an asychronous operation
1932 *
1933 * The return value is set when the operation is complete or safe,
1934 * whichever comes first.
1935 *
1936 * @pre The operation is safe or complete
1937 *
1938 * @note BUG: complete callback may never be called when the safe
1939 * message is received before the complete message
1940 *
1941 * @param c async operation to inspect
1942 * @returns return value of the operation
1943 */
1944 CEPH_RADOS_API int rados_aio_get_return_value(rados_completion_t c);
1945
1946 /**
1947 * Get the internal object version of the target of an asychronous operation
1948 *
1949 * The return value is set when the operation is complete or safe,
1950 * whichever comes first.
1951 *
1952 * @pre The operation is safe or complete
1953 *
1954 * @note BUG: complete callback may never be called when the safe
1955 * message is received before the complete message
1956 *
1957 * @param c async operation to inspect
1958 * @returns version number of the asychronous operation's target
1959 */
1960 CEPH_RADOS_API uint64_t rados_aio_get_version(rados_completion_t c);
1961
1962 /**
1963 * Release a completion
1964 *
1965 * Call this when you no longer need the completion. It may not be
1966 * freed immediately if the operation is not acked and committed.
1967 *
1968 * @param c completion to release
1969 */
1970 CEPH_RADOS_API void rados_aio_release(rados_completion_t c);
1971
1972 /**
1973 * Write data to an object asynchronously
1974 *
1975 * Queues the write and returns. The return value of the completion
1976 * will be 0 on success, negative error code on failure.
1977 *
1978 * @param io the context in which the write will occur
1979 * @param oid name of the object
1980 * @param completion what to do when the write is safe and complete
1981 * @param buf data to write
1982 * @param len length of the data, in bytes
1983 * @param off byte offset in the object to begin writing at
1984 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
1985 * other than LIBRADOS_SNAP_HEAD
1986 */
1987 CEPH_RADOS_API int rados_aio_write(rados_ioctx_t io, const char *oid,
1988 rados_completion_t completion,
1989 const char *buf, size_t len, uint64_t off);
1990
1991 /**
1992 * Asychronously append data to an object
1993 *
1994 * Queues the append and returns.
1995 *
1996 * The return value of the completion will be 0 on success, negative
1997 * error code on failure.
1998 *
1999 * @param io the context to operate in
2000 * @param oid the name of the object
2001 * @param completion what to do when the append is safe and complete
2002 * @param buf the data to append
2003 * @param len length of buf (in bytes)
2004 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2005 * other than LIBRADOS_SNAP_HEAD
2006 */
2007 CEPH_RADOS_API int rados_aio_append(rados_ioctx_t io, const char *oid,
2008 rados_completion_t completion,
2009 const char *buf, size_t len);
2010
2011 /**
2012 * Asychronously write an entire object
2013 *
2014 * The object is filled with the provided data. If the object exists,
2015 * it is atomically truncated and then written.
2016 * Queues the write_full and returns.
2017 *
2018 * The return value of the completion will be 0 on success, negative
2019 * error code on failure.
2020 *
2021 * @param io the io context in which the write will occur
2022 * @param oid name of the object
2023 * @param completion what to do when the write_full is safe and complete
2024 * @param buf data to write
2025 * @param len length of the data, in bytes
2026 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2027 * other than LIBRADOS_SNAP_HEAD
2028 */
2029 CEPH_RADOS_API int rados_aio_write_full(rados_ioctx_t io, const char *oid,
2030 rados_completion_t completion,
2031 const char *buf, size_t len);
2032
2033 /**
2034 * Asychronously write the same buffer multiple times
2035 *
2036 * Queues the writesame and returns.
2037 *
2038 * The return value of the completion will be 0 on success, negative
2039 * error code on failure.
2040 *
2041 * @param io the io context in which the write will occur
2042 * @param oid name of the object
2043 * @param completion what to do when the writesame is safe and complete
2044 * @param buf data to write
2045 * @param data_len length of the data, in bytes
2046 * @param write_len the total number of bytes to write
2047 * @param off byte offset in the object to begin writing at
2048 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2049 * other than LIBRADOS_SNAP_HEAD
2050 */
2051 CEPH_RADOS_API int rados_aio_writesame(rados_ioctx_t io, const char *oid,
2052 rados_completion_t completion,
2053 const char *buf, size_t data_len,
2054 size_t write_len, uint64_t off);
2055
2056 /**
2057 * Asychronously remove an object
2058 *
2059 * Queues the remove and returns.
2060 *
2061 * The return value of the completion will be 0 on success, negative
2062 * error code on failure.
2063 *
2064 * @param io the context to operate in
2065 * @param oid the name of the object
2066 * @param completion what to do when the remove is safe and complete
2067 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2068 * other than LIBRADOS_SNAP_HEAD
2069 */
2070 CEPH_RADOS_API int rados_aio_remove(rados_ioctx_t io, const char *oid,
2071 rados_completion_t completion);
2072
2073 /**
2074 * Asychronously read data from an object
2075 *
2076 * The io context determines the snapshot to read from, if any was set
2077 * by rados_ioctx_snap_set_read().
2078 *
2079 * The return value of the completion will be number of bytes read on
2080 * success, negative error code on failure.
2081 *
2082 * @note only the 'complete' callback of the completion will be called.
2083 *
2084 * @param io the context in which to perform the read
2085 * @param oid the name of the object to read from
2086 * @param completion what to do when the read is complete
2087 * @param buf where to store the results
2088 * @param len the number of bytes to read
2089 * @param off the offset to start reading from in the object
2090 * @returns 0 on success, negative error code on failure
2091 */
2092 CEPH_RADOS_API int rados_aio_read(rados_ioctx_t io, const char *oid,
2093 rados_completion_t completion,
2094 char *buf, size_t len, uint64_t off);
2095
2096 /**
2097 * Block until all pending writes in an io context are safe
2098 *
2099 * This is not equivalent to calling rados_aio_wait_for_safe() on all
2100 * write completions, since this waits for the associated callbacks to
2101 * complete as well.
2102 *
2103 * @note BUG: always returns 0, should be void or accept a timeout
2104 *
2105 * @param io the context to flush
2106 * @returns 0 on success, negative error code on failure
2107 */
2108 CEPH_RADOS_API int rados_aio_flush(rados_ioctx_t io);
2109
2110
2111 /**
2112 * Schedule a callback for when all currently pending
2113 * aio writes are safe. This is a non-blocking version of
2114 * rados_aio_flush().
2115 *
2116 * @param io the context to flush
2117 * @param completion what to do when the writes are safe
2118 * @returns 0 on success, negative error code on failure
2119 */
2120 CEPH_RADOS_API int rados_aio_flush_async(rados_ioctx_t io,
2121 rados_completion_t completion);
2122
2123
2124 /**
2125 * Asynchronously get object stats (size/mtime)
2126 *
2127 * @param io ioctx
2128 * @param o object name
2129 * @param psize where to store object size
2130 * @param pmtime where to store modification time
2131 * @returns 0 on success, negative error code on failure
2132 */
2133 CEPH_RADOS_API int rados_aio_stat(rados_ioctx_t io, const char *o,
2134 rados_completion_t completion,
2135 uint64_t *psize, time_t *pmtime);
2136
2137 /**
2138 * Asynchronously compare an on-disk object range with a buffer
2139 *
2140 * @param io the context in which to perform the comparison
2141 * @param o the name of the object to compare with
2142 * @param completion what to do when the comparison is complete
2143 * @param cmp_buf buffer containing bytes to be compared with object contents
2144 * @param cmp_len length to compare and size of @cmp_buf in bytes
2145 * @param off object byte offset at which to start the comparison
2146 * @returns 0 on success, negative error code on failure,
2147 * (-MAX_ERRNO - mismatch_off) on mismatch
2148 */
2149 CEPH_RADOS_API int rados_aio_cmpext(rados_ioctx_t io, const char *o,
2150 rados_completion_t completion,
2151 const char *cmp_buf,
2152 size_t cmp_len,
2153 uint64_t off);
2154
2155 /**
2156 * Cancel async operation
2157 *
2158 * @param io ioctx
2159 * @param completion completion handle
2160 * @returns 0 on success, negative error code on failure
2161 */
2162 CEPH_RADOS_API int rados_aio_cancel(rados_ioctx_t io,
2163 rados_completion_t completion);
2164
2165 /**
2166 * Asynchronously execute an OSD class method on an object
2167 *
2168 * The OSD has a plugin mechanism for performing complicated
2169 * operations on an object atomically. These plugins are called
2170 * classes. This function allows librados users to call the custom
2171 * methods. The input and output formats are defined by the class.
2172 * Classes in ceph.git can be found in src/cls subdirectories
2173 *
2174 * @param io the context in which to call the method
2175 * @param oid the object to call the method on
2176 * @param cls the name of the class
2177 * @param method the name of the method
2178 * @param in_buf where to find input
2179 * @param in_len length of in_buf in bytes
2180 * @param buf where to store output
2181 * @param out_len length of buf in bytes
2182 * @returns 0 on success, negative error code on failure
2183 */
2184 CEPH_RADOS_API int rados_aio_exec(rados_ioctx_t io, const char *o,
2185 rados_completion_t completion,
2186 const char *cls, const char *method,
2187 const char *in_buf, size_t in_len,
2188 char *buf, size_t out_len);
2189
2190 /** @} Asynchronous I/O */
2191
2192 /**
2193 * @name Asynchronous Xattrs
2194 * Extended attributes are stored as extended attributes on the files
2195 * representing an object on the OSDs. Thus, they have the same
2196 * limitations as the underlying filesystem. On ext4, this means that
2197 * the total data stored in xattrs cannot exceed 4KB.
2198 *
2199 * @{
2200 */
2201
2202 /**
2203 * Asynchronously get the value of an extended attribute on an object.
2204 *
2205 * @param io the context in which the attribute is read
2206 * @param o name of the object
2207 * @param completion what to do when the getxattr completes
2208 * @param name which extended attribute to read
2209 * @param buf where to store the result
2210 * @param len size of buf in bytes
2211 * @returns length of xattr value on success, negative error code on failure
2212 */
2213 CEPH_RADOS_API int rados_aio_getxattr(rados_ioctx_t io, const char *o,
2214 rados_completion_t completion,
2215 const char *name, char *buf, size_t len);
2216
2217 /**
2218 * Asynchronously set an extended attribute on an object.
2219 *
2220 * @param io the context in which xattr is set
2221 * @param o name of the object
2222 * @param completion what to do when the setxattr completes
2223 * @param name which extended attribute to set
2224 * @param buf what to store in the xattr
2225 * @param len the number of bytes in buf
2226 * @returns 0 on success, negative error code on failure
2227 */
2228 CEPH_RADOS_API int rados_aio_setxattr(rados_ioctx_t io, const char *o,
2229 rados_completion_t completion,
2230 const char *name, const char *buf,
2231 size_t len);
2232
2233 /**
2234 * Asynchronously delete an extended attribute from an object.
2235 *
2236 * @param io the context in which to delete the xattr
2237 * @param o the name of the object
2238 * @param completion what to do when the rmxattr completes
2239 * @param name which xattr to delete
2240 * @returns 0 on success, negative error code on failure
2241 */
2242 CEPH_RADOS_API int rados_aio_rmxattr(rados_ioctx_t io, const char *o,
2243 rados_completion_t completion,
2244 const char *name);
2245
2246 /**
2247 * Asynchronously start iterating over xattrs on an object.
2248 *
2249 * @post iter is a valid iterator
2250 *
2251 * @param io the context in which to list xattrs
2252 * @param oid name of the object
2253 * @param iter where to store the iterator
2254 * @returns 0 on success, negative error code on failure
2255 */
2256 CEPH_RADOS_API int rados_aio_getxattrs(rados_ioctx_t io, const char *oid,
2257 rados_completion_t completion,
2258 rados_xattrs_iter_t *iter);
2259
2260 /** @} Asynchronous Xattrs */
2261
2262 /**
2263 * @name Watch/Notify
2264 *
2265 * Watch/notify is a protocol to help communicate among clients. It
2266 * can be used to sychronize client state. All that's needed is a
2267 * well-known object name (for example, rbd uses the header object of
2268 * an image).
2269 *
2270 * Watchers register an interest in an object, and receive all
2271 * notifies on that object. A notify attempts to communicate with all
2272 * clients watching an object, and blocks on the notifier until each
2273 * client responds or a timeout is reached.
2274 *
2275 * See rados_watch() and rados_notify() for more details.
2276 *
2277 * @{
2278 */
2279
2280 /**
2281 * @typedef rados_watchcb_t
2282 *
2283 * Callback activated when a notify is received on a watched
2284 * object.
2285 *
2286 * @param opcode undefined
2287 * @param ver version of the watched object
2288 * @param arg application-specific data
2289 *
2290 * @note BUG: opcode is an internal detail that shouldn't be exposed
2291 * @note BUG: ver is unused
2292 */
2293 typedef void (*rados_watchcb_t)(uint8_t opcode, uint64_t ver, void *arg);
2294
2295 /**
2296 * @typedef rados_watchcb2_t
2297 *
2298 * Callback activated when a notify is received on a watched
2299 * object.
2300 *
2301 * @param arg opaque user-defined value provided to rados_watch2()
2302 * @param notify_id an id for this notify event
2303 * @param handle the watcher handle we are notifying
2304 * @param notifier_id the unique client id for the notifier
2305 * @param data payload from the notifier
2306 * @param datalen length of payload buffer
2307 */
2308 typedef void (*rados_watchcb2_t)(void *arg,
2309 uint64_t notify_id,
2310 uint64_t handle,
2311 uint64_t notifier_id,
2312 void *data,
2313 size_t data_len);
2314
2315 /**
2316 * @typedef rados_watcherrcb_t
2317 *
2318 * Callback activated when we encounter an error with the watch session.
2319 * This can happen when the location of the objects moves within the
2320 * cluster and we fail to register our watch with the new object location,
2321 * or when our connection with the object OSD is otherwise interrupted and
2322 * we may have missed notify events.
2323 *
2324 * @param pre opaque user-defined value provided to rados_watch2()
2325 * @param err error code
2326 */
2327 typedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err);
2328
2329 /**
2330 * Register an interest in an object
2331 *
2332 * A watch operation registers the client as being interested in
2333 * notifications on an object. OSDs keep track of watches on
2334 * persistent storage, so they are preserved across cluster changes by
2335 * the normal recovery process. If the client loses its connection to
2336 * the primary OSD for a watched object, the watch will be removed
2337 * after 30 seconds. Watches are automatically reestablished when a new
2338 * connection is made, or a placement group switches OSDs.
2339 *
2340 * @note BUG: librados should provide a way for watchers to notice connection resets
2341 * @note BUG: the ver parameter does not work, and -ERANGE will never be returned
2342 * (See URL tracker.ceph.com/issues/2592)
2343 *
2344 * @param io the pool the object is in
2345 * @param o the object to watch
2346 * @param ver expected version of the object
2347 * @param cookie where to store the internal id assigned to this watch
2348 * @param watchcb what to do when a notify is received on this object
2349 * @param arg application defined data to pass when watchcb is called
2350 * @returns 0 on success, negative error code on failure
2351 * @returns -ERANGE if the version of the object is greater than ver
2352 */
2353 CEPH_RADOS_API int rados_watch(rados_ioctx_t io, const char *o, uint64_t ver,
2354 uint64_t *cookie,
2355 rados_watchcb_t watchcb, void *arg)
2356 __attribute__((deprecated));
2357
2358
2359 /**
2360 * Register an interest in an object
2361 *
2362 * A watch operation registers the client as being interested in
2363 * notifications on an object. OSDs keep track of watches on
2364 * persistent storage, so they are preserved across cluster changes by
2365 * the normal recovery process. If the client loses its connection to the
2366 * primary OSD for a watched object, the watch will be removed after
2367 * a timeout configured with osd_client_watch_timeout.
2368 * Watches are automatically reestablished when a new
2369 * connection is made, or a placement group switches OSDs.
2370 *
2371 * @param io the pool the object is in
2372 * @param o the object to watch
2373 * @param cookie where to store the internal id assigned to this watch
2374 * @param watchcb what to do when a notify is received on this object
2375 * @param watcherrcb what to do when the watch session encounters an error
2376 * @param arg opaque value to pass to the callback
2377 * @returns 0 on success, negative error code on failure
2378 */
2379 CEPH_RADOS_API int rados_watch2(rados_ioctx_t io, const char *o, uint64_t *cookie,
2380 rados_watchcb2_t watchcb,
2381 rados_watcherrcb_t watcherrcb,
2382 void *arg);
2383
2384 /**
2385 * Register an interest in an object
2386 *
2387 * A watch operation registers the client as being interested in
2388 * notifications on an object. OSDs keep track of watches on
2389 * persistent storage, so they are preserved across cluster changes by
2390 * the normal recovery process. Watches are automatically reestablished when a new
2391 * connection is made, or a placement group switches OSDs.
2392 *
2393 * @param io the pool the object is in
2394 * @param o the object to watch
2395 * @param cookie where to store the internal id assigned to this watch
2396 * @param watchcb what to do when a notify is received on this object
2397 * @param watcherrcb what to do when the watch session encounters an error
2398 * @param timeout how many seconds the connection will keep after disconnection
2399 * @param arg opaque value to pass to the callback
2400 * @returns 0 on success, negative error code on failure
2401 */
2402 CEPH_RADOS_API int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie,
2403 rados_watchcb2_t watchcb,
2404 rados_watcherrcb_t watcherrcb,
2405 uint32_t timeout,
2406 void *arg);
2407
2408 /**
2409 * Asynchronous register an interest in an object
2410 *
2411 * A watch operation registers the client as being interested in
2412 * notifications on an object. OSDs keep track of watches on
2413 * persistent storage, so they are preserved across cluster changes by
2414 * the normal recovery process. If the client loses its connection to
2415 * the primary OSD for a watched object, the watch will be removed
2416 * after 30 seconds. Watches are automatically reestablished when a new
2417 * connection is made, or a placement group switches OSDs.
2418 *
2419 * @param io the pool the object is in
2420 * @param o the object to watch
2421 * @param completion what to do when operation has been attempted
2422 * @param handle where to store the internal id assigned to this watch
2423 * @param watchcb what to do when a notify is received on this object
2424 * @param watcherrcb what to do when the watch session encounters an error
2425 * @param arg opaque value to pass to the callback
2426 * @returns 0 on success, negative error code on failure
2427 */
2428 CEPH_RADOS_API int rados_aio_watch(rados_ioctx_t io, const char *o,
2429 rados_completion_t completion, uint64_t *handle,
2430 rados_watchcb2_t watchcb,
2431 rados_watcherrcb_t watcherrcb,
2432 void *arg);
2433
2434 /**
2435 * Asynchronous register an interest in an object
2436 *
2437 * A watch operation registers the client as being interested in
2438 * notifications on an object. OSDs keep track of watches on
2439 * persistent storage, so they are preserved across cluster changes by
2440 * the normal recovery process. If the client loses its connection to
2441 * the primary OSD for a watched object, the watch will be removed
2442 * after the number of seconds that configured in timeout parameter.
2443 * Watches are automatically reestablished when a new
2444 * connection is made, or a placement group switches OSDs.
2445 *
2446 * @param io the pool the object is in
2447 * @param o the object to watch
2448 * @param completion what to do when operation has been attempted
2449 * @param handle where to store the internal id assigned to this watch
2450 * @param watchcb what to do when a notify is received on this object
2451 * @param watcherrcb what to do when the watch session encounters an error
2452 * @param timeout how many seconds the connection will keep after disconnection
2453 * @param arg opaque value to pass to the callback
2454 * @returns 0 on success, negative error code on failure
2455 */
2456 CEPH_RADOS_API int rados_aio_watch2(rados_ioctx_t io, const char *o,
2457 rados_completion_t completion, uint64_t *handle,
2458 rados_watchcb2_t watchcb,
2459 rados_watcherrcb_t watcherrcb,
2460 uint32_t timeout,
2461 void *arg);
2462
2463 /**
2464 * Check on the status of a watch
2465 *
2466 * Return the number of milliseconds since the watch was last confirmed.
2467 * Or, if there has been an error, return that.
2468 *
2469 * If there is an error, the watch is no longer valid, and should be
2470 * destroyed with rados_unwatch2(). The the user is still interested
2471 * in the object, a new watch should be created with rados_watch2().
2472 *
2473 * @param io the pool the object is in
2474 * @param cookie the watch handle
2475 * @returns ms since last confirmed on success, negative error code on failure
2476 */
2477 CEPH_RADOS_API int rados_watch_check(rados_ioctx_t io, uint64_t cookie);
2478
2479 /**
2480 * Unregister an interest in an object
2481 *
2482 * Once this completes, no more notifies will be sent to us for this
2483 * watch. This should be called to clean up unneeded watchers.
2484 *
2485 * @param io the pool the object is in
2486 * @param o the name of the watched object (ignored)
2487 * @param cookie which watch to unregister
2488 * @returns 0 on success, negative error code on failure
2489 */
2490 CEPH_RADOS_API int rados_unwatch(rados_ioctx_t io, const char *o, uint64_t cookie)
2491 __attribute__((deprecated));
2492
2493 /**
2494 * Unregister an interest in an object
2495 *
2496 * Once this completes, no more notifies will be sent to us for this
2497 * watch. This should be called to clean up unneeded watchers.
2498 *
2499 * @param io the pool the object is in
2500 * @param cookie which watch to unregister
2501 * @returns 0 on success, negative error code on failure
2502 */
2503 CEPH_RADOS_API int rados_unwatch2(rados_ioctx_t io, uint64_t cookie);
2504
2505 /**
2506 * Asynchronous unregister an interest in an object
2507 *
2508 * Once this completes, no more notifies will be sent to us for this
2509 * watch. This should be called to clean up unneeded watchers.
2510 *
2511 * @param io the pool the object is in
2512 * @param completion what to do when operation has been attempted
2513 * @param cookie which watch to unregister
2514 * @returns 0 on success, negative error code on failure
2515 */
2516 CEPH_RADOS_API int rados_aio_unwatch(rados_ioctx_t io, uint64_t cookie,
2517 rados_completion_t completion);
2518
2519 /**
2520 * Sychronously notify watchers of an object
2521 *
2522 * This blocks until all watchers of the object have received and
2523 * reacted to the notify, or a timeout is reached.
2524 *
2525 * @note BUG: the timeout is not changeable via the C API
2526 * @note BUG: the bufferlist is inaccessible in a rados_watchcb_t
2527 *
2528 * @param io the pool the object is in
2529 * @param o the name of the object
2530 * @param ver obsolete - just pass zero
2531 * @param buf data to send to watchers
2532 * @param buf_len length of buf in bytes
2533 * @returns 0 on success, negative error code on failure
2534 */
2535 CEPH_RADOS_API int rados_notify(rados_ioctx_t io, const char *o, uint64_t ver,
2536 const char *buf, int buf_len)
2537 __attribute__((deprecated));
2538
2539 /**
2540 * Sychronously notify watchers of an object
2541 *
2542 * This blocks until all watchers of the object have received and
2543 * reacted to the notify, or a timeout is reached.
2544 *
2545 * The reply buffer is optional. If specified, the client will get
2546 * back an encoded buffer that includes the ids of the clients that
2547 * acknowledged the notify as well as their notify ack payloads (if
2548 * any). Clients that timed out are not included. Even clients that
2549 * do not include a notify ack payload are included in the list but
2550 * have a 0-length payload associated with them. The format:
2551 *
2552 * le32 num_acks
2553 * {
2554 * le64 gid global id for the client (for client.1234 that's 1234)
2555 * le64 cookie cookie for the client
2556 * le32 buflen length of reply message buffer
2557 * u8 * buflen payload
2558 * } * num_acks
2559 * le32 num_timeouts
2560 * {
2561 * le64 gid global id for the client
2562 * le64 cookie cookie for the client
2563 * } * num_timeouts
2564 *
2565 * Note: There may be multiple instances of the same gid if there are
2566 * multiple watchers registered via the same client.
2567 *
2568 * Note: The buffer must be released with rados_buffer_free() when the
2569 * user is done with it.
2570 *
2571 * Note: Since the result buffer includes clients that time out, it
2572 * will be set even when rados_notify() returns an error code (like
2573 * -ETIMEDOUT).
2574 *
2575 * @param io the pool the object is in
2576 * @param completion what to do when operation has been attempted
2577 * @param o the name of the object
2578 * @param buf data to send to watchers
2579 * @param buf_len length of buf in bytes
2580 * @param timeout_ms notify timeout (in ms)
2581 * @param reply_buffer pointer to reply buffer pointer (free with rados_buffer_free)
2582 * @param reply_buffer_len pointer to size of reply buffer
2583 * @returns 0 on success, negative error code on failure
2584 */
2585 CEPH_RADOS_API int rados_notify2(rados_ioctx_t io, const char *o,
2586 const char *buf, int buf_len,
2587 uint64_t timeout_ms,
2588 char **reply_buffer, size_t *reply_buffer_len);
2589 CEPH_RADOS_API int rados_aio_notify(rados_ioctx_t io, const char *o,
2590 rados_completion_t completion,
2591 const char *buf, int buf_len,
2592 uint64_t timeout_ms, char **reply_buffer,
2593 size_t *reply_buffer_len);
2594
2595 /**
2596 * Acknolwedge receipt of a notify
2597 *
2598 * @param io the pool the object is in
2599 * @param o the name of the object
2600 * @param notify_id the notify_id we got on the watchcb2_t callback
2601 * @param cookie the watcher handle
2602 * @param buf payload to return to notifier (optional)
2603 * @param buf_len payload length
2604 * @returns 0 on success
2605 */
2606 CEPH_RADOS_API int rados_notify_ack(rados_ioctx_t io, const char *o,
2607 uint64_t notify_id, uint64_t cookie,
2608 const char *buf, int buf_len);
2609
2610 /**
2611 * Flush watch/notify callbacks
2612 *
2613 * This call will block until all pending watch/notify callbacks have
2614 * been executed and the queue is empty. It should usually be called
2615 * after shutting down any watches before shutting down the ioctx or
2616 * librados to ensure that any callbacks do not misuse the ioctx (for
2617 * example by calling rados_notify_ack after the ioctx has been
2618 * destroyed).
2619 *
2620 * @param cluster the cluster handle
2621 */
2622 CEPH_RADOS_API int rados_watch_flush(rados_t cluster);
2623 /**
2624 * Flush watch/notify callbacks
2625 *
2626 * This call will be nonblock, and the completion will be called
2627 * until all pending watch/notify callbacks have been executed and
2628 * the queue is empty. It should usually be called after shutting
2629 * down any watches before shutting down the ioctx or
2630 * librados to ensure that any callbacks do not misuse the ioctx (for
2631 * example by calling rados_notify_ack after the ioctx has been
2632 * destroyed).
2633 *
2634 * @param cluster the cluster handle
2635 * @param completion what to do when operation has been attempted
2636 */
2637 CEPH_RADOS_API int rados_aio_watch_flush(rados_t cluster, rados_completion_t completion);
2638
2639 /** @} Watch/Notify */
2640
2641 /**
2642 * Pin an object in the cache tier
2643 *
2644 * When an object is pinned in the cache tier, it stays in the cache
2645 * tier, and won't be flushed out.
2646 *
2647 * @param io the pool the object is in
2648 * @param o the object id
2649 * @returns 0 on success, negative error code on failure
2650 */
2651 CEPH_RADOS_API int rados_cache_pin(rados_ioctx_t io, const char *o);
2652
2653 /**
2654 * Unpin an object in the cache tier
2655 *
2656 * After an object is unpinned in the cache tier, it can be flushed out
2657 *
2658 * @param io the pool the object is in
2659 * @param o the object id
2660 * @returns 0 on success, negative error code on failure
2661 */
2662 CEPH_RADOS_API int rados_cache_unpin(rados_ioctx_t io, const char *o);
2663
2664 /**
2665 * @name Hints
2666 *
2667 * @{
2668 */
2669
2670 /**
2671 * Set allocation hint for an object
2672 *
2673 * This is an advisory operation, it will always succeed (as if it was
2674 * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2675 * guaranteed to do anything on the backend.
2676 *
2677 * @param io the pool the object is in
2678 * @param o the name of the object
2679 * @param expected_object_size expected size of the object, in bytes
2680 * @param expected_write_size expected size of writes to the object, in bytes
2681 * @returns 0 on success, negative error code on failure
2682 */
2683 CEPH_RADOS_API int rados_set_alloc_hint(rados_ioctx_t io, const char *o,
2684 uint64_t expected_object_size,
2685 uint64_t expected_write_size);
2686
2687 /**
2688 * Set allocation hint for an object
2689 *
2690 * This is an advisory operation, it will always succeed (as if it was
2691 * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2692 * guaranteed to do anything on the backend.
2693 *
2694 * @param io the pool the object is in
2695 * @param o the name of the object
2696 * @param expected_object_size expected size of the object, in bytes
2697 * @param expected_write_size expected size of writes to the object, in bytes
2698 * @param flags hints about future IO patterns
2699 * @returns 0 on success, negative error code on failure
2700 */
2701 CEPH_RADOS_API int rados_set_alloc_hint2(rados_ioctx_t io, const char *o,
2702 uint64_t expected_object_size,
2703 uint64_t expected_write_size,
2704 uint32_t flags);
2705
2706 /** @} Hints */
2707
2708 /**
2709 * @name Object Operations
2710 *
2711 * A single rados operation can do multiple operations on one object
2712 * atomicly. The whole operation will suceed or fail, and no partial
2713 * results will be visible.
2714 *
2715 * Operations may be either reads, which can return data, or writes,
2716 * which cannot. The effects of writes are applied and visible all at
2717 * once, so an operation that sets an xattr and then checks its value
2718 * will not see the updated value.
2719 *
2720 * @{
2721 */
2722
2723 /**
2724 * Create a new rados_write_op_t write operation. This will store all actions
2725 * to be performed atomically. You must call rados_release_write_op when you are
2726 * finished with it.
2727 *
2728 * @returns non-NULL on success, NULL on memory allocation error.
2729 */
2730 CEPH_RADOS_API rados_write_op_t rados_create_write_op(void);
2731
2732 /**
2733 * Free a rados_write_op_t, must be called when you're done with it.
2734 * @param write_op operation to deallocate, created with rados_create_write_op
2735 */
2736 CEPH_RADOS_API void rados_release_write_op(rados_write_op_t write_op);
2737
2738 /**
2739 * Set flags for the last operation added to this write_op.
2740 * At least one op must have been added to the write_op.
2741 * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
2742 */
2743 CEPH_RADOS_API void rados_write_op_set_flags(rados_write_op_t write_op,
2744 int flags);
2745
2746 /**
2747 * Ensure that the object exists before writing
2748 * @param write_op operation to add this action to
2749 */
2750 CEPH_RADOS_API void rados_write_op_assert_exists(rados_write_op_t write_op);
2751
2752 /**
2753 * Ensure that the object exists and that its internal version
2754 * number is equal to "ver" before writing. "ver" should be a
2755 * version number previously obtained with rados_get_last_version().
2756 * - If the object's version is greater than the asserted version
2757 * then rados_write_op_operate will return -ERANGE instead of
2758 * executing the op.
2759 * - If the object's version is less than the asserted version
2760 * then rados_write_op_operate will return -EOVERFLOW instead
2761 * of executing the op.
2762 * @param write_op operation to add this action to
2763 * @param ver object version number
2764 */
2765 CEPH_RADOS_API void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver);
2766
2767 /**
2768 * Ensure that given object range (extent) satisfies comparison.
2769 *
2770 * @param write_op operation to add this action to
2771 * @param cmp_buf buffer containing bytes to be compared with object contents
2772 * @param cmp_len length to compare and size of @cmp_buf in bytes
2773 * @param off object byte offset at which to start the comparison
2774 * @param prval returned result of comparison, 0 on success, negative error code
2775 * on failure, (-MAX_ERRNO - mismatch_off) on mismatch
2776 */
2777 CEPH_RADOS_API void rados_write_op_cmpext(rados_write_op_t write_op,
2778 const char *cmp_buf,
2779 size_t cmp_len,
2780 uint64_t off,
2781 int *prval);
2782
2783 /**
2784 * Ensure that given xattr satisfies comparison.
2785 * If the comparison is not satisfied, the return code of the
2786 * operation will be -ECANCELED
2787 * @param write_op operation to add this action to
2788 * @param name name of the xattr to look up
2789 * @param comparison_operator currently undocumented, look for
2790 * LIBRADOS_CMPXATTR_OP_EQ in librados.h
2791 * @param value buffer to compare actual xattr value to
2792 * @param value_len length of buffer to compare actual xattr value to
2793 */
2794 CEPH_RADOS_API void rados_write_op_cmpxattr(rados_write_op_t write_op,
2795 const char *name,
2796 uint8_t comparison_operator,
2797 const char *value,
2798 size_t value_len);
2799
2800 /**
2801 * Ensure that the an omap value satisfies a comparison,
2802 * with the supplied value on the right hand side (i.e.
2803 * for OP_LT, the comparison is actual_value < value.
2804 *
2805 * @param write_op operation to add this action to
2806 * @param key which omap value to compare
2807 * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
2808 LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
2809 * @param val value to compare with
2810 * @param val_len length of value in bytes
2811 * @param prval where to store the return value from this action
2812 */
2813 CEPH_RADOS_API void rados_write_op_omap_cmp(rados_write_op_t write_op,
2814 const char *key,
2815 uint8_t comparison_operator,
2816 const char *val,
2817 size_t val_len,
2818 int *prval);
2819
2820 /**
2821 * Set an xattr
2822 * @param write_op operation to add this action to
2823 * @param name name of the xattr
2824 * @param value buffer to set xattr to
2825 * @param value_len length of buffer to set xattr to
2826 */
2827 CEPH_RADOS_API void rados_write_op_setxattr(rados_write_op_t write_op,
2828 const char *name,
2829 const char *value,
2830 size_t value_len);
2831
2832 /**
2833 * Remove an xattr
2834 * @param write_op operation to add this action to
2835 * @param name name of the xattr to remove
2836 */
2837 CEPH_RADOS_API void rados_write_op_rmxattr(rados_write_op_t write_op,
2838 const char *name);
2839
2840 /**
2841 * Create the object
2842 * @param write_op operation to add this action to
2843 * @param exclusive set to either LIBRADOS_CREATE_EXCLUSIVE or
2844 LIBRADOS_CREATE_IDEMPOTENT
2845 * will error if the object already exists.
2846 * @param category category string (DEPRECATED, HAS NO EFFECT)
2847 */
2848 CEPH_RADOS_API void rados_write_op_create(rados_write_op_t write_op,
2849 int exclusive,
2850 const char* category);
2851
2852 /**
2853 * Write to offset
2854 * @param write_op operation to add this action to
2855 * @param offset offset to write to
2856 * @param buffer bytes to write
2857 * @param len length of buffer
2858 */
2859 CEPH_RADOS_API void rados_write_op_write(rados_write_op_t write_op,
2860 const char *buffer,
2861 size_t len,
2862 uint64_t offset);
2863
2864 /**
2865 * Write whole object, atomically replacing it.
2866 * @param write_op operation to add this action to
2867 * @param buffer bytes to write
2868 * @param len length of buffer
2869 */
2870 CEPH_RADOS_API void rados_write_op_write_full(rados_write_op_t write_op,
2871 const char *buffer,
2872 size_t len);
2873
2874 /**
2875 * Write the same buffer multiple times
2876 * @param write_op operation to add this action to
2877 * @param buffer bytes to write
2878 * @param data_len length of buffer
2879 * @param write_len total number of bytes to write, as a multiple of @data_len
2880 * @param offset offset to write to
2881 */
2882 CEPH_RADOS_API void rados_write_op_writesame(rados_write_op_t write_op,
2883 const char *buffer,
2884 size_t data_len,
2885 size_t write_len,
2886 uint64_t offset);
2887
2888 /**
2889 * Append to end of object.
2890 * @param write_op operation to add this action to
2891 * @param buffer bytes to write
2892 * @param len length of buffer
2893 */
2894 CEPH_RADOS_API void rados_write_op_append(rados_write_op_t write_op,
2895 const char *buffer,
2896 size_t len);
2897 /**
2898 * Remove object
2899 * @param write_op operation to add this action to
2900 */
2901 CEPH_RADOS_API void rados_write_op_remove(rados_write_op_t write_op);
2902
2903 /**
2904 * Truncate an object
2905 * @param write_op operation to add this action to
2906 * @param offset Offset to truncate to
2907 */
2908 CEPH_RADOS_API void rados_write_op_truncate(rados_write_op_t write_op,
2909 uint64_t offset);
2910
2911 /**
2912 * Zero part of an object
2913 * @param write_op operation to add this action to
2914 * @param offset Offset to zero
2915 * @param len length to zero
2916 */
2917 CEPH_RADOS_API void rados_write_op_zero(rados_write_op_t write_op,
2918 uint64_t offset,
2919 uint64_t len);
2920
2921 /**
2922 * Execute an OSD class method on an object
2923 * See rados_exec() for general description.
2924 *
2925 * @param write_op operation to add this action to
2926 * @param cls the name of the class
2927 * @param method the name of the method
2928 * @param in_buf where to find input
2929 * @param in_len length of in_buf in bytes
2930 * @param prval where to store the return value from the method
2931 */
2932 CEPH_RADOS_API void rados_write_op_exec(rados_write_op_t write_op,
2933 const char *cls,
2934 const char *method,
2935 const char *in_buf,
2936 size_t in_len,
2937 int *prval);
2938
2939 /**
2940 * Set key/value pairs on an object
2941 *
2942 * @param write_op operation to add this action to
2943 * @param keys array of null-terminated char arrays representing keys to set
2944 * @param vals array of pointers to values to set
2945 * @param lens array of lengths corresponding to each value
2946 * @param num number of key/value pairs to set
2947 */
2948 CEPH_RADOS_API void rados_write_op_omap_set(rados_write_op_t write_op,
2949 char const* const* keys,
2950 char const* const* vals,
2951 const size_t *lens,
2952 size_t num);
2953
2954 /**
2955 * Remove key/value pairs from an object
2956 *
2957 * @param write_op operation to add this action to
2958 * @param keys array of null-terminated char arrays representing keys to remove
2959 * @param keys_len number of key/value pairs to remove
2960 */
2961 CEPH_RADOS_API void rados_write_op_omap_rm_keys(rados_write_op_t write_op,
2962 char const* const* keys,
2963 size_t keys_len);
2964
2965 /**
2966 * Remove all key/value pairs from an object
2967 *
2968 * @param write_op operation to add this action to
2969 */
2970 CEPH_RADOS_API void rados_write_op_omap_clear(rados_write_op_t write_op);
2971
2972 /**
2973 * Set allocation hint for an object
2974 *
2975 * @param write_op operation to add this action to
2976 * @param expected_object_size expected size of the object, in bytes
2977 * @param expected_write_size expected size of writes to the object, in bytes
2978 */
2979 CEPH_RADOS_API void rados_write_op_set_alloc_hint(rados_write_op_t write_op,
2980 uint64_t expected_object_size,
2981 uint64_t expected_write_size);
2982
2983 /**
2984 * Set allocation hint for an object
2985 *
2986 * @param write_op operation to add this action to
2987 * @param expected_object_size expected size of the object, in bytes
2988 * @param expected_write_size expected size of writes to the object, in bytes
2989 * @param flags hints about future IO patterns
2990 */
2991 CEPH_RADOS_API void rados_write_op_set_alloc_hint2(rados_write_op_t write_op,
2992 uint64_t expected_object_size,
2993 uint64_t expected_write_size,
2994 uint32_t flags);
2995
2996 /**
2997 * Perform a write operation synchronously
2998 * @param write_op operation to perform
2999 * @param io the ioctx that the object is in
3000 * @param oid the object id
3001 * @param mtime the time to set the mtime to, NULL for the current time
3002 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3003 */
3004 CEPH_RADOS_API int rados_write_op_operate(rados_write_op_t write_op,
3005 rados_ioctx_t io,
3006 const char *oid,
3007 time_t *mtime,
3008 int flags);
3009 /**
3010 * Perform a write operation synchronously
3011 * @param write_op operation to perform
3012 * @param io the ioctx that the object is in
3013 * @param oid the object id
3014 * @param mtime the time to set the mtime to, NULL for the current time
3015 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3016 */
3017
3018 CEPH_RADOS_API int rados_write_op_operate2(rados_write_op_t write_op,
3019 rados_ioctx_t io,
3020 const char *oid,
3021 struct timespec *mtime,
3022 int flags);
3023
3024 /**
3025 * Perform a write operation asynchronously
3026 * @param write_op operation to perform
3027 * @param io the ioctx that the object is in
3028 * @param completion what to do when operation has been attempted
3029 * @param oid the object id
3030 * @param mtime the time to set the mtime to, NULL for the current time
3031 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3032 */
3033 CEPH_RADOS_API int rados_aio_write_op_operate(rados_write_op_t write_op,
3034 rados_ioctx_t io,
3035 rados_completion_t completion,
3036 const char *oid,
3037 time_t *mtime,
3038 int flags);
3039
3040 /**
3041 * Create a new rados_read_op_t write operation. This will store all
3042 * actions to be performed atomically. You must call
3043 * rados_release_read_op when you are finished with it (after it
3044 * completes, or you decide not to send it in the first place).
3045 *
3046 * @returns non-NULL on success, NULL on memory allocation error.
3047 */
3048 CEPH_RADOS_API rados_read_op_t rados_create_read_op(void);
3049
3050 /**
3051 * Free a rados_read_op_t, must be called when you're done with it.
3052 * @param read_op operation to deallocate, created with rados_create_read_op
3053 */
3054 CEPH_RADOS_API void rados_release_read_op(rados_read_op_t read_op);
3055
3056 /**
3057 * Set flags for the last operation added to this read_op.
3058 * At least one op must have been added to the read_op.
3059 * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
3060 */
3061 CEPH_RADOS_API void rados_read_op_set_flags(rados_read_op_t read_op, int flags);
3062
3063 /**
3064 * Ensure that the object exists before reading
3065 * @param read_op operation to add this action to
3066 */
3067 CEPH_RADOS_API void rados_read_op_assert_exists(rados_read_op_t read_op);
3068
3069 /**
3070 * Ensure that the object exists and that its internal version
3071 * number is equal to "ver" before reading. "ver" should be a
3072 * version number previously obtained with rados_get_last_version().
3073 * - If the object's version is greater than the asserted version
3074 * then rados_read_op_operate will return -ERANGE instead of
3075 * executing the op.
3076 * - If the object's version is less than the asserted version
3077 * then rados_read_op_operate will return -EOVERFLOW instead
3078 * of executing the op.
3079 * @param read_op operation to add this action to
3080 * @param ver object version number
3081 */
3082 CEPH_RADOS_API void rados_read_op_assert_version(rados_read_op_t read_op, uint64_t ver);
3083
3084 /**
3085 * Ensure that given object range (extent) satisfies comparison.
3086 *
3087 * @param read_op operation to add this action to
3088 * @param cmp_buf buffer containing bytes to be compared with object contents
3089 * @param cmp_len length to compare and size of @cmp_buf in bytes
3090 * @param off object byte offset at which to start the comparison
3091 * @param prval returned result of comparison, 0 on success, negative error code
3092 * on failure, (-MAX_ERRNO - mismatch_off) on mismatch
3093 */
3094 CEPH_RADOS_API void rados_read_op_cmpext(rados_read_op_t read_op,
3095 const char *cmp_buf,
3096 size_t cmp_len,
3097 uint64_t off,
3098 int *prval);
3099
3100 /**
3101 * Ensure that the an xattr satisfies a comparison
3102 * If the comparison is not satisfied, the return code of the
3103 * operation will be -ECANCELED
3104 * @param read_op operation to add this action to
3105 * @param name name of the xattr to look up
3106 * @param comparison_operator currently undocumented, look for
3107 * LIBRADOS_CMPXATTR_OP_EQ in librados.h
3108 * @param value buffer to compare actual xattr value to
3109 * @param value_len length of buffer to compare actual xattr value to
3110 */
3111 CEPH_RADOS_API void rados_read_op_cmpxattr(rados_read_op_t read_op,
3112 const char *name,
3113 uint8_t comparison_operator,
3114 const char *value,
3115 size_t value_len);
3116
3117 /**
3118 * Start iterating over xattrs on an object.
3119 *
3120 * @param read_op operation to add this action to
3121 * @param iter where to store the iterator
3122 * @param prval where to store the return value of this action
3123 */
3124 CEPH_RADOS_API void rados_read_op_getxattrs(rados_read_op_t read_op,
3125 rados_xattrs_iter_t *iter,
3126 int *prval);
3127
3128 /**
3129 * Ensure that the an omap value satisfies a comparison,
3130 * with the supplied value on the right hand side (i.e.
3131 * for OP_LT, the comparison is actual_value < value.
3132 *
3133 * @param read_op operation to add this action to
3134 * @param key which omap value to compare
3135 * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
3136 LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
3137 * @param val value to compare with
3138 * @param val_len length of value in bytes
3139 * @param prval where to store the return value from this action
3140 */
3141 CEPH_RADOS_API void rados_read_op_omap_cmp(rados_read_op_t read_op,
3142 const char *key,
3143 uint8_t comparison_operator,
3144 const char *val,
3145 size_t val_len,
3146 int *prval);
3147
3148 /**
3149 * Get object size and mtime
3150 * @param read_op operation to add this action to
3151 * @param psize where to store object size
3152 * @param pmtime where to store modification time
3153 * @param prval where to store the return value of this action
3154 */
3155 CEPH_RADOS_API void rados_read_op_stat(rados_read_op_t read_op,
3156 uint64_t *psize,
3157 time_t *pmtime,
3158 int *prval);
3159
3160 /**
3161 * Read bytes from offset into buffer.
3162 *
3163 * prlen will be filled with the number of bytes read if successful.
3164 * A short read can only occur if the read reaches the end of the
3165 * object.
3166 *
3167 * @param read_op operation to add this action to
3168 * @param offset offset to read from
3169 * @param len length of buffer
3170 * @param buffer where to put the data
3171 * @param bytes_read where to store the number of bytes read by this action
3172 * @param prval where to store the return value of this action
3173 */
3174 CEPH_RADOS_API void rados_read_op_read(rados_read_op_t read_op,
3175 uint64_t offset,
3176 size_t len,
3177 char *buffer,
3178 size_t *bytes_read,
3179 int *prval);
3180
3181 /**
3182 * Compute checksum from object data
3183 *
3184 * @param read_op operation to add this action to
3185 * @param oid the name of the object to checksum
3186 * @param type the checksum algorithm to utilize
3187 * @param init_value the init value for the algorithm
3188 * @param init_value_len the length of the init value
3189 * @param len the number of bytes to checksum
3190 * @param off the offset to start checksuming in the object
3191 * @param chunk_size optional length-aligned chunk size for checksums
3192 * @param pchecksum where to store the checksum result for this action
3193 * @param checksum_len the number of bytes available for the result
3194 * @param prval where to store the return value for this action
3195 */
3196 CEPH_RADOS_API void rados_read_op_checksum(rados_read_op_t read_op,
3197 rados_checksum_type_t type,
3198 const char *init_value,
3199 size_t init_value_len,
3200 uint64_t offset, size_t len,
3201 size_t chunk_size, char *pchecksum,
3202 size_t checksum_len, int *prval);
3203
3204 /**
3205 * Execute an OSD class method on an object
3206 * See rados_exec() for general description.
3207 *
3208 * The output buffer is allocated on the heap; the caller is
3209 * expected to release that memory with rados_buffer_free(). The
3210 * buffer and length pointers can all be NULL, in which case they are
3211 * not filled in.
3212 *
3213 * @param read_op operation to add this action to
3214 * @param cls the name of the class
3215 * @param method the name of the method
3216 * @param in_buf where to find input
3217 * @param in_len length of in_buf in bytes
3218 * @param out_buf where to put librados-allocated output buffer
3219 * @param out_len length of out_buf in bytes
3220 * @param prval where to store the return value from the method
3221 */
3222 CEPH_RADOS_API void rados_read_op_exec(rados_read_op_t read_op,
3223 const char *cls,
3224 const char *method,
3225 const char *in_buf,
3226 size_t in_len,
3227 char **out_buf,
3228 size_t *out_len,
3229 int *prval);
3230
3231 /**
3232 * Execute an OSD class method on an object
3233 * See rados_exec() for general description.
3234 *
3235 * If the output buffer is too small, prval will
3236 * be set to -ERANGE and used_len will be 0.
3237 *
3238 * @param read_op operation to add this action to
3239 * @param cls the name of the class
3240 * @param method the name of the method
3241 * @param in_buf where to find input
3242 * @param in_len length of in_buf in bytes
3243 * @param out_buf user-provided buffer to read into
3244 * @param out_len length of out_buf in bytes
3245 * @param used_len where to store the number of bytes read into out_buf
3246 * @param prval where to store the return value from the method
3247 */
3248 CEPH_RADOS_API void rados_read_op_exec_user_buf(rados_read_op_t read_op,
3249 const char *cls,
3250 const char *method,
3251 const char *in_buf,
3252 size_t in_len,
3253 char *out_buf,
3254 size_t out_len,
3255 size_t *used_len,
3256 int *prval);
3257
3258 /**
3259 * Start iterating over key/value pairs on an object.
3260 *
3261 * They will be returned sorted by key.
3262 *
3263 * @param read_op operation to add this action to
3264 * @param start_after list keys starting after start_after
3265 * @param filter_prefix list only keys beginning with filter_prefix
3266 * @param max_return list no more than max_return key/value pairs
3267 * @param iter where to store the iterator
3268 * @param prval where to store the return value from this action
3269 */
3270 CEPH_RADOS_API void rados_read_op_omap_get_vals(rados_read_op_t read_op,
3271 const char *start_after,
3272 const char *filter_prefix,
3273 uint64_t max_return,
3274 rados_omap_iter_t *iter,
3275 int *prval)
3276 __attribute__((deprecated)); /* use v2 below */
3277
3278 /**
3279 * Start iterating over key/value pairs on an object.
3280 *
3281 * They will be returned sorted by key.
3282 *
3283 * @param read_op operation to add this action to
3284 * @param start_after list keys starting after start_after
3285 * @param filter_prefix list only keys beginning with filter_prefix
3286 * @param max_return list no more than max_return key/value pairs
3287 * @param iter where to store the iterator
3288 * @param pmore flag indicating whether there are more keys to fetch
3289 * @param prval where to store the return value from this action
3290 */
3291 CEPH_RADOS_API void rados_read_op_omap_get_vals2(rados_read_op_t read_op,
3292 const char *start_after,
3293 const char *filter_prefix,
3294 uint64_t max_return,
3295 rados_omap_iter_t *iter,
3296 unsigned char *pmore,
3297 int *prval);
3298
3299 /**
3300 * Start iterating over keys on an object.
3301 *
3302 * They will be returned sorted by key, and the iterator
3303 * will fill in NULL for all values if specified.
3304 *
3305 * @param read_op operation to add this action to
3306 * @param start_after list keys starting after start_after
3307 * @param max_return list no more than max_return keys
3308 * @param iter where to store the iterator
3309 * @param prval where to store the return value from this action
3310 */
3311 CEPH_RADOS_API void rados_read_op_omap_get_keys(rados_read_op_t read_op,
3312 const char *start_after,
3313 uint64_t max_return,
3314 rados_omap_iter_t *iter,
3315 int *prval)
3316 __attribute__((deprecated)); /* use v2 below */
3317
3318 /**
3319 * Start iterating over keys on an object.
3320 *
3321 * They will be returned sorted by key, and the iterator
3322 * will fill in NULL for all values if specified.
3323 *
3324 * @param read_op operation to add this action to
3325 * @param start_after list keys starting after start_after
3326 * @param max_return list no more than max_return keys
3327 * @param iter where to store the iterator
3328 * @param pmore flag indicating whether there are more keys to fetch
3329 * @param prval where to store the return value from this action
3330 */
3331 CEPH_RADOS_API void rados_read_op_omap_get_keys2(rados_read_op_t read_op,
3332 const char *start_after,
3333 uint64_t max_return,
3334 rados_omap_iter_t *iter,
3335 unsigned char *pmore,
3336 int *prval);
3337
3338 /**
3339 * Start iterating over specific key/value pairs
3340 *
3341 * They will be returned sorted by key.
3342 *
3343 * @param read_op operation to add this action to
3344 * @param keys array of pointers to null-terminated keys to get
3345 * @param keys_len the number of strings in keys
3346 * @param iter where to store the iterator
3347 * @param prval where to store the return value from this action
3348 */
3349 CEPH_RADOS_API void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op,
3350 char const* const* keys,
3351 size_t keys_len,
3352 rados_omap_iter_t *iter,
3353 int *prval);
3354
3355 /**
3356 * Perform a read operation synchronously
3357 * @param read_op operation to perform
3358 * @param io the ioctx that the object is in
3359 * @param oid the object id
3360 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3361 */
3362 CEPH_RADOS_API int rados_read_op_operate(rados_read_op_t read_op,
3363 rados_ioctx_t io,
3364 const char *oid,
3365 int flags);
3366
3367 /**
3368 * Perform a read operation asynchronously
3369 * @param read_op operation to perform
3370 * @param io the ioctx that the object is in
3371 * @param completion what to do when operation has been attempted
3372 * @param oid the object id
3373 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3374 */
3375 CEPH_RADOS_API int rados_aio_read_op_operate(rados_read_op_t read_op,
3376 rados_ioctx_t io,
3377 rados_completion_t completion,
3378 const char *oid,
3379 int flags);
3380
3381 /** @} Object Operations */
3382
3383 /**
3384 * Take an exclusive lock on an object.
3385 *
3386 * @param io the context to operate in
3387 * @param oid the name of the object
3388 * @param name the name of the lock
3389 * @param cookie user-defined identifier for this instance of the lock
3390 * @param desc user-defined lock description
3391 * @param duration the duration of the lock. Set to NULL for infinite duration.
3392 * @param flags lock flags
3393 * @returns 0 on success, negative error code on failure
3394 * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3395 * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3396 */
3397 CEPH_RADOS_API int rados_lock_exclusive(rados_ioctx_t io, const char * oid,
3398 const char * name, const char * cookie,
3399 const char * desc,
3400 struct timeval * duration,
3401 uint8_t flags);
3402
3403 /**
3404 * Take a shared lock on an object.
3405 *
3406 * @param io the context to operate in
3407 * @param o the name of the object
3408 * @param name the name of the lock
3409 * @param cookie user-defined identifier for this instance of the lock
3410 * @param tag The tag of the lock
3411 * @param desc user-defined lock description
3412 * @param duration the duration of the lock. Set to NULL for infinite duration.
3413 * @param flags lock flags
3414 * @returns 0 on success, negative error code on failure
3415 * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3416 * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3417 */
3418 CEPH_RADOS_API int rados_lock_shared(rados_ioctx_t io, const char * o,
3419 const char * name, const char * cookie,
3420 const char * tag, const char * desc,
3421 struct timeval * duration, uint8_t flags);
3422
3423 /**
3424 * Release a shared or exclusive lock on an object.
3425 *
3426 * @param io the context to operate in
3427 * @param o the name of the object
3428 * @param name the name of the lock
3429 * @param cookie user-defined identifier for the instance of the lock
3430 * @returns 0 on success, negative error code on failure
3431 * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3432 */
3433 CEPH_RADOS_API int rados_unlock(rados_ioctx_t io, const char *o,
3434 const char *name, const char *cookie);
3435
3436 /**
3437 * Asynchronous release a shared or exclusive lock on an object.
3438 *
3439 * @param io the context to operate in
3440 * @param o the name of the object
3441 * @param name the name of the lock
3442 * @param cookie user-defined identifier for the instance of the lock
3443 * @param completion what to do when operation has been attempted
3444 * @returns 0 on success, negative error code on failure
3445 */
3446 CEPH_RADOS_API int rados_aio_unlock(rados_ioctx_t io, const char *o,
3447 const char *name, const char *cookie,
3448 rados_completion_t completion);
3449
3450 /**
3451 * List clients that have locked the named object lock and information about
3452 * the lock.
3453 *
3454 * The number of bytes required in each buffer is put in the
3455 * corresponding size out parameter. If any of the provided buffers
3456 * are too short, -ERANGE is returned after these sizes are filled in.
3457 *
3458 * @param io the context to operate in
3459 * @param o the name of the object
3460 * @param name the name of the lock
3461 * @param exclusive where to store whether the lock is exclusive (1) or shared (0)
3462 * @param tag where to store the tag associated with the object lock
3463 * @param tag_len number of bytes in tag buffer
3464 * @param clients buffer in which locker clients are stored, separated by '\0'
3465 * @param clients_len number of bytes in the clients buffer
3466 * @param cookies buffer in which locker cookies are stored, separated by '\0'
3467 * @param cookies_len number of bytes in the cookies buffer
3468 * @param addrs buffer in which locker addresses are stored, separated by '\0'
3469 * @param addrs_len number of bytes in the clients buffer
3470 * @returns number of lockers on success, negative error code on failure
3471 * @returns -ERANGE if any of the buffers are too short
3472 */
3473 CEPH_RADOS_API ssize_t rados_list_lockers(rados_ioctx_t io, const char *o,
3474 const char *name, int *exclusive,
3475 char *tag, size_t *tag_len,
3476 char *clients, size_t *clients_len,
3477 char *cookies, size_t *cookies_len,
3478 char *addrs, size_t *addrs_len);
3479
3480 /**
3481 * Releases a shared or exclusive lock on an object, which was taken by the
3482 * specified client.
3483 *
3484 * @param io the context to operate in
3485 * @param o the name of the object
3486 * @param name the name of the lock
3487 * @param client the client currently holding the lock
3488 * @param cookie user-defined identifier for the instance of the lock
3489 * @returns 0 on success, negative error code on failure
3490 * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3491 * @returns -EINVAL if the client cannot be parsed
3492 */
3493 CEPH_RADOS_API int rados_break_lock(rados_ioctx_t io, const char *o,
3494 const char *name, const char *client,
3495 const char *cookie);
3496
3497 /**
3498 * Blacklists the specified client from the OSDs
3499 *
3500 * @param cluster cluster handle
3501 * @param client_address client address
3502 * @param expire_seconds number of seconds to blacklist (0 for default)
3503 * @returns 0 on success, negative error code on failure
3504 */
3505 CEPH_RADOS_API int rados_blacklist_add(rados_t cluster,
3506 char *client_address,
3507 uint32_t expire_seconds);
3508
3509 /**
3510 * @name Mon/OSD/PG Commands
3511 *
3512 * These interfaces send commands relating to the monitor, OSD, or PGs.
3513 *
3514 * @{
3515 */
3516
3517 /**
3518 * Send monitor command.
3519 *
3520 * @note Takes command string in carefully-formatted JSON; must match
3521 * defined commands, types, etc.
3522 *
3523 * The result buffers are allocated on the heap; the caller is
3524 * expected to release that memory with rados_buffer_free(). The
3525 * buffer and length pointers can all be NULL, in which case they are
3526 * not filled in.
3527 *
3528 * @param cluster cluster handle
3529 * @param cmd an array of char *'s representing the command
3530 * @param cmdlen count of valid entries in cmd
3531 * @param inbuf any bulk input data (crush map, etc.)
3532 * @param outbuf double pointer to output buffer
3533 * @param outbuflen pointer to output buffer length
3534 * @param outs double pointer to status string
3535 * @param outslen pointer to status string length
3536 * @returns 0 on success, negative error code on failure
3537 */
3538 CEPH_RADOS_API int rados_mon_command(rados_t cluster, const char **cmd,
3539 size_t cmdlen, const char *inbuf,
3540 size_t inbuflen, char **outbuf,
3541 size_t *outbuflen, char **outs,
3542 size_t *outslen);
3543
3544 /**
3545 * Send ceph-mgr command.
3546 *
3547 * @note Takes command string in carefully-formatted JSON; must match
3548 * defined commands, types, etc.
3549 *
3550 * The result buffers are allocated on the heap; the caller is
3551 * expected to release that memory with rados_buffer_free(). The
3552 * buffer and length pointers can all be NULL, in which case they are
3553 * not filled in.
3554 *
3555 * @param cluster cluster handle
3556 * @param cmd an array of char *'s representing the command
3557 * @param cmdlen count of valid entries in cmd
3558 * @param inbuf any bulk input data (crush map, etc.)
3559 * @param outbuf double pointer to output buffer
3560 * @param outbuflen pointer to output buffer length
3561 * @param outs double pointer to status string
3562 * @param outslen pointer to status string length
3563 * @returns 0 on success, negative error code on failure
3564 */
3565 CEPH_RADOS_API int rados_mgr_command(rados_t cluster, const char **cmd,
3566 size_t cmdlen, const char *inbuf,
3567 size_t inbuflen, char **outbuf,
3568 size_t *outbuflen, char **outs,
3569 size_t *outslen);
3570
3571 /**
3572 * Send monitor command to a specific monitor.
3573 *
3574 * @note Takes command string in carefully-formatted JSON; must match
3575 * defined commands, types, etc.
3576 *
3577 * The result buffers are allocated on the heap; the caller is
3578 * expected to release that memory with rados_buffer_free(). The
3579 * buffer and length pointers can all be NULL, in which case they are
3580 * not filled in.
3581 *
3582 * @param cluster cluster handle
3583 * @param name target monitor's name
3584 * @param cmd an array of char *'s representing the command
3585 * @param cmdlen count of valid entries in cmd
3586 * @param inbuf any bulk input data (crush map, etc.)
3587 * @param outbuf double pointer to output buffer
3588 * @param outbuflen pointer to output buffer length
3589 * @param outs double pointer to status string
3590 * @param outslen pointer to status string length
3591 * @returns 0 on success, negative error code on failure
3592 */
3593 CEPH_RADOS_API int rados_mon_command_target(rados_t cluster, const char *name,
3594 const char **cmd, size_t cmdlen,
3595 const char *inbuf, size_t inbuflen,
3596 char **outbuf, size_t *outbuflen,
3597 char **outs, size_t *outslen);
3598
3599 /**
3600 * free a rados-allocated buffer
3601 *
3602 * Release memory allocated by librados calls like rados_mon_command().
3603 *
3604 * @param buf buffer pointer
3605 */
3606 CEPH_RADOS_API void rados_buffer_free(char *buf);
3607
3608 CEPH_RADOS_API int rados_osd_command(rados_t cluster, int osdid,
3609 const char **cmd, size_t cmdlen,
3610 const char *inbuf, size_t inbuflen,
3611 char **outbuf, size_t *outbuflen,
3612 char **outs, size_t *outslen);
3613
3614 CEPH_RADOS_API int rados_pg_command(rados_t cluster, const char *pgstr,
3615 const char **cmd, size_t cmdlen,
3616 const char *inbuf, size_t inbuflen,
3617 char **outbuf, size_t *outbuflen,
3618 char **outs, size_t *outslen);
3619
3620 CEPH_RADOS_API int rados_mgr_command(rados_t cluster,
3621 const char **cmd, size_t cmdlen,
3622 const char *inbuf, size_t inbuflen,
3623 char **outbuf, size_t *outbuflen,
3624 char **outs, size_t *outslen);
3625
3626 /*
3627 * This is not a doxygen comment leadin, because doxygen breaks on
3628 * a typedef with function params and returns, and I can't figure out
3629 * how to fix it.
3630 *
3631 * Monitor cluster log
3632 *
3633 * Monitor events logged to the cluster log. The callback get each
3634 * log entry both as a single formatted line and with each field in a
3635 * separate arg.
3636 *
3637 * Calling with a cb argument of NULL will deregister any previously
3638 * registered callback.
3639 *
3640 * @param cluster cluster handle
3641 * @param level minimum log level (debug, info, warn|warning, err|error)
3642 * @param cb callback to run for each log message. It MUST NOT block
3643 * nor call back into librados.
3644 * @param arg void argument to pass to cb
3645 *
3646 * @returns 0 on success, negative code on error
3647 */
3648 typedef void (*rados_log_callback_t)(void *arg,
3649 const char *line,
3650 const char *who,
3651 uint64_t sec, uint64_t nsec,
3652 uint64_t seq, const char *level,
3653 const char *msg);
3654
3655 CEPH_RADOS_API int rados_monitor_log(rados_t cluster, const char *level,
3656 rados_log_callback_t cb, void *arg);
3657
3658 /** @} Mon/OSD/PG commands */
3659
3660 /*
3661 * These methods are no longer supported and return -ENOTSUP where possible.
3662 */
3663 CEPH_RADOS_API int rados_objects_list_open(
3664 rados_ioctx_t io,
3665 rados_list_ctx_t *ctx) __attribute__((deprecated));
3666 CEPH_RADOS_API uint32_t rados_objects_list_get_pg_hash_position(
3667 rados_list_ctx_t ctx) __attribute__((deprecated));
3668 CEPH_RADOS_API uint32_t rados_objects_list_seek(
3669 rados_list_ctx_t ctx,
3670 uint32_t pos) __attribute__((deprecated));
3671 CEPH_RADOS_API int rados_objects_list_next(
3672 rados_list_ctx_t ctx,
3673 const char **entry,
3674 const char **key) __attribute__((deprecated));
3675 CEPH_RADOS_API void rados_objects_list_close(
3676 rados_list_ctx_t ctx) __attribute__((deprecated));
3677
3678
3679 #ifdef __cplusplus
3680 }
3681 #endif
3682
3683 #endif