]> git.proxmox.com Git - ceph.git/blob - ceph/src/include/rados/librados.h
8fa4b69f5709b468f3cdc112ae2042bbf06df1ba
[ceph.git] / ceph / src / include / rados / librados.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #ifndef CEPH_LIBRADOS_H
16 #define CEPH_LIBRADOS_H
17
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21
22 #include <netinet/in.h>
23 #if defined(__linux__)
24 #include <linux/types.h>
25 #elif defined(__FreeBSD__)
26 #include <sys/types.h>
27 #endif
28 #include <unistd.h>
29 #include <string.h>
30 #include "rados_types.h"
31
32 #include <sys/time.h>
33
34 #ifndef CEPH_OSD_TMAP_SET
35 /* These are also defined in rados.h and objclass.h. Keep them in sync! */
36 #define CEPH_OSD_TMAP_HDR 'h'
37 #define CEPH_OSD_TMAP_SET 's'
38 #define CEPH_OSD_TMAP_CREATE 'c'
39 #define CEPH_OSD_TMAP_RM 'r'
40 #endif
41
42 #define LIBRADOS_VER_MAJOR 0
43 #define LIBRADOS_VER_MINOR 69
44 #define LIBRADOS_VER_EXTRA 1
45
46 #define LIBRADOS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
47
48 #define LIBRADOS_VERSION_CODE LIBRADOS_VERSION(LIBRADOS_VER_MAJOR, LIBRADOS_VER_MINOR, LIBRADOS_VER_EXTRA)
49
50 #define LIBRADOS_SUPPORTS_WATCH 1
51
52 /* RADOS lock flags
53 * They are also defined in cls_lock_types.h. Keep them in sync!
54 */
55 #define LIBRADOS_LOCK_FLAG_RENEW 0x1
56
57 /*
58 * Constants for rados_write_op_create().
59 */
60 #define LIBRADOS_CREATE_EXCLUSIVE 1
61 #define LIBRADOS_CREATE_IDEMPOTENT 0
62
63 /*
64 * Flags that can be set on a per-op basis via
65 * rados_read_op_set_flags() and rados_write_op_set_flags().
66 */
67 enum {
68 // fail a create operation if the object already exists
69 LIBRADOS_OP_FLAG_EXCL = 0x1,
70 // allow the transaction to succeed even if the flagged op fails
71 LIBRADOS_OP_FLAG_FAILOK = 0x2,
72 // indicate read/write op random
73 LIBRADOS_OP_FLAG_FADVISE_RANDOM = 0x4,
74 // indicate read/write op sequential
75 LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL = 0x8,
76 // indicate read/write data will be accessed in the near future (by someone)
77 LIBRADOS_OP_FLAG_FADVISE_WILLNEED = 0x10,
78 // indicate read/write data will not accessed in the near future (by anyone)
79 LIBRADOS_OP_FLAG_FADVISE_DONTNEED = 0x20,
80 // indicate read/write data will not accessed again (by *this* client)
81 LIBRADOS_OP_FLAG_FADVISE_NOCACHE = 0x40,
82 };
83
84 #if __GNUC__ >= 4
85 #define CEPH_RADOS_API __attribute__ ((visibility ("default")))
86 #else
87 #define CEPH_RADOS_API
88 #endif
89
90 /**
91 * @name xattr comparison operations
92 * Operators for comparing xattrs on objects, and aborting the
93 * rados_read_op or rados_write_op transaction if the comparison
94 * fails.
95 *
96 * @{
97 */
98 enum {
99 LIBRADOS_CMPXATTR_OP_EQ = 1,
100 LIBRADOS_CMPXATTR_OP_NE = 2,
101 LIBRADOS_CMPXATTR_OP_GT = 3,
102 LIBRADOS_CMPXATTR_OP_GTE = 4,
103 LIBRADOS_CMPXATTR_OP_LT = 5,
104 LIBRADOS_CMPXATTR_OP_LTE = 6
105 };
106 /** @} */
107
108 /**
109 * @name Operation Flags
110 * Flags for rados_read_op_opeprate(), rados_write_op_operate(),
111 * rados_aio_read_op_operate(), and rados_aio_write_op_operate().
112 * See librados.hpp for details.
113 * @{
114 */
115 enum {
116 LIBRADOS_OPERATION_NOFLAG = 0,
117 LIBRADOS_OPERATION_BALANCE_READS = 1,
118 LIBRADOS_OPERATION_LOCALIZE_READS = 2,
119 LIBRADOS_OPERATION_ORDER_READS_WRITES = 4,
120 LIBRADOS_OPERATION_IGNORE_CACHE = 8,
121 LIBRADOS_OPERATION_SKIPRWLOCKS = 16,
122 LIBRADOS_OPERATION_IGNORE_OVERLAY = 32,
123 /* send requests to cluster despite the cluster or pool being marked
124 full; ops will either succeed (e.g., delete) or return EDQUOT or
125 ENOSPC. */
126 LIBRADOS_OPERATION_FULL_TRY = 64,
127 /*
128 * Mainly for delete op
129 */
130 LIBRADOS_OPERATION_FULL_FORCE = 128,
131 LIBRADOS_OPERATION_IGNORE_REDIRECT = 256,
132 };
133 /** @} */
134
135 /**
136 * @name Alloc hint flags
137 * Flags for rados_write_op_alloc_hint2() and rados_set_alloc_hint2()
138 * indicating future IO patterns.
139 * @{
140 */
141 enum {
142 LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_WRITE = 1,
143 LIBRADOS_ALLOC_HINT_FLAG_RANDOM_WRITE = 2,
144 LIBRADOS_ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4,
145 LIBRADOS_ALLOC_HINT_FLAG_RANDOM_READ = 8,
146 LIBRADOS_ALLOC_HINT_FLAG_APPEND_ONLY = 16,
147 LIBRADOS_ALLOC_HINT_FLAG_IMMUTABLE = 32,
148 LIBRADOS_ALLOC_HINT_FLAG_SHORTLIVED = 64,
149 LIBRADOS_ALLOC_HINT_FLAG_LONGLIVED = 128,
150 LIBRADOS_ALLOC_HINT_FLAG_COMPRESSIBLE = 256,
151 LIBRADOS_ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512,
152 };
153 /** @} */
154
155 typedef enum {
156 LIBRADOS_CHECKSUM_TYPE_XXHASH32 = 0,
157 LIBRADOS_CHECKSUM_TYPE_XXHASH64 = 1,
158 LIBRADOS_CHECKSUM_TYPE_CRC32C = 2
159 } rados_checksum_type_t;
160
161 /*
162 * snap id contants
163 */
164 #define LIBRADOS_SNAP_HEAD ((uint64_t)(-2))
165 #define LIBRADOS_SNAP_DIR ((uint64_t)(-1))
166
167 /**
168 * @typedef rados_t
169 *
170 * A handle for interacting with a RADOS cluster. It encapsulates all
171 * RADOS client configuration, including username, key for
172 * authentication, logging, and debugging. Talking different clusters
173 * -- or to the same cluster with different users -- requires
174 * different cluster handles.
175 */
176 #ifndef VOIDPTR_RADOS_T
177 #define VOIDPTR_RADOS_T
178 typedef void *rados_t;
179 #endif //VOIDPTR_RADOS_T
180
181 /**
182 * @typedef rados_config_t
183 *
184 * A handle for the ceph configuration context for the rados_t cluster
185 * instance. This can be used to share configuration context/state
186 * (e.g., logging configuration) between librados instance.
187 *
188 * @warning The config context does not have independent reference
189 * counting. As such, a rados_config_t handle retrieved from a given
190 * rados_t is only valid as long as that rados_t.
191 */
192 typedef void *rados_config_t;
193
194 /**
195 * @typedef rados_ioctx_t
196 *
197 * An io context encapsulates a few settings for all I/O operations
198 * done on it:
199 * - pool - set when the io context is created (see rados_ioctx_create())
200 * - snapshot context for writes (see
201 * rados_ioctx_selfmanaged_snap_set_write_ctx())
202 * - snapshot id to read from (see rados_ioctx_snap_set_read())
203 * - object locator for all single-object operations (see
204 * rados_ioctx_locator_set_key())
205 * - namespace for all single-object operations (see
206 * rados_ioctx_set_namespace()). Set to LIBRADOS_ALL_NSPACES
207 * before rados_nobjects_list_open() will list all objects in all
208 * namespaces.
209 *
210 * @warning Changing any of these settings is not thread-safe -
211 * librados users must synchronize any of these changes on their own,
212 * or use separate io contexts for each thread
213 */
214 typedef void *rados_ioctx_t;
215
216 /**
217 * @typedef rados_list_ctx_t
218 *
219 * An iterator for listing the objects in a pool.
220 * Used with rados_nobjects_list_open(),
221 * rados_nobjects_list_next(), and
222 * rados_nobjects_list_close().
223 */
224 typedef void *rados_list_ctx_t;
225
226 /**
227 * @typedef rados_object_list_cursor
228 *
229 * The cursor used with rados_enumerate_objects
230 * and accompanying methods.
231 */
232 typedef void * rados_object_list_cursor;
233
234 typedef struct rados_object_list_item {
235 size_t oid_length;
236 char *oid;
237
238 size_t nspace_length;
239 char *nspace;
240
241 size_t locator_length;
242 char *locator;
243 } rados_object_list_item;
244
245 /**
246 * @typedef rados_snap_t
247 * The id of a snapshot.
248 */
249 typedef uint64_t rados_snap_t;
250
251 /**
252 * @typedef rados_xattrs_iter_t
253 * An iterator for listing extended attrbutes on an object.
254 * Used with rados_getxattrs(), rados_getxattrs_next(), and
255 * rados_getxattrs_end().
256 */
257 typedef void *rados_xattrs_iter_t;
258
259 /**
260 * @typedef rados_omap_iter_t
261 * An iterator for listing omap key/value pairs on an object.
262 * Used with rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals(),
263 * rados_read_op_omap_get_vals_by_keys(), rados_omap_get_next(), and
264 * rados_omap_get_end().
265 */
266 typedef void *rados_omap_iter_t;
267
268 /**
269 * @struct rados_pool_stat_t
270 * Usage information for a pool.
271 */
272 struct rados_pool_stat_t {
273 /// space used in bytes
274 uint64_t num_bytes;
275 /// space used in KB
276 uint64_t num_kb;
277 /// number of objects in the pool
278 uint64_t num_objects;
279 /// number of clones of objects
280 uint64_t num_object_clones;
281 /// num_objects * num_replicas
282 uint64_t num_object_copies;
283 uint64_t num_objects_missing_on_primary;
284 /// number of objects found on no OSDs
285 uint64_t num_objects_unfound;
286 /// number of objects replicated fewer times than they should be
287 /// (but found on at least one OSD)
288 uint64_t num_objects_degraded;
289 uint64_t num_rd;
290 uint64_t num_rd_kb;
291 uint64_t num_wr;
292 uint64_t num_wr_kb;
293 };
294
295 /**
296 * @struct rados_cluster_stat_t
297 * Cluster-wide usage information
298 */
299 struct rados_cluster_stat_t {
300 uint64_t kb, kb_used, kb_avail;
301 uint64_t num_objects;
302 };
303
304 /**
305 * @typedef rados_write_op_t
306 *
307 * An object write operation stores a number of operations which can be
308 * executed atomically. For usage, see:
309 * - Creation and deletion: rados_create_write_op() rados_release_write_op()
310 * - Extended attribute manipulation: rados_write_op_cmpxattr()
311 * rados_write_op_cmpxattr(), rados_write_op_setxattr(),
312 * rados_write_op_rmxattr()
313 * - Object map key/value pairs: rados_write_op_omap_set(),
314 * rados_write_op_omap_rm_keys(), rados_write_op_omap_clear(),
315 * rados_write_op_omap_cmp()
316 * - Object properties: rados_write_op_assert_exists(),
317 * rados_write_op_assert_version()
318 * - Creating objects: rados_write_op_create()
319 * - IO on objects: rados_write_op_append(), rados_write_op_write(), rados_write_op_zero
320 * rados_write_op_write_full(), rados_write_op_writesame(), rados_write_op_remove,
321 * rados_write_op_truncate(), rados_write_op_zero(), rados_write_op_cmpext()
322 * - Hints: rados_write_op_set_alloc_hint()
323 * - Performing the operation: rados_write_op_operate(), rados_aio_write_op_operate()
324 */
325 typedef void *rados_write_op_t;
326
327 /**
328 * @typedef rados_read_op_t
329 *
330 * An object read operation stores a number of operations which can be
331 * executed atomically. For usage, see:
332 * - Creation and deletion: rados_create_read_op() rados_release_read_op()
333 * - Extended attribute manipulation: rados_read_op_cmpxattr(),
334 * rados_read_op_getxattr(), rados_read_op_getxattrs()
335 * - Object map key/value pairs: rados_read_op_omap_get_vals(),
336 * rados_read_op_omap_get_keys(), rados_read_op_omap_get_vals_by_keys(),
337 * rados_read_op_omap_cmp()
338 * - Object properties: rados_read_op_stat(), rados_read_op_assert_exists(),
339 * rados_read_op_assert_version()
340 * - IO on objects: rados_read_op_read(), rados_read_op_checksum(),
341 * rados_read_op_cmpext()
342 * - Custom operations: rados_read_op_exec(), rados_read_op_exec_user_buf()
343 * - Request properties: rados_read_op_set_flags()
344 * - Performing the operation: rados_read_op_operate(),
345 * rados_aio_read_op_operate()
346 */
347 typedef void *rados_read_op_t;
348
349 /**
350 * @typedef rados_completion_t
351 * Represents the state of an asynchronous operation - it contains the
352 * return value once the operation completes, and can be used to block
353 * until the operation is complete or safe.
354 */
355 typedef void *rados_completion_t;
356
357 /**
358 * @struct blkin_trace_info
359 * blkin trace information for Zipkin tracing
360 */
361 struct blkin_trace_info;
362
363 /**
364 * Get the version of librados.
365 *
366 * The version number is major.minor.extra. Note that this is
367 * unrelated to the Ceph version number.
368 *
369 * TODO: define version semantics, i.e.:
370 * - incrementing major is for backwards-incompatible changes
371 * - incrementing minor is for backwards-compatible changes
372 * - incrementing extra is for bug fixes
373 *
374 * @param major where to store the major version number
375 * @param minor where to store the minor version number
376 * @param extra where to store the extra version number
377 */
378 CEPH_RADOS_API void rados_version(int *major, int *minor, int *extra);
379
380 /**
381 * @name Setup and Teardown
382 * These are the first and last functions to that should be called
383 * when using librados.
384 *
385 * @{
386 */
387
388 /**
389 * Create a handle for communicating with a RADOS cluster.
390 *
391 * Ceph environment variables are read when this is called, so if
392 * $CEPH_ARGS specifies everything you need to connect, no further
393 * configuration is necessary.
394 *
395 * @param cluster where to store the handle
396 * @param id the user to connect as (i.e. admin, not client.admin)
397 * @returns 0 on success, negative error code on failure
398 */
399 CEPH_RADOS_API int rados_create(rados_t *cluster, const char * const id);
400
401 /**
402 * Extended version of rados_create.
403 *
404 * Like rados_create, but
405 * 1) don't assume 'client\.'+id; allow full specification of name
406 * 2) allow specification of cluster name
407 * 3) flags for future expansion
408 */
409 CEPH_RADOS_API int rados_create2(rados_t *pcluster,
410 const char *const clustername,
411 const char * const name, uint64_t flags);
412
413 /**
414 * Initialize a cluster handle from an existing configuration.
415 *
416 * Share configuration state with another rados_t instance.
417 *
418 * @param cluster where to store the handle
419 * @param cct the existing configuration to use
420 * @returns 0 on success, negative error code on failure
421 */
422 CEPH_RADOS_API int rados_create_with_context(rados_t *cluster,
423 rados_config_t cct);
424
425 /**
426 * Ping the monitor with ID mon_id, storing the resulting reply in
427 * buf (if specified) with a maximum size of len.
428 *
429 * The result buffer is allocated on the heap; the caller is
430 * expected to release that memory with rados_buffer_free(). The
431 * buffer and length pointers can be NULL, in which case they are
432 * not filled in.
433 *
434 * @param cluster cluster handle
435 * @param[in] mon_id ID of the monitor to ping
436 * @param[out] outstr double pointer with the resulting reply
437 * @param[out] outstrlen pointer with the size of the reply in outstr
438 */
439 CEPH_RADOS_API int rados_ping_monitor(rados_t cluster, const char *mon_id,
440 char **outstr, size_t *outstrlen);
441
442 /**
443 * Connect to the cluster.
444 *
445 * @note BUG: Before calling this, calling a function that communicates with the
446 * cluster will crash.
447 *
448 * @pre The cluster handle is configured with at least a monitor
449 * address. If cephx is enabled, a client name and secret must also be
450 * set.
451 *
452 * @post If this succeeds, any function in librados may be used
453 *
454 * @param cluster The cluster to connect to.
455 * @returns 0 on sucess, negative error code on failure
456 */
457 CEPH_RADOS_API int rados_connect(rados_t cluster);
458
459 /**
460 * Disconnects from the cluster.
461 *
462 * For clean up, this is only necessary after rados_connect() has
463 * succeeded.
464 *
465 * @warning This does not guarantee any asynchronous writes have
466 * completed. To do that, you must call rados_aio_flush() on all open
467 * io contexts.
468 *
469 * @warning We implicitly call rados_watch_flush() on shutdown. If
470 * there are watches being used, this should be done explicitly before
471 * destroying the relevant IoCtx. We do it here as a safety measure.
472 *
473 * @post the cluster handle cannot be used again
474 *
475 * @param cluster the cluster to shutdown
476 */
477 CEPH_RADOS_API void rados_shutdown(rados_t cluster);
478
479 /** @} init */
480
481 /**
482 * @name Configuration
483 * These functions read and update Ceph configuration for a cluster
484 * handle. Any configuration changes must be done before connecting to
485 * the cluster.
486 *
487 * Options that librados users might want to set include:
488 * - mon_host
489 * - auth_supported
490 * - key, keyfile, or keyring when using cephx
491 * - log_file, log_to_stderr, err_to_stderr, and log_to_syslog
492 * - debug_rados, debug_objecter, debug_monc, debug_auth, or debug_ms
493 *
494 * All possible options can be found in src/common/config_opts.h in ceph.git
495 *
496 * @{
497 */
498
499 /**
500 * Configure the cluster handle using a Ceph config file
501 *
502 * If path is NULL, the default locations are searched, and the first
503 * found is used. The locations are:
504 * - $CEPH_CONF (environment variable)
505 * - /etc/ceph/ceph.conf
506 * - ~/.ceph/config
507 * - ceph.conf (in the current working directory)
508 *
509 * @pre rados_connect() has not been called on the cluster handle
510 *
511 * @param cluster cluster handle to configure
512 * @param path path to a Ceph configuration file
513 * @returns 0 on success, negative error code on failure
514 */
515 CEPH_RADOS_API int rados_conf_read_file(rados_t cluster, const char *path);
516
517 /**
518 * Configure the cluster handle with command line arguments
519 *
520 * argv can contain any common Ceph command line option, including any
521 * configuration parameter prefixed by '--' and replacing spaces with
522 * dashes or underscores. For example, the following options are equivalent:
523 * - --mon-host 10.0.0.1:6789
524 * - --mon_host 10.0.0.1:6789
525 * - -m 10.0.0.1:6789
526 *
527 * @pre rados_connect() has not been called on the cluster handle
528 *
529 * @param cluster cluster handle to configure
530 * @param argc number of arguments in argv
531 * @param argv arguments to parse
532 * @returns 0 on success, negative error code on failure
533 */
534 CEPH_RADOS_API int rados_conf_parse_argv(rados_t cluster, int argc,
535 const char **argv);
536
537
538 /**
539 * Configure the cluster handle with command line arguments, returning
540 * any remainders. Same rados_conf_parse_argv, except for extra
541 * remargv argument to hold returns unrecognized arguments.
542 *
543 * @pre rados_connect() has not been called on the cluster handle
544 *
545 * @param cluster cluster handle to configure
546 * @param argc number of arguments in argv
547 * @param argv arguments to parse
548 * @param remargv char* array for returned unrecognized arguments
549 * @returns 0 on success, negative error code on failure
550 */
551 CEPH_RADOS_API int rados_conf_parse_argv_remainder(rados_t cluster, int argc,
552 const char **argv,
553 const char **remargv);
554 /**
555 * Configure the cluster handle based on an environment variable
556 *
557 * The contents of the environment variable are parsed as if they were
558 * Ceph command line options. If var is NULL, the CEPH_ARGS
559 * environment variable is used.
560 *
561 * @pre rados_connect() has not been called on the cluster handle
562 *
563 * @note BUG: this is not threadsafe - it uses a static buffer
564 *
565 * @param cluster cluster handle to configure
566 * @param var name of the environment variable to read
567 * @returns 0 on success, negative error code on failure
568 */
569 CEPH_RADOS_API int rados_conf_parse_env(rados_t cluster, const char *var);
570
571 /**
572 * Set a configuration option
573 *
574 * @pre rados_connect() has not been called on the cluster handle
575 *
576 * @param cluster cluster handle to configure
577 * @param option option to set
578 * @param value value of the option
579 * @returns 0 on success, negative error code on failure
580 * @returns -ENOENT when the option is not a Ceph configuration option
581 */
582 CEPH_RADOS_API int rados_conf_set(rados_t cluster, const char *option,
583 const char *value);
584
585 /**
586 * Get the value of a configuration option
587 *
588 * @param cluster configuration to read
589 * @param option which option to read
590 * @param buf where to write the configuration value
591 * @param len the size of buf in bytes
592 * @returns 0 on success, negative error code on failure
593 * @returns -ENAMETOOLONG if the buffer is too short to contain the
594 * requested value
595 */
596 CEPH_RADOS_API int rados_conf_get(rados_t cluster, const char *option,
597 char *buf, size_t len);
598
599 /** @} config */
600
601 /**
602 * Read usage info about the cluster
603 *
604 * This tells you total space, space used, space available, and number
605 * of objects. These are not updated immediately when data is written,
606 * they are eventually consistent.
607 *
608 * @param cluster cluster to query
609 * @param result where to store the results
610 * @returns 0 on success, negative error code on failure
611 */
612 CEPH_RADOS_API int rados_cluster_stat(rados_t cluster,
613 struct rados_cluster_stat_t *result);
614
615 /**
616 * Get the fsid of the cluster as a hexadecimal string.
617 *
618 * The fsid is a unique id of an entire Ceph cluster.
619 *
620 * @param cluster where to get the fsid
621 * @param buf where to write the fsid
622 * @param len the size of buf in bytes (should be 37)
623 * @returns 0 on success, negative error code on failure
624 * @returns -ERANGE if the buffer is too short to contain the
625 * fsid
626 */
627 CEPH_RADOS_API int rados_cluster_fsid(rados_t cluster, char *buf, size_t len);
628
629 /**
630 * Get/wait for the most recent osdmap
631 *
632 * @param cluster the cluster to shutdown
633 * @returns 0 on sucess, negative error code on failure
634 */
635 CEPH_RADOS_API int rados_wait_for_latest_osdmap(rados_t cluster);
636
637 /**
638 * @name Pools
639 *
640 * RADOS pools are separate namespaces for objects. Pools may have
641 * different crush rules associated with them, so they could have
642 * differing replication levels or placement strategies. RADOS
643 * permissions are also tied to pools - users can have different read,
644 * write, and execute permissions on a per-pool basis.
645 *
646 * @{
647 */
648
649 /**
650 * List pools
651 *
652 * Gets a list of pool names as NULL-terminated strings. The pool
653 * names will be placed in the supplied buffer one after another.
654 * After the last pool name, there will be two 0 bytes in a row.
655 *
656 * If len is too short to fit all the pool name entries we need, we will fill
657 * as much as we can.
658 *
659 * Buf may be null to determine the buffer size needed to list all pools.
660 *
661 * @param cluster cluster handle
662 * @param buf output buffer
663 * @param len output buffer length
664 * @returns length of the buffer we would need to list all pools
665 */
666 CEPH_RADOS_API int rados_pool_list(rados_t cluster, char *buf, size_t len);
667
668 /**
669 * List inconsistent placement groups of the given pool
670 *
671 * Gets a list of inconsistent placement groups as NULL-terminated strings.
672 * The placement group names will be placed in the supplied buffer one after
673 * another. After the last name, there will be two 0 types in a row.
674 *
675 * If len is too short to fit all the placement group entries we need, we will
676 * fill as much as we can.
677 *
678 * @param cluster cluster handle
679 * @param pool pool ID
680 * @param buf output buffer
681 * @param len output buffer length
682 * @returns length of the buffer we would need to list all pools
683 */
684 CEPH_RADOS_API int rados_inconsistent_pg_list(rados_t cluster, int64_t pool,
685 char *buf, size_t len);
686
687 /**
688 * Get a configuration handle for a rados cluster handle
689 *
690 * This handle is valid only as long as the cluster handle is valid.
691 *
692 * @param cluster cluster handle
693 * @returns config handle for this cluster
694 */
695 CEPH_RADOS_API rados_config_t rados_cct(rados_t cluster);
696
697 /**
698 * Get a global id for current instance
699 *
700 * This id is a unique representation of current connection to the cluster
701 *
702 * @param cluster cluster handle
703 * @returns instance global id
704 */
705 CEPH_RADOS_API uint64_t rados_get_instance_id(rados_t cluster);
706
707 /**
708 * Create an io context
709 *
710 * The io context allows you to perform operations within a particular
711 * pool. For more details see rados_ioctx_t.
712 *
713 * @param cluster which cluster the pool is in
714 * @param pool_name name of the pool
715 * @param ioctx where to store the io context
716 * @returns 0 on success, negative error code on failure
717 */
718 CEPH_RADOS_API int rados_ioctx_create(rados_t cluster, const char *pool_name,
719 rados_ioctx_t *ioctx);
720 CEPH_RADOS_API int rados_ioctx_create2(rados_t cluster, int64_t pool_id,
721 rados_ioctx_t *ioctx);
722
723 /**
724 * The opposite of rados_ioctx_create
725 *
726 * This just tells librados that you no longer need to use the io context.
727 * It may not be freed immediately if there are pending asynchronous
728 * requests on it, but you should not use an io context again after
729 * calling this function on it.
730 *
731 * @warning This does not guarantee any asynchronous
732 * writes have completed. You must call rados_aio_flush()
733 * on the io context before destroying it to do that.
734 *
735 * @warning If this ioctx is used by rados_watch, the caller needs to
736 * be sure that all registered watches are disconnected via
737 * rados_unwatch() and that rados_watch_flush() is called. This
738 * ensures that a racing watch callback does not make use of a
739 * destroyed ioctx.
740 *
741 * @param io the io context to dispose of
742 */
743 CEPH_RADOS_API void rados_ioctx_destroy(rados_ioctx_t io);
744
745 /**
746 * Get configuration handle for a pool handle
747 *
748 * @param io pool handle
749 * @returns rados_config_t for this cluster
750 */
751 CEPH_RADOS_API rados_config_t rados_ioctx_cct(rados_ioctx_t io);
752
753 /**
754 * Get the cluster handle used by this rados_ioctx_t
755 * Note that this is a weak reference, and should not
756 * be destroyed via rados_shutdown().
757 *
758 * @param io the io context
759 * @returns the cluster handle for this io context
760 */
761 CEPH_RADOS_API rados_t rados_ioctx_get_cluster(rados_ioctx_t io);
762
763 /**
764 * Get pool usage statistics
765 *
766 * Fills in a rados_pool_stat_t after querying the cluster.
767 *
768 * @param io determines which pool to query
769 * @param stats where to store the results
770 * @returns 0 on success, negative error code on failure
771 */
772 CEPH_RADOS_API int rados_ioctx_pool_stat(rados_ioctx_t io,
773 struct rados_pool_stat_t *stats);
774
775 /**
776 * Get the id of a pool
777 *
778 * @param cluster which cluster the pool is in
779 * @param pool_name which pool to look up
780 * @returns id of the pool
781 * @returns -ENOENT if the pool is not found
782 */
783 CEPH_RADOS_API int64_t rados_pool_lookup(rados_t cluster,
784 const char *pool_name);
785
786 /**
787 * Get the name of a pool
788 *
789 * @param cluster which cluster the pool is in
790 * @param id the id of the pool
791 * @param buf where to store the pool name
792 * @param maxlen size of buffer where name will be stored
793 * @returns length of string stored, or -ERANGE if buffer too small
794 */
795 CEPH_RADOS_API int rados_pool_reverse_lookup(rados_t cluster, int64_t id,
796 char *buf, size_t maxlen);
797
798 /**
799 * Create a pool with default settings
800 *
801 * The default owner is the admin user (auid 0).
802 * The default crush rule is rule 0.
803 *
804 * @param cluster the cluster in which the pool will be created
805 * @param pool_name the name of the new pool
806 * @returns 0 on success, negative error code on failure
807 */
808 CEPH_RADOS_API int rados_pool_create(rados_t cluster, const char *pool_name);
809
810 /**
811 * Create a pool owned by a specific auid
812 *
813 * The auid is the authenticated user id to give ownership of the pool.
814 * TODO: document auid and the rest of the auth system
815 *
816 * @param cluster the cluster in which the pool will be created
817 * @param pool_name the name of the new pool
818 * @param auid the id of the owner of the new pool
819 * @returns 0 on success, negative error code on failure
820 */
821 CEPH_RADOS_API int rados_pool_create_with_auid(rados_t cluster,
822 const char *pool_name,
823 uint64_t auid);
824
825 /**
826 * Create a pool with a specific CRUSH rule
827 *
828 * @param cluster the cluster in which the pool will be created
829 * @param pool_name the name of the new pool
830 * @param crush_rule_num which rule to use for placement in the new pool1
831 * @returns 0 on success, negative error code on failure
832 */
833 CEPH_RADOS_API int rados_pool_create_with_crush_rule(rados_t cluster,
834 const char *pool_name,
835 uint8_t crush_rule_num);
836
837 /**
838 * Create a pool with a specific CRUSH rule and auid
839 *
840 * This is a combination of rados_pool_create_with_crush_rule() and
841 * rados_pool_create_with_auid().
842 *
843 * @param cluster the cluster in which the pool will be created
844 * @param pool_name the name of the new pool
845 * @param crush_rule_num which rule to use for placement in the new pool2
846 * @param auid the id of the owner of the new pool
847 * @returns 0 on success, negative error code on failure
848 */
849 CEPH_RADOS_API int rados_pool_create_with_all(rados_t cluster,
850 const char *pool_name,
851 uint64_t auid,
852 uint8_t crush_rule_num);
853
854 /**
855 * Returns the pool that is the base tier for this pool.
856 *
857 * The return value is the ID of the pool that should be used to read from/write to.
858 * If tiering is not set up for the pool, returns \c pool.
859 *
860 * @param cluster the cluster the pool is in
861 * @param pool ID of the pool to query
862 * @param[out] base_tier base tier, or \c pool if tiering is not configured
863 * @returns 0 on success, negative error code on failure
864 */
865 CEPH_RADOS_API int rados_pool_get_base_tier(rados_t cluster, int64_t pool,
866 int64_t* base_tier);
867
868 /**
869 * Delete a pool and all data inside it
870 *
871 * The pool is removed from the cluster immediately,
872 * but the actual data is deleted in the background.
873 *
874 * @param cluster the cluster the pool is in
875 * @param pool_name which pool to delete
876 * @returns 0 on success, negative error code on failure
877 */
878 CEPH_RADOS_API int rados_pool_delete(rados_t cluster, const char *pool_name);
879
880 /**
881 * Attempt to change an io context's associated auid "owner"
882 *
883 * Requires that you have write permission on both the current and new
884 * auid.
885 *
886 * @param io reference to the pool to change.
887 * @param auid the auid you wish the io to have.
888 * @returns 0 on success, negative error code on failure
889 */
890 CEPH_RADOS_API int rados_ioctx_pool_set_auid(rados_ioctx_t io, uint64_t auid);
891
892 /**
893 * Get the auid of a pool
894 *
895 * @param io pool to query
896 * @param auid where to store the auid
897 * @returns 0 on success, negative error code on failure
898 */
899 CEPH_RADOS_API int rados_ioctx_pool_get_auid(rados_ioctx_t io, uint64_t *auid);
900
901 /* deprecated, use rados_ioctx_pool_requires_alignment2 instead */
902 CEPH_RADOS_API int rados_ioctx_pool_requires_alignment(rados_ioctx_t io)
903 __attribute__((deprecated));
904
905 /**
906 * Test whether the specified pool requires alignment or not.
907 *
908 * @param io pool to query
909 * @param requires 1 if alignment is supported, 0 if not.
910 * @returns 0 on success, negative error code on failure
911 */
912 CEPH_RADOS_API int rados_ioctx_pool_requires_alignment2(rados_ioctx_t io,
913 int *requires);
914
915 /* deprecated, use rados_ioctx_pool_required_alignment2 instead */
916 CEPH_RADOS_API uint64_t rados_ioctx_pool_required_alignment(rados_ioctx_t io)
917 __attribute__((deprecated));
918
919 /**
920 * Get the alignment flavor of a pool
921 *
922 * @param io pool to query
923 * @param alignment where to store the alignment flavor
924 * @returns 0 on success, negative error code on failure
925 */
926 CEPH_RADOS_API int rados_ioctx_pool_required_alignment2(rados_ioctx_t io,
927 uint64_t *alignment);
928
929 /**
930 * Get the pool id of the io context
931 *
932 * @param io the io context to query
933 * @returns the id of the pool the io context uses
934 */
935 CEPH_RADOS_API int64_t rados_ioctx_get_id(rados_ioctx_t io);
936
937 /**
938 * Get the pool name of the io context
939 *
940 * @param io the io context to query
941 * @param buf pointer to buffer where name will be stored
942 * @param maxlen size of buffer where name will be stored
943 * @returns length of string stored, or -ERANGE if buffer too small
944 */
945 CEPH_RADOS_API int rados_ioctx_get_pool_name(rados_ioctx_t io, char *buf,
946 unsigned maxlen);
947
948 /** @} pools */
949
950 /**
951 * @name Object Locators
952 *
953 * @{
954 */
955
956 /**
957 * Set the key for mapping objects to pgs within an io context.
958 *
959 * The key is used instead of the object name to determine which
960 * placement groups an object is put in. This affects all subsequent
961 * operations of the io context - until a different locator key is
962 * set, all objects in this io context will be placed in the same pg.
963 *
964 * @param io the io context to change
965 * @param key the key to use as the object locator, or NULL to discard
966 * any previously set key
967 */
968 CEPH_RADOS_API void rados_ioctx_locator_set_key(rados_ioctx_t io,
969 const char *key);
970
971 /**
972 * Set the namespace for objects within an io context
973 *
974 * The namespace specification further refines a pool into different
975 * domains. The mapping of objects to pgs is also based on this
976 * value.
977 *
978 * @param io the io context to change
979 * @param nspace the name to use as the namespace, or NULL use the
980 * default namespace
981 */
982 CEPH_RADOS_API void rados_ioctx_set_namespace(rados_ioctx_t io,
983 const char *nspace);
984 /** @} obj_loc */
985
986 /**
987 * @name Listing Objects
988 * @{
989 */
990 /**
991 * Start listing objects in a pool
992 *
993 * @param io the pool to list from
994 * @param ctx the handle to store list context in
995 * @returns 0 on success, negative error code on failure
996 */
997 CEPH_RADOS_API int rados_nobjects_list_open(rados_ioctx_t io,
998 rados_list_ctx_t *ctx);
999
1000 /**
1001 * Return hash position of iterator, rounded to the current PG
1002 *
1003 * @param ctx iterator marking where you are in the listing
1004 * @returns current hash position, rounded to the current pg
1005 */
1006 CEPH_RADOS_API uint32_t rados_nobjects_list_get_pg_hash_position(rados_list_ctx_t ctx);
1007
1008 /**
1009 * Reposition object iterator to a different hash position
1010 *
1011 * @param ctx iterator marking where you are in the listing
1012 * @param pos hash position to move to
1013 * @returns actual (rounded) position we moved to
1014 */
1015 CEPH_RADOS_API uint32_t rados_nobjects_list_seek(rados_list_ctx_t ctx,
1016 uint32_t pos);
1017
1018 /**
1019 * Reposition object iterator to a different position
1020 *
1021 * @param ctx iterator marking where you are in the listing
1022 * @param cursor position to move to
1023 * @returns rounded position we moved to
1024 */
1025 CEPH_RADOS_API uint32_t rados_nobjects_list_seek_cursor(rados_list_ctx_t ctx,
1026 rados_object_list_cursor cursor);
1027
1028 /**
1029 * Reposition object iterator to a different position
1030 *
1031 * The returned handle must be released with rados_object_list_cursor_free().
1032 *
1033 * @param ctx iterator marking where you are in the listing
1034 * @param cursor where to store cursor
1035 * @returns 0 on success, negative error code on failure
1036 */
1037 CEPH_RADOS_API int rados_nobjects_list_get_cursor(rados_list_ctx_t ctx,
1038 rados_object_list_cursor *cursor);
1039
1040 /**
1041 * Get the next object name and locator in the pool
1042 *
1043 * *entry and *key are valid until next call to rados_nobjects_list_*
1044 *
1045 * @param ctx iterator marking where you are in the listing
1046 * @param entry where to store the name of the entry
1047 * @param key where to store the object locator (set to NULL to ignore)
1048 * @param nspace where to store the object namespace (set to NULL to ignore)
1049 * @returns 0 on success, negative error code on failure
1050 * @returns -ENOENT when there are no more objects to list
1051 */
1052 CEPH_RADOS_API int rados_nobjects_list_next(rados_list_ctx_t ctx,
1053 const char **entry,
1054 const char **key,
1055 const char **nspace);
1056
1057 /**
1058 * Close the object listing handle.
1059 *
1060 * This should be called when the handle is no longer needed.
1061 * The handle should not be used after it has been closed.
1062 *
1063 * @param ctx the handle to close
1064 */
1065 CEPH_RADOS_API void rados_nobjects_list_close(rados_list_ctx_t ctx);
1066
1067 /**
1068 * Get cursor handle pointing to the *beginning* of a pool.
1069 *
1070 * This is an opaque handle pointing to the start of a pool. It must
1071 * be released with rados_object_list_cursor_free().
1072 *
1073 * @param io ioctx for the pool
1074 * @returns handle for the pool, NULL on error (pool does not exist)
1075 */
1076 CEPH_RADOS_API rados_object_list_cursor rados_object_list_begin(
1077 rados_ioctx_t io);
1078
1079 /**
1080 * Get cursor handle pointing to the *end* of a pool.
1081 *
1082 * This is an opaque handle pointing to the start of a pool. It must
1083 * be released with rados_object_list_cursor_free().
1084 *
1085 * @param io ioctx for the pool
1086 * @returns handle for the pool, NULL on error (pool does not exist)
1087 */
1088 CEPH_RADOS_API rados_object_list_cursor rados_object_list_end(rados_ioctx_t io);
1089
1090 /**
1091 * Check if a cursor has reached the end of a pool
1092 *
1093 * @param io ioctx
1094 * @param cur cursor
1095 * @returns 1 if the cursor has reached the end of the pool, 0 otherwise
1096 */
1097 CEPH_RADOS_API int rados_object_list_is_end(rados_ioctx_t io,
1098 rados_object_list_cursor cur);
1099
1100 /**
1101 * Release a cursor
1102 *
1103 * Release a cursor. The handle may not be used after this point.
1104 *
1105 * @param io ioctx
1106 * @param cur cursor
1107 */
1108 CEPH_RADOS_API void rados_object_list_cursor_free(rados_ioctx_t io,
1109 rados_object_list_cursor cur);
1110
1111 /**
1112 * Compare two cursor positions
1113 *
1114 * Compare two cursors, and indicate whether the first cursor precedes,
1115 * matches, or follows the second.
1116 *
1117 * @param io ioctx
1118 * @param lhs first cursor
1119 * @param rhs second cursor
1120 * @returns -1, 0, or 1 for lhs < rhs, lhs == rhs, or lhs > rhs
1121 */
1122 CEPH_RADOS_API int rados_object_list_cursor_cmp(rados_ioctx_t io,
1123 rados_object_list_cursor lhs, rados_object_list_cursor rhs);
1124
1125 /**
1126 * @return the number of items set in the result array
1127 */
1128 CEPH_RADOS_API int rados_object_list(rados_ioctx_t io,
1129 const rados_object_list_cursor start,
1130 const rados_object_list_cursor finish,
1131 const size_t result_size,
1132 const char *filter_buf,
1133 const size_t filter_buf_len,
1134 rados_object_list_item *results,
1135 rados_object_list_cursor *next);
1136
1137 CEPH_RADOS_API void rados_object_list_free(
1138 const size_t result_size,
1139 rados_object_list_item *results);
1140
1141 /**
1142 * Obtain cursors delineating a subset of a range. Use this
1143 * when you want to split up the work of iterating over the
1144 * global namespace. Expected use case is when you are iterating
1145 * in parallel, with `m` workers, and each worker taking an id `n`.
1146 *
1147 * @param start start of the range to be sliced up (inclusive)
1148 * @param finish end of the range to be sliced up (exclusive)
1149 * @param m how many chunks to divide start-finish into
1150 * @param n which of the m chunks you would like to get cursors for
1151 * @param split_start cursor populated with start of the subrange (inclusive)
1152 * @param split_finish cursor populated with end of the subrange (exclusive)
1153 */
1154 CEPH_RADOS_API void rados_object_list_slice(rados_ioctx_t io,
1155 const rados_object_list_cursor start,
1156 const rados_object_list_cursor finish,
1157 const size_t n,
1158 const size_t m,
1159 rados_object_list_cursor *split_start,
1160 rados_object_list_cursor *split_finish);
1161
1162
1163 /** @} Listing Objects */
1164
1165 /**
1166 * @name Snapshots
1167 *
1168 * RADOS snapshots are based upon sequence numbers that form a
1169 * snapshot context. They are pool-specific. The snapshot context
1170 * consists of the current snapshot sequence number for a pool, and an
1171 * array of sequence numbers at which snapshots were taken, in
1172 * descending order. Whenever a snapshot is created or deleted, the
1173 * snapshot sequence number for the pool is increased. To add a new
1174 * snapshot, the new snapshot sequence number must be increased and
1175 * added to the snapshot context.
1176 *
1177 * There are two ways to manage these snapshot contexts:
1178 * -# within the RADOS cluster
1179 * These are called pool snapshots, and store the snapshot context
1180 * in the OSDMap. These represent a snapshot of all the objects in
1181 * a pool.
1182 * -# within the RADOS clients
1183 * These are called self-managed snapshots, and push the
1184 * responsibility for keeping track of the snapshot context to the
1185 * clients. For every write, the client must send the snapshot
1186 * context. In librados, this is accomplished with
1187 * rados_selfmanaged_snap_set_write_ctx(). These are more
1188 * difficult to manage, but are restricted to specific objects
1189 * instead of applying to an entire pool.
1190 *
1191 * @{
1192 */
1193
1194 /**
1195 * Create a pool-wide snapshot
1196 *
1197 * @param io the pool to snapshot
1198 * @param snapname the name of the snapshot
1199 * @returns 0 on success, negative error code on failure
1200 */
1201 CEPH_RADOS_API int rados_ioctx_snap_create(rados_ioctx_t io,
1202 const char *snapname);
1203
1204 /**
1205 * Delete a pool snapshot
1206 *
1207 * @param io the pool to delete the snapshot from
1208 * @param snapname which snapshot to delete
1209 * @returns 0 on success, negative error code on failure
1210 */
1211 CEPH_RADOS_API int rados_ioctx_snap_remove(rados_ioctx_t io,
1212 const char *snapname);
1213
1214 /**
1215 * Rollback an object to a pool snapshot
1216 *
1217 * The contents of the object will be the same as
1218 * when the snapshot was taken.
1219 *
1220 * @param io the pool in which the object is stored
1221 * @param oid the name of the object to rollback
1222 * @param snapname which snapshot to rollback to
1223 * @returns 0 on success, negative error code on failure
1224 */
1225 CEPH_RADOS_API int rados_ioctx_snap_rollback(rados_ioctx_t io, const char *oid,
1226 const char *snapname);
1227
1228 /**
1229 * @warning Deprecated: Use rados_ioctx_snap_rollback() instead
1230 */
1231 CEPH_RADOS_API int rados_rollback(rados_ioctx_t io, const char *oid,
1232 const char *snapname)
1233 __attribute__((deprecated));
1234
1235 /**
1236 * Set the snapshot from which reads are performed.
1237 *
1238 * Subsequent reads will return data as it was at the time of that
1239 * snapshot.
1240 *
1241 * @param io the io context to change
1242 * @param snap the id of the snapshot to set, or LIBRADOS_SNAP_HEAD for no
1243 * snapshot (i.e. normal operation)
1244 */
1245 CEPH_RADOS_API void rados_ioctx_snap_set_read(rados_ioctx_t io,
1246 rados_snap_t snap);
1247
1248 /**
1249 * Allocate an ID for a self-managed snapshot
1250 *
1251 * Get a unique ID to put in the snaphot context to create a
1252 * snapshot. A clone of an object is not created until a write with
1253 * the new snapshot context is completed.
1254 *
1255 * @param io the pool in which the snapshot will exist
1256 * @param snapid where to store the newly allocated snapshot ID
1257 * @returns 0 on success, negative error code on failure
1258 */
1259 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_create(rados_ioctx_t io,
1260 rados_snap_t *snapid);
1261 CEPH_RADOS_API void
1262 rados_aio_ioctx_selfmanaged_snap_create(rados_ioctx_t io,
1263 rados_snap_t *snapid,
1264 rados_completion_t completion);
1265
1266 /**
1267 * Remove a self-managed snapshot
1268 *
1269 * This increases the snapshot sequence number, which will cause
1270 * snapshots to be removed lazily.
1271 *
1272 * @param io the pool in which the snapshot will exist
1273 * @param snapid where to store the newly allocated snapshot ID
1274 * @returns 0 on success, negative error code on failure
1275 */
1276 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_remove(rados_ioctx_t io,
1277 rados_snap_t snapid);
1278 CEPH_RADOS_API void
1279 rados_aio_ioctx_selfmanaged_snap_remove(rados_ioctx_t io,
1280 rados_snap_t snapid,
1281 rados_completion_t completion);
1282
1283 /**
1284 * Rollback an object to a self-managed snapshot
1285 *
1286 * The contents of the object will be the same as
1287 * when the snapshot was taken.
1288 *
1289 * @param io the pool in which the object is stored
1290 * @param oid the name of the object to rollback
1291 * @param snapid which snapshot to rollback to
1292 * @returns 0 on success, negative error code on failure
1293 */
1294 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_rollback(rados_ioctx_t io,
1295 const char *oid,
1296 rados_snap_t snapid);
1297
1298 /**
1299 * Set the snapshot context for use when writing to objects
1300 *
1301 * This is stored in the io context, and applies to all future writes.
1302 *
1303 * @param io the io context to change
1304 * @param seq the newest snapshot sequence number for the pool
1305 * @param snaps array of snapshots in sorted by descending id
1306 * @param num_snaps how many snaphosts are in the snaps array
1307 * @returns 0 on success, negative error code on failure
1308 * @returns -EINVAL if snaps are not in descending order
1309 */
1310 CEPH_RADOS_API int rados_ioctx_selfmanaged_snap_set_write_ctx(rados_ioctx_t io,
1311 rados_snap_t seq,
1312 rados_snap_t *snaps,
1313 int num_snaps);
1314
1315 /**
1316 * List all the ids of pool snapshots
1317 *
1318 * If the output array does not have enough space to fit all the
1319 * snapshots, -ERANGE is returned and the caller should retry with a
1320 * larger array.
1321 *
1322 * @param io the pool to read from
1323 * @param snaps where to store the results
1324 * @param maxlen the number of rados_snap_t that fit in the snaps array
1325 * @returns number of snapshots on success, negative error code on failure
1326 * @returns -ERANGE is returned if the snaps array is too short
1327 */
1328 CEPH_RADOS_API int rados_ioctx_snap_list(rados_ioctx_t io, rados_snap_t *snaps,
1329 int maxlen);
1330
1331 /**
1332 * Get the id of a pool snapshot
1333 *
1334 * @param io the pool to read from
1335 * @param name the snapshot to find
1336 * @param id where to store the result
1337 * @returns 0 on success, negative error code on failure
1338 */
1339 CEPH_RADOS_API int rados_ioctx_snap_lookup(rados_ioctx_t io, const char *name,
1340 rados_snap_t *id);
1341
1342 /**
1343 * Get the name of a pool snapshot
1344 *
1345 * @param io the pool to read from
1346 * @param id the snapshot to find
1347 * @param name where to store the result
1348 * @param maxlen the size of the name array
1349 * @returns 0 on success, negative error code on failure
1350 * @returns -ERANGE if the name array is too small
1351 */
1352 CEPH_RADOS_API int rados_ioctx_snap_get_name(rados_ioctx_t io, rados_snap_t id,
1353 char *name, int maxlen);
1354
1355 /**
1356 * Find when a pool snapshot occurred
1357 *
1358 * @param io the pool the snapshot was taken in
1359 * @param id the snapshot to lookup
1360 * @param t where to store the result
1361 * @returns 0 on success, negative error code on failure
1362 */
1363 CEPH_RADOS_API int rados_ioctx_snap_get_stamp(rados_ioctx_t io, rados_snap_t id,
1364 time_t *t);
1365
1366 /** @} Snapshots */
1367
1368 /**
1369 * @name Synchronous I/O
1370 * Writes are replicated to a number of OSDs based on the
1371 * configuration of the pool they are in. These write functions block
1372 * until data is in memory on all replicas of the object they're
1373 * writing to - they are equivalent to doing the corresponding
1374 * asynchronous write, and the calling
1375 * rados_ioctx_wait_for_complete(). For greater data safety, use the
1376 * asynchronous functions and rados_aio_wait_for_safe().
1377 *
1378 * @{
1379 */
1380
1381 /**
1382 * Return the version of the last object read or written to.
1383 *
1384 * This exposes the internal version number of the last object read or
1385 * written via this io context
1386 *
1387 * @param io the io context to check
1388 * @returns last read or written object version
1389 */
1390 CEPH_RADOS_API uint64_t rados_get_last_version(rados_ioctx_t io);
1391
1392 /**
1393 * Write *len* bytes from *buf* into the *oid* object, starting at
1394 * offset *off*. The value of *len* must be <= UINT_MAX/2.
1395 *
1396 * @note This will never return a positive value not equal to len.
1397 * @param io the io context in which the write will occur
1398 * @param oid name of the object
1399 * @param buf data to write
1400 * @param len length of the data, in bytes
1401 * @param off byte offset in the object to begin writing at
1402 * @returns 0 on success, negative error code on failure
1403 */
1404 CEPH_RADOS_API int rados_write(rados_ioctx_t io, const char *oid,
1405 const char *buf, size_t len, uint64_t off);
1406
1407 /**
1408 * Write *len* bytes from *buf* into the *oid* object. The value of
1409 * *len* must be <= UINT_MAX/2.
1410 *
1411 * The object is filled with the provided data. If the object exists,
1412 * it is atomically truncated and then written.
1413 *
1414 * @param io the io context in which the write will occur
1415 * @param oid name of the object
1416 * @param buf data to write
1417 * @param len length of the data, in bytes
1418 * @returns 0 on success, negative error code on failure
1419 */
1420 CEPH_RADOS_API int rados_write_full(rados_ioctx_t io, const char *oid,
1421 const char *buf, size_t len);
1422
1423 /**
1424 * Write the same *data_len* bytes from *buf* multiple times into the
1425 * *oid* object. *write_len* bytes are written in total, which must be
1426 * a multiple of *data_len*. The value of *write_len* and *data_len*
1427 * must be <= UINT_MAX/2.
1428 *
1429 * @param io the io context in which the write will occur
1430 * @param oid name of the object
1431 * @param buf data to write
1432 * @param data_len length of the data, in bytes
1433 * @param write_len the total number of bytes to write
1434 * @param off byte offset in the object to begin writing at
1435 * @returns 0 on success, negative error code on failure
1436 */
1437 CEPH_RADOS_API int rados_writesame(rados_ioctx_t io, const char *oid,
1438 const char *buf, size_t data_len,
1439 size_t write_len, uint64_t off);
1440
1441 /**
1442 * Append *len* bytes from *buf* into the *oid* object. The value of
1443 * *len* must be <= UINT_MAX/2.
1444 *
1445 * @param io the context to operate in
1446 * @param oid the name of the object
1447 * @param buf the data to append
1448 * @param len length of buf (in bytes)
1449 * @returns 0 on success, negative error code on failure
1450 */
1451 CEPH_RADOS_API int rados_append(rados_ioctx_t io, const char *oid,
1452 const char *buf, size_t len);
1453
1454 /**
1455 * Read data from an object
1456 *
1457 * The io context determines the snapshot to read from, if any was set
1458 * by rados_ioctx_snap_set_read().
1459 *
1460 * @param io the context in which to perform the read
1461 * @param oid the name of the object to read from
1462 * @param buf where to store the results
1463 * @param len the number of bytes to read
1464 * @param off the offset to start reading from in the object
1465 * @returns number of bytes read on success, negative error code on
1466 * failure
1467 */
1468 CEPH_RADOS_API int rados_read(rados_ioctx_t io, const char *oid, char *buf,
1469 size_t len, uint64_t off);
1470
1471 /**
1472 * Compute checksum from object data
1473 *
1474 * The io context determines the snapshot to checksum, if any was set
1475 * by rados_ioctx_snap_set_read(). The length of the init_value and
1476 * resulting checksum are dependent upon the checksum type:
1477 *
1478 * XXHASH64: le64
1479 * XXHASH32: le32
1480 * CRC32C: le32
1481 *
1482 * The checksum result is encoded the following manner:
1483 *
1484 * le32 num_checksum_chunks
1485 * {
1486 * leXX checksum for chunk (where XX = appropriate size for the checksum type)
1487 * } * num_checksum_chunks
1488 *
1489 * @param io the context in which to perform the checksum
1490 * @param oid the name of the object to checksum
1491 * @param type the checksum algorithm to utilize
1492 * @param init_value the init value for the algorithm
1493 * @param init_value_len the length of the init value
1494 * @param len the number of bytes to checksum
1495 * @param off the offset to start checksuming in the object
1496 * @param chunk_size optional length-aligned chunk size for checksums
1497 * @param pchecksum where to store the checksum result
1498 * @param checksum_len the number of bytes available for the result
1499 * @return negative error code on failure
1500 */
1501 CEPH_RADOS_API int rados_checksum(rados_ioctx_t io, const char *oid,
1502 rados_checksum_type_t type,
1503 const char *init_value, size_t init_value_len,
1504 size_t len, uint64_t off, size_t chunk_size,
1505 char *pchecksum, size_t checksum_len);
1506
1507 /**
1508 * Delete an object
1509 *
1510 * @note This does not delete any snapshots of the object.
1511 *
1512 * @param io the pool to delete the object from
1513 * @param oid the name of the object to delete
1514 * @returns 0 on success, negative error code on failure
1515 */
1516 CEPH_RADOS_API int rados_remove(rados_ioctx_t io, const char *oid);
1517
1518 /**
1519 * Resize an object
1520 *
1521 * If this enlarges the object, the new area is logically filled with
1522 * zeroes. If this shrinks the object, the excess data is removed.
1523 *
1524 * @param io the context in which to truncate
1525 * @param oid the name of the object
1526 * @param size the new size of the object in bytes
1527 * @returns 0 on success, negative error code on failure
1528 */
1529 CEPH_RADOS_API int rados_trunc(rados_ioctx_t io, const char *oid,
1530 uint64_t size);
1531
1532 /**
1533 * Compare an on-disk object range with a buffer
1534 *
1535 * @param io the context in which to perform the comparison
1536 * @param o name of the object
1537 * @param cmp_buf buffer containing bytes to be compared with object contents
1538 * @param cmp_len length to compare and size of @cmp_buf in bytes
1539 * @param off object byte offset at which to start the comparison
1540 * @returns 0 on success, negative error code on failure,
1541 * (-MAX_ERRNO - mismatch_off) on mismatch
1542 */
1543 CEPH_RADOS_API int rados_cmpext(rados_ioctx_t io, const char *o,
1544 const char *cmp_buf, size_t cmp_len,
1545 uint64_t off);
1546
1547 /**
1548 * @name Xattrs
1549 * Extended attributes are stored as extended attributes on the files
1550 * representing an object on the OSDs. Thus, they have the same
1551 * limitations as the underlying filesystem. On ext4, this means that
1552 * the total data stored in xattrs cannot exceed 4KB.
1553 *
1554 * @{
1555 */
1556
1557 /**
1558 * Get the value of an extended attribute on an object.
1559 *
1560 * @param io the context in which the attribute is read
1561 * @param o name of the object
1562 * @param name which extended attribute to read
1563 * @param buf where to store the result
1564 * @param len size of buf in bytes
1565 * @returns length of xattr value on success, negative error code on failure
1566 */
1567 CEPH_RADOS_API int rados_getxattr(rados_ioctx_t io, const char *o,
1568 const char *name, char *buf, size_t len);
1569
1570 /**
1571 * Set an extended attribute on an object.
1572 *
1573 * @param io the context in which xattr is set
1574 * @param o name of the object
1575 * @param name which extended attribute to set
1576 * @param buf what to store in the xattr
1577 * @param len the number of bytes in buf
1578 * @returns 0 on success, negative error code on failure
1579 */
1580 CEPH_RADOS_API int rados_setxattr(rados_ioctx_t io, const char *o,
1581 const char *name, const char *buf,
1582 size_t len);
1583
1584 /**
1585 * Delete an extended attribute from an object.
1586 *
1587 * @param io the context in which to delete the xattr
1588 * @param o the name of the object
1589 * @param name which xattr to delete
1590 * @returns 0 on success, negative error code on failure
1591 */
1592 CEPH_RADOS_API int rados_rmxattr(rados_ioctx_t io, const char *o,
1593 const char *name);
1594
1595 /**
1596 * Start iterating over xattrs on an object.
1597 *
1598 * @post iter is a valid iterator
1599 *
1600 * @param io the context in which to list xattrs
1601 * @param oid name of the object
1602 * @param iter where to store the iterator
1603 * @returns 0 on success, negative error code on failure
1604 */
1605 CEPH_RADOS_API int rados_getxattrs(rados_ioctx_t io, const char *oid,
1606 rados_xattrs_iter_t *iter);
1607
1608 /**
1609 * Get the next xattr on the object
1610 *
1611 * @pre iter is a valid iterator
1612 *
1613 * @post name is the NULL-terminated name of the next xattr, and val
1614 * contains the value of the xattr, which is of length len. If the end
1615 * of the list has been reached, name and val are NULL, and len is 0.
1616 *
1617 * @param iter iterator to advance
1618 * @param name where to store the name of the next xattr
1619 * @param val where to store the value of the next xattr
1620 * @param len the number of bytes in val
1621 * @returns 0 on success, negative error code on failure
1622 */
1623 CEPH_RADOS_API int rados_getxattrs_next(rados_xattrs_iter_t iter,
1624 const char **name, const char **val,
1625 size_t *len);
1626
1627 /**
1628 * Close the xattr iterator.
1629 *
1630 * iter should not be used after this is called.
1631 *
1632 * @param iter the iterator to close
1633 */
1634 CEPH_RADOS_API void rados_getxattrs_end(rados_xattrs_iter_t iter);
1635
1636 /** @} Xattrs */
1637
1638 /**
1639 * Get the next omap key/value pair on the object
1640 *
1641 * @pre iter is a valid iterator
1642 *
1643 * @post key and val are the next key/value pair. key is
1644 * null-terminated, and val has length len. If the end of the list has
1645 * been reached, key and val are NULL, and len is 0. key and val will
1646 * not be accessible after rados_omap_get_end() is called on iter, so
1647 * if they are needed after that they should be copied.
1648 *
1649 * @param iter iterator to advance
1650 * @param key where to store the key of the next omap entry
1651 * @param val where to store the value of the next omap entry
1652 * @param len where to store the number of bytes in val
1653 * @returns 0 on success, negative error code on failure
1654 */
1655 CEPH_RADOS_API int rados_omap_get_next(rados_omap_iter_t iter,
1656 char **key,
1657 char **val,
1658 size_t *len);
1659
1660 /**
1661 * Close the omap iterator.
1662 *
1663 * iter should not be used after this is called.
1664 *
1665 * @param iter the iterator to close
1666 */
1667 CEPH_RADOS_API void rados_omap_get_end(rados_omap_iter_t iter);
1668
1669 /**
1670 * Get object stats (size/mtime)
1671 *
1672 * TODO: when are these set, and by whom? can they be out of date?
1673 *
1674 * @param io ioctx
1675 * @param o object name
1676 * @param psize where to store object size
1677 * @param pmtime where to store modification time
1678 * @returns 0 on success, negative error code on failure
1679 */
1680 CEPH_RADOS_API int rados_stat(rados_ioctx_t io, const char *o, uint64_t *psize,
1681 time_t *pmtime);
1682
1683 /**
1684 * Update tmap (trivial map)
1685 *
1686 * Do compound update to a tmap object, inserting or deleting some
1687 * number of records. cmdbuf is a series of operation byte
1688 * codes, following by command payload. Each command is a single-byte
1689 * command code, whose value is one of CEPH_OSD_TMAP_*.
1690 *
1691 * - update tmap 'header'
1692 * - 1 byte = CEPH_OSD_TMAP_HDR
1693 * - 4 bytes = data length (little endian)
1694 * - N bytes = data
1695 *
1696 * - insert/update one key/value pair
1697 * - 1 byte = CEPH_OSD_TMAP_SET
1698 * - 4 bytes = key name length (little endian)
1699 * - N bytes = key name
1700 * - 4 bytes = data length (little endian)
1701 * - M bytes = data
1702 *
1703 * - insert one key/value pair; return -EEXIST if it already exists.
1704 * - 1 byte = CEPH_OSD_TMAP_CREATE
1705 * - 4 bytes = key name length (little endian)
1706 * - N bytes = key name
1707 * - 4 bytes = data length (little endian)
1708 * - M bytes = data
1709 *
1710 * - remove one key/value pair
1711 * - 1 byte = CEPH_OSD_TMAP_RM
1712 * - 4 bytes = key name length (little endian)
1713 * - N bytes = key name
1714 *
1715 * Restrictions:
1716 * - The HDR update must preceed any key/value updates.
1717 * - All key/value updates must be in lexicographically sorted order
1718 * in cmdbuf.
1719 * - You can read/write to a tmap object via the regular APIs, but
1720 * you should be careful not to corrupt it. Also be aware that the
1721 * object format may change without notice.
1722 *
1723 * @param io ioctx
1724 * @param o object name
1725 * @param cmdbuf command buffer
1726 * @param cmdbuflen command buffer length in bytes
1727 * @returns 0 on success, negative error code on failure
1728 */
1729 CEPH_RADOS_API int rados_tmap_update(rados_ioctx_t io, const char *o,
1730 const char *cmdbuf, size_t cmdbuflen);
1731
1732 /**
1733 * Store complete tmap (trivial map) object
1734 *
1735 * Put a full tmap object into the store, replacing what was there.
1736 *
1737 * The format of buf is:
1738 * - 4 bytes - length of header (little endian)
1739 * - N bytes - header data
1740 * - 4 bytes - number of keys (little endian)
1741 *
1742 * and for each key,
1743 * - 4 bytes - key name length (little endian)
1744 * - N bytes - key name
1745 * - 4 bytes - value length (little endian)
1746 * - M bytes - value data
1747 *
1748 * @param io ioctx
1749 * @param o object name
1750 * @param buf buffer
1751 * @param buflen buffer length in bytes
1752 * @returns 0 on success, negative error code on failure
1753 */
1754 CEPH_RADOS_API int rados_tmap_put(rados_ioctx_t io, const char *o,
1755 const char *buf, size_t buflen);
1756
1757 /**
1758 * Fetch complete tmap (trivial map) object
1759 *
1760 * Read a full tmap object. See rados_tmap_put() for the format the
1761 * data is returned in.
1762 *
1763 * @param io ioctx
1764 * @param o object name
1765 * @param buf buffer
1766 * @param buflen buffer length in bytes
1767 * @returns 0 on success, negative error code on failure
1768 * @returns -ERANGE if buf isn't big enough
1769 */
1770 CEPH_RADOS_API int rados_tmap_get(rados_ioctx_t io, const char *o, char *buf,
1771 size_t buflen);
1772
1773 /**
1774 * Execute an OSD class method on an object
1775 *
1776 * The OSD has a plugin mechanism for performing complicated
1777 * operations on an object atomically. These plugins are called
1778 * classes. This function allows librados users to call the custom
1779 * methods. The input and output formats are defined by the class.
1780 * Classes in ceph.git can be found in src/cls subdirectories
1781 *
1782 * @param io the context in which to call the method
1783 * @param oid the object to call the method on
1784 * @param cls the name of the class
1785 * @param method the name of the method
1786 * @param in_buf where to find input
1787 * @param in_len length of in_buf in bytes
1788 * @param buf where to store output
1789 * @param out_len length of buf in bytes
1790 * @returns the length of the output, or
1791 * -ERANGE if out_buf does not have enough space to store it (For methods that return data). For
1792 * methods that don't return data, the return value is
1793 * method-specific.
1794 */
1795 CEPH_RADOS_API int rados_exec(rados_ioctx_t io, const char *oid,
1796 const char *cls, const char *method,
1797 const char *in_buf, size_t in_len, char *buf,
1798 size_t out_len);
1799
1800
1801 /** @} Synchronous I/O */
1802
1803 /**
1804 * @name Asynchronous I/O
1805 * Read and write to objects without blocking.
1806 *
1807 * @{
1808 */
1809
1810 /**
1811 * @typedef rados_callback_t
1812 * Callbacks for asynchrous operations take two parameters:
1813 * - cb the completion that has finished
1814 * - arg application defined data made available to the callback function
1815 */
1816 typedef void (*rados_callback_t)(rados_completion_t cb, void *arg);
1817
1818 /**
1819 * Constructs a completion to use with asynchronous operations
1820 *
1821 * The complete and safe callbacks correspond to operations being
1822 * acked and committed, respectively. The callbacks are called in
1823 * order of receipt, so the safe callback may be triggered before the
1824 * complete callback, and vice versa. This is affected by journalling
1825 * on the OSDs.
1826 *
1827 * TODO: more complete documentation of this elsewhere (in the RADOS docs?)
1828 *
1829 * @note Read operations only get a complete callback.
1830 * @note BUG: this should check for ENOMEM instead of throwing an exception
1831 *
1832 * @param cb_arg application-defined data passed to the callback functions
1833 * @param cb_complete the function to be called when the operation is
1834 * in memory on all relpicas
1835 * @param cb_safe the function to be called when the operation is on
1836 * stable storage on all replicas
1837 * @param pc where to store the completion
1838 * @returns 0
1839 */
1840 CEPH_RADOS_API int rados_aio_create_completion(void *cb_arg,
1841 rados_callback_t cb_complete,
1842 rados_callback_t cb_safe,
1843 rados_completion_t *pc);
1844
1845 /**
1846 * Block until an operation completes
1847 *
1848 * This means it is in memory on all replicas.
1849 *
1850 * @note BUG: this should be void
1851 *
1852 * @param c operation to wait for
1853 * @returns 0
1854 */
1855 CEPH_RADOS_API int rados_aio_wait_for_complete(rados_completion_t c);
1856
1857 /**
1858 * Block until an operation is safe
1859 *
1860 * This means it is on stable storage on all replicas.
1861 *
1862 * @note BUG: this should be void
1863 *
1864 * @param c operation to wait for
1865 * @returns 0
1866 */
1867 CEPH_RADOS_API int rados_aio_wait_for_safe(rados_completion_t c);
1868
1869 /**
1870 * Has an asynchronous operation completed?
1871 *
1872 * @warning This does not imply that the complete callback has
1873 * finished
1874 *
1875 * @param c async operation to inspect
1876 * @returns whether c is complete
1877 */
1878 CEPH_RADOS_API int rados_aio_is_complete(rados_completion_t c);
1879
1880 /**
1881 * Is an asynchronous operation safe?
1882 *
1883 * @warning This does not imply that the safe callback has
1884 * finished
1885 *
1886 * @param c async operation to inspect
1887 * @returns whether c is safe
1888 */
1889 CEPH_RADOS_API int rados_aio_is_safe(rados_completion_t c);
1890
1891 /**
1892 * Block until an operation completes and callback completes
1893 *
1894 * This means it is in memory on all replicas and can be read.
1895 *
1896 * @note BUG: this should be void
1897 *
1898 * @param c operation to wait for
1899 * @returns 0
1900 */
1901 CEPH_RADOS_API int rados_aio_wait_for_complete_and_cb(rados_completion_t c);
1902
1903 /**
1904 * Block until an operation is safe and callback has completed
1905 *
1906 * This means it is on stable storage on all replicas.
1907 *
1908 * @note BUG: this should be void
1909 *
1910 * @param c operation to wait for
1911 * @returns 0
1912 */
1913 CEPH_RADOS_API int rados_aio_wait_for_safe_and_cb(rados_completion_t c);
1914
1915 /**
1916 * Has an asynchronous operation and callback completed
1917 *
1918 * @param c async operation to inspect
1919 * @returns whether c is complete
1920 */
1921 CEPH_RADOS_API int rados_aio_is_complete_and_cb(rados_completion_t c);
1922
1923 /**
1924 * Is an asynchronous operation safe and has the callback completed
1925 *
1926 * @param c async operation to inspect
1927 * @returns whether c is safe
1928 */
1929 CEPH_RADOS_API int rados_aio_is_safe_and_cb(rados_completion_t c);
1930
1931 /**
1932 * Get the return value of an asychronous operation
1933 *
1934 * The return value is set when the operation is complete or safe,
1935 * whichever comes first.
1936 *
1937 * @pre The operation is safe or complete
1938 *
1939 * @note BUG: complete callback may never be called when the safe
1940 * message is received before the complete message
1941 *
1942 * @param c async operation to inspect
1943 * @returns return value of the operation
1944 */
1945 CEPH_RADOS_API int rados_aio_get_return_value(rados_completion_t c);
1946
1947 /**
1948 * Get the internal object version of the target of an asychronous operation
1949 *
1950 * The return value is set when the operation is complete or safe,
1951 * whichever comes first.
1952 *
1953 * @pre The operation is safe or complete
1954 *
1955 * @note BUG: complete callback may never be called when the safe
1956 * message is received before the complete message
1957 *
1958 * @param c async operation to inspect
1959 * @returns version number of the asychronous operation's target
1960 */
1961 CEPH_RADOS_API uint64_t rados_aio_get_version(rados_completion_t c);
1962
1963 /**
1964 * Release a completion
1965 *
1966 * Call this when you no longer need the completion. It may not be
1967 * freed immediately if the operation is not acked and committed.
1968 *
1969 * @param c completion to release
1970 */
1971 CEPH_RADOS_API void rados_aio_release(rados_completion_t c);
1972
1973 /**
1974 * Write data to an object asynchronously
1975 *
1976 * Queues the write and returns. The return value of the completion
1977 * will be 0 on success, negative error code on failure.
1978 *
1979 * @param io the context in which the write will occur
1980 * @param oid name of the object
1981 * @param completion what to do when the write is safe and complete
1982 * @param buf data to write
1983 * @param len length of the data, in bytes
1984 * @param off byte offset in the object to begin writing at
1985 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
1986 * other than LIBRADOS_SNAP_HEAD
1987 */
1988 CEPH_RADOS_API int rados_aio_write(rados_ioctx_t io, const char *oid,
1989 rados_completion_t completion,
1990 const char *buf, size_t len, uint64_t off);
1991
1992 /**
1993 * Asychronously append data to an object
1994 *
1995 * Queues the append and returns.
1996 *
1997 * The return value of the completion will be 0 on success, negative
1998 * error code on failure.
1999 *
2000 * @param io the context to operate in
2001 * @param oid the name of the object
2002 * @param completion what to do when the append is safe and complete
2003 * @param buf the data to append
2004 * @param len length of buf (in bytes)
2005 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2006 * other than LIBRADOS_SNAP_HEAD
2007 */
2008 CEPH_RADOS_API int rados_aio_append(rados_ioctx_t io, const char *oid,
2009 rados_completion_t completion,
2010 const char *buf, size_t len);
2011
2012 /**
2013 * Asychronously write an entire object
2014 *
2015 * The object is filled with the provided data. If the object exists,
2016 * it is atomically truncated and then written.
2017 * Queues the write_full and returns.
2018 *
2019 * The return value of the completion will be 0 on success, negative
2020 * error code on failure.
2021 *
2022 * @param io the io context in which the write will occur
2023 * @param oid name of the object
2024 * @param completion what to do when the write_full is safe and complete
2025 * @param buf data to write
2026 * @param len length of the data, in bytes
2027 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2028 * other than LIBRADOS_SNAP_HEAD
2029 */
2030 CEPH_RADOS_API int rados_aio_write_full(rados_ioctx_t io, const char *oid,
2031 rados_completion_t completion,
2032 const char *buf, size_t len);
2033
2034 /**
2035 * Asychronously write the same buffer multiple times
2036 *
2037 * Queues the writesame and returns.
2038 *
2039 * The return value of the completion will be 0 on success, negative
2040 * error code on failure.
2041 *
2042 * @param io the io context in which the write will occur
2043 * @param oid name of the object
2044 * @param completion what to do when the writesame is safe and complete
2045 * @param buf data to write
2046 * @param data_len length of the data, in bytes
2047 * @param write_len the total number of bytes to write
2048 * @param off byte offset in the object to begin writing at
2049 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2050 * other than LIBRADOS_SNAP_HEAD
2051 */
2052 CEPH_RADOS_API int rados_aio_writesame(rados_ioctx_t io, const char *oid,
2053 rados_completion_t completion,
2054 const char *buf, size_t data_len,
2055 size_t write_len, uint64_t off);
2056
2057 /**
2058 * Asychronously remove an object
2059 *
2060 * Queues the remove and returns.
2061 *
2062 * The return value of the completion will be 0 on success, negative
2063 * error code on failure.
2064 *
2065 * @param io the context to operate in
2066 * @param oid the name of the object
2067 * @param completion what to do when the remove is safe and complete
2068 * @returns 0 on success, -EROFS if the io context specifies a snap_seq
2069 * other than LIBRADOS_SNAP_HEAD
2070 */
2071 CEPH_RADOS_API int rados_aio_remove(rados_ioctx_t io, const char *oid,
2072 rados_completion_t completion);
2073
2074 /**
2075 * Asychronously read data from an object
2076 *
2077 * The io context determines the snapshot to read from, if any was set
2078 * by rados_ioctx_snap_set_read().
2079 *
2080 * The return value of the completion will be number of bytes read on
2081 * success, negative error code on failure.
2082 *
2083 * @note only the 'complete' callback of the completion will be called.
2084 *
2085 * @param io the context in which to perform the read
2086 * @param oid the name of the object to read from
2087 * @param completion what to do when the read is complete
2088 * @param buf where to store the results
2089 * @param len the number of bytes to read
2090 * @param off the offset to start reading from in the object
2091 * @returns 0 on success, negative error code on failure
2092 */
2093 CEPH_RADOS_API int rados_aio_read(rados_ioctx_t io, const char *oid,
2094 rados_completion_t completion,
2095 char *buf, size_t len, uint64_t off);
2096
2097 /**
2098 * Block until all pending writes in an io context are safe
2099 *
2100 * This is not equivalent to calling rados_aio_wait_for_safe() on all
2101 * write completions, since this waits for the associated callbacks to
2102 * complete as well.
2103 *
2104 * @note BUG: always returns 0, should be void or accept a timeout
2105 *
2106 * @param io the context to flush
2107 * @returns 0 on success, negative error code on failure
2108 */
2109 CEPH_RADOS_API int rados_aio_flush(rados_ioctx_t io);
2110
2111
2112 /**
2113 * Schedule a callback for when all currently pending
2114 * aio writes are safe. This is a non-blocking version of
2115 * rados_aio_flush().
2116 *
2117 * @param io the context to flush
2118 * @param completion what to do when the writes are safe
2119 * @returns 0 on success, negative error code on failure
2120 */
2121 CEPH_RADOS_API int rados_aio_flush_async(rados_ioctx_t io,
2122 rados_completion_t completion);
2123
2124
2125 /**
2126 * Asynchronously get object stats (size/mtime)
2127 *
2128 * @param io ioctx
2129 * @param o object name
2130 * @param psize where to store object size
2131 * @param pmtime where to store modification time
2132 * @returns 0 on success, negative error code on failure
2133 */
2134 CEPH_RADOS_API int rados_aio_stat(rados_ioctx_t io, const char *o,
2135 rados_completion_t completion,
2136 uint64_t *psize, time_t *pmtime);
2137
2138 /**
2139 * Asynchronously compare an on-disk object range with a buffer
2140 *
2141 * @param io the context in which to perform the comparison
2142 * @param o the name of the object to compare with
2143 * @param completion what to do when the comparison is complete
2144 * @param cmp_buf buffer containing bytes to be compared with object contents
2145 * @param cmp_len length to compare and size of @cmp_buf in bytes
2146 * @param off object byte offset at which to start the comparison
2147 * @returns 0 on success, negative error code on failure,
2148 * (-MAX_ERRNO - mismatch_off) on mismatch
2149 */
2150 CEPH_RADOS_API int rados_aio_cmpext(rados_ioctx_t io, const char *o,
2151 rados_completion_t completion,
2152 const char *cmp_buf,
2153 size_t cmp_len,
2154 uint64_t off);
2155
2156 /**
2157 * Cancel async operation
2158 *
2159 * @param io ioctx
2160 * @param completion completion handle
2161 * @returns 0 on success, negative error code on failure
2162 */
2163 CEPH_RADOS_API int rados_aio_cancel(rados_ioctx_t io,
2164 rados_completion_t completion);
2165
2166 /**
2167 * Asynchronously execute an OSD class method on an object
2168 *
2169 * The OSD has a plugin mechanism for performing complicated
2170 * operations on an object atomically. These plugins are called
2171 * classes. This function allows librados users to call the custom
2172 * methods. The input and output formats are defined by the class.
2173 * Classes in ceph.git can be found in src/cls subdirectories
2174 *
2175 * @param io the context in which to call the method
2176 * @param oid the object to call the method on
2177 * @param cls the name of the class
2178 * @param method the name of the method
2179 * @param in_buf where to find input
2180 * @param in_len length of in_buf in bytes
2181 * @param buf where to store output
2182 * @param out_len length of buf in bytes
2183 * @returns 0 on success, negative error code on failure
2184 */
2185 CEPH_RADOS_API int rados_aio_exec(rados_ioctx_t io, const char *o,
2186 rados_completion_t completion,
2187 const char *cls, const char *method,
2188 const char *in_buf, size_t in_len,
2189 char *buf, size_t out_len);
2190
2191 /** @} Asynchronous I/O */
2192
2193 /**
2194 * @name Asynchronous Xattrs
2195 * Extended attributes are stored as extended attributes on the files
2196 * representing an object on the OSDs. Thus, they have the same
2197 * limitations as the underlying filesystem. On ext4, this means that
2198 * the total data stored in xattrs cannot exceed 4KB.
2199 *
2200 * @{
2201 */
2202
2203 /**
2204 * Asynchronously get the value of an extended attribute on an object.
2205 *
2206 * @param io the context in which the attribute is read
2207 * @param o name of the object
2208 * @param completion what to do when the getxattr completes
2209 * @param name which extended attribute to read
2210 * @param buf where to store the result
2211 * @param len size of buf in bytes
2212 * @returns length of xattr value on success, negative error code on failure
2213 */
2214 CEPH_RADOS_API int rados_aio_getxattr(rados_ioctx_t io, const char *o,
2215 rados_completion_t completion,
2216 const char *name, char *buf, size_t len);
2217
2218 /**
2219 * Asynchronously set an extended attribute on an object.
2220 *
2221 * @param io the context in which xattr is set
2222 * @param o name of the object
2223 * @param completion what to do when the setxattr completes
2224 * @param name which extended attribute to set
2225 * @param buf what to store in the xattr
2226 * @param len the number of bytes in buf
2227 * @returns 0 on success, negative error code on failure
2228 */
2229 CEPH_RADOS_API int rados_aio_setxattr(rados_ioctx_t io, const char *o,
2230 rados_completion_t completion,
2231 const char *name, const char *buf,
2232 size_t len);
2233
2234 /**
2235 * Asynchronously delete an extended attribute from an object.
2236 *
2237 * @param io the context in which to delete the xattr
2238 * @param o the name of the object
2239 * @param completion what to do when the rmxattr completes
2240 * @param name which xattr to delete
2241 * @returns 0 on success, negative error code on failure
2242 */
2243 CEPH_RADOS_API int rados_aio_rmxattr(rados_ioctx_t io, const char *o,
2244 rados_completion_t completion,
2245 const char *name);
2246
2247 /**
2248 * Asynchronously start iterating over xattrs on an object.
2249 *
2250 * @post iter is a valid iterator
2251 *
2252 * @param io the context in which to list xattrs
2253 * @param oid name of the object
2254 * @param iter where to store the iterator
2255 * @returns 0 on success, negative error code on failure
2256 */
2257 CEPH_RADOS_API int rados_aio_getxattrs(rados_ioctx_t io, const char *oid,
2258 rados_completion_t completion,
2259 rados_xattrs_iter_t *iter);
2260
2261 /** @} Asynchronous Xattrs */
2262
2263 /**
2264 * @name Watch/Notify
2265 *
2266 * Watch/notify is a protocol to help communicate among clients. It
2267 * can be used to sychronize client state. All that's needed is a
2268 * well-known object name (for example, rbd uses the header object of
2269 * an image).
2270 *
2271 * Watchers register an interest in an object, and receive all
2272 * notifies on that object. A notify attempts to communicate with all
2273 * clients watching an object, and blocks on the notifier until each
2274 * client responds or a timeout is reached.
2275 *
2276 * See rados_watch() and rados_notify() for more details.
2277 *
2278 * @{
2279 */
2280
2281 /**
2282 * @typedef rados_watchcb_t
2283 *
2284 * Callback activated when a notify is received on a watched
2285 * object.
2286 *
2287 * @param opcode undefined
2288 * @param ver version of the watched object
2289 * @param arg application-specific data
2290 *
2291 * @note BUG: opcode is an internal detail that shouldn't be exposed
2292 * @note BUG: ver is unused
2293 */
2294 typedef void (*rados_watchcb_t)(uint8_t opcode, uint64_t ver, void *arg);
2295
2296 /**
2297 * @typedef rados_watchcb2_t
2298 *
2299 * Callback activated when a notify is received on a watched
2300 * object.
2301 *
2302 * @param arg opaque user-defined value provided to rados_watch2()
2303 * @param notify_id an id for this notify event
2304 * @param handle the watcher handle we are notifying
2305 * @param notifier_id the unique client id for the notifier
2306 * @param data payload from the notifier
2307 * @param datalen length of payload buffer
2308 */
2309 typedef void (*rados_watchcb2_t)(void *arg,
2310 uint64_t notify_id,
2311 uint64_t handle,
2312 uint64_t notifier_id,
2313 void *data,
2314 size_t data_len);
2315
2316 /**
2317 * @typedef rados_watcherrcb_t
2318 *
2319 * Callback activated when we encounter an error with the watch session.
2320 * This can happen when the location of the objects moves within the
2321 * cluster and we fail to register our watch with the new object location,
2322 * or when our connection with the object OSD is otherwise interrupted and
2323 * we may have missed notify events.
2324 *
2325 * @param pre opaque user-defined value provided to rados_watch2()
2326 * @param err error code
2327 */
2328 typedef void (*rados_watcherrcb_t)(void *pre, uint64_t cookie, int err);
2329
2330 /**
2331 * Register an interest in an object
2332 *
2333 * A watch operation registers the client as being interested in
2334 * notifications on an object. OSDs keep track of watches on
2335 * persistent storage, so they are preserved across cluster changes by
2336 * the normal recovery process. If the client loses its connection to
2337 * the primary OSD for a watched object, the watch will be removed
2338 * after 30 seconds. Watches are automatically reestablished when a new
2339 * connection is made, or a placement group switches OSDs.
2340 *
2341 * @note BUG: librados should provide a way for watchers to notice connection resets
2342 * @note BUG: the ver parameter does not work, and -ERANGE will never be returned
2343 * (See URL tracker.ceph.com/issues/2592)
2344 *
2345 * @param io the pool the object is in
2346 * @param o the object to watch
2347 * @param ver expected version of the object
2348 * @param cookie where to store the internal id assigned to this watch
2349 * @param watchcb what to do when a notify is received on this object
2350 * @param arg application defined data to pass when watchcb is called
2351 * @returns 0 on success, negative error code on failure
2352 * @returns -ERANGE if the version of the object is greater than ver
2353 */
2354 CEPH_RADOS_API int rados_watch(rados_ioctx_t io, const char *o, uint64_t ver,
2355 uint64_t *cookie,
2356 rados_watchcb_t watchcb, void *arg)
2357 __attribute__((deprecated));
2358
2359
2360 /**
2361 * Register an interest in an object
2362 *
2363 * A watch operation registers the client as being interested in
2364 * notifications on an object. OSDs keep track of watches on
2365 * persistent storage, so they are preserved across cluster changes by
2366 * the normal recovery process. If the client loses its connection to the
2367 * primary OSD for a watched object, the watch will be removed after
2368 * a timeout configured with osd_client_watch_timeout.
2369 * Watches are automatically reestablished when a new
2370 * connection is made, or a placement group switches OSDs.
2371 *
2372 * @param io the pool the object is in
2373 * @param o the object to watch
2374 * @param cookie where to store the internal id assigned to this watch
2375 * @param watchcb what to do when a notify is received on this object
2376 * @param watcherrcb what to do when the watch session encounters an error
2377 * @param arg opaque value to pass to the callback
2378 * @returns 0 on success, negative error code on failure
2379 */
2380 CEPH_RADOS_API int rados_watch2(rados_ioctx_t io, const char *o, uint64_t *cookie,
2381 rados_watchcb2_t watchcb,
2382 rados_watcherrcb_t watcherrcb,
2383 void *arg);
2384
2385 /**
2386 * Register an interest in an object
2387 *
2388 * A watch operation registers the client as being interested in
2389 * notifications on an object. OSDs keep track of watches on
2390 * persistent storage, so they are preserved across cluster changes by
2391 * the normal recovery process. Watches are automatically reestablished when a new
2392 * connection is made, or a placement group switches OSDs.
2393 *
2394 * @param io the pool the object is in
2395 * @param o the object to watch
2396 * @param cookie where to store the internal id assigned to this watch
2397 * @param watchcb what to do when a notify is received on this object
2398 * @param watcherrcb what to do when the watch session encounters an error
2399 * @param timeout how many seconds the connection will keep after disconnection
2400 * @param arg opaque value to pass to the callback
2401 * @returns 0 on success, negative error code on failure
2402 */
2403 CEPH_RADOS_API int rados_watch3(rados_ioctx_t io, const char *o, uint64_t *cookie,
2404 rados_watchcb2_t watchcb,
2405 rados_watcherrcb_t watcherrcb,
2406 uint32_t timeout,
2407 void *arg);
2408
2409 /**
2410 * Asynchronous register an interest in an object
2411 *
2412 * A watch operation registers the client as being interested in
2413 * notifications on an object. OSDs keep track of watches on
2414 * persistent storage, so they are preserved across cluster changes by
2415 * the normal recovery process. If the client loses its connection to
2416 * the primary OSD for a watched object, the watch will be removed
2417 * after 30 seconds. Watches are automatically reestablished when a new
2418 * connection is made, or a placement group switches OSDs.
2419 *
2420 * @param io the pool the object is in
2421 * @param o the object to watch
2422 * @param completion what to do when operation has been attempted
2423 * @param handle where to store the internal id assigned to this watch
2424 * @param watchcb what to do when a notify is received on this object
2425 * @param watcherrcb what to do when the watch session encounters an error
2426 * @param arg opaque value to pass to the callback
2427 * @returns 0 on success, negative error code on failure
2428 */
2429 CEPH_RADOS_API int rados_aio_watch(rados_ioctx_t io, const char *o,
2430 rados_completion_t completion, uint64_t *handle,
2431 rados_watchcb2_t watchcb,
2432 rados_watcherrcb_t watcherrcb,
2433 void *arg);
2434
2435 /**
2436 * Asynchronous register an interest in an object
2437 *
2438 * A watch operation registers the client as being interested in
2439 * notifications on an object. OSDs keep track of watches on
2440 * persistent storage, so they are preserved across cluster changes by
2441 * the normal recovery process. If the client loses its connection to
2442 * the primary OSD for a watched object, the watch will be removed
2443 * after the number of seconds that configured in timeout parameter.
2444 * Watches are automatically reestablished when a new
2445 * connection is made, or a placement group switches OSDs.
2446 *
2447 * @param io the pool the object is in
2448 * @param o the object to watch
2449 * @param completion what to do when operation has been attempted
2450 * @param handle where to store the internal id assigned to this watch
2451 * @param watchcb what to do when a notify is received on this object
2452 * @param watcherrcb what to do when the watch session encounters an error
2453 * @param timeout how many seconds the connection will keep after disconnection
2454 * @param arg opaque value to pass to the callback
2455 * @returns 0 on success, negative error code on failure
2456 */
2457 CEPH_RADOS_API int rados_aio_watch2(rados_ioctx_t io, const char *o,
2458 rados_completion_t completion, uint64_t *handle,
2459 rados_watchcb2_t watchcb,
2460 rados_watcherrcb_t watcherrcb,
2461 uint32_t timeout,
2462 void *arg);
2463
2464 /**
2465 * Check on the status of a watch
2466 *
2467 * Return the number of milliseconds since the watch was last confirmed.
2468 * Or, if there has been an error, return that.
2469 *
2470 * If there is an error, the watch is no longer valid, and should be
2471 * destroyed with rados_unwatch2(). The the user is still interested
2472 * in the object, a new watch should be created with rados_watch2().
2473 *
2474 * @param io the pool the object is in
2475 * @param cookie the watch handle
2476 * @returns ms since last confirmed on success, negative error code on failure
2477 */
2478 CEPH_RADOS_API int rados_watch_check(rados_ioctx_t io, uint64_t cookie);
2479
2480 /**
2481 * Unregister an interest in an object
2482 *
2483 * Once this completes, no more notifies will be sent to us for this
2484 * watch. This should be called to clean up unneeded watchers.
2485 *
2486 * @param io the pool the object is in
2487 * @param o the name of the watched object (ignored)
2488 * @param cookie which watch to unregister
2489 * @returns 0 on success, negative error code on failure
2490 */
2491 CEPH_RADOS_API int rados_unwatch(rados_ioctx_t io, const char *o, uint64_t cookie)
2492 __attribute__((deprecated));
2493
2494 /**
2495 * Unregister an interest in an object
2496 *
2497 * Once this completes, no more notifies will be sent to us for this
2498 * watch. This should be called to clean up unneeded watchers.
2499 *
2500 * @param io the pool the object is in
2501 * @param cookie which watch to unregister
2502 * @returns 0 on success, negative error code on failure
2503 */
2504 CEPH_RADOS_API int rados_unwatch2(rados_ioctx_t io, uint64_t cookie);
2505
2506 /**
2507 * Asynchronous unregister an interest in an object
2508 *
2509 * Once this completes, no more notifies will be sent to us for this
2510 * watch. This should be called to clean up unneeded watchers.
2511 *
2512 * @param io the pool the object is in
2513 * @param completion what to do when operation has been attempted
2514 * @param cookie which watch to unregister
2515 * @returns 0 on success, negative error code on failure
2516 */
2517 CEPH_RADOS_API int rados_aio_unwatch(rados_ioctx_t io, uint64_t cookie,
2518 rados_completion_t completion);
2519
2520 /**
2521 * Sychronously notify watchers of an object
2522 *
2523 * This blocks until all watchers of the object have received and
2524 * reacted to the notify, or a timeout is reached.
2525 *
2526 * @note BUG: the timeout is not changeable via the C API
2527 * @note BUG: the bufferlist is inaccessible in a rados_watchcb_t
2528 *
2529 * @param io the pool the object is in
2530 * @param o the name of the object
2531 * @param ver obsolete - just pass zero
2532 * @param buf data to send to watchers
2533 * @param buf_len length of buf in bytes
2534 * @returns 0 on success, negative error code on failure
2535 */
2536 CEPH_RADOS_API int rados_notify(rados_ioctx_t io, const char *o, uint64_t ver,
2537 const char *buf, int buf_len)
2538 __attribute__((deprecated));
2539
2540 /**
2541 * Sychronously notify watchers of an object
2542 *
2543 * This blocks until all watchers of the object have received and
2544 * reacted to the notify, or a timeout is reached.
2545 *
2546 * The reply buffer is optional. If specified, the client will get
2547 * back an encoded buffer that includes the ids of the clients that
2548 * acknowledged the notify as well as their notify ack payloads (if
2549 * any). Clients that timed out are not included. Even clients that
2550 * do not include a notify ack payload are included in the list but
2551 * have a 0-length payload associated with them. The format:
2552 *
2553 * le32 num_acks
2554 * {
2555 * le64 gid global id for the client (for client.1234 that's 1234)
2556 * le64 cookie cookie for the client
2557 * le32 buflen length of reply message buffer
2558 * u8 * buflen payload
2559 * } * num_acks
2560 * le32 num_timeouts
2561 * {
2562 * le64 gid global id for the client
2563 * le64 cookie cookie for the client
2564 * } * num_timeouts
2565 *
2566 * Note: There may be multiple instances of the same gid if there are
2567 * multiple watchers registered via the same client.
2568 *
2569 * Note: The buffer must be released with rados_buffer_free() when the
2570 * user is done with it.
2571 *
2572 * Note: Since the result buffer includes clients that time out, it
2573 * will be set even when rados_notify() returns an error code (like
2574 * -ETIMEDOUT).
2575 *
2576 * @param io the pool the object is in
2577 * @param completion what to do when operation has been attempted
2578 * @param o the name of the object
2579 * @param buf data to send to watchers
2580 * @param buf_len length of buf in bytes
2581 * @param timeout_ms notify timeout (in ms)
2582 * @param reply_buffer pointer to reply buffer pointer (free with rados_buffer_free)
2583 * @param reply_buffer_len pointer to size of reply buffer
2584 * @returns 0 on success, negative error code on failure
2585 */
2586 CEPH_RADOS_API int rados_notify2(rados_ioctx_t io, const char *o,
2587 const char *buf, int buf_len,
2588 uint64_t timeout_ms,
2589 char **reply_buffer, size_t *reply_buffer_len);
2590 CEPH_RADOS_API int rados_aio_notify(rados_ioctx_t io, const char *o,
2591 rados_completion_t completion,
2592 const char *buf, int buf_len,
2593 uint64_t timeout_ms, char **reply_buffer,
2594 size_t *reply_buffer_len);
2595
2596 /**
2597 * Acknolwedge receipt of a notify
2598 *
2599 * @param io the pool the object is in
2600 * @param o the name of the object
2601 * @param notify_id the notify_id we got on the watchcb2_t callback
2602 * @param cookie the watcher handle
2603 * @param buf payload to return to notifier (optional)
2604 * @param buf_len payload length
2605 * @returns 0 on success
2606 */
2607 CEPH_RADOS_API int rados_notify_ack(rados_ioctx_t io, const char *o,
2608 uint64_t notify_id, uint64_t cookie,
2609 const char *buf, int buf_len);
2610
2611 /**
2612 * Flush watch/notify callbacks
2613 *
2614 * This call will block until all pending watch/notify callbacks have
2615 * been executed and the queue is empty. It should usually be called
2616 * after shutting down any watches before shutting down the ioctx or
2617 * librados to ensure that any callbacks do not misuse the ioctx (for
2618 * example by calling rados_notify_ack after the ioctx has been
2619 * destroyed).
2620 *
2621 * @param cluster the cluster handle
2622 */
2623 CEPH_RADOS_API int rados_watch_flush(rados_t cluster);
2624 /**
2625 * Flush watch/notify callbacks
2626 *
2627 * This call will be nonblock, and the completion will be called
2628 * until all pending watch/notify callbacks have been executed and
2629 * the queue is empty. It should usually be called after shutting
2630 * down any watches before shutting down the ioctx or
2631 * librados to ensure that any callbacks do not misuse the ioctx (for
2632 * example by calling rados_notify_ack after the ioctx has been
2633 * destroyed).
2634 *
2635 * @param cluster the cluster handle
2636 * @param completion what to do when operation has been attempted
2637 */
2638 CEPH_RADOS_API int rados_aio_watch_flush(rados_t cluster, rados_completion_t completion);
2639
2640 /** @} Watch/Notify */
2641
2642 /**
2643 * Pin an object in the cache tier
2644 *
2645 * When an object is pinned in the cache tier, it stays in the cache
2646 * tier, and won't be flushed out.
2647 *
2648 * @param io the pool the object is in
2649 * @param o the object id
2650 * @returns 0 on success, negative error code on failure
2651 */
2652 CEPH_RADOS_API int rados_cache_pin(rados_ioctx_t io, const char *o);
2653
2654 /**
2655 * Unpin an object in the cache tier
2656 *
2657 * After an object is unpinned in the cache tier, it can be flushed out
2658 *
2659 * @param io the pool the object is in
2660 * @param o the object id
2661 * @returns 0 on success, negative error code on failure
2662 */
2663 CEPH_RADOS_API int rados_cache_unpin(rados_ioctx_t io, const char *o);
2664
2665 /**
2666 * @name Hints
2667 *
2668 * @{
2669 */
2670
2671 /**
2672 * Set allocation hint for an object
2673 *
2674 * This is an advisory operation, it will always succeed (as if it was
2675 * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2676 * guaranteed to do anything on the backend.
2677 *
2678 * @param io the pool the object is in
2679 * @param o the name of the object
2680 * @param expected_object_size expected size of the object, in bytes
2681 * @param expected_write_size expected size of writes to the object, in bytes
2682 * @returns 0 on success, negative error code on failure
2683 */
2684 CEPH_RADOS_API int rados_set_alloc_hint(rados_ioctx_t io, const char *o,
2685 uint64_t expected_object_size,
2686 uint64_t expected_write_size);
2687
2688 /**
2689 * Set allocation hint for an object
2690 *
2691 * This is an advisory operation, it will always succeed (as if it was
2692 * submitted with a LIBRADOS_OP_FLAG_FAILOK flag set) and is not
2693 * guaranteed to do anything on the backend.
2694 *
2695 * @param io the pool the object is in
2696 * @param o the name of the object
2697 * @param expected_object_size expected size of the object, in bytes
2698 * @param expected_write_size expected size of writes to the object, in bytes
2699 * @param flags hints about future IO patterns
2700 * @returns 0 on success, negative error code on failure
2701 */
2702 CEPH_RADOS_API int rados_set_alloc_hint2(rados_ioctx_t io, const char *o,
2703 uint64_t expected_object_size,
2704 uint64_t expected_write_size,
2705 uint32_t flags);
2706
2707 /** @} Hints */
2708
2709 /**
2710 * @name Object Operations
2711 *
2712 * A single rados operation can do multiple operations on one object
2713 * atomicly. The whole operation will suceed or fail, and no partial
2714 * results will be visible.
2715 *
2716 * Operations may be either reads, which can return data, or writes,
2717 * which cannot. The effects of writes are applied and visible all at
2718 * once, so an operation that sets an xattr and then checks its value
2719 * will not see the updated value.
2720 *
2721 * @{
2722 */
2723
2724 /**
2725 * Create a new rados_write_op_t write operation. This will store all actions
2726 * to be performed atomically. You must call rados_release_write_op when you are
2727 * finished with it.
2728 *
2729 * @returns non-NULL on success, NULL on memory allocation error.
2730 */
2731 CEPH_RADOS_API rados_write_op_t rados_create_write_op(void);
2732
2733 /**
2734 * Free a rados_write_op_t, must be called when you're done with it.
2735 * @param write_op operation to deallocate, created with rados_create_write_op
2736 */
2737 CEPH_RADOS_API void rados_release_write_op(rados_write_op_t write_op);
2738
2739 /**
2740 * Set flags for the last operation added to this write_op.
2741 * At least one op must have been added to the write_op.
2742 * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
2743 */
2744 CEPH_RADOS_API void rados_write_op_set_flags(rados_write_op_t write_op,
2745 int flags);
2746
2747 /**
2748 * Ensure that the object exists before writing
2749 * @param write_op operation to add this action to
2750 */
2751 CEPH_RADOS_API void rados_write_op_assert_exists(rados_write_op_t write_op);
2752
2753 /**
2754 * Ensure that the object exists and that its internal version
2755 * number is equal to "ver" before writing. "ver" should be a
2756 * version number previously obtained with rados_get_last_version().
2757 * - If the object's version is greater than the asserted version
2758 * then rados_write_op_operate will return -ERANGE instead of
2759 * executing the op.
2760 * - If the object's version is less than the asserted version
2761 * then rados_write_op_operate will return -EOVERFLOW instead
2762 * of executing the op.
2763 * @param write_op operation to add this action to
2764 * @param ver object version number
2765 */
2766 CEPH_RADOS_API void rados_write_op_assert_version(rados_write_op_t write_op, uint64_t ver);
2767
2768 /**
2769 * Ensure that given object range (extent) satisfies comparison.
2770 *
2771 * @param write_op operation to add this action to
2772 * @param cmp_buf buffer containing bytes to be compared with object contents
2773 * @param cmp_len length to compare and size of @cmp_buf in bytes
2774 * @param off object byte offset at which to start the comparison
2775 * @param prval returned result of comparison, 0 on success, negative error code
2776 * on failure, (-MAX_ERRNO - mismatch_off) on mismatch
2777 */
2778 CEPH_RADOS_API void rados_write_op_cmpext(rados_write_op_t write_op,
2779 const char *cmp_buf,
2780 size_t cmp_len,
2781 uint64_t off,
2782 int *prval);
2783
2784 /**
2785 * Ensure that given xattr satisfies comparison.
2786 * If the comparison is not satisfied, the return code of the
2787 * operation will be -ECANCELED
2788 * @param write_op operation to add this action to
2789 * @param name name of the xattr to look up
2790 * @param comparison_operator currently undocumented, look for
2791 * LIBRADOS_CMPXATTR_OP_EQ in librados.h
2792 * @param value buffer to compare actual xattr value to
2793 * @param value_len length of buffer to compare actual xattr value to
2794 */
2795 CEPH_RADOS_API void rados_write_op_cmpxattr(rados_write_op_t write_op,
2796 const char *name,
2797 uint8_t comparison_operator,
2798 const char *value,
2799 size_t value_len);
2800
2801 /**
2802 * Ensure that the an omap value satisfies a comparison,
2803 * with the supplied value on the right hand side (i.e.
2804 * for OP_LT, the comparison is actual_value < value.
2805 *
2806 * @param write_op operation to add this action to
2807 * @param key which omap value to compare
2808 * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
2809 LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
2810 * @param val value to compare with
2811 * @param val_len length of value in bytes
2812 * @param prval where to store the return value from this action
2813 */
2814 CEPH_RADOS_API void rados_write_op_omap_cmp(rados_write_op_t write_op,
2815 const char *key,
2816 uint8_t comparison_operator,
2817 const char *val,
2818 size_t val_len,
2819 int *prval);
2820
2821 /**
2822 * Set an xattr
2823 * @param write_op operation to add this action to
2824 * @param name name of the xattr
2825 * @param value buffer to set xattr to
2826 * @param value_len length of buffer to set xattr to
2827 */
2828 CEPH_RADOS_API void rados_write_op_setxattr(rados_write_op_t write_op,
2829 const char *name,
2830 const char *value,
2831 size_t value_len);
2832
2833 /**
2834 * Remove an xattr
2835 * @param write_op operation to add this action to
2836 * @param name name of the xattr to remove
2837 */
2838 CEPH_RADOS_API void rados_write_op_rmxattr(rados_write_op_t write_op,
2839 const char *name);
2840
2841 /**
2842 * Create the object
2843 * @param write_op operation to add this action to
2844 * @param exclusive set to either LIBRADOS_CREATE_EXCLUSIVE or
2845 LIBRADOS_CREATE_IDEMPOTENT
2846 * will error if the object already exists.
2847 * @param category category string (DEPRECATED, HAS NO EFFECT)
2848 */
2849 CEPH_RADOS_API void rados_write_op_create(rados_write_op_t write_op,
2850 int exclusive,
2851 const char* category);
2852
2853 /**
2854 * Write to offset
2855 * @param write_op operation to add this action to
2856 * @param offset offset to write to
2857 * @param buffer bytes to write
2858 * @param len length of buffer
2859 */
2860 CEPH_RADOS_API void rados_write_op_write(rados_write_op_t write_op,
2861 const char *buffer,
2862 size_t len,
2863 uint64_t offset);
2864
2865 /**
2866 * Write whole object, atomically replacing it.
2867 * @param write_op operation to add this action to
2868 * @param buffer bytes to write
2869 * @param len length of buffer
2870 */
2871 CEPH_RADOS_API void rados_write_op_write_full(rados_write_op_t write_op,
2872 const char *buffer,
2873 size_t len);
2874
2875 /**
2876 * Write the same buffer multiple times
2877 * @param write_op operation to add this action to
2878 * @param buffer bytes to write
2879 * @param data_len length of buffer
2880 * @param write_len total number of bytes to write, as a multiple of @data_len
2881 * @param offset offset to write to
2882 */
2883 CEPH_RADOS_API void rados_write_op_writesame(rados_write_op_t write_op,
2884 const char *buffer,
2885 size_t data_len,
2886 size_t write_len,
2887 uint64_t offset);
2888
2889 /**
2890 * Append to end of object.
2891 * @param write_op operation to add this action to
2892 * @param buffer bytes to write
2893 * @param len length of buffer
2894 */
2895 CEPH_RADOS_API void rados_write_op_append(rados_write_op_t write_op,
2896 const char *buffer,
2897 size_t len);
2898 /**
2899 * Remove object
2900 * @param write_op operation to add this action to
2901 */
2902 CEPH_RADOS_API void rados_write_op_remove(rados_write_op_t write_op);
2903
2904 /**
2905 * Truncate an object
2906 * @param write_op operation to add this action to
2907 * @param offset Offset to truncate to
2908 */
2909 CEPH_RADOS_API void rados_write_op_truncate(rados_write_op_t write_op,
2910 uint64_t offset);
2911
2912 /**
2913 * Zero part of an object
2914 * @param write_op operation to add this action to
2915 * @param offset Offset to zero
2916 * @param len length to zero
2917 */
2918 CEPH_RADOS_API void rados_write_op_zero(rados_write_op_t write_op,
2919 uint64_t offset,
2920 uint64_t len);
2921
2922 /**
2923 * Execute an OSD class method on an object
2924 * See rados_exec() for general description.
2925 *
2926 * @param write_op operation to add this action to
2927 * @param cls the name of the class
2928 * @param method the name of the method
2929 * @param in_buf where to find input
2930 * @param in_len length of in_buf in bytes
2931 * @param prval where to store the return value from the method
2932 */
2933 CEPH_RADOS_API void rados_write_op_exec(rados_write_op_t write_op,
2934 const char *cls,
2935 const char *method,
2936 const char *in_buf,
2937 size_t in_len,
2938 int *prval);
2939
2940 /**
2941 * Set key/value pairs on an object
2942 *
2943 * @param write_op operation to add this action to
2944 * @param keys array of null-terminated char arrays representing keys to set
2945 * @param vals array of pointers to values to set
2946 * @param lens array of lengths corresponding to each value
2947 * @param num number of key/value pairs to set
2948 */
2949 CEPH_RADOS_API void rados_write_op_omap_set(rados_write_op_t write_op,
2950 char const* const* keys,
2951 char const* const* vals,
2952 const size_t *lens,
2953 size_t num);
2954
2955 /**
2956 * Remove key/value pairs from an object
2957 *
2958 * @param write_op operation to add this action to
2959 * @param keys array of null-terminated char arrays representing keys to remove
2960 * @param keys_len number of key/value pairs to remove
2961 */
2962 CEPH_RADOS_API void rados_write_op_omap_rm_keys(rados_write_op_t write_op,
2963 char const* const* keys,
2964 size_t keys_len);
2965
2966 /**
2967 * Remove all key/value pairs from an object
2968 *
2969 * @param write_op operation to add this action to
2970 */
2971 CEPH_RADOS_API void rados_write_op_omap_clear(rados_write_op_t write_op);
2972
2973 /**
2974 * Set allocation hint for an object
2975 *
2976 * @param write_op operation to add this action to
2977 * @param expected_object_size expected size of the object, in bytes
2978 * @param expected_write_size expected size of writes to the object, in bytes
2979 */
2980 CEPH_RADOS_API void rados_write_op_set_alloc_hint(rados_write_op_t write_op,
2981 uint64_t expected_object_size,
2982 uint64_t expected_write_size);
2983
2984 /**
2985 * Set allocation hint for an object
2986 *
2987 * @param write_op operation to add this action to
2988 * @param expected_object_size expected size of the object, in bytes
2989 * @param expected_write_size expected size of writes to the object, in bytes
2990 * @param flags hints about future IO patterns
2991 */
2992 CEPH_RADOS_API void rados_write_op_set_alloc_hint2(rados_write_op_t write_op,
2993 uint64_t expected_object_size,
2994 uint64_t expected_write_size,
2995 uint32_t flags);
2996
2997 /**
2998 * Perform a write operation synchronously
2999 * @param write_op operation to perform
3000 * @param io the ioctx that the object is in
3001 * @param oid the object id
3002 * @param mtime the time to set the mtime to, NULL for the current time
3003 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3004 */
3005 CEPH_RADOS_API int rados_write_op_operate(rados_write_op_t write_op,
3006 rados_ioctx_t io,
3007 const char *oid,
3008 time_t *mtime,
3009 int flags);
3010 /**
3011 * Perform a write operation synchronously
3012 * @param write_op operation to perform
3013 * @param io the ioctx that the object is in
3014 * @param oid the object id
3015 * @param mtime the time to set the mtime to, NULL for the current time
3016 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3017 */
3018
3019 CEPH_RADOS_API int rados_write_op_operate2(rados_write_op_t write_op,
3020 rados_ioctx_t io,
3021 const char *oid,
3022 struct timespec *mtime,
3023 int flags);
3024
3025 /**
3026 * Perform a write operation asynchronously
3027 * @param write_op operation to perform
3028 * @param io the ioctx that the object is in
3029 * @param completion what to do when operation has been attempted
3030 * @param oid the object id
3031 * @param mtime the time to set the mtime to, NULL for the current time
3032 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3033 */
3034 CEPH_RADOS_API int rados_aio_write_op_operate(rados_write_op_t write_op,
3035 rados_ioctx_t io,
3036 rados_completion_t completion,
3037 const char *oid,
3038 time_t *mtime,
3039 int flags);
3040
3041 /**
3042 * Create a new rados_read_op_t write operation. This will store all
3043 * actions to be performed atomically. You must call
3044 * rados_release_read_op when you are finished with it (after it
3045 * completes, or you decide not to send it in the first place).
3046 *
3047 * @returns non-NULL on success, NULL on memory allocation error.
3048 */
3049 CEPH_RADOS_API rados_read_op_t rados_create_read_op(void);
3050
3051 /**
3052 * Free a rados_read_op_t, must be called when you're done with it.
3053 * @param read_op operation to deallocate, created with rados_create_read_op
3054 */
3055 CEPH_RADOS_API void rados_release_read_op(rados_read_op_t read_op);
3056
3057 /**
3058 * Set flags for the last operation added to this read_op.
3059 * At least one op must have been added to the read_op.
3060 * @param flags see librados.h constants beginning with LIBRADOS_OP_FLAG
3061 */
3062 CEPH_RADOS_API void rados_read_op_set_flags(rados_read_op_t read_op, int flags);
3063
3064 /**
3065 * Ensure that the object exists before reading
3066 * @param read_op operation to add this action to
3067 */
3068 CEPH_RADOS_API void rados_read_op_assert_exists(rados_read_op_t read_op);
3069
3070 /**
3071 * Ensure that the object exists and that its internal version
3072 * number is equal to "ver" before reading. "ver" should be a
3073 * version number previously obtained with rados_get_last_version().
3074 * - If the object's version is greater than the asserted version
3075 * then rados_read_op_operate will return -ERANGE instead of
3076 * executing the op.
3077 * - If the object's version is less than the asserted version
3078 * then rados_read_op_operate will return -EOVERFLOW instead
3079 * of executing the op.
3080 * @param read_op operation to add this action to
3081 * @param ver object version number
3082 */
3083 CEPH_RADOS_API void rados_read_op_assert_version(rados_read_op_t read_op, uint64_t ver);
3084
3085 /**
3086 * Ensure that given object range (extent) satisfies comparison.
3087 *
3088 * @param read_op operation to add this action to
3089 * @param cmp_buf buffer containing bytes to be compared with object contents
3090 * @param cmp_len length to compare and size of @cmp_buf in bytes
3091 * @param off object byte offset at which to start the comparison
3092 * @param prval returned result of comparison, 0 on success, negative error code
3093 * on failure, (-MAX_ERRNO - mismatch_off) on mismatch
3094 */
3095 CEPH_RADOS_API void rados_read_op_cmpext(rados_read_op_t read_op,
3096 const char *cmp_buf,
3097 size_t cmp_len,
3098 uint64_t off,
3099 int *prval);
3100
3101 /**
3102 * Ensure that the an xattr satisfies a comparison
3103 * If the comparison is not satisfied, the return code of the
3104 * operation will be -ECANCELED
3105 * @param read_op operation to add this action to
3106 * @param name name of the xattr to look up
3107 * @param comparison_operator currently undocumented, look for
3108 * LIBRADOS_CMPXATTR_OP_EQ in librados.h
3109 * @param value buffer to compare actual xattr value to
3110 * @param value_len length of buffer to compare actual xattr value to
3111 */
3112 CEPH_RADOS_API void rados_read_op_cmpxattr(rados_read_op_t read_op,
3113 const char *name,
3114 uint8_t comparison_operator,
3115 const char *value,
3116 size_t value_len);
3117
3118 /**
3119 * Start iterating over xattrs on an object.
3120 *
3121 * @param read_op operation to add this action to
3122 * @param iter where to store the iterator
3123 * @param prval where to store the return value of this action
3124 */
3125 CEPH_RADOS_API void rados_read_op_getxattrs(rados_read_op_t read_op,
3126 rados_xattrs_iter_t *iter,
3127 int *prval);
3128
3129 /**
3130 * Ensure that the an omap value satisfies a comparison,
3131 * with the supplied value on the right hand side (i.e.
3132 * for OP_LT, the comparison is actual_value < value.
3133 *
3134 * @param read_op operation to add this action to
3135 * @param key which omap value to compare
3136 * @param comparison_operator one of LIBRADOS_CMPXATTR_OP_EQ,
3137 LIBRADOS_CMPXATTR_OP_LT, or LIBRADOS_CMPXATTR_OP_GT
3138 * @param val value to compare with
3139 * @param val_len length of value in bytes
3140 * @param prval where to store the return value from this action
3141 */
3142 CEPH_RADOS_API void rados_read_op_omap_cmp(rados_read_op_t read_op,
3143 const char *key,
3144 uint8_t comparison_operator,
3145 const char *val,
3146 size_t val_len,
3147 int *prval);
3148
3149 /**
3150 * Get object size and mtime
3151 * @param read_op operation to add this action to
3152 * @param psize where to store object size
3153 * @param pmtime where to store modification time
3154 * @param prval where to store the return value of this action
3155 */
3156 CEPH_RADOS_API void rados_read_op_stat(rados_read_op_t read_op,
3157 uint64_t *psize,
3158 time_t *pmtime,
3159 int *prval);
3160
3161 /**
3162 * Read bytes from offset into buffer.
3163 *
3164 * prlen will be filled with the number of bytes read if successful.
3165 * A short read can only occur if the read reaches the end of the
3166 * object.
3167 *
3168 * @param read_op operation to add this action to
3169 * @param offset offset to read from
3170 * @param len length of buffer
3171 * @param buffer where to put the data
3172 * @param bytes_read where to store the number of bytes read by this action
3173 * @param prval where to store the return value of this action
3174 */
3175 CEPH_RADOS_API void rados_read_op_read(rados_read_op_t read_op,
3176 uint64_t offset,
3177 size_t len,
3178 char *buffer,
3179 size_t *bytes_read,
3180 int *prval);
3181
3182 /**
3183 * Compute checksum from object data
3184 *
3185 * @param read_op operation to add this action to
3186 * @param oid the name of the object to checksum
3187 * @param type the checksum algorithm to utilize
3188 * @param init_value the init value for the algorithm
3189 * @param init_value_len the length of the init value
3190 * @param len the number of bytes to checksum
3191 * @param off the offset to start checksuming in the object
3192 * @param chunk_size optional length-aligned chunk size for checksums
3193 * @param pchecksum where to store the checksum result for this action
3194 * @param checksum_len the number of bytes available for the result
3195 * @param prval where to store the return value for this action
3196 */
3197 CEPH_RADOS_API void rados_read_op_checksum(rados_read_op_t read_op,
3198 rados_checksum_type_t type,
3199 const char *init_value,
3200 size_t init_value_len,
3201 uint64_t offset, size_t len,
3202 size_t chunk_size, char *pchecksum,
3203 size_t checksum_len, int *prval);
3204
3205 /**
3206 * Execute an OSD class method on an object
3207 * See rados_exec() for general description.
3208 *
3209 * The output buffer is allocated on the heap; the caller is
3210 * expected to release that memory with rados_buffer_free(). The
3211 * buffer and length pointers can all be NULL, in which case they are
3212 * not filled in.
3213 *
3214 * @param read_op operation to add this action to
3215 * @param cls the name of the class
3216 * @param method the name of the method
3217 * @param in_buf where to find input
3218 * @param in_len length of in_buf in bytes
3219 * @param out_buf where to put librados-allocated output buffer
3220 * @param out_len length of out_buf in bytes
3221 * @param prval where to store the return value from the method
3222 */
3223 CEPH_RADOS_API void rados_read_op_exec(rados_read_op_t read_op,
3224 const char *cls,
3225 const char *method,
3226 const char *in_buf,
3227 size_t in_len,
3228 char **out_buf,
3229 size_t *out_len,
3230 int *prval);
3231
3232 /**
3233 * Execute an OSD class method on an object
3234 * See rados_exec() for general description.
3235 *
3236 * If the output buffer is too small, prval will
3237 * be set to -ERANGE and used_len will be 0.
3238 *
3239 * @param read_op operation to add this action to
3240 * @param cls the name of the class
3241 * @param method the name of the method
3242 * @param in_buf where to find input
3243 * @param in_len length of in_buf in bytes
3244 * @param out_buf user-provided buffer to read into
3245 * @param out_len length of out_buf in bytes
3246 * @param used_len where to store the number of bytes read into out_buf
3247 * @param prval where to store the return value from the method
3248 */
3249 CEPH_RADOS_API void rados_read_op_exec_user_buf(rados_read_op_t read_op,
3250 const char *cls,
3251 const char *method,
3252 const char *in_buf,
3253 size_t in_len,
3254 char *out_buf,
3255 size_t out_len,
3256 size_t *used_len,
3257 int *prval);
3258
3259 /**
3260 * Start iterating over key/value pairs on an object.
3261 *
3262 * They will be returned sorted by key.
3263 *
3264 * @param read_op operation to add this action to
3265 * @param start_after list keys starting after start_after
3266 * @param filter_prefix list only keys beginning with filter_prefix
3267 * @param max_return list no more than max_return key/value pairs
3268 * @param iter where to store the iterator
3269 * @param prval where to store the return value from this action
3270 */
3271 CEPH_RADOS_API void rados_read_op_omap_get_vals(rados_read_op_t read_op,
3272 const char *start_after,
3273 const char *filter_prefix,
3274 uint64_t max_return,
3275 rados_omap_iter_t *iter,
3276 int *prval)
3277 __attribute__((deprecated)); /* use v2 below */
3278
3279 /**
3280 * Start iterating over key/value pairs on an object.
3281 *
3282 * They will be returned sorted by key.
3283 *
3284 * @param read_op operation to add this action to
3285 * @param start_after list keys starting after start_after
3286 * @param filter_prefix list only keys beginning with filter_prefix
3287 * @param max_return list no more than max_return key/value pairs
3288 * @param iter where to store the iterator
3289 * @param pmore flag indicating whether there are more keys to fetch
3290 * @param prval where to store the return value from this action
3291 */
3292 CEPH_RADOS_API void rados_read_op_omap_get_vals2(rados_read_op_t read_op,
3293 const char *start_after,
3294 const char *filter_prefix,
3295 uint64_t max_return,
3296 rados_omap_iter_t *iter,
3297 unsigned char *pmore,
3298 int *prval);
3299
3300 /**
3301 * Start iterating over keys on an object.
3302 *
3303 * They will be returned sorted by key, and the iterator
3304 * will fill in NULL for all values if specified.
3305 *
3306 * @param read_op operation to add this action to
3307 * @param start_after list keys starting after start_after
3308 * @param max_return list no more than max_return keys
3309 * @param iter where to store the iterator
3310 * @param prval where to store the return value from this action
3311 */
3312 CEPH_RADOS_API void rados_read_op_omap_get_keys(rados_read_op_t read_op,
3313 const char *start_after,
3314 uint64_t max_return,
3315 rados_omap_iter_t *iter,
3316 int *prval)
3317 __attribute__((deprecated)); /* use v2 below */
3318
3319 /**
3320 * Start iterating over keys on an object.
3321 *
3322 * They will be returned sorted by key, and the iterator
3323 * will fill in NULL for all values if specified.
3324 *
3325 * @param read_op operation to add this action to
3326 * @param start_after list keys starting after start_after
3327 * @param max_return list no more than max_return keys
3328 * @param iter where to store the iterator
3329 * @param pmore flag indicating whether there are more keys to fetch
3330 * @param prval where to store the return value from this action
3331 */
3332 CEPH_RADOS_API void rados_read_op_omap_get_keys2(rados_read_op_t read_op,
3333 const char *start_after,
3334 uint64_t max_return,
3335 rados_omap_iter_t *iter,
3336 unsigned char *pmore,
3337 int *prval);
3338
3339 /**
3340 * Start iterating over specific key/value pairs
3341 *
3342 * They will be returned sorted by key.
3343 *
3344 * @param read_op operation to add this action to
3345 * @param keys array of pointers to null-terminated keys to get
3346 * @param keys_len the number of strings in keys
3347 * @param iter where to store the iterator
3348 * @param prval where to store the return value from this action
3349 */
3350 CEPH_RADOS_API void rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op,
3351 char const* const* keys,
3352 size_t keys_len,
3353 rados_omap_iter_t *iter,
3354 int *prval);
3355
3356 /**
3357 * Perform a read operation synchronously
3358 * @param read_op operation to perform
3359 * @param io the ioctx that the object is in
3360 * @param oid the object id
3361 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3362 */
3363 CEPH_RADOS_API int rados_read_op_operate(rados_read_op_t read_op,
3364 rados_ioctx_t io,
3365 const char *oid,
3366 int flags);
3367
3368 /**
3369 * Perform a read operation asynchronously
3370 * @param read_op operation to perform
3371 * @param io the ioctx that the object is in
3372 * @param completion what to do when operation has been attempted
3373 * @param oid the object id
3374 * @param flags flags to apply to the entire operation (LIBRADOS_OPERATION_*)
3375 */
3376 CEPH_RADOS_API int rados_aio_read_op_operate(rados_read_op_t read_op,
3377 rados_ioctx_t io,
3378 rados_completion_t completion,
3379 const char *oid,
3380 int flags);
3381
3382 /** @} Object Operations */
3383
3384 /**
3385 * Take an exclusive lock on an object.
3386 *
3387 * @param io the context to operate in
3388 * @param oid the name of the object
3389 * @param name the name of the lock
3390 * @param cookie user-defined identifier for this instance of the lock
3391 * @param desc user-defined lock description
3392 * @param duration the duration of the lock. Set to NULL for infinite duration.
3393 * @param flags lock flags
3394 * @returns 0 on success, negative error code on failure
3395 * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3396 * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3397 */
3398 CEPH_RADOS_API int rados_lock_exclusive(rados_ioctx_t io, const char * oid,
3399 const char * name, const char * cookie,
3400 const char * desc,
3401 struct timeval * duration,
3402 uint8_t flags);
3403
3404 /**
3405 * Take a shared lock on an object.
3406 *
3407 * @param io the context to operate in
3408 * @param o the name of the object
3409 * @param name the name of the lock
3410 * @param cookie user-defined identifier for this instance of the lock
3411 * @param tag The tag of the lock
3412 * @param desc user-defined lock description
3413 * @param duration the duration of the lock. Set to NULL for infinite duration.
3414 * @param flags lock flags
3415 * @returns 0 on success, negative error code on failure
3416 * @returns -EBUSY if the lock is already held by another (client, cookie) pair
3417 * @returns -EEXIST if the lock is already held by the same (client, cookie) pair
3418 */
3419 CEPH_RADOS_API int rados_lock_shared(rados_ioctx_t io, const char * o,
3420 const char * name, const char * cookie,
3421 const char * tag, const char * desc,
3422 struct timeval * duration, uint8_t flags);
3423
3424 /**
3425 * Release a shared or exclusive lock on an object.
3426 *
3427 * @param io the context to operate in
3428 * @param o the name of the object
3429 * @param name the name of the lock
3430 * @param cookie user-defined identifier for the instance of the lock
3431 * @returns 0 on success, negative error code on failure
3432 * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3433 */
3434 CEPH_RADOS_API int rados_unlock(rados_ioctx_t io, const char *o,
3435 const char *name, const char *cookie);
3436
3437 /**
3438 * Asynchronous release a shared or exclusive lock on an object.
3439 *
3440 * @param io the context to operate in
3441 * @param o the name of the object
3442 * @param name the name of the lock
3443 * @param cookie user-defined identifier for the instance of the lock
3444 * @param completion what to do when operation has been attempted
3445 * @returns 0 on success, negative error code on failure
3446 */
3447 CEPH_RADOS_API int rados_aio_unlock(rados_ioctx_t io, const char *o,
3448 const char *name, const char *cookie,
3449 rados_completion_t completion);
3450
3451 /**
3452 * List clients that have locked the named object lock and information about
3453 * the lock.
3454 *
3455 * The number of bytes required in each buffer is put in the
3456 * corresponding size out parameter. If any of the provided buffers
3457 * are too short, -ERANGE is returned after these sizes are filled in.
3458 *
3459 * @param io the context to operate in
3460 * @param o the name of the object
3461 * @param name the name of the lock
3462 * @param exclusive where to store whether the lock is exclusive (1) or shared (0)
3463 * @param tag where to store the tag associated with the object lock
3464 * @param tag_len number of bytes in tag buffer
3465 * @param clients buffer in which locker clients are stored, separated by '\0'
3466 * @param clients_len number of bytes in the clients buffer
3467 * @param cookies buffer in which locker cookies are stored, separated by '\0'
3468 * @param cookies_len number of bytes in the cookies buffer
3469 * @param addrs buffer in which locker addresses are stored, separated by '\0'
3470 * @param addrs_len number of bytes in the clients buffer
3471 * @returns number of lockers on success, negative error code on failure
3472 * @returns -ERANGE if any of the buffers are too short
3473 */
3474 CEPH_RADOS_API ssize_t rados_list_lockers(rados_ioctx_t io, const char *o,
3475 const char *name, int *exclusive,
3476 char *tag, size_t *tag_len,
3477 char *clients, size_t *clients_len,
3478 char *cookies, size_t *cookies_len,
3479 char *addrs, size_t *addrs_len);
3480
3481 /**
3482 * Releases a shared or exclusive lock on an object, which was taken by the
3483 * specified client.
3484 *
3485 * @param io the context to operate in
3486 * @param o the name of the object
3487 * @param name the name of the lock
3488 * @param client the client currently holding the lock
3489 * @param cookie user-defined identifier for the instance of the lock
3490 * @returns 0 on success, negative error code on failure
3491 * @returns -ENOENT if the lock is not held by the specified (client, cookie) pair
3492 * @returns -EINVAL if the client cannot be parsed
3493 */
3494 CEPH_RADOS_API int rados_break_lock(rados_ioctx_t io, const char *o,
3495 const char *name, const char *client,
3496 const char *cookie);
3497
3498 /**
3499 * Blacklists the specified client from the OSDs
3500 *
3501 * @param cluster cluster handle
3502 * @param client_address client address
3503 * @param expire_seconds number of seconds to blacklist (0 for default)
3504 * @returns 0 on success, negative error code on failure
3505 */
3506 CEPH_RADOS_API int rados_blacklist_add(rados_t cluster,
3507 char *client_address,
3508 uint32_t expire_seconds);
3509
3510 /**
3511 * @name Mon/OSD/PG Commands
3512 *
3513 * These interfaces send commands relating to the monitor, OSD, or PGs.
3514 *
3515 * @{
3516 */
3517
3518 /**
3519 * Send monitor command.
3520 *
3521 * @note Takes command string in carefully-formatted JSON; must match
3522 * defined commands, types, etc.
3523 *
3524 * The result buffers are allocated on the heap; the caller is
3525 * expected to release that memory with rados_buffer_free(). The
3526 * buffer and length pointers can all be NULL, in which case they are
3527 * not filled in.
3528 *
3529 * @param cluster cluster handle
3530 * @param cmd an array of char *'s representing the command
3531 * @param cmdlen count of valid entries in cmd
3532 * @param inbuf any bulk input data (crush map, etc.)
3533 * @param outbuf double pointer to output buffer
3534 * @param outbuflen pointer to output buffer length
3535 * @param outs double pointer to status string
3536 * @param outslen pointer to status string length
3537 * @returns 0 on success, negative error code on failure
3538 */
3539 CEPH_RADOS_API int rados_mon_command(rados_t cluster, const char **cmd,
3540 size_t cmdlen, const char *inbuf,
3541 size_t inbuflen, char **outbuf,
3542 size_t *outbuflen, char **outs,
3543 size_t *outslen);
3544
3545 /**
3546 * Send ceph-mgr command.
3547 *
3548 * @note Takes command string in carefully-formatted JSON; must match
3549 * defined commands, types, etc.
3550 *
3551 * The result buffers are allocated on the heap; the caller is
3552 * expected to release that memory with rados_buffer_free(). The
3553 * buffer and length pointers can all be NULL, in which case they are
3554 * not filled in.
3555 *
3556 * @param cluster cluster handle
3557 * @param cmd an array of char *'s representing the command
3558 * @param cmdlen count of valid entries in cmd
3559 * @param inbuf any bulk input data (crush map, etc.)
3560 * @param outbuf double pointer to output buffer
3561 * @param outbuflen pointer to output buffer length
3562 * @param outs double pointer to status string
3563 * @param outslen pointer to status string length
3564 * @returns 0 on success, negative error code on failure
3565 */
3566 CEPH_RADOS_API int rados_mgr_command(rados_t cluster, const char **cmd,
3567 size_t cmdlen, const char *inbuf,
3568 size_t inbuflen, char **outbuf,
3569 size_t *outbuflen, char **outs,
3570 size_t *outslen);
3571
3572 /**
3573 * Send monitor command to a specific monitor.
3574 *
3575 * @note Takes command string in carefully-formatted JSON; must match
3576 * defined commands, types, etc.
3577 *
3578 * The result buffers are allocated on the heap; the caller is
3579 * expected to release that memory with rados_buffer_free(). The
3580 * buffer and length pointers can all be NULL, in which case they are
3581 * not filled in.
3582 *
3583 * @param cluster cluster handle
3584 * @param name target monitor's name
3585 * @param cmd an array of char *'s representing the command
3586 * @param cmdlen count of valid entries in cmd
3587 * @param inbuf any bulk input data (crush map, etc.)
3588 * @param outbuf double pointer to output buffer
3589 * @param outbuflen pointer to output buffer length
3590 * @param outs double pointer to status string
3591 * @param outslen pointer to status string length
3592 * @returns 0 on success, negative error code on failure
3593 */
3594 CEPH_RADOS_API int rados_mon_command_target(rados_t cluster, const char *name,
3595 const char **cmd, size_t cmdlen,
3596 const char *inbuf, size_t inbuflen,
3597 char **outbuf, size_t *outbuflen,
3598 char **outs, size_t *outslen);
3599
3600 /**
3601 * free a rados-allocated buffer
3602 *
3603 * Release memory allocated by librados calls like rados_mon_command().
3604 *
3605 * @param buf buffer pointer
3606 */
3607 CEPH_RADOS_API void rados_buffer_free(char *buf);
3608
3609 CEPH_RADOS_API int rados_osd_command(rados_t cluster, int osdid,
3610 const char **cmd, size_t cmdlen,
3611 const char *inbuf, size_t inbuflen,
3612 char **outbuf, size_t *outbuflen,
3613 char **outs, size_t *outslen);
3614
3615 CEPH_RADOS_API int rados_pg_command(rados_t cluster, const char *pgstr,
3616 const char **cmd, size_t cmdlen,
3617 const char *inbuf, size_t inbuflen,
3618 char **outbuf, size_t *outbuflen,
3619 char **outs, size_t *outslen);
3620
3621 CEPH_RADOS_API int rados_mgr_command(rados_t cluster,
3622 const char **cmd, size_t cmdlen,
3623 const char *inbuf, size_t inbuflen,
3624 char **outbuf, size_t *outbuflen,
3625 char **outs, size_t *outslen);
3626
3627 /*
3628 * This is not a doxygen comment leadin, because doxygen breaks on
3629 * a typedef with function params and returns, and I can't figure out
3630 * how to fix it.
3631 *
3632 * Monitor cluster log
3633 *
3634 * Monitor events logged to the cluster log. The callback get each
3635 * log entry both as a single formatted line and with each field in a
3636 * separate arg.
3637 *
3638 * Calling with a cb argument of NULL will deregister any previously
3639 * registered callback.
3640 *
3641 * @param cluster cluster handle
3642 * @param level minimum log level (debug, info, warn|warning, err|error)
3643 * @param cb callback to run for each log message. It MUST NOT block
3644 * nor call back into librados.
3645 * @param arg void argument to pass to cb
3646 *
3647 * @returns 0 on success, negative code on error
3648 */
3649 typedef void (*rados_log_callback_t)(void *arg,
3650 const char *line,
3651 const char *who,
3652 uint64_t sec, uint64_t nsec,
3653 uint64_t seq, const char *level,
3654 const char *msg);
3655
3656 /*
3657 * This is not a doxygen comment leadin, because doxygen breaks on
3658 * a typedef with function params and returns, and I can't figure out
3659 * how to fix it.
3660 *
3661 * Monitor cluster log
3662 *
3663 * Monitor events logged to the cluster log. The callback get each
3664 * log entry both as a single formatted line and with each field in a
3665 * separate arg.
3666 *
3667 * Calling with a cb argument of NULL will deregister any previously
3668 * registered callback.
3669 *
3670 * @param cluster cluster handle
3671 * @param level minimum log level (debug, info, warn|warning, err|error)
3672 * @param cb callback to run for each log message. It MUST NOT block
3673 * nor call back into librados.
3674 * @param arg void argument to pass to cb
3675 *
3676 * @returns 0 on success, negative code on error
3677 */
3678 typedef void (*rados_log_callback2_t)(void *arg,
3679 const char *line,
3680 const char *who,
3681 const char *name,
3682 uint64_t sec, uint64_t nsec,
3683 uint64_t seq, const char *level,
3684 const char *msg);
3685
3686 CEPH_RADOS_API int rados_monitor_log(rados_t cluster, const char *level,
3687 rados_log_callback_t cb, void *arg);
3688 CEPH_RADOS_API int rados_monitor_log2(rados_t cluster, const char *level,
3689 rados_log_callback2_t cb, void *arg);
3690
3691 /** @} Mon/OSD/PG commands */
3692
3693 /*
3694 * These methods are no longer supported and return -ENOTSUP where possible.
3695 */
3696 CEPH_RADOS_API int rados_objects_list_open(
3697 rados_ioctx_t io,
3698 rados_list_ctx_t *ctx) __attribute__((deprecated));
3699 CEPH_RADOS_API uint32_t rados_objects_list_get_pg_hash_position(
3700 rados_list_ctx_t ctx) __attribute__((deprecated));
3701 CEPH_RADOS_API uint32_t rados_objects_list_seek(
3702 rados_list_ctx_t ctx,
3703 uint32_t pos) __attribute__((deprecated));
3704 CEPH_RADOS_API int rados_objects_list_next(
3705 rados_list_ctx_t ctx,
3706 const char **entry,
3707 const char **key) __attribute__((deprecated));
3708 CEPH_RADOS_API void rados_objects_list_close(
3709 rados_list_ctx_t ctx) __attribute__((deprecated));
3710
3711
3712 #ifdef __cplusplus
3713 }
3714 #endif
3715
3716 #endif