]> git.proxmox.com Git - mirror_ovs.git/blob - ovsdb/raft.h
ovsdb_monitor: Fix style of prototypes.
[mirror_ovs.git] / ovsdb / raft.h
1 /*
2 * Copyright (c) 2017, 2018 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef RAFT_H
18 #define RAFT_H 1
19
20 #include <stddef.h>
21
22 /* Implementation of the Raft consensus algorithm.
23 *
24 *
25 * References
26 * ==========
27 *
28 * Based on Diego Ongaro's Ph.D. thesis, "Consensus: Bridging Theory and
29 * Practice", available at https://ramcloud.stanford.edu/~ongaro/thesis.pdf.
30 * References to sections, pages, and figures are from this thesis. Quotations
31 * in comments also come from this work, in accordance with its license notice,
32 * reproduced below:
33 *
34 * Copyright 2014 by Diego Andres Ongaro. All Rights Reserved.
35 *
36 * This work is licensed under a Creative Commons Attribution-3.0 United
37 * States License. http://creativecommons.org/licenses/by/3.0/us/
38 *
39 *
40 * Concepts
41 * ========
42 *
43 * Raft allows a cluster of servers to maintain a distributed log. At any
44 * given time, at most one of N servers is a leader. The leader can propose
45 * appending a new entry to the log. If ratified by more than N/2 servers
46 * (including the leader), the new entry becomes permanently part of the log.
47 *
48 * This implementation gives each cluster a name, which is the same as the
49 * database schema's name and a UUID, called the cluster ID. Each server has
50 * its own UUID, called the server ID, and a network address (e.g. an IP
51 * address and a port).
52 *
53 *
54 * Thread-safety
55 * =============
56 *
57 * The Raft code is not thread-safe. Even if separate threads access different
58 * Raft objects, the implementation can still make unsynchronized cross-thread
59 * accesses (from unixctl handlers).
60 */
61
62 #include <stdbool.h>
63 #include <stdint.h>
64 #include "compiler.h"
65 #include "uuid.h"
66
67 struct json;
68 struct ovsdb_log;
69 struct raft;
70 struct sset;
71
72 #define RAFT_MAGIC "CLUSTER"
73
74 /* Setting up a new cluster or adding a new server to a cluster.
75 *
76 * These functions just write an on-disk file. They do not do any network
77 * activity, which means that the actual work of setting up or joining the
78 * cluster happens later after raft_open(). */
79 struct ovsdb_error *raft_create_cluster(const char *file_name,
80 const char *name,
81 const char *local_address,
82 const struct json *snapshot)
83 OVS_WARN_UNUSED_RESULT;
84 struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
85 const char *local_address,
86 const struct sset *remote_addrs,
87 const struct uuid *cid)
88 OVS_WARN_UNUSED_RESULT;
89
90 /* Reading metadata from a server log. */
91 struct raft_metadata {
92 struct uuid sid; /* Server ID. */
93 struct uuid cid; /* Cluster ID. All-zeros if not yet known. */
94 char *name; /* Schema name. */
95 char *local; /* Local address. */
96 };
97 struct ovsdb_error *raft_read_metadata(struct ovsdb_log *,
98 struct raft_metadata *)
99 OVS_WARN_UNUSED_RESULT;
100 void raft_metadata_destroy(struct raft_metadata *);
101
102 /* Starting up or shutting down a server within a cluster. */
103 struct ovsdb_error *raft_open(struct ovsdb_log *, struct raft **)
104 OVS_WARN_UNUSED_RESULT;
105 void raft_close(struct raft *);
106
107 void raft_run(struct raft *);
108 void raft_wait(struct raft *);
109
110 /* Information. */
111 const char *raft_get_name(const struct raft *);
112 const struct uuid *raft_get_cid(const struct raft *);
113 const struct uuid *raft_get_sid(const struct raft *);
114 bool raft_is_connected(const struct raft *);
115 bool raft_is_leader(const struct raft *);
116
117 /* Joining a cluster. */
118 bool raft_is_joining(const struct raft *);
119
120 /* Leaving a cluster. */
121 void raft_leave(struct raft *);
122 bool raft_is_leaving(const struct raft *);
123 bool raft_left(const struct raft *);
124
125 /* Failure. */
126 bool raft_failed(const struct raft *);
127
128 /* Reading snapshots and log entries. */
129 const struct json *raft_next_entry(struct raft *, struct uuid *eid,
130 bool *is_snapshot);
131 bool raft_has_next_entry(const struct raft *);
132
133 uint64_t raft_get_applied_index(const struct raft *);
134 uint64_t raft_get_commit_index(const struct raft *);
135
136 /* Writing log entries (executing commands). */
137 enum raft_command_status {
138 /* In progress, please wait. */
139 RAFT_CMD_INCOMPLETE,
140
141 /* Success. */
142 RAFT_CMD_SUCCESS, /* Committed. */
143
144 /* Failure.
145 *
146 * A failure status does not always mean that the operation actually
147 * failed. In corner cases, it means that the log entry was committed but
148 * the message reporting success was not successfully received. Thus, this
149 * Raft implementation implements "at-least-once" rather than
150 * "exactly-once" semantics. */
151 RAFT_CMD_NOT_LEADER, /* Failed because we are not the leader. */
152 RAFT_CMD_BAD_PREREQ, /* Failed because prerequisite check failed. */
153 RAFT_CMD_LOST_LEADERSHIP, /* Leadership lost after command initiation. */
154 RAFT_CMD_SHUTDOWN, /* Raft server joining or left or shut down. */
155 RAFT_CMD_IO_ERROR, /* I/O error. */
156 RAFT_CMD_TIMEOUT, /* Request to remote leader timed out. */
157 };
158 const char *raft_command_status_to_string(enum raft_command_status);
159 bool raft_command_status_from_string(const char *, enum raft_command_status *);
160
161 struct raft_command *raft_command_execute(struct raft *,
162 const struct json *data,
163 const struct uuid *prereq,
164 struct uuid *result)
165 OVS_WARN_UNUSED_RESULT;
166 enum raft_command_status raft_command_get_status(const struct raft_command *);
167 uint64_t raft_command_get_commit_index(const struct raft_command *);
168 void raft_command_unref(struct raft_command *);
169 void raft_command_wait(const struct raft_command *);
170
171 /* Replacing the local log by a snapshot. */
172 bool raft_grew_lots(const struct raft *);
173 uint64_t raft_get_log_length(const struct raft *);
174 bool raft_may_snapshot(const struct raft *);
175 struct ovsdb_error *raft_store_snapshot(struct raft *,
176 const struct json *new_snapshot)
177 OVS_WARN_UNUSED_RESULT;
178
179 /* Cluster management. */
180 void raft_take_leadership(struct raft *);
181 void raft_transfer_leadership(struct raft *, const char *reason);
182
183 #endif /* lib/raft.h */