]>
git.proxmox.com Git - mirror_ovs.git/blob - ovsdb/raft.h
2 * Copyright (c) 2017, 2018 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
22 /* Implementation of the Raft consensus algorithm.
28 * Based on Diego Ongaro's Ph.D. thesis, "Consensus: Bridging Theory and
29 * Practice", available at https://ramcloud.stanford.edu/~ongaro/thesis.pdf.
30 * References to sections, pages, and figures are from this thesis. Quotations
31 * in comments also come from this work, in accordance with its license notice,
34 * Copyright 2014 by Diego Andres Ongaro. All Rights Reserved.
36 * This work is licensed under a Creative Commons Attribution-3.0 United
37 * States License. http://creativecommons.org/licenses/by/3.0/us/
43 * Raft allows a cluster of servers to maintain a distributed log. At any
44 * given time, at most one of N servers is a leader. The leader can propose
45 * appending a new entry to the log. If ratified by more than N/2 servers
46 * (including the leader), the new entry becomes permanently part of the log.
48 * This implementation gives each cluster a name, which is the same as the
49 * database schema's name and a UUID, called the cluster ID. Each server has
50 * its own UUID, called the server ID, and a network address (e.g. an IP
51 * address and a port).
57 * The Raft code is not thread-safe. Even if separate threads access different
58 * Raft objects, the implementation can still make unsynchronized cross-thread
59 * accesses (from unixctl handlers).
72 #define RAFT_MAGIC "CLUSTER"
74 /* Setting up a new cluster or adding a new server to a cluster.
76 * These functions just write an on-disk file. They do not do any network
77 * activity, which means that the actual work of setting up or joining the
78 * cluster happens later after raft_open(). */
79 struct ovsdb_error
*raft_create_cluster(const char *file_name
,
81 const char *local_address
,
82 const struct json
*snapshot
)
83 OVS_WARN_UNUSED_RESULT
;
84 struct ovsdb_error
*raft_join_cluster(const char *file_name
, const char *name
,
85 const char *local_address
,
86 const struct sset
*remote_addrs
,
87 const struct uuid
*cid
)
88 OVS_WARN_UNUSED_RESULT
;
90 /* Reading metadata from a server log. */
91 struct raft_metadata
{
92 struct uuid sid
; /* Server ID. */
93 struct uuid cid
; /* Cluster ID. All-zeros if not yet known. */
94 char *name
; /* Schema name. */
95 char *local
; /* Local address. */
97 struct ovsdb_error
*raft_read_metadata(struct ovsdb_log
*,
98 struct raft_metadata
*)
99 OVS_WARN_UNUSED_RESULT
;
100 void raft_metadata_destroy(struct raft_metadata
*);
102 /* Starting up or shutting down a server within a cluster. */
103 struct ovsdb_error
*raft_open(struct ovsdb_log
*, struct raft
**)
104 OVS_WARN_UNUSED_RESULT
;
105 void raft_close(struct raft
*);
107 void raft_run(struct raft
*);
108 void raft_wait(struct raft
*);
111 const char *raft_get_name(const struct raft
*);
112 const struct uuid
*raft_get_cid(const struct raft
*);
113 const struct uuid
*raft_get_sid(const struct raft
*);
114 bool raft_is_connected(const struct raft
*);
115 bool raft_is_leader(const struct raft
*);
117 /* Joining a cluster. */
118 bool raft_is_joining(const struct raft
*);
120 /* Leaving a cluster. */
121 void raft_leave(struct raft
*);
122 bool raft_is_leaving(const struct raft
*);
123 bool raft_left(const struct raft
*);
126 bool raft_failed(const struct raft
*);
128 /* Reading snapshots and log entries. */
129 const struct json
*raft_next_entry(struct raft
*, struct uuid
*eid
,
131 bool raft_has_next_entry(const struct raft
*);
133 uint64_t raft_get_applied_index(const struct raft
*);
134 uint64_t raft_get_commit_index(const struct raft
*);
136 /* Writing log entries (executing commands). */
137 enum raft_command_status
{
138 /* In progress, please wait. */
142 RAFT_CMD_SUCCESS
, /* Committed. */
146 * A failure status does not always mean that the operation actually
147 * failed. In corner cases, it means that the log entry was committed but
148 * the message reporting success was not successfully received. Thus, this
149 * Raft implementation implements "at-least-once" rather than
150 * "exactly-once" semantics. */
151 RAFT_CMD_NOT_LEADER
, /* Failed because we are not the leader. */
152 RAFT_CMD_BAD_PREREQ
, /* Failed because prerequisite check failed. */
153 RAFT_CMD_LOST_LEADERSHIP
, /* Leadership lost after command initiation. */
154 RAFT_CMD_SHUTDOWN
, /* Raft server joining or left or shut down. */
155 RAFT_CMD_IO_ERROR
, /* I/O error. */
156 RAFT_CMD_TIMEOUT
, /* Request to remote leader timed out. */
158 const char *raft_command_status_to_string(enum raft_command_status
);
159 bool raft_command_status_from_string(const char *, enum raft_command_status
*);
161 struct raft_command
*raft_command_execute(struct raft
*,
162 const struct json
*data
,
163 const struct uuid
*prereq
,
165 OVS_WARN_UNUSED_RESULT
;
166 enum raft_command_status
raft_command_get_status(const struct raft_command
*);
167 uint64_t raft_command_get_commit_index(const struct raft_command
*);
168 void raft_command_unref(struct raft_command
*);
169 void raft_command_wait(const struct raft_command
*);
171 /* Replacing the local log by a snapshot. */
172 bool raft_grew_lots(const struct raft
*);
173 uint64_t raft_get_log_length(const struct raft
*);
174 bool raft_may_snapshot(const struct raft
*);
175 struct ovsdb_error
*raft_store_snapshot(struct raft
*,
176 const struct json
*new_snapshot
)
177 OVS_WARN_UNUSED_RESULT
;
179 /* Cluster management. */
180 void raft_take_leadership(struct raft
*);
181 void raft_transfer_leadership(struct raft
*, const char *reason
);
183 const struct uuid
*raft_current_eid(const struct raft
*);
184 #endif /* lib/raft.h */