]> git.proxmox.com Git - mirror_ovs.git/blame - ovsdb/raft.h
Eliminate "whitelist" and "blacklist" terms.
[mirror_ovs.git] / ovsdb / raft.h
CommitLineData
1b1d2e6d
BP
1/*
2 * Copyright (c) 2017, 2018 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef RAFT_H
18#define RAFT_H 1
19
20#include <stddef.h>
21
22/* Implementation of the Raft consensus algorithm.
23 *
24 *
25 * References
26 * ==========
27 *
28 * Based on Diego Ongaro's Ph.D. thesis, "Consensus: Bridging Theory and
29 * Practice", available at https://ramcloud.stanford.edu/~ongaro/thesis.pdf.
30 * References to sections, pages, and figures are from this thesis. Quotations
31 * in comments also come from this work, in accordance with its license notice,
32 * reproduced below:
33 *
34 * Copyright 2014 by Diego Andres Ongaro. All Rights Reserved.
35 *
36 * This work is licensed under a Creative Commons Attribution-3.0 United
37 * States License. http://creativecommons.org/licenses/by/3.0/us/
38 *
39 *
40 * Concepts
41 * ========
42 *
43 * Raft allows a cluster of servers to maintain a distributed log. At any
44 * given time, at most one of N servers is a leader. The leader can propose
45 * appending a new entry to the log. If ratified by more than N/2 servers
46 * (including the leader), the new entry becomes permanently part of the log.
47 *
48 * This implementation gives each cluster a name, which is the same as the
49 * database schema's name and a UUID, called the cluster ID. Each server has
50 * its own UUID, called the server ID, and a network address (e.g. an IP
51 * address and a port).
52 *
53 *
54 * Thread-safety
55 * =============
56 *
57 * The Raft code is not thread-safe. Even if separate threads access different
58 * Raft objects, the implementation can still make unsynchronized cross-thread
59 * accesses (from unixctl handlers).
60 */
61
62#include <stdbool.h>
63#include <stdint.h>
64#include "compiler.h"
65#include "uuid.h"
66
67struct json;
68struct ovsdb_log;
69struct raft;
3423cd97 70struct simap;
1b1d2e6d
BP
71struct sset;
72
73#define RAFT_MAGIC "CLUSTER"
74
75/* Setting up a new cluster or adding a new server to a cluster.
76 *
77 * These functions just write an on-disk file. They do not do any network
78 * activity, which means that the actual work of setting up or joining the
79 * cluster happens later after raft_open(). */
80struct ovsdb_error *raft_create_cluster(const char *file_name,
81 const char *name,
82 const char *local_address,
83 const struct json *snapshot)
84 OVS_WARN_UNUSED_RESULT;
85struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
86 const char *local_address,
87 const struct sset *remote_addrs,
88 const struct uuid *cid)
89 OVS_WARN_UNUSED_RESULT;
90
91/* Reading metadata from a server log. */
92struct raft_metadata {
93 struct uuid sid; /* Server ID. */
94 struct uuid cid; /* Cluster ID. All-zeros if not yet known. */
95 char *name; /* Schema name. */
96 char *local; /* Local address. */
97};
98struct ovsdb_error *raft_read_metadata(struct ovsdb_log *,
99 struct raft_metadata *)
100 OVS_WARN_UNUSED_RESULT;
101void raft_metadata_destroy(struct raft_metadata *);
102
103/* Starting up or shutting down a server within a cluster. */
104struct ovsdb_error *raft_open(struct ovsdb_log *, struct raft **)
105 OVS_WARN_UNUSED_RESULT;
106void raft_close(struct raft *);
107
108void raft_run(struct raft *);
109void raft_wait(struct raft *);
110
111/* Information. */
112const char *raft_get_name(const struct raft *);
113const struct uuid *raft_get_cid(const struct raft *);
114const struct uuid *raft_get_sid(const struct raft *);
115bool raft_is_connected(const struct raft *);
116bool raft_is_leader(const struct raft *);
3423cd97 117void raft_get_memory_usage(const struct raft *, struct simap *usage);
1b1d2e6d
BP
118
119/* Joining a cluster. */
120bool raft_is_joining(const struct raft *);
121
122/* Leaving a cluster. */
123void raft_leave(struct raft *);
124bool raft_is_leaving(const struct raft *);
125bool raft_left(const struct raft *);
126
127/* Failure. */
128bool raft_failed(const struct raft *);
129
130/* Reading snapshots and log entries. */
131const struct json *raft_next_entry(struct raft *, struct uuid *eid,
132 bool *is_snapshot);
133bool raft_has_next_entry(const struct raft *);
134
135uint64_t raft_get_applied_index(const struct raft *);
136uint64_t raft_get_commit_index(const struct raft *);
137
138/* Writing log entries (executing commands). */
139enum raft_command_status {
140 /* In progress, please wait. */
141 RAFT_CMD_INCOMPLETE,
142
143 /* Success. */
144 RAFT_CMD_SUCCESS, /* Committed. */
145
146 /* Failure.
147 *
148 * A failure status does not always mean that the operation actually
149 * failed. In corner cases, it means that the log entry was committed but
150 * the message reporting success was not successfully received. Thus, this
151 * Raft implementation implements "at-least-once" rather than
152 * "exactly-once" semantics. */
153 RAFT_CMD_NOT_LEADER, /* Failed because we are not the leader. */
154 RAFT_CMD_BAD_PREREQ, /* Failed because prerequisite check failed. */
155 RAFT_CMD_LOST_LEADERSHIP, /* Leadership lost after command initiation. */
156 RAFT_CMD_SHUTDOWN, /* Raft server joining or left or shut down. */
157 RAFT_CMD_IO_ERROR, /* I/O error. */
158 RAFT_CMD_TIMEOUT, /* Request to remote leader timed out. */
159};
160const char *raft_command_status_to_string(enum raft_command_status);
161bool raft_command_status_from_string(const char *, enum raft_command_status *);
162
163struct raft_command *raft_command_execute(struct raft *,
164 const struct json *data,
165 const struct uuid *prereq,
166 struct uuid *result)
167 OVS_WARN_UNUSED_RESULT;
168enum raft_command_status raft_command_get_status(const struct raft_command *);
169uint64_t raft_command_get_commit_index(const struct raft_command *);
170void raft_command_unref(struct raft_command *);
171void raft_command_wait(const struct raft_command *);
172
173/* Replacing the local log by a snapshot. */
174bool raft_grew_lots(const struct raft *);
175uint64_t raft_get_log_length(const struct raft *);
176bool raft_may_snapshot(const struct raft *);
177struct ovsdb_error *raft_store_snapshot(struct raft *,
178 const struct json *new_snapshot)
179 OVS_WARN_UNUSED_RESULT;
180
181/* Cluster management. */
182void raft_take_leadership(struct raft *);
183void raft_transfer_leadership(struct raft *, const char *reason);
184
2cd62f75 185const struct uuid *raft_current_eid(const struct raft *);
1b1d2e6d 186#endif /* lib/raft.h */