]> git.proxmox.com Git - mirror_ovs.git/blame - ovsdb/raft.h
trigger: Free leaked ovsdb_schema
[mirror_ovs.git] / ovsdb / raft.h
CommitLineData
1b1d2e6d
BP
1/*
2 * Copyright (c) 2017, 2018 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef RAFT_H
18#define RAFT_H 1
19
20#include <stddef.h>
21
22/* Implementation of the Raft consensus algorithm.
23 *
24 *
25 * References
26 * ==========
27 *
28 * Based on Diego Ongaro's Ph.D. thesis, "Consensus: Bridging Theory and
29 * Practice", available at https://ramcloud.stanford.edu/~ongaro/thesis.pdf.
30 * References to sections, pages, and figures are from this thesis. Quotations
31 * in comments also come from this work, in accordance with its license notice,
32 * reproduced below:
33 *
34 * Copyright 2014 by Diego Andres Ongaro. All Rights Reserved.
35 *
36 * This work is licensed under a Creative Commons Attribution-3.0 United
37 * States License. http://creativecommons.org/licenses/by/3.0/us/
38 *
39 *
40 * Concepts
41 * ========
42 *
43 * Raft allows a cluster of servers to maintain a distributed log. At any
44 * given time, at most one of N servers is a leader. The leader can propose
45 * appending a new entry to the log. If ratified by more than N/2 servers
46 * (including the leader), the new entry becomes permanently part of the log.
47 *
48 * This implementation gives each cluster a name, which is the same as the
49 * database schema's name and a UUID, called the cluster ID. Each server has
50 * its own UUID, called the server ID, and a network address (e.g. an IP
51 * address and a port).
52 *
53 *
54 * Thread-safety
55 * =============
56 *
57 * The Raft code is not thread-safe. Even if separate threads access different
58 * Raft objects, the implementation can still make unsynchronized cross-thread
59 * accesses (from unixctl handlers).
60 */
61
62#include <stdbool.h>
63#include <stdint.h>
64#include "compiler.h"
65#include "uuid.h"
66
67struct json;
68struct ovsdb_log;
69struct raft;
70struct sset;
71
72#define RAFT_MAGIC "CLUSTER"
73
74/* Setting up a new cluster or adding a new server to a cluster.
75 *
76 * These functions just write an on-disk file. They do not do any network
77 * activity, which means that the actual work of setting up or joining the
78 * cluster happens later after raft_open(). */
79struct ovsdb_error *raft_create_cluster(const char *file_name,
80 const char *name,
81 const char *local_address,
82 const struct json *snapshot)
83 OVS_WARN_UNUSED_RESULT;
84struct ovsdb_error *raft_join_cluster(const char *file_name, const char *name,
85 const char *local_address,
86 const struct sset *remote_addrs,
87 const struct uuid *cid)
88 OVS_WARN_UNUSED_RESULT;
89
90/* Reading metadata from a server log. */
91struct raft_metadata {
92 struct uuid sid; /* Server ID. */
93 struct uuid cid; /* Cluster ID. All-zeros if not yet known. */
94 char *name; /* Schema name. */
95 char *local; /* Local address. */
96};
97struct ovsdb_error *raft_read_metadata(struct ovsdb_log *,
98 struct raft_metadata *)
99 OVS_WARN_UNUSED_RESULT;
100void raft_metadata_destroy(struct raft_metadata *);
101
102/* Starting up or shutting down a server within a cluster. */
103struct ovsdb_error *raft_open(struct ovsdb_log *, struct raft **)
104 OVS_WARN_UNUSED_RESULT;
105void raft_close(struct raft *);
106
107void raft_run(struct raft *);
108void raft_wait(struct raft *);
109
110/* Information. */
111const char *raft_get_name(const struct raft *);
112const struct uuid *raft_get_cid(const struct raft *);
113const struct uuid *raft_get_sid(const struct raft *);
114bool raft_is_connected(const struct raft *);
115bool raft_is_leader(const struct raft *);
116
117/* Joining a cluster. */
118bool raft_is_joining(const struct raft *);
119
120/* Leaving a cluster. */
121void raft_leave(struct raft *);
122bool raft_is_leaving(const struct raft *);
123bool raft_left(const struct raft *);
124
125/* Failure. */
126bool raft_failed(const struct raft *);
127
128/* Reading snapshots and log entries. */
129const struct json *raft_next_entry(struct raft *, struct uuid *eid,
130 bool *is_snapshot);
131bool raft_has_next_entry(const struct raft *);
132
133uint64_t raft_get_applied_index(const struct raft *);
134uint64_t raft_get_commit_index(const struct raft *);
135
136/* Writing log entries (executing commands). */
137enum raft_command_status {
138 /* In progress, please wait. */
139 RAFT_CMD_INCOMPLETE,
140
141 /* Success. */
142 RAFT_CMD_SUCCESS, /* Committed. */
143
144 /* Failure.
145 *
146 * A failure status does not always mean that the operation actually
147 * failed. In corner cases, it means that the log entry was committed but
148 * the message reporting success was not successfully received. Thus, this
149 * Raft implementation implements "at-least-once" rather than
150 * "exactly-once" semantics. */
151 RAFT_CMD_NOT_LEADER, /* Failed because we are not the leader. */
152 RAFT_CMD_BAD_PREREQ, /* Failed because prerequisite check failed. */
153 RAFT_CMD_LOST_LEADERSHIP, /* Leadership lost after command initiation. */
154 RAFT_CMD_SHUTDOWN, /* Raft server joining or left or shut down. */
155 RAFT_CMD_IO_ERROR, /* I/O error. */
156 RAFT_CMD_TIMEOUT, /* Request to remote leader timed out. */
157};
158const char *raft_command_status_to_string(enum raft_command_status);
159bool raft_command_status_from_string(const char *, enum raft_command_status *);
160
161struct raft_command *raft_command_execute(struct raft *,
162 const struct json *data,
163 const struct uuid *prereq,
164 struct uuid *result)
165 OVS_WARN_UNUSED_RESULT;
166enum raft_command_status raft_command_get_status(const struct raft_command *);
167uint64_t raft_command_get_commit_index(const struct raft_command *);
168void raft_command_unref(struct raft_command *);
169void raft_command_wait(const struct raft_command *);
170
171/* Replacing the local log by a snapshot. */
172bool raft_grew_lots(const struct raft *);
173uint64_t raft_get_log_length(const struct raft *);
174bool raft_may_snapshot(const struct raft *);
175struct ovsdb_error *raft_store_snapshot(struct raft *,
176 const struct json *new_snapshot)
177 OVS_WARN_UNUSED_RESULT;
178
179/* Cluster management. */
180void raft_take_leadership(struct raft *);
181void raft_transfer_leadership(struct raft *, const char *reason);
182
2cd62f75 183const struct uuid *raft_current_eid(const struct raft *);
1b1d2e6d 184#endif /* lib/raft.h */