]> git.proxmox.com Git - ceph.git/blob - ceph/src/mon/ElectionLogic.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / mon / ElectionLogic.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "ElectionLogic.h"
16
17 #include "include/ceph_assert.h"
18 #include "common/dout.h"
19
20 #define dout_subsys ceph_subsys_mon
21 #undef dout_prefix
22 #define dout_prefix _prefix(_dout, epoch, elector)
23 static ostream& _prefix(std::ostream *_dout, epoch_t epoch, ElectionOwner* elector) {
24 return *_dout << "paxos." << elector->get_my_rank()
25 << ").electionLogic(" << epoch << ") ";
26 }
27 void ElectionLogic::init()
28 {
29 epoch = elector->read_persisted_epoch();
30 if (!epoch) {
31 ldout(cct, 1) << "init, first boot, initializing epoch at 1 " << dendl;
32 epoch = 1;
33 } else if (epoch % 2) {
34 ldout(cct, 1) << "init, last seen epoch " << epoch
35 << ", mid-election, bumping" << dendl;
36 ++epoch;
37 elector->persist_epoch(epoch);
38 } else {
39 ldout(cct, 1) << "init, last seen epoch " << epoch << dendl;
40 }
41 }
42
43 void ElectionLogic::bump_epoch(epoch_t e)
44 {
45 ldout(cct, 10) << __func__ << epoch << " to " << e << dendl;
46 ceph_assert(epoch <= e);
47 epoch = e;
48 elector->persist_epoch(epoch);
49 // clear up some state
50 electing_me = false;
51 acked_me.clear();
52 elector->notify_bump_epoch();
53 }
54
55 void ElectionLogic::declare_standalone_victory()
56 {
57 assert(elector->paxos_size() == 1 && elector->get_my_rank() == 0);
58 init();
59 bump_epoch(epoch+1);
60 }
61
62 void ElectionLogic::start()
63 {
64 if (!participating) {
65 ldout(cct, 0) << "not starting new election -- not participating" << dendl;
66 return;
67 }
68 ldout(cct, 5) << "start -- can i be leader?" << dendl;
69
70 acked_me.clear();
71 init();
72
73 // start by trying to elect me
74 if (epoch % 2 == 0) {
75 bump_epoch(epoch+1); // odd == election cycle
76 } else {
77 elector->validate_store();
78 }
79 electing_me = true;
80 acked_me.insert(elector->get_my_rank());
81 leader_acked = -1;
82
83 elector->propose_to_peers(epoch);
84 elector->_start();
85 }
86
87 void ElectionLogic::defer(int who)
88 {
89 ldout(cct, 5) << "defer to " << who << dendl;
90
91 if (electing_me) {
92 // drop out
93 acked_me.clear();
94 electing_me = false;
95 }
96
97 // ack them
98 leader_acked = who;
99 elector->_defer_to(who);
100 }
101
102 void ElectionLogic::end_election_period()
103 {
104 ldout(cct, 5) << "election period ended" << dendl;
105
106 // did i win?
107 if (electing_me &&
108 acked_me.size() > (elector->paxos_size() / 2)) {
109 // i win
110 declare_victory();
111 } else {
112 // whoever i deferred to didn't declare victory quickly enough.
113 if (elector->ever_participated())
114 start();
115 else
116 elector->reset_election();
117 }
118 }
119
120
121 void ElectionLogic::declare_victory()
122 {
123 leader_acked = -1;
124 electing_me = false;
125
126 set<int> new_quorum;
127 new_quorum.swap(acked_me);
128
129 ceph_assert(epoch % 2 == 1); // election
130 bump_epoch(epoch+1); // is over!
131
132 elector->message_victory(new_quorum);
133 }
134
135 void ElectionLogic::receive_propose(int from, epoch_t mepoch)
136 {
137 if (mepoch > epoch) {
138 bump_epoch(mepoch);
139 } else if (mepoch < epoch) {
140 // got an "old" propose,
141 if (epoch % 2 == 0 && // in a non-election cycle
142 !elector->is_current_member(from)) { // from someone outside the quorum
143 // a mon just started up, call a new election so they can rejoin!
144 ldout(cct, 5) << " got propose from old epoch, "
145 << from << " must have just started" << dendl;
146 // we may be active; make sure we reset things in the monitor appropriately.
147 elector->trigger_new_election();
148 } else {
149 ldout(cct, 5) << " ignoring old propose" << dendl;
150 }
151 return;
152 }
153
154 if (elector->get_my_rank() < from) {
155 // i would win over them.
156 if (leader_acked >= 0) { // we already acked someone
157 ceph_assert(leader_acked < from); // and they still win, of course
158 ldout(cct, 5) << "no, we already acked " << leader_acked << dendl;
159 } else {
160 // wait, i should win!
161 if (!electing_me) {
162 elector->trigger_new_election();
163 }
164 }
165 } else {
166 // they would win over me
167 if (leader_acked < 0 || // haven't acked anyone yet, or
168 leader_acked > from || // they would win over who you did ack, or
169 leader_acked == from) { // this is the guy we're already deferring to
170 defer(from);
171 } else {
172 // ignore them!
173 ldout(cct, 5) << "no, we already acked " << leader_acked << dendl;
174 }
175 }
176 }
177
178 void ElectionLogic::receive_ack(int from, epoch_t from_epoch)
179 {
180 ceph_assert(from_epoch % 2 == 1); // sender in an election epoch
181 if (from_epoch > epoch) {
182 ldout(cct, 5) << "woah, that's a newer epoch, i must have rebooted. bumping and re-starting!" << dendl;
183 bump_epoch(from_epoch);
184 start();
185 return;
186 }
187 // is that _everyone_?
188 if (electing_me) {
189 acked_me.insert(from);
190 if (acked_me.size() == elector->paxos_size()) {
191 // if yes, shortcut to election finish
192 declare_victory();
193 }
194 } else {
195 // ignore, i'm deferring already.
196 ceph_assert(leader_acked >= 0);
197 }
198 }
199
200 bool ElectionLogic::receive_victory_claim(int from, epoch_t from_epoch)
201 {
202 ceph_assert(from < elector->get_my_rank());
203 ceph_assert(from_epoch % 2 == 0);
204
205 leader_acked = -1;
206
207 // i should have seen this election if i'm getting the victory.
208 if (from_epoch != epoch + 1) {
209 ldout(cct, 5) << "woah, that's a funny epoch, i must have rebooted. bumping and re-starting!" << dendl;
210 bump_epoch(from_epoch);
211 start();
212 return false;
213 }
214
215 bump_epoch(from_epoch);
216
217 // they win
218 return true;
219 }