]>
Commit | Line | Data |
---|---|---|
f25a336a DM |
1 | package PVE::HA::Server; |
2 | ||
3 | use strict; | |
4 | use warnings; | |
5 | ||
6 | use PVE::SafeSyslog; | |
7 | use PVE::Tools; | |
8 | ||
9 | use PVE::HA::Manager; | |
10 | ||
11 | # Server can have several state: | |
12 | # | |
13 | # wait_for_quorum: cluster is not quorate, waiting | |
14 | # recover: fixme? | |
15 | # master: | |
16 | # slave: | |
17 | # lost_quorum: | |
18 | # error: | |
19 | # halt: | |
20 | ||
21 | my $valid_states = { | |
22 | wait_for_quorum => 1, | |
23 | recover => 1, | |
24 | master => 1, | |
25 | slave => 1, | |
26 | lost_quorum => 1, | |
27 | error => 1, | |
28 | halt => 1, | |
29 | }; | |
30 | ||
31 | sub new { | |
32 | my ($this, $haenv) = @_; | |
33 | ||
34 | my $class = ref($this) || $this; | |
35 | ||
36 | my $self = bless { | |
37 | haenv => $haenv, | |
38 | manager => undef, | |
39 | }, $class; | |
40 | ||
41 | $self->{status} = $haenv->read_local_status() || 'wait_for_quorum'; | |
42 | # can happen after crash? | |
43 | if ($self->{status} eq 'master') { | |
44 | $self->set_local_status('recover'); | |
45 | } else { | |
46 | $self->set_local_status('wait_for_quorum'); | |
47 | } | |
48 | ||
49 | return $self; | |
50 | } | |
51 | ||
52 | sub get_local_status { | |
53 | my ($self) = @_; | |
54 | ||
55 | return $self->{status}; | |
56 | } | |
57 | ||
58 | sub set_local_status { | |
59 | my ($self, $new_status) = @_; | |
60 | ||
61 | die "invalid state '$new_status'" | |
62 | if !$valid_states->{$new_status}; | |
63 | ||
64 | my $haenv = $self->{haenv}; | |
65 | ||
66 | my $status = $self->{status}; | |
67 | ||
68 | return if $status eq $new_status; | |
69 | ||
70 | $haenv->log('info', "manager status change $status => $new_status"); | |
71 | ||
72 | $status = $new_status; | |
73 | ||
74 | $haenv->write_local_status($status); | |
75 | ||
76 | $self->{status} = $status; | |
77 | ||
78 | if ($status eq 'master') { | |
79 | $self->{manager} = PVE::HA::Manager->new($haenv); | |
80 | } else { | |
81 | if ($self->{manager}) { | |
82 | # fixme: what should we do here? | |
83 | $self->{manager}->cleanup(); | |
84 | $self->{manager} = undef; | |
85 | } | |
86 | } | |
87 | } | |
88 | ||
89 | sub get_manager_lock { | |
90 | my ($self) = @_; | |
91 | ||
92 | my $haenv = $self->{haenv}; | |
93 | ||
94 | my $count = 0; | |
95 | for (;;) { | |
96 | return 1 if $haenv->get_ha_manager_lock(); | |
97 | last if ++$count > 5; | |
98 | $haenv->sleep(1); | |
99 | } | |
100 | ||
101 | return 0; | |
102 | } | |
103 | ||
104 | sub do_one_iteration { | |
105 | my ($self) = @_; | |
106 | ||
107 | my $haenv = $self->{haenv}; | |
108 | ||
109 | my $status = $self->get_local_status(); | |
110 | ||
111 | $haenv->loop_start_hook(); | |
112 | ||
113 | if ($status eq 'recover') { | |
114 | ||
115 | $haenv->log('info', "waiting for 5 seconds"); | |
116 | ||
117 | $haenv->sleep(5); | |
118 | ||
119 | $self->set_local_status('wait_for_quorum'); | |
120 | ||
121 | } elsif ($status eq 'wait_for_quorum') { | |
122 | ||
123 | $haenv->sleep(5); | |
124 | ||
125 | if ($haenv->quorate()) { | |
126 | if ($self->get_manager_lock()) { | |
127 | $self->set_local_status('master'); | |
128 | } else { | |
129 | $self->set_local_status('slave'); | |
130 | } | |
131 | } | |
132 | ||
133 | } elsif ($status eq 'master') { | |
134 | ||
135 | my $manager = $self->{manager}; | |
136 | ||
137 | die "no manager" if !defined($manager); | |
138 | ||
139 | my $startime = $haenv->get_time(); | |
140 | ||
141 | my $max_time = 10; | |
142 | ||
143 | # do work (max_time seconds) | |
144 | eval { | |
145 | # fixme: set alert timer | |
146 | $manager->manage(); | |
147 | }; | |
148 | if (my $err = $@) { | |
149 | ||
150 | # fixme: cleanup? | |
151 | $haenv->log('err', "got unexpected error - $err"); | |
152 | $self->set_local_status('error'); | |
153 | ||
154 | } else { | |
155 | $haenv->sleep_until($startime + $max_time); | |
156 | } | |
157 | ||
158 | if (!$self->get_manager_lock()) { | |
159 | if ($haenv->quorate()) { | |
160 | $self->set_local_status('slave'); | |
161 | } else { | |
162 | $self->set_local_status('wait_for_quorum'); | |
163 | # set_local_status('lost_quorum'); | |
164 | } | |
165 | } | |
166 | } elsif ($status eq 'slave') { | |
167 | ||
168 | $haenv->sleep(5); | |
169 | ||
170 | if ($haenv->quorate()) { | |
171 | if ($self->get_manager_lock()) { | |
172 | $self->set_local_status('master'); | |
173 | } | |
174 | } else { | |
175 | $self->set_local_status('wait_for_quorum'); | |
176 | } | |
177 | ||
178 | } elsif ($status eq 'error') { | |
179 | die "stopping due to errors\n"; | |
180 | } elsif ($status eq 'lost_quorum') { | |
181 | die "lost_quorum\n"; | |
182 | } elsif ($status eq 'halt') { | |
183 | die "halt\n"; | |
184 | } else { | |
185 | die "got unexpected status '$status'\n"; | |
186 | } | |
187 | ||
188 | $haenv->loop_end_hook(); | |
189 | ||
190 | return 1; | |
191 | } | |
192 | ||
193 | 1; |