]> git.proxmox.com Git - pve-ha-manager.git/blame - PVE/HA/Server.pm
split out server code
[pve-ha-manager.git] / PVE / HA / Server.pm
CommitLineData
f25a336a
DM
1package PVE::HA::Server;
2
3use strict;
4use warnings;
5
6use PVE::SafeSyslog;
7use PVE::Tools;
8
9use PVE::HA::Manager;
10
11# Server can have several state:
12#
13# wait_for_quorum: cluster is not quorate, waiting
14# recover: fixme?
15# master:
16# slave:
17# lost_quorum:
18# error:
19# halt:
20
21my $valid_states = {
22 wait_for_quorum => 1,
23 recover => 1,
24 master => 1,
25 slave => 1,
26 lost_quorum => 1,
27 error => 1,
28 halt => 1,
29};
30
31sub new {
32 my ($this, $haenv) = @_;
33
34 my $class = ref($this) || $this;
35
36 my $self = bless {
37 haenv => $haenv,
38 manager => undef,
39 }, $class;
40
41 $self->{status} = $haenv->read_local_status() || 'wait_for_quorum';
42 # can happen after crash?
43 if ($self->{status} eq 'master') {
44 $self->set_local_status('recover');
45 } else {
46 $self->set_local_status('wait_for_quorum');
47 }
48
49 return $self;
50}
51
52sub get_local_status {
53 my ($self) = @_;
54
55 return $self->{status};
56}
57
58sub set_local_status {
59 my ($self, $new_status) = @_;
60
61 die "invalid state '$new_status'"
62 if !$valid_states->{$new_status};
63
64 my $haenv = $self->{haenv};
65
66 my $status = $self->{status};
67
68 return if $status eq $new_status;
69
70 $haenv->log('info', "manager status change $status => $new_status");
71
72 $status = $new_status;
73
74 $haenv->write_local_status($status);
75
76 $self->{status} = $status;
77
78 if ($status eq 'master') {
79 $self->{manager} = PVE::HA::Manager->new($haenv);
80 } else {
81 if ($self->{manager}) {
82 # fixme: what should we do here?
83 $self->{manager}->cleanup();
84 $self->{manager} = undef;
85 }
86 }
87}
88
89sub get_manager_lock {
90 my ($self) = @_;
91
92 my $haenv = $self->{haenv};
93
94 my $count = 0;
95 for (;;) {
96 return 1 if $haenv->get_ha_manager_lock();
97 last if ++$count > 5;
98 $haenv->sleep(1);
99 }
100
101 return 0;
102}
103
104sub do_one_iteration {
105 my ($self) = @_;
106
107 my $haenv = $self->{haenv};
108
109 my $status = $self->get_local_status();
110
111 $haenv->loop_start_hook();
112
113 if ($status eq 'recover') {
114
115 $haenv->log('info', "waiting for 5 seconds");
116
117 $haenv->sleep(5);
118
119 $self->set_local_status('wait_for_quorum');
120
121 } elsif ($status eq 'wait_for_quorum') {
122
123 $haenv->sleep(5);
124
125 if ($haenv->quorate()) {
126 if ($self->get_manager_lock()) {
127 $self->set_local_status('master');
128 } else {
129 $self->set_local_status('slave');
130 }
131 }
132
133 } elsif ($status eq 'master') {
134
135 my $manager = $self->{manager};
136
137 die "no manager" if !defined($manager);
138
139 my $startime = $haenv->get_time();
140
141 my $max_time = 10;
142
143 # do work (max_time seconds)
144 eval {
145 # fixme: set alert timer
146 $manager->manage();
147 };
148 if (my $err = $@) {
149
150 # fixme: cleanup?
151 $haenv->log('err', "got unexpected error - $err");
152 $self->set_local_status('error');
153
154 } else {
155 $haenv->sleep_until($startime + $max_time);
156 }
157
158 if (!$self->get_manager_lock()) {
159 if ($haenv->quorate()) {
160 $self->set_local_status('slave');
161 } else {
162 $self->set_local_status('wait_for_quorum');
163 # set_local_status('lost_quorum');
164 }
165 }
166 } elsif ($status eq 'slave') {
167
168 $haenv->sleep(5);
169
170 if ($haenv->quorate()) {
171 if ($self->get_manager_lock()) {
172 $self->set_local_status('master');
173 }
174 } else {
175 $self->set_local_status('wait_for_quorum');
176 }
177
178 } elsif ($status eq 'error') {
179 die "stopping due to errors\n";
180 } elsif ($status eq 'lost_quorum') {
181 die "lost_quorum\n";
182 } elsif ($status eq 'halt') {
183 die "halt\n";
184 } else {
185 die "got unexpected status '$status'\n";
186 }
187
188 $haenv->loop_end_hook();
189
190 return 1;
191}
192
1931;