3 # Cluster Resource Manager
14 # Server can have several state:
17 wait_for_quorum
=> "cluster is not quorate, waiting",
18 master
=> "quorate, and we got the ha_manager lock",
19 lost_manager_lock
=> "we lost the ha_manager lock (watchgog active)",
20 slave
=> "quorate, but we do not own the ha_manager lock",
24 my ($this, $haenv) = @_;
26 my $class = ref($this) || $this;
31 status
=> { state => 'startup' },
34 $self->set_local_status({ state => 'wait_for_quorum' });
39 sub shutdown_request
{
42 syslog
('info' , "server received shutdown request")
43 if !$self->{shutdown_request
};
45 $self->{shutdown_request
} = 1;
48 sub get_local_status
{
51 return $self->{status
};
54 sub set_local_status
{
55 my ($self, $new) = @_;
57 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
59 my $haenv = $self->{haenv
};
61 my $old = $self->{status
};
63 # important: only update if if really changed
64 return if $old->{state} eq $new->{state};
66 $haenv->log('info', "status change $old->{state} => $new->{state}");
68 $new->{state_change_time
} = $haenv->get_time();
70 $self->{status
} = $new;
72 # fixme: do not use extra class
73 if ($new->{state} eq 'master') {
75 $self->{manager
} = PVE
::HA
::Manager-
>new($haenv);
77 if ($self->{manager
}) {
78 # fixme: what should we do here?
79 $self->{manager
}->cleanup();
80 $self->{manager
} = undef;
85 sub get_protected_ha_manager_lock
{
88 my $haenv = $self->{haenv
};
91 my $starttime = $haenv->get_time();
95 if ($haenv->get_ha_manager_lock()) {
96 if ($self->{ha_manager_wd
}) {
97 $haenv->watchdog_update($self->{ha_manager_wd
});
99 my $wfh = $haenv->watchdog_open();
100 $self->{ha_manager_wd
} = $wfh;
105 last if ++$count > 5; # try max 5 time
107 my $delay = $haenv->get_time() - $starttime;
108 last if $delay > 5; # for max 5 seconds
116 sub check_pending_fencing
{
117 my ($manager_status, $node) = @_;
119 my $ss = $manager_status->{service_status
};
121 return 1 if PVE
::HA
::Tools
::count_fenced_services
($ss, $node);
126 sub do_one_iteration
{
129 my $haenv = $self->{haenv
};
131 my $status = $self->get_local_status();
132 my $state = $status->{state};
134 my $manager_status = $haenv->read_manager_status();
135 my $pending_fencing = check_pending_fencing
($manager_status, $haenv->nodename());
137 # do state changes first
139 if ($state eq 'wait_for_quorum') {
141 if (!$pending_fencing && $haenv->quorate()) {
142 if ($self->get_protected_ha_manager_lock()) {
143 $self->set_local_status({ state => 'master' });
145 $self->set_local_status({ state => 'slave' });
149 } elsif ($state eq 'slave') {
151 if (!$pending_fencing && $haenv->quorate()) {
152 if ($self->get_protected_ha_manager_lock()) {
153 $self->set_local_status({ state => 'master' });
156 $self->set_local_status({ state => 'wait_for_quorum' });
159 } elsif ($state eq 'lost_manager_lock') {
161 if (!$pending_fencing && $haenv->quorate()) {
162 if ($self->get_protected_ha_manager_lock()) {
163 $self->set_local_status({ state => 'master' });
167 } elsif ($state eq 'master') {
169 if (!$self->get_protected_ha_manager_lock()) {
170 $self->set_local_status({ state => 'lost_manager_lock'});
174 $status = $self->get_local_status();
175 $state = $status->{state};
179 if ($state eq 'wait_for_quorum') {
181 return 0 if $self->{shutdown_request
};
185 } elsif ($state eq 'master') {
187 my $manager = $self->{manager
};
189 die "no manager" if !defined($manager);
191 my $startime = $haenv->get_time();
197 # do work (max_time seconds)
199 # fixme: set alert timer
201 if ($self->{shutdown_request
}) {
203 if ($self->{ha_manager_wd
}) {
204 $haenv->watchdog_close($self->{ha_manager_wd
});
205 delete $self->{ha_manager_wd
};
215 $haenv->log('err', "got unexpected error - $err");
218 return 0 if $shutdown;
220 $haenv->sleep_until($startime + $max_time);
222 } elsif ($state eq 'lost_manager_lock') {
224 if ($self->{ha_manager_wd
}) {
225 $haenv->watchdog_close($self->{ha_manager_wd
});
226 delete $self->{ha_manager_wd
};
229 return 0 if $self->{shutdown_request
};
231 $self->set_local_status({ state => 'wait_for_quorum' });
233 } elsif ($state eq 'slave') {
235 return 0 if $self->{shutdown_request
};
237 # wait until we get master
241 die "got unexpected status '$state'\n";