]> git.proxmox.com Git - pve-ha-manager.git/blame - PVE/HA/LRM.pm
improve documentation
[pve-ha-manager.git] / PVE / HA / LRM.pm
CommitLineData
5f095798
DM
1package PVE::HA::LRM;
2
3# Local Resource Manager
4
5use strict;
6use warnings;
7
8use PVE::SafeSyslog;
9use PVE::Tools;
10use PVE::HA::Tools;
11
12# Server can have several states:
13
14my $valid_states = {
15 wait_for_agent_lock => "waiting for agnet lock",
0bba8f60 16 active => "got agent_lock",
5f095798
DM
17 lost_agent_lock => "lost agent_lock",
18};
19
20sub new {
21 my ($this, $haenv) = @_;
22
23 my $class = ref($this) || $this;
24
25 my $self = bless {
26 haenv => $haenv,
27 status => { state => 'startup' },
28 }, $class;
29
30 $self->set_local_status({ state => 'wait_for_agent_lock' });
31
32 return $self;
33}
34
35sub shutdown_request {
36 my ($self) = @_;
37
38 $self->{shutdown_request} = 1;
39}
40
41sub get_local_status {
42 my ($self) = @_;
43
44 return $self->{status};
45}
46
47sub set_local_status {
48 my ($self, $new) = @_;
49
50 die "invalid state '$new->{state}'" if !$valid_states->{$new->{state}};
51
52 my $haenv = $self->{haenv};
53
54 my $old = $self->{status};
55
56 # important: only update if if really changed
57 return if $old->{state} eq $new->{state};
58
0bba8f60 59 $haenv->log('info', "status change $old->{state} => $new->{state}");
5f095798
DM
60
61 $new->{state_change_time} = $haenv->get_time();
62
63 $self->{status} = $new;
64}
65
66sub get_protected_ha_agent_lock {
67 my ($self) = @_;
68
69 my $haenv = $self->{haenv};
70
71 my $count = 0;
72 my $starttime = $haenv->get_time();
73
74 for (;;) {
75
76 if ($haenv->get_ha_agent_lock()) {
77 if ($self->{ha_agent_wd}) {
78 $haenv->watchdog_update($self->{ha_agent_wd});
79 } else {
80 my $wfh = $haenv->watchdog_open();
81 $self->{ha_agent_wd} = $wfh;
82 }
83 return 1;
84 }
85
86 last if ++$count > 5; # try max 5 time
87
88 my $delay = $haenv->get_time() - $starttime;
89 last if $delay > 5; # for max 5 seconds
90
91 $haenv->sleep(1);
92 }
93
94 return 0;
95}
96
97sub do_one_iteration {
98 my ($self) = @_;
99
100 my $haenv = $self->{haenv};
101
102 my $status = $self->get_local_status();
103 my $state = $status->{state};
104
105 # do state changes first
106
107 my $ctime = $haenv->get_time();
108
109 if ($state eq 'wait_for_agent_lock') {
110
111 my $service_count = 1; # todo: correctly compute
112
113 if ($service_count && $haenv->quorate()) {
0bba8f60
DM
114 if ($self->get_protected_ha_agent_lock()) {
115 $self->set_local_status({ state => 'active' });
5f095798
DM
116 }
117 }
118
119 } elsif ($state eq 'lost_agent_lock') {
120
121 if ($haenv->quorate()) {
0bba8f60
DM
122 if ($self->get_protected_ha_agent_lock()) {
123 $self->set_local_status({ state => 'active' });
5f095798
DM
124 }
125 }
126
0bba8f60 127 } elsif ($state eq 'active') {
5f095798 128
0bba8f60 129 if (!$self->get_protected_ha_agent_lock()) {
5f095798
DM
130 $self->set_local_status({ state => 'lost_agent_lock'});
131 }
132 }
133
134 $status = $self->get_local_status();
135 $state = $status->{state};
136
137 # do work
138
139 if ($state eq 'wait_for_agent_lock') {
140
141 return 0 if $self->{shutdown_request};
142
143 $haenv->sleep(5);
144
0bba8f60 145 } elsif ($state eq 'active') {
5f095798
DM
146
147 my $startime = $haenv->get_time();
148
149 my $max_time = 10;
150
151 my $shutdown = 0;
152
153 # do work (max_time seconds)
154 eval {
155 # fixme: set alert timer
156
157 if ($self->{shutdown_request}) {
158
159 # fixme: request service stop or relocate ?
160
161 my $service_count = 0; # fixme
162
163 if ($service_count == 0) {
164
165 if ($self->{ha_agent_wd}) {
166 $haenv->watchdog_close($self->{ha_agent_wd});
167 delete $self->{ha_agent_wd};
168 }
169
170 $shutdown = 1;
171 }
172 }
173 };
174 if (my $err = $@) {
175 $haenv->log('err', "got unexpected error - $err");
176 }
177
178 return 0 if $shutdown;
179
180 $haenv->sleep_until($startime + $max_time);
181
182 } elsif ($state eq 'lost_agent_lock') {
183
184 # Note: watchdog is active an will triger soon!
185
186 # so we hope to get the lock back soon!
187
188 if ($self->{shutdown_request}) {
189
190 my $running_services = 0; # fixme: correctly compute
191
192 if ($running_services > 0) {
193 $haenv->log('err', "get shutdown request in state 'lost_agent_lock' - " .
194 "killing running services");
195
196 # fixme: kill all services as fast as possible
197 }
198
199 # now all services are stopped, so we can close the watchdog
200
201 if ($self->{ha_agent_wd}) {
202 $haenv->watchdog_close($self->{ha_agent_wd});
203 delete $self->{ha_agent_wd};
204 }
205
206 return 0;
207 }
208
209 } else {
210
211 die "got unexpected status '$state'\n";
212
213 }
214
215 return 1;
216}
217
2181;