]> git.proxmox.com Git - pve-ha-manager.git/blob - src/PVE/HA/Sim/Env.pm
env: rename get_ha_settings to get_datacenter_settings
[pve-ha-manager.git] / src / PVE / HA / Sim / Env.pm
1 package PVE::HA::Sim::Env;
2
3 use strict;
4 use warnings;
5 use POSIX qw(strftime EINTR);
6 use JSON;
7 use IO::File;
8 use Fcntl qw(:DEFAULT :flock);
9
10 use PVE::HA::Tools;
11 use PVE::HA::Env;
12 use PVE::HA::Resources;
13 use PVE::HA::Sim::Resources::VirtVM;
14 use PVE::HA::Sim::Resources::VirtCT;
15 use PVE::HA::Sim::Resources::VirtFail;
16
17 PVE::HA::Sim::Resources::VirtVM->register();
18 PVE::HA::Sim::Resources::VirtCT->register();
19 PVE::HA::Sim::Resources::VirtFail->register();
20
21 PVE::HA::Resources->init();
22
23 sub new {
24 my ($this, $nodename, $hardware, $log_id) = @_;
25
26 die "missing nodename" if !$nodename;
27 die "missing log_id" if !$log_id;
28
29 my $class = ref($this) || $this;
30
31 my $self = bless {}, $class;
32
33 $self->{statusdir} = $hardware->statusdir();
34 $self->{nodename} = $nodename;
35
36 $self->{hardware} = $hardware;
37 $self->{lock_timeout} = 120;
38
39 $self->{log_id} = $log_id;
40
41 return $self;
42 }
43
44 sub nodename {
45 my ($self) = @_;
46
47 return $self->{nodename};
48 }
49
50 sub hardware {
51 my ($self) = @_;
52
53 return $self->{hardware};
54 }
55
56 my $assert_cfs_can_rw = sub {
57 my ($self, $emsg) = @_;
58
59 $emsg //= 'cfs connection refused - not mounted?';
60
61 die "$emsg\n"
62 if !$self->{hardware}->get_cfs_state($self->{nodename}, 'rw');
63 };
64
65 sub sim_get_lock {
66 my ($self, $lock_name, $unlock) = @_;
67
68 return 0 if !$self->quorate();
69
70 my $filename = "$self->{statusdir}/cluster_locks";
71
72 my $code = sub {
73
74 my $data = PVE::HA::Tools::read_json_from_file($filename, {});
75
76 my $res;
77
78 my $nodename = $self->nodename();
79 my $ctime = $self->get_time();
80
81 if ($unlock) {
82
83 if (my $d = $data->{$lock_name}) {
84 my $tdiff = $ctime - $d->{time};
85
86 if ($tdiff > $self->{lock_timeout}) {
87 $res = 1;
88 } elsif (($tdiff <= $self->{lock_timeout}) && ($d->{node} eq $nodename)) {
89 delete $data->{$lock_name};
90 $res = 1;
91 } else {
92 $res = 0;
93 }
94 }
95
96 } else {
97
98 if (my $d = $data->{$lock_name}) {
99
100 my $tdiff = $ctime - $d->{time};
101
102 if ($tdiff <= $self->{lock_timeout}) {
103 if ($d->{node} eq $nodename) {
104 $d->{time} = $ctime;
105 $res = 1;
106 } else {
107 $res = 0;
108 }
109 } else {
110 $self->log('info', "got lock '$lock_name'");
111 $d->{node} = $nodename;
112 $d->{time} = $ctime;
113 $res = 1;
114 }
115
116 } else {
117 $data->{$lock_name} = {
118 time => $ctime,
119 node => $nodename,
120 };
121 $self->log('info', "got lock '$lock_name'");
122 $res = 1;
123 }
124 }
125
126 PVE::HA::Tools::write_json_to_file($filename, $data);
127
128 return $res;
129 };
130
131 return $self->{hardware}->global_lock($code);
132 }
133
134 sub read_manager_status {
135 my ($self) = @_;
136
137 $assert_cfs_can_rw->($self);
138
139 my $filename = "$self->{statusdir}/manager_status";
140
141 return PVE::HA::Tools::read_json_from_file($filename, {});
142 }
143
144 sub write_manager_status {
145 my ($self, $status_obj) = @_;
146
147 $assert_cfs_can_rw->($self);
148
149 my $filename = "$self->{statusdir}/manager_status";
150
151 PVE::HA::Tools::write_json_to_file($filename, $status_obj);
152 }
153
154 sub read_lrm_status {
155 my ($self, $node) = @_;
156
157 $node = $self->{nodename} if !defined($node);
158
159 $assert_cfs_can_rw->($self);
160
161 return $self->{hardware}->read_lrm_status($node);
162 }
163
164 sub write_lrm_status {
165 my ($self, $status_obj) = @_;
166
167 my $node = $self->{nodename};
168
169 $assert_cfs_can_rw->($self);
170
171 return $self->{hardware}->write_lrm_status($node, $status_obj);
172 }
173
174 sub is_node_shutdown {
175 my ($self) = @_;
176
177 my $node = $self->{nodename};
178 my $cstatus = $self->{hardware}->read_hardware_status_nolock();
179
180 die "undefined node status for node '$node'" if !defined($cstatus->{$node});
181
182 my ($shutdown, $reboot) = (0, 0);
183
184 if (my $target = $cstatus->{$node}->{shutdown}) {
185 if ($target eq 'shutdown') {
186 $shutdown = 1;
187 } elsif ($target eq 'reboot') {
188 $shutdown = 1;
189 $reboot = 1;
190 } else {
191 die "unknown shutdown target '$target'";
192 }
193 }
194
195 return ($shutdown, $reboot);
196 }
197
198 sub read_service_config {
199 my ($self) = @_;
200
201 $assert_cfs_can_rw->($self);
202
203 return $self->{hardware}->read_service_config();
204 }
205
206 sub update_service_config {
207 my ($self, $sid, $param) = @_;
208
209 return $self->{hardware}->update_service_config($sid, $param);
210 }
211
212 sub parse_sid {
213 my ($self, $sid) = @_;
214
215 die "unable to parse service id '$sid'\n"
216 if !($sid =~ m/^(\S+):(\S+)$/);
217
218 my $name = $2;
219 my $type = $1;
220
221 return wantarray ? ($sid, $type, $name) : $sid;
222 }
223
224 sub read_fence_config {
225 my ($self) = @_;
226
227 $assert_cfs_can_rw->($self);
228
229 return $self->{hardware}->read_fence_config();
230 }
231
232 # the test/sim framework has hardware enabled fencing if
233 # it has devices configured
234 sub fencing_mode {
235 my ($self) = @_;
236
237 my $cfg = $self->read_fence_config();
238
239 return (defined($cfg) && keys %{$cfg}) ? 'hardware' : 'watchdog';
240 }
241
242 sub exec_fence_agent {
243 my ($self, $agent, $node, @param) = @_;
244
245 return $self->{hardware}->exec_fence_agent($agent, $node, @param);
246 }
247
248 sub read_group_config {
249 my ($self) = @_;
250
251 $assert_cfs_can_rw->($self);
252
253 return $self->{hardware}->read_group_config();
254 }
255
256 # this is normally only allowed by the master to recover a _fenced_ service
257 sub steal_service {
258 my ($self, $sid, $current_node, $new_node) = @_;
259
260 $assert_cfs_can_rw->($self);
261
262 return $self->{hardware}->change_service_location($sid, $current_node, $new_node);
263 }
264
265 sub queue_crm_commands {
266 my ($self, $cmd) = @_;
267
268 $assert_cfs_can_rw->($self);
269
270 return $self->{hardware}->queue_crm_commands($cmd);
271 }
272
273 sub read_crm_commands {
274 my ($self) = @_;
275
276 $assert_cfs_can_rw->($self);
277
278 return $self->{hardware}->read_crm_commands();
279 }
280
281 sub log {
282 my ($self, $level, $msg) = @_;
283
284 chomp $msg;
285
286 my $time = $self->get_time();
287
288 printf("%-5s %5d %12s: $msg\n", $level, $time, "$self->{nodename}/$self->{log_id}");
289 }
290
291 sub sendmail {
292 my ($self, $subject, $text) = @_;
293
294 # only log subject, do not spam the logs
295 $self->log('email', $subject);
296 }
297
298 sub get_time {
299 my ($self) = @_;
300
301 die "implement in subclass";
302 }
303
304 sub sleep {
305 my ($self, $delay) = @_;
306
307 die "implement in subclass";
308 }
309
310 sub sleep_until {
311 my ($self, $end_time) = @_;
312
313 die "implement in subclass";
314 }
315
316 sub get_ha_manager_lock {
317 my ($self) = @_;
318
319 return $self->sim_get_lock('ha_manager_lock');
320 }
321
322 # release the cluster wide manager lock.
323 # when released another CRM may step up and get the lock, thus this should only
324 # get called when shutting down/deactivating the current master
325 sub release_ha_manager_lock {
326 my ($self) = @_;
327
328 return $self->sim_get_lock('ha_manager_lock', 1);
329 }
330
331 sub get_ha_agent_lock_name {
332 my ($self, $node) = @_;
333
334 $node = $self->nodename() if !$node;
335
336 return "ha_agent_${node}_lock";
337 }
338
339 sub get_ha_agent_lock {
340 my ($self, $node) = @_;
341
342 my $lck = $self->get_ha_agent_lock_name($node);
343 return $self->sim_get_lock($lck);
344 }
345
346
347 # release the respective node agent lock.
348 # this should only get called if the nodes LRM gracefully shuts down with
349 # all services already cleanly stopped!
350 sub release_ha_agent_lock {
351 my ($self) = @_;
352
353 my $node = $self->nodename();
354
355 my $lock = $self->get_ha_agent_lock_name($node);
356 return $self->sim_get_lock($lock, 1);
357 }
358
359 # return true when cluster is quorate
360 sub quorate {
361 my ($self) = @_;
362
363 my ($node_info, $quorate) = $self->{hardware}->get_node_info();
364 my $node = $self->nodename();
365 return 0 if !$node_info->{$node}->{online};
366 return $quorate;
367 }
368
369 sub get_node_info {
370 my ($self) = @_;
371
372 return $self->{hardware}->get_node_info();
373 }
374
375 sub loop_start_hook {
376 my ($self) = @_;
377
378 # do nothing, overwrite in subclass
379 }
380
381 sub loop_end_hook {
382 my ($self) = @_;
383
384 # do nothing, overwrite in subclass
385 }
386
387
388 sub cluster_state_update {
389 my ($self) = @_;
390
391 return $self->{hardware}->get_cfs_state($self->{nodename}, 'update');
392 }
393
394 sub watchdog_open {
395 my ($self) = @_;
396
397 my $node = $self->nodename();
398
399 return $self->{hardware}->watchdog_open($node);
400 }
401
402 sub watchdog_update {
403 my ($self, $wfh) = @_;
404
405 return $self->{hardware}->watchdog_update($wfh);
406 }
407
408 sub watchdog_close {
409 my ($self, $wfh) = @_;
410
411 return $self->{hardware}->watchdog_close($wfh);
412 }
413
414 sub after_fork {
415 my ($self) = @_;
416
417 # nothing to clean up in the simulation environment
418 }
419
420
421 sub get_max_workers {
422 my ($self) = @_;
423
424 return 4;
425 }
426
427 # return cluster wide enforced HA settings
428 sub get_datacenter_settings {
429 my ($self) = @_;
430
431 my $datacenterconfig = $self->{hardware}->read_datacenter_conf();
432
433 return $datacenterconfig->{ha};
434 }
435
436 sub get_static_node_stats {
437 my ($self) = @_;
438
439 return $self->{hardware}->get_static_node_stats();
440 }
441
442 1;