]>
Commit | Line | Data |
---|---|---|
8b3f9144 DM |
1 | package PVE::HA::Sim::Hardware; |
2 | ||
3 | # Simulate Hardware resources | |
4 | ||
5 | # power supply for nodes: on/off | |
6 | # network connection to nodes: on/off | |
7 | # watchdog devices for nodes | |
0cfd8f5b DM |
8 | |
9 | use strict; | |
10 | use warnings; | |
11 | use POSIX qw(strftime EINTR); | |
12 | use Data::Dumper; | |
13 | use JSON; | |
14 | use IO::File; | |
15 | use Fcntl qw(:DEFAULT :flock); | |
787b66eb DM |
16 | use File::Copy; |
17 | use File::Path qw(make_path remove_tree); | |
0cfd8f5b | 18 | |
0bba8f60 | 19 | my $watchdog_timeout = 180; |
0bba8f60 | 20 | |
0cfd8f5b | 21 | |
787b66eb DM |
22 | # Status directory layout |
23 | # | |
24 | # configuration | |
25 | # | |
3c36cbca DM |
26 | # $testdir/cmdlist Command list for simulation |
27 | # $testdir/hardware_status Hardware description (number of nodes, ...) | |
28 | # $testdir/manager_status CRM status (start with {}) | |
17654a06 | 29 | # $testdir/service_config Service configuration |
3c36cbca | 30 | |
9329c1e2 DM |
31 | # |
32 | # runtime status for simulation system | |
33 | # | |
34 | # $testdir/status/cluster_locks Cluster locks | |
35 | # $testdir/status/hardware_status Hardware status (power/network on/off) | |
36 | # $testdir/status/watchdog_status Watchdog status | |
787b66eb DM |
37 | # |
38 | # runtime status | |
9329c1e2 | 39 | # |
3c36cbca | 40 | # $testdir/status/local_status_<node> local CRM Daemon status |
17654a06 DM |
41 | # $testdir/status/manager_status CRM status |
42 | # $testdir/status/service_config Service configuration | |
787b66eb | 43 | |
8b3f9144 | 44 | sub read_hardware_status_nolock { |
0cfd8f5b DM |
45 | my ($self) = @_; |
46 | ||
8b3f9144 | 47 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
48 | |
49 | my $raw = PVE::Tools::file_get_contents($filename); | |
50 | my $cstatus = decode_json($raw); | |
51 | ||
52 | return $cstatus; | |
53 | } | |
54 | ||
8b3f9144 | 55 | sub write_hardware_status_nolock { |
0cfd8f5b DM |
56 | my ($self, $cstatus) = @_; |
57 | ||
8b3f9144 | 58 | my $filename = "$self->{statusdir}/hardware_status"; |
0cfd8f5b DM |
59 | |
60 | PVE::Tools::file_set_contents($filename, encode_json($cstatus)); | |
61 | }; | |
62 | ||
63 | sub new { | |
64 | my ($this, $testdir) = @_; | |
65 | ||
66 | die "missing testdir" if !$testdir; | |
67 | ||
68 | my $class = ref($this) || $this; | |
69 | ||
70 | my $self = bless {}, $class; | |
71 | ||
787b66eb DM |
72 | my $statusdir = $self->{statusdir} = "$testdir/status"; |
73 | ||
74 | remove_tree($statusdir); | |
75 | mkdir $statusdir; | |
0cfd8f5b | 76 | |
787b66eb DM |
77 | # copy initial configuartion |
78 | copy("$testdir/manager_status", "$statusdir/manager_status"); # optional | |
17654a06 | 79 | copy("$testdir/service_config", "$statusdir/service_config"); # optional |
787b66eb | 80 | |
8b3f9144 DM |
81 | copy("$testdir/hardware_status", "$statusdir/hardware_status") || |
82 | die "Copy failed: $!\n"; | |
787b66eb | 83 | |
0cfd8f5b | 84 | |
8b3f9144 | 85 | my $cstatus = $self->read_hardware_status_nolock(); |
0cfd8f5b DM |
86 | |
87 | foreach my $node (sort keys %$cstatus) { | |
0bba8f60 | 88 | $self->{nodes}->{$node} = {}; |
0cfd8f5b DM |
89 | } |
90 | ||
91 | return $self; | |
92 | } | |
93 | ||
94 | sub get_time { | |
95 | my ($self) = @_; | |
96 | ||
bf93e2a2 | 97 | die "implement in subclass"; |
0cfd8f5b DM |
98 | } |
99 | ||
100 | sub log { | |
fde8362a | 101 | my ($self, $level, $msg, $id) = @_; |
0cfd8f5b DM |
102 | |
103 | chomp $msg; | |
104 | ||
105 | my $time = $self->get_time(); | |
106 | ||
fde8362a DM |
107 | $id = 'hardware' if !$id; |
108 | ||
0bba8f60 | 109 | printf("%-5s %5d %12s: $msg\n", $level, $time, $id); |
0cfd8f5b DM |
110 | } |
111 | ||
112 | sub statusdir { | |
113 | my ($self, $node) = @_; | |
114 | ||
115 | return $self->{statusdir}; | |
116 | } | |
117 | ||
8b3f9144 | 118 | sub global_lock { |
0cfd8f5b DM |
119 | my ($self, $code, @param) = @_; |
120 | ||
8b3f9144 | 121 | my $lockfile = "$self->{statusdir}/hardware.lck"; |
0cfd8f5b DM |
122 | my $fh = IO::File->new(">>$lockfile") || |
123 | die "unable to open '$lockfile'\n"; | |
124 | ||
125 | my $success; | |
126 | for (;;) { | |
127 | $success = flock($fh, LOCK_EX); | |
128 | if ($success || ($! != EINTR)) { | |
129 | last; | |
130 | } | |
131 | if (!$success) { | |
132 | die "can't aquire lock '$lockfile' - $!\n"; | |
133 | } | |
134 | } | |
135 | ||
136 | my $res; | |
137 | ||
138 | eval { $res = &$code(@param) }; | |
139 | my $err = $@; | |
140 | ||
141 | close($fh); | |
142 | ||
143 | die $err if $err; | |
144 | ||
145 | return $res; | |
146 | } | |
147 | ||
8b3f9144 DM |
148 | my $compute_node_info = sub { |
149 | my ($self, $cstatus) = @_; | |
150 | ||
151 | my $node_info = {}; | |
152 | ||
153 | my $node_count = 0; | |
154 | my $online_count = 0; | |
155 | ||
156 | foreach my $node (keys %$cstatus) { | |
157 | my $d = $cstatus->{$node}; | |
158 | ||
159 | my $online = ($d->{power} eq 'on' && $d->{network} eq 'on') ? 1 : 0; | |
160 | $node_info->{$node}->{online} = $online; | |
161 | ||
162 | $node_count++; | |
163 | $online_count++ if $online; | |
164 | } | |
165 | ||
166 | my $quorate = ($online_count > int($node_count/2)) ? 1 : 0; | |
167 | ||
168 | if (!$quorate) { | |
169 | foreach my $node (keys %$cstatus) { | |
170 | my $d = $cstatus->{$node}; | |
171 | $node_info->{$node}->{online} = 0; | |
172 | } | |
173 | } | |
174 | ||
175 | return ($node_info, $quorate); | |
176 | }; | |
177 | ||
178 | sub get_node_info { | |
179 | my ($self) = @_; | |
180 | ||
181 | my ($node_info, $quorate); | |
182 | ||
183 | my $code = sub { | |
184 | my $cstatus = $self->read_hardware_status_nolock(); | |
185 | ($node_info, $quorate) = &$compute_node_info($self, $cstatus); | |
186 | }; | |
187 | ||
188 | $self->global_lock($code); | |
189 | ||
190 | return ($node_info, $quorate); | |
191 | } | |
192 | ||
193 | # simulate hardware commands | |
0cfd8f5b DM |
194 | # power <node> <on|off> |
195 | # network <node> <on|off> | |
196 | ||
8b3f9144 | 197 | sub sim_hardware_cmd { |
fde8362a | 198 | my ($self, $cmdstr, $logid) = @_; |
0cfd8f5b | 199 | |
bf93e2a2 | 200 | die "implement in subclass"; |
0cfd8f5b DM |
201 | } |
202 | ||
203 | sub run { | |
204 | my ($self) = @_; | |
205 | ||
bf93e2a2 | 206 | die "implement in subclass"; |
0cfd8f5b | 207 | } |
9329c1e2 DM |
208 | |
209 | my $modify_watchog = sub { | |
210 | my ($self, $code) = @_; | |
211 | ||
212 | my $update_cmd = sub { | |
213 | ||
214 | my $filename = "$self->{statusdir}/watchdog_status"; | |
0cfd8f5b | 215 | |
9329c1e2 DM |
216 | my ($res, $wdstatus); |
217 | ||
218 | if (-f $filename) { | |
219 | my $raw = PVE::Tools::file_get_contents($filename); | |
220 | $wdstatus = decode_json($raw); | |
221 | } else { | |
222 | $wdstatus = {}; | |
223 | } | |
224 | ||
225 | ($wdstatus, $res) = &$code($wdstatus); | |
226 | ||
227 | PVE::Tools::file_set_contents($filename, encode_json($wdstatus)); | |
228 | ||
229 | return $res; | |
230 | }; | |
231 | ||
232 | return $self->global_lock($update_cmd); | |
233 | }; | |
234 | ||
235 | sub watchdog_check { | |
236 | my ($self, $node) = @_; | |
237 | ||
238 | my $code = sub { | |
239 | my ($wdstatus) = @_; | |
240 | ||
241 | my $res = 1; | |
242 | ||
243 | foreach my $wfh (keys %$wdstatus) { | |
244 | my $wd = $wdstatus->{$wfh}; | |
245 | next if $wd->{node} ne $node; | |
246 | ||
247 | my $ctime = $self->get_time(); | |
248 | my $tdiff = $ctime - $wd->{update_time}; | |
249 | ||
0bba8f60 | 250 | if ($tdiff > $watchdog_timeout) { # expired |
9329c1e2 DM |
251 | $res = 0; |
252 | delete $wdstatus->{$wfh}; | |
253 | } | |
254 | } | |
255 | ||
256 | return ($wdstatus, $res); | |
257 | }; | |
258 | ||
259 | return &$modify_watchog($self, $code); | |
260 | } | |
261 | ||
262 | my $wdcounter = 0; | |
263 | ||
264 | sub watchdog_open { | |
265 | my ($self, $node) = @_; | |
266 | ||
267 | my $code = sub { | |
268 | my ($wdstatus) = @_; | |
269 | ||
270 | ++$wdcounter; | |
271 | ||
272 | my $id = "WD:$node:$$:$wdcounter"; | |
273 | ||
274 | die "internal error" if defined($wdstatus->{$id}); | |
275 | ||
276 | $wdstatus->{$id} = { | |
277 | node => $node, | |
278 | update_time => $self->get_time(), | |
279 | }; | |
280 | ||
281 | return ($wdstatus, $id); | |
282 | }; | |
283 | ||
284 | return &$modify_watchog($self, $code); | |
285 | } | |
286 | ||
287 | sub watchdog_close { | |
288 | my ($self, $wfh) = @_; | |
289 | ||
290 | my $code = sub { | |
291 | my ($wdstatus) = @_; | |
292 | ||
293 | my $wd = $wdstatus->{$wfh}; | |
294 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
295 | ||
296 | my $tdiff = $self->get_time() - $wd->{update_time}; | |
0bba8f60 | 297 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
298 | |
299 | delete $wdstatus->{$wfh}; | |
300 | ||
301 | return ($wdstatus); | |
302 | }; | |
303 | ||
304 | return &$modify_watchog($self, $code); | |
305 | } | |
306 | ||
307 | sub watchdog_update { | |
308 | my ($self, $wfh) = @_; | |
309 | ||
310 | my $code = sub { | |
311 | my ($wdstatus) = @_; | |
312 | ||
313 | my $wd = $wdstatus->{$wfh}; | |
314 | ||
315 | die "no such watchdog handle '$wfh'\n" if !defined($wd); | |
316 | ||
317 | my $ctime = $self->get_time(); | |
318 | my $tdiff = $ctime - $wd->{update_time}; | |
319 | ||
0bba8f60 | 320 | die "watchdog expired" if $tdiff > $watchdog_timeout; |
9329c1e2 DM |
321 | |
322 | $wd->{update_time} = $ctime; | |
323 | ||
324 | return ($wdstatus); | |
325 | }; | |
326 | ||
327 | return &$modify_watchog($self, $code); | |
328 | } | |
329 | ||
0cfd8f5b DM |
330 | |
331 | ||
332 | 1; |