]> git.proxmox.com Git - mirror_corosync-qdevice.git/blame - qdevices/qdevice-heuristics.c
qdevice: Use EXIT_SUCCESS and EXIT_FAILURE codes
[mirror_corosync-qdevice.git] / qdevices / qdevice-heuristics.c
CommitLineData
9a1955a7 1/*
406b689d 2 * Copyright (c) 2015-2020 Red Hat, Inc.
9a1955a7
JF
3 *
4 * All rights reserved.
5 *
6 * Author: Jan Friesse (jfriesse@redhat.com)
7 *
8 * This software licensed under BSD license, the text of which follows:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#include <sys/types.h>
36#include <sys/wait.h>
37
38#include <err.h>
39#include <poll.h>
40#include <stdlib.h>
41#include <string.h>
42#include <unistd.h>
43
313d42d1 44#include "log.h"
9a1955a7
JF
45#include "qdevice-heuristics.h"
46#include "qdevice-heuristics-cmd.h"
47#include "qdevice-heuristics-worker.h"
48#include "qdevice-heuristics-io.h"
49#include "qdevice-votequorum.h"
50#include "utils.h"
51
52#define QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS 5
53
54void
55qdevice_heuristics_init(struct qdevice_heuristics_instance *instance,
56 struct qdevice_advanced_settings *advanced_settings)
57{
58 int pipe_cmd_in[2], pipe_cmd_out[2], pipe_log_out[2];
59 pid_t pid;
60
61 if (pipe(pipe_cmd_in) != 0) {
406b689d 62 err(EXIT_FAILURE, "Can't create command input pipe");
9a1955a7
JF
63 }
64
65 if (pipe(pipe_cmd_out) != 0) {
406b689d 66 err(EXIT_FAILURE, "Can't create command output pipe");
9a1955a7
JF
67 }
68
69 if (pipe(pipe_log_out) != 0) {
406b689d 70 err(EXIT_FAILURE, "Can't create logging output pipe");
9a1955a7
JF
71 }
72
73 pid = fork();
74 if (pid == -1) {
406b689d 75 err(EXIT_FAILURE, "Can't create child process");
9a1955a7
JF
76 } else if (pid == 0) {
77 /*
78 * Child
79 */
80 (void)setsid();
81 if (dup2(pipe_cmd_in[0], 0) == -1) {
406b689d 82 err(EXIT_FAILURE, "Can't dup2 command input pipe");
9a1955a7
JF
83 }
84 close(pipe_cmd_in[1]);
85 close(pipe_cmd_in[0]);
86 if (utils_fd_set_non_blocking(0) == -1) {
406b689d 87 err(EXIT_FAILURE, "Can't set non blocking flag on command input pipe");
9a1955a7
JF
88 }
89
90 if (dup2(pipe_cmd_out[1], 1) == -1) {
406b689d 91 err(EXIT_FAILURE, "Can't dup2 command output pipe");
9a1955a7
JF
92 }
93 close(pipe_cmd_out[0]);
94 close(pipe_cmd_out[1]);
95
96 if (dup2(pipe_log_out[1], 2) == -1) {
406b689d 97 err(EXIT_FAILURE, "Can't dup2 logging output pipe");
9a1955a7
JF
98 }
99 close(pipe_log_out[0]);
100 close(pipe_log_out[1]);
101
102 qdevice_heuristics_worker_start(advanced_settings->heuristics_ipc_max_send_receive_size,
103 advanced_settings->heuristics_use_execvp, advanced_settings->heuristics_max_processes,
104 advanced_settings->heuristics_kill_list_interval);
105
106 qdevice_advanced_settings_destroy(advanced_settings);
107
406b689d 108 exit(EXIT_SUCCESS);
9a1955a7
JF
109 } else {
110 close(pipe_cmd_in[0]);
111 close(pipe_cmd_out[1]);
112 close(pipe_log_out[1]);
113
114 qdevice_heuristics_instance_init(instance);
115
116 instance->pipe_cmd_send = pipe_cmd_in[1];
117 if (utils_fd_set_non_blocking(instance->pipe_cmd_send) == -1) {
406b689d 118 err(EXIT_FAILURE, "Can't set non blocking flag on command input pipe");
9a1955a7
JF
119 }
120 instance->pipe_cmd_recv = pipe_cmd_out[0];
121 if (utils_fd_set_non_blocking(instance->pipe_cmd_recv) == -1) {
406b689d 122 err(EXIT_FAILURE, "Can't set non blocking flag on command output pipe");
9a1955a7
JF
123 }
124 instance->pipe_log_recv = pipe_log_out[0];
125 if (utils_fd_set_non_blocking(instance->pipe_cmd_recv) == -1) {
406b689d 126 err(EXIT_FAILURE, "Can't set non blocking flag on logging output pipe");
9a1955a7
JF
127 }
128 instance->worker_pid = pid;
129
130 send_buffer_list_init(&instance->cmd_out_buffer_list,
131 advanced_settings->heuristics_ipc_max_send_buffers,
132 advanced_settings->heuristics_ipc_max_send_receive_size);
133 dynar_init(&instance->log_in_buffer,
134 advanced_settings->heuristics_ipc_max_send_receive_size);
135 dynar_init(&instance->cmd_in_buffer,
136 advanced_settings->heuristics_ipc_max_send_receive_size);
137 }
138}
139
140void
9bbf2f24 141qdevice_heuristics_destroy(struct qdevice_heuristics_instance *instance, int wait_for_worker_exit)
9a1955a7
JF
142{
143 int status;
144
145 /*
9bbf2f24
JF
146 * Close pipe_cmd_send. Heuristics worker catch the close of the fd and exits
147 * properly.
9a1955a7
JF
148 */
149 close(instance->pipe_cmd_send);
150
9bbf2f24
JF
151 /*
152 * When daemonization is used, heuristics worker is not a child of the corosync-qdevice
153 * process any longer so it's not possible to wait for its exit.
154 */
155 if (wait_for_worker_exit) {
c8d19612 156 log(LOG_DEBUG, "Waiting for heuristics worker to finish");
9bbf2f24 157 if (waitpid(instance->worker_pid, &status, 0) == -1) {
c8d19612 158 log_err(LOG_ERR, "Heuristics worker waitpid failed");
9bbf2f24
JF
159 } else {
160 /*
161 * Log what left in worker log buffer. Errors can be ignored
162 */
163 (void)qdevice_heuristics_log_read_from_pipe(instance);
164 }
9a1955a7
JF
165 }
166
167 close(instance->pipe_cmd_recv);
168 close(instance->pipe_log_recv);
169
170 dynar_destroy(&instance->log_in_buffer);
171 dynar_destroy(&instance->cmd_in_buffer);
172 send_buffer_list_free(&instance->cmd_out_buffer_list);
173
174 qdevice_heuristics_instance_destroy(instance);
175}
176
177int
178qdevice_heuristics_exec(struct qdevice_heuristics_instance *instance, int sync_in_progress)
179{
180 uint32_t timeout;
181
182 instance->expected_reply_seq_number++;
183 instance->waiting_for_result = 1;
184
185 if (sync_in_progress) {
186 timeout = instance->sync_timeout;
187 } else {
188 timeout = instance->timeout;
189 }
190
191 return (qdevice_heuristics_cmd_write_exec(instance, timeout,
192 instance->expected_reply_seq_number));
193}
194
195int
196qdevice_heuristics_waiting_for_result(const struct qdevice_heuristics_instance *instance)
197{
198
199 return (instance->waiting_for_result);
200}
201
202int
203qdevice_heuristics_change_exec_list(struct qdevice_heuristics_instance *instance,
204 const struct qdevice_heuristics_exec_list *new_exec_list, int sync_in_progress)
205{
206
207 if (qdevice_heuristics_cmd_write_exec_list(instance, new_exec_list) != 0) {
208 return (-1);
209 }
210
211 qdevice_heuristics_exec_list_free(&instance->exec_list);
212
213 if (new_exec_list != NULL) {
214 if (qdevice_heuristics_exec_list_clone(&instance->exec_list, new_exec_list) != 0) {
c8d19612 215 log(LOG_ERR, "Can't clone exec list");
9a1955a7
JF
216
217 return (-1);
218 }
219 }
220
221 if (qdevice_heuristics_waiting_for_result(instance)) {
222 if (qdevice_heuristics_exec(instance, sync_in_progress) != 0) {
c8d19612 223 log(LOG_ERR, "Can't execute heuristics");
9a1955a7
JF
224
225 return (-1);
226 }
227 }
228
229 return (0);
230}
231
232
233int
234qdevice_heuristics_wait_for_initial_exec_result(struct qdevice_heuristics_instance *instance)
235{
236 struct pollfd pfds[QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS];
237 int no_pfds;
238 int poll_res;
239 int timeout;
240 int i;
241 int case_processed;
242 int res;
243
244 while (!instance->qdevice_instance_ptr->vq_node_list_initial_heuristics_finished) {
245 no_pfds = 0;
246
247 assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
248 pfds[no_pfds].fd = instance->pipe_log_recv;
249 pfds[no_pfds].events = POLLIN;
250 pfds[no_pfds].revents = 0;
251 no_pfds++;
252
253 assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
254 pfds[no_pfds].fd = instance->pipe_cmd_recv;
255 pfds[no_pfds].events = POLLIN;
256 pfds[no_pfds].revents = 0;
257 no_pfds++;
258
259 assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
260 pfds[no_pfds].fd = instance->qdevice_instance_ptr->votequorum_poll_fd;
261 pfds[no_pfds].events = POLLIN;
262 pfds[no_pfds].revents = 0;
263 no_pfds++;
264
265 if (!send_buffer_list_empty(&instance->cmd_out_buffer_list)) {
266 assert(no_pfds < QDEVICE_HEURISTICS_WAIT_FOR_INITIAL_EXEC_RESULT_MAX_PFDS);
267 pfds[no_pfds].fd = instance->pipe_cmd_send;
268 pfds[no_pfds].events = POLLOUT;
269 pfds[no_pfds].revents = 0;
270 no_pfds++;
271 }
272
273 /*
274 * We know this is never larger than QDEVICE_DEFAULT_HEURISTICS_MAX_TIMEOUT * 2
275 */
276 timeout = (int)instance->sync_timeout * 2;
277
278 poll_res = poll(pfds, no_pfds, timeout);
279 if (poll_res > 0) {
280 for (i = 0; i < no_pfds; i++) {
281 if (pfds[i].revents & POLLIN) {
282 case_processed = 0;
283 switch (i) {
284 case 0:
285 case_processed = 1;
286
287 res = qdevice_heuristics_log_read_from_pipe(instance);
288 if (res == -1) {
289 return (-1);
290 }
291 break;
292 case 1:
293 case_processed = 1;
294 res = qdevice_heuristics_cmd_read_from_pipe(instance);
295 if (res == -1) {
296 return (-1);
297 }
298 break;
299 case 2:
300 case_processed = 1;
301 res = qdevice_votequorum_dispatch(instance->qdevice_instance_ptr);
302 if (res == -1) {
303 return (-1);
304 }
305 case 3:
306 /*
307 * Read on heuristics cmd send fs shouldn't happen
308 */
309 break;
310 }
311
312 if (!case_processed) {
c8d19612 313 log(LOG_CRIT, "Unhandled read on poll descriptor %u", i);
406b689d 314 exit(EXIT_FAILURE);
9a1955a7
JF
315 }
316 }
317
318 if (pfds[i].revents & POLLOUT) {
319 case_processed = 0;
320 switch (i) {
321 case 0:
322 case 1:
323 case 2:
324 /*
325 * Write on heuristics log, cmd recv or vq shouldn't happen
326 */
327 break;
328 case 3:
329 case_processed = 1;
330 res = qdevice_heuristics_cmd_write(instance);
331 if (res == -1) {
332 return (-1);
333 }
334 break;
335 }
336
337 if (!case_processed) {
c8d19612 338 log(LOG_CRIT, "Unhandled write on poll descriptor %u", i);
406b689d 339 exit(EXIT_FAILURE);
9a1955a7
JF
340 }
341 }
342
343 if ((pfds[i].revents & (POLLERR|POLLHUP|POLLNVAL)) &&
344 !(pfds[i].revents & (POLLIN|POLLOUT))) {
345 switch (i) {
346 case 0:
347 case 1:
348 case 3:
349 /*
350 * Closed pipe doesn't mean return of POLLIN. To display
351 * better log message, we call read log as if POLLIN would
352 * be set.
353 */
354 res = qdevice_heuristics_log_read_from_pipe(instance);
355 if (res == -1) {
356 return (-1);
357 }
358
c8d19612
JF
359 log(LOG_ERR, "POLLERR (%u) on heuristics pipe. Exiting",
360 pfds[i].revents);
9a1955a7
JF
361 return (-1);
362 break;
363 case 2:
c8d19612
JF
364 log(LOG_ERR, "POLLERR (%u) on corosync socket. Exiting",
365 pfds[i].revents);
9a1955a7
JF
366 return (-1);
367 break;
368 }
369 }
370 }
371 } else if (poll_res == 0) {
c8d19612 372 log(LOG_ERR, "Timeout waiting for initial heuristics exec result");
9a1955a7
JF
373 return (-1);
374 } else {
c8d19612 375 log_err(LOG_ERR, "Initial heuristics exec result poll failed");
9a1955a7
JF
376 return (-1);
377 }
378 }
379
380 return (0);
381}