]> git.proxmox.com Git - mirror_corosync-qdevice.git/blame - qdevices/qdevice-heuristics-worker.c
init: Fix init scripts to work with containers
[mirror_corosync-qdevice.git] / qdevices / qdevice-heuristics-worker.c
CommitLineData
9a1955a7
JF
1/*
2 * Copyright (c) 2015-2017 Red Hat, Inc.
3 *
4 * All rights reserved.
5 *
6 * Author: Jan Friesse (jfriesse@redhat.com)
7 *
8 * This software licensed under BSD license, the text of which follows:
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#include <limits.h>
36#include <errno.h>
37#include <poll.h>
38#include <signal.h>
39#include <stdlib.h>
40#include <string.h>
41#include <unistd.h>
42
43#include "dynar-str.h"
44#include "qdevice-config.h"
45#include "qdevice-heuristics-io.h"
46#include "qdevice-heuristics-worker.h"
47#include "qdevice-heuristics-worker-instance.h"
48#include "qdevice-heuristics-worker-log.h"
49#include "qdevice-heuristics-worker-cmd.h"
50
51/*
52 * Declarations
53 */
54static int qdevice_heuristics_worker_kill_list_timer_callback(void *data1,
55 void *data2);
56
57static void qdevice_heuristics_worker_process_list_notify(
58 enum process_list_notify_reason reason, const struct process_list_entry *entry,
59 void *user_data);
60
61static void qdevice_heuristics_worker_signal_handlers_register(void);
62
63
64/*
65 * Definitions
66 */
67static void
68qdevice_heuristics_worker_process_list_notify(enum process_list_notify_reason reason,
69 const struct process_list_entry *entry, void *user_data)
70{
71 struct qdevice_heuristics_worker_instance *instance;
72
73 instance = (struct qdevice_heuristics_worker_instance *)user_data;
74
75 switch (reason) {
76 case PROCESS_LIST_NOTIFY_REASON_EXECUTED:
77 qdevice_heuristics_worker_log_printf(instance, LOG_DEBUG,
78 "process %s executed", entry->name);
79 break;
80 case PROCESS_LIST_NOTIFY_REASON_FINISHED:
81 if (!WIFEXITED(entry->exit_status) || WEXITSTATUS(entry->exit_status) != 0) {
82 if (WIFEXITED(entry->exit_status)) {
83 qdevice_heuristics_worker_log_printf(instance, LOG_WARNING,
84 "process %s finished with status %d", entry->name,
85 WEXITSTATUS(entry->exit_status));
86 } else if (WIFSIGNALED(entry->exit_status)) {
87 qdevice_heuristics_worker_log_printf(instance, LOG_WARNING,
88 "process %s killed by signal %d", entry->name,
89 WTERMSIG(entry->exit_status));
90 } else {
91 qdevice_heuristics_worker_log_printf(instance, LOG_WARNING,
92 "process %s finished with non zero status", entry->name);
93 }
94 } else {
95 qdevice_heuristics_worker_log_printf(instance, LOG_DEBUG,
fdbbe724 96 "process %s successfully finished", entry->name);
9a1955a7
JF
97 }
98 break;
99 }
100}
101
102static void
103qdevice_heuristics_worker_signal_handlers_register(void)
104{
105 struct sigaction act;
106
107 act.sa_handler = SIG_DFL;
108 sigemptyset(&act.sa_mask);
109 act.sa_flags = SA_RESTART;
110
111 sigaction(SIGCHLD, &act, NULL);
112
113 act.sa_handler = SIG_IGN;
114 sigemptyset(&act.sa_mask);
115 act.sa_flags = SA_RESTART;
116
117 sigaction(SIGPIPE, &act, NULL);
118
119 act.sa_handler = SIG_IGN;
120 sigemptyset(&act.sa_mask);
121 act.sa_flags = SA_RESTART;
122
123 sigaction(SIGINT, &act, NULL);
124}
125
126static int
127qdevice_heuristics_worker_kill_list_timer_callback(void *data1, void *data2)
128{
129 struct qdevice_heuristics_worker_instance *instance;
130 size_t kill_list_size;
131
132 instance = (struct qdevice_heuristics_worker_instance *)data1;
133
134 if (process_list_process_kill_list(&instance->main_process_list) != 0) {
135 qdevice_heuristics_worker_log_printf(instance, LOG_CRIT,
136 "qdevice_heuristics_worker_kill_list_timer_callback: process kill list failed. "
137 "Shutting down worker");
138
139 instance->schedule_exit = 1;
140 return (0);
141 }
142
143 kill_list_size = process_list_get_kill_list_items(&instance->main_process_list);
144
145 if (kill_list_size > 0) {
146 qdevice_heuristics_worker_log_printf(instance, LOG_DEBUG,
147 "Still waiting for %zu processes exit", kill_list_size);
148 }
149
150 /*
151 * Schedule this timer again
152 */
153 return (-1);
154}
155
156int
157qdevice_heuristics_worker_exec_timeout_timer_callback(void *data1, void *data2)
158{
159 struct qdevice_heuristics_worker_instance *instance;
160
161 instance = (struct qdevice_heuristics_worker_instance *)data1;
162
163 qdevice_heuristics_worker_log_printf(instance, LOG_WARNING,
164 "Not all heuristics execs finished on time");
165
166 process_list_move_active_entries_to_kill_list(&instance->main_process_list);
167
168 instance->exec_timeout_timer = NULL;
169
170 if (qdevice_heuristics_worker_cmd_write_exec_result(instance, instance->last_exec_seq_number,
171 QDEVICE_HEURISTICS_EXEC_RESULT_FAIL) != 0) {
172 instance->schedule_exit = 1;
173
174 return (0);
175 }
176
177 return (0);
178}
179
180static int
181qdevice_heuristics_worker_poll(struct qdevice_heuristics_worker_instance *instance)
182{
183 int poll_res;
184 struct pollfd poll_input_fd;
185 uint32_t timeout;
186 int plist_summary;
187
188 /*
189 * Poll command input
190 */
191 poll_input_fd.fd = QDEVICE_HEURISTICS_WORKER_CMD_IN_FD;
192 poll_input_fd.events = POLLIN;
193 poll_input_fd.revents = 0;
194
195 timeout = timer_list_time_to_expire_ms(&instance->main_timer_list);
196 if (timeout > QDEVICE_MIN_HEURISTICS_TIMEOUT) {
197 timeout = QDEVICE_MIN_HEURISTICS_TIMEOUT;
198 }
199
200 if ((poll_res = poll(&poll_input_fd, 1, timeout)) >= 0) {
201 if (poll_input_fd.revents & POLLIN) {
202 /*
203 * POLLIN
204 */
205 if (qdevice_heuristics_worker_cmd_read_from_pipe(instance) != 0) {
206 return (-1);
207 }
208 }
209
210 if (poll_input_fd.revents & POLLOUT) {
211 /*
212 * Pollout shouldn't happen (critical error)
213 */
214 qdevice_heuristics_worker_log_printf(instance, LOG_CRIT,
215 "qdevice_heuristics_worker_poll: POLLOUT set. Shutting down worker");
216
217 return (-1);
218 }
219
220 if (poll_input_fd.revents & (POLLERR|POLLHUP|POLLNVAL) &&
221 !(poll_input_fd.revents & (POLLIN|POLLOUT))) {
222 /*
223 * Qdevice closed pipe
224 */
225
226 return (-1);
227 }
228 }
229
230 if (process_list_waitpid(&instance->main_process_list) != 0) {
231 qdevice_heuristics_worker_log_printf(instance, LOG_CRIT,
232 "qdevice_heuristics_worker_poll: Waitpid failed. Shutting down worker");
233
234 return (-1);
235 }
236
237 if (instance->exec_timeout_timer != NULL) {
238 plist_summary = process_list_get_summary_result_short(&instance->main_process_list);
239
240 switch (plist_summary) {
241 case -1:
242 /*
243 * Processes not finished -> continue
244 */
245 break;
246 case 0:
247 /*
fdbbe724 248 * All processes finished successfully
9a1955a7
JF
249 */
250 if (qdevice_heuristics_worker_cmd_write_exec_result(instance,
251 instance->last_exec_seq_number, QDEVICE_HEURISTICS_EXEC_RESULT_PASS) != 0) {
252 return (-1);
253 }
254
255 process_list_move_active_entries_to_kill_list(&instance->main_process_list);
256
257 timer_list_delete(&instance->main_timer_list, instance->exec_timeout_timer);
258 instance->exec_timeout_timer = NULL;
259
260 break;
261 case 1:
262 /*
263 * Some processes failed
264 */
265 if (qdevice_heuristics_worker_cmd_write_exec_result(instance,
266 instance->last_exec_seq_number, QDEVICE_HEURISTICS_EXEC_RESULT_FAIL) != 0) {
267 return (-1);
268 }
269
270 process_list_move_active_entries_to_kill_list(&instance->main_process_list);
271
272 timer_list_delete(&instance->main_timer_list, instance->exec_timeout_timer);
273 instance->exec_timeout_timer = NULL;
274 break;
275 default:
276 qdevice_heuristics_worker_log_printf(instance, LOG_CRIT,
277 "qdevice_heuristics_worker_poll: Unhandled "
278 "process_list_get_summary_result. Shutting down worker");
279
280 return (-1);
281 break;
282 }
283 }
284
285 timer_list_expire(&instance->main_timer_list);
286
287 if (instance->schedule_exit) {
288 return (-1);
289 }
290
291 return (0);
292}
293
294void
295qdevice_heuristics_worker_start(size_t ipc_max_send_receive_size, int use_execvp,
296 size_t max_processes, uint32_t kill_list_interval)
297{
298 struct qdevice_heuristics_worker_instance instance;
299
300 memset(&instance, 0, sizeof(instance));
301
302 instance.schedule_exit = 0;
303
304 dynar_init(&instance.cmd_in_buffer, ipc_max_send_receive_size);
305 dynar_init(&instance.cmd_out_buffer, ipc_max_send_receive_size);
306 dynar_init(&instance.log_out_buffer, ipc_max_send_receive_size);
307
308 process_list_init(&instance.main_process_list, max_processes, use_execvp,
309 qdevice_heuristics_worker_process_list_notify, (void *)&instance);
310
311 timer_list_init(&instance.main_timer_list);
312 instance.kill_list_timer = timer_list_add(&instance.main_timer_list,
313 kill_list_interval, qdevice_heuristics_worker_kill_list_timer_callback,
314 (void *)&instance, NULL);
315
316 if (instance.kill_list_timer == NULL) {
317 qdevice_heuristics_worker_log_printf(&instance, LOG_CRIT,
318 "Can't create kill list timer");
319 return ;
320 }
321
322 instance.exec_timeout_timer = NULL;
323
324 qdevice_heuristics_exec_list_init(&instance.exec_list);
325
326 qdevice_heuristics_worker_signal_handlers_register();
327
328 qdevice_heuristics_worker_log_printf(&instance, LOG_DEBUG, "Heuristic worker initialized");
329
330 while (qdevice_heuristics_worker_poll(&instance) == 0) {
331 }
332
333 qdevice_heuristics_worker_log_printf(&instance, LOG_DEBUG, "Heuristic worker shutdown "
334 "requested");
335
336 qdevice_heuristics_exec_list_free(&instance.exec_list);
337
338 timer_list_free(&instance.main_timer_list);
339
340 qdevice_heuristics_worker_log_printf(&instance, LOG_DEBUG,
341 "Waiting for all processes to exit");
342
343 if (process_list_killall(&instance.main_process_list, kill_list_interval) != 0) {
344 qdevice_heuristics_worker_log_printf(&instance, LOG_WARNING,
345 "Not all process exited");
346 }
347
348 process_list_free(&instance.main_process_list);
349
350 dynar_destroy(&instance.cmd_in_buffer);
351 dynar_destroy(&instance.cmd_out_buffer);
352 dynar_destroy(&instance.log_out_buffer);
353}