2 * Copyright (c) 2009-2010 Red Hat, Inc.
6 * Author: Jan Friesse (jfriesse@redhat.com)
8 * This software licensed under BSD license, the text of which follows:
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
13 * - Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright notice,
16 * this list of conditions and the following disclaimer in the documentation
17 * and/or other materials provided with the distribution.
18 * - Neither the name of the Red Hat, Inc. nor the names of its
19 * contributors may be used to endorse or promote products derived from this
20 * software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
46 #include <sys/types.h>
47 #include <sys/socket.h>
50 #include <corosync/corotypes.h>
51 #include <corosync/coroipc_types.h>
52 #include <corosync/coroipcc.h>
53 #include <corosync/corodefs.h>
54 #include <corosync/confdb.h>
55 #include <corosync/hdb.h>
56 #include <corosync/quorum.h>
58 #include <corosync/sam.h>
66 #define SAM_CONFDB_S_FAILED "failed"
67 #define SAM_CONFDB_S_REGISTERED "stopped"
68 #define SAM_CONFDB_S_STARTED "running"
69 #define SAM_CONFDB_S_Q_WAIT "waiting for quorum"
71 #define SAM_RP_MASK_Q(pol) (pol & (~SAM_RECOVERY_POLICY_QUORUM))
72 #define SAM_RP_MASK_C(pol) (pol & (~SAM_RECOVERY_POLICY_CONFDB))
73 #define SAM_RP_MASK(pol) (pol & (~(SAM_RECOVERY_POLICY_QUORUM | SAM_RECOVERY_POLICY_CONFDB)))
75 enum sam_internal_status_t
{
76 SAM_INTERNAL_STATUS_NOT_INITIALIZED
= 0,
77 SAM_INTERNAL_STATUS_INITIALIZED
,
78 SAM_INTERNAL_STATUS_REGISTERED
,
79 SAM_INTERNAL_STATUS_STARTED
,
80 SAM_INTERNAL_STATUS_FINALIZED
87 SAM_COMMAND_DATA_STORE
,
88 SAM_COMMAND_WARN_SIGNAL_SET
,
89 SAM_COMMAND_MARK_FAILED
,
97 enum sam_parent_action_t
{
98 SAM_PARENT_ACTION_ERROR
,
99 SAM_PARENT_ACTION_RECOVERY
,
100 SAM_PARENT_ACTION_QUIT
,
101 SAM_PARENT_ACTION_CONTINUE
104 enum sam_confdb_key_t
{
105 SAM_CONFDB_KEY_RECOVERY
,
106 SAM_CONFDB_KEY_HC_PERIOD
,
107 SAM_CONFDB_KEY_LAST_HC
,
108 SAM_CONFDB_KEY_STATE
,
113 sam_recovery_policy_t recovery_policy
;
114 enum sam_internal_status_t internal_status
;
115 unsigned int instance_id
;
122 sam_hc_callback_t hc_callback
;
124 int cb_rpipe_fd
, cb_wpipe_fd
;
128 size_t user_data_size
;
129 size_t user_data_allocated
;
131 pthread_mutex_t lock
;
133 quorum_handle_t quorum_handle
;
137 confdb_handle_t confdb_handle
;
138 hdb_handle_t confdb_pid_handle
;
141 extern const char *__progname
;
143 static cs_error_t
sam_confdb_update_key (enum sam_confdb_key_t key
, const char *value
)
147 uint64_t hc_period
, last_hc
;
148 const char *ssvalue
[] = { [SAM_RECOVERY_POLICY_QUIT
] = "quit", [SAM_RECOVERY_POLICY_RESTART
] = "restart" };
151 case SAM_CONFDB_KEY_RECOVERY
:
152 svalue
= ssvalue
[SAM_RP_MASK (sam_internal_data
.recovery_policy
)];
154 if ((err
= confdb_key_create_typed (sam_internal_data
.confdb_handle
, sam_internal_data
.confdb_pid_handle
,
155 "recovery", svalue
, strlen ((const char *)svalue
), CONFDB_VALUETYPE_STRING
)) != CS_OK
) {
159 case SAM_CONFDB_KEY_HC_PERIOD
:
160 hc_period
= sam_internal_data
.time_interval
;
162 if ((err
= confdb_key_create_typed (sam_internal_data
.confdb_handle
, sam_internal_data
.confdb_pid_handle
,
163 "poll_period", &hc_period
, sizeof (hc_period
), CONFDB_VALUETYPE_UINT64
)) != CS_OK
) {
167 case SAM_CONFDB_KEY_LAST_HC
:
168 last_hc
= cs_timestamp_get();
170 if ((err
= confdb_key_create_typed (sam_internal_data
.confdb_handle
, sam_internal_data
.confdb_pid_handle
,
171 "last_updated", &last_hc
, sizeof (last_hc
), CONFDB_VALUETYPE_UINT64
)) != CS_OK
) {
175 case SAM_CONFDB_KEY_STATE
:
177 if ((err
= confdb_key_create_typed (sam_internal_data
.confdb_handle
, sam_internal_data
.confdb_pid_handle
,
178 "state", svalue
, strlen ((const char *)svalue
), CONFDB_VALUETYPE_STRING
)) != CS_OK
) {
190 static cs_error_t
sam_confdb_destroy_pid_obj (void)
192 return (confdb_object_destroy (sam_internal_data
.confdb_handle
, sam_internal_data
.confdb_pid_handle
));
195 static cs_error_t
sam_confdb_register (void)
197 const char *obj_name
;
199 confdb_handle_t confdb_handle
;
200 hdb_handle_t resource_handle
, process_handle
, pid_handle
, obj_handle
;
201 hdb_handle_t
*res_handle
;
202 char tmp_obj
[PATH_MAX
];
205 if ((err
= confdb_initialize (&confdb_handle
, NULL
)) != CS_OK
) {
209 for (i
= 0; i
< 3; i
++) {
212 obj_name
= "resources";
213 obj_handle
= OBJECT_PARENT_HANDLE
;
214 res_handle
= &resource_handle
;
217 obj_name
= "process";
218 obj_handle
= resource_handle
;
219 res_handle
= &process_handle
;
222 if (snprintf (tmp_obj
, sizeof (tmp_obj
), "%s:%d", __progname
, getpid ()) >= sizeof (tmp_obj
)) {
223 snprintf (tmp_obj
, sizeof (tmp_obj
), "%d", getpid ());
227 obj_handle
= process_handle
;
228 res_handle
= &pid_handle
;
232 if ((err
= confdb_object_find_start (confdb_handle
, obj_handle
)) != CS_OK
) {
236 if ((err
= confdb_object_find (confdb_handle
, obj_handle
, obj_name
, strlen (obj_name
),
237 res_handle
)) != CS_OK
) {
238 if (err
== CONFDB_ERR_ACCESS
) {
240 * Try to create object
242 if ((err
= confdb_object_create (confdb_handle
, obj_handle
, obj_name
,
243 strlen (obj_name
), res_handle
)) != CS_OK
) {
250 if ((err
= confdb_object_find_destroy (confdb_handle
, obj_handle
)) != CS_OK
) {
256 sam_internal_data
.confdb_pid_handle
= pid_handle
;
257 sam_internal_data
.confdb_handle
= confdb_handle
;
259 if ((err
= sam_confdb_update_key (SAM_CONFDB_KEY_RECOVERY
, NULL
)) != CS_OK
) {
260 goto destroy_finalize_error
;
263 if ((err
= sam_confdb_update_key (SAM_CONFDB_KEY_HC_PERIOD
, NULL
)) != CS_OK
) {
264 goto destroy_finalize_error
;
269 destroy_finalize_error
:
270 sam_confdb_destroy_pid_obj ();
272 confdb_finalize (confdb_handle
);
276 static void quorum_notification_fn (
277 quorum_handle_t handle
,
280 uint32_t view_list_entries
,
283 sam_internal_data
.quorate
= quorate
;
286 cs_error_t
sam_initialize (
288 sam_recovery_policy_t recovery_policy
)
290 quorum_callbacks_t quorum_callbacks
;
293 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_NOT_INITIALIZED
) {
294 return (CS_ERR_BAD_HANDLE
);
297 if (SAM_RP_MASK (recovery_policy
) != SAM_RECOVERY_POLICY_QUIT
&&
298 SAM_RP_MASK (recovery_policy
) != SAM_RECOVERY_POLICY_RESTART
) {
299 return (CS_ERR_INVALID_PARAM
);
302 if (recovery_policy
& SAM_RECOVERY_POLICY_QUORUM
) {
306 quorum_callbacks
.quorum_notify_fn
= quorum_notification_fn
;
307 if ((err
= quorum_initialize (&sam_internal_data
.quorum_handle
, &quorum_callbacks
)) != CS_OK
) {
311 if ((err
= quorum_trackstart (sam_internal_data
.quorum_handle
, CS_TRACK_CHANGES
)) != CS_OK
) {
312 goto exit_error_quorum
;
315 if ((err
= quorum_fd_get (sam_internal_data
.quorum_handle
, &sam_internal_data
.quorum_fd
)) != CS_OK
) {
316 goto exit_error_quorum
;
320 * Dispatch initial quorate state
322 if ((err
= quorum_dispatch (sam_internal_data
.quorum_handle
, CS_DISPATCH_ONE
)) != CS_OK
) {
323 goto exit_error_quorum
;
326 sam_internal_data
.recovery_policy
= recovery_policy
;
328 sam_internal_data
.time_interval
= time_interval
;
330 sam_internal_data
.internal_status
= SAM_INTERNAL_STATUS_INITIALIZED
;
332 sam_internal_data
.warn_signal
= SIGTERM
;
334 sam_internal_data
.am_i_child
= 0;
336 sam_internal_data
.user_data
= NULL
;
337 sam_internal_data
.user_data_size
= 0;
338 sam_internal_data
.user_data_allocated
= 0;
340 pthread_mutex_init (&sam_internal_data
.lock
, NULL
);
345 quorum_finalize (sam_internal_data
.quorum_handle
);
351 * Wrapper on top of write(2) function. It handles EAGAIN and EINTR states and sends whole buffer if possible.
353 static size_t sam_safe_write (
359 ssize_t tmp_bytes_write
;
364 tmp_bytes_write
= write (d
, (const char *)buf
+ bytes_write
,
365 (nbyte
- bytes_write
> SSIZE_MAX
) ? SSIZE_MAX
: nbyte
- bytes_write
);
367 if (tmp_bytes_write
== -1) {
368 if (!(errno
== EAGAIN
|| errno
== EINTR
))
371 bytes_write
+= tmp_bytes_write
;
373 } while (bytes_write
!= nbyte
);
375 return (bytes_write
);
379 * Wrapper on top of read(2) function. It handles EAGAIN and EINTR states and reads whole buffer if possible.
381 static size_t sam_safe_read (
387 ssize_t tmp_bytes_read
;
392 tmp_bytes_read
= read (d
, (char *)buf
+ bytes_read
,
393 (nbyte
- bytes_read
> SSIZE_MAX
) ? SSIZE_MAX
: nbyte
- bytes_read
);
395 if (tmp_bytes_read
== -1) {
396 if (!(errno
== EAGAIN
|| errno
== EINTR
))
399 bytes_read
+= tmp_bytes_read
;
402 } while (bytes_read
!= nbyte
&& tmp_bytes_read
!= 0);
407 static cs_error_t
sam_read_reply (
413 if (sam_safe_read (sam_internal_data
.child_fd_in
, &reply
, sizeof (reply
)) != sizeof (reply
)) {
414 return (CS_ERR_LIBRARY
);
418 case SAM_REPLY_ERROR
:
420 * Read error and return that
422 if (sam_safe_read (sam_internal_data
.child_fd_in
, &err
, sizeof (err
)) != sizeof (err
)) {
423 return (CS_ERR_LIBRARY
);
434 return (CS_ERR_LIBRARY
);
441 cs_error_t
sam_data_getsize (size_t *size
)
444 return (CS_ERR_INVALID_PARAM
);
447 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_INITIALIZED
&&
448 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
&&
449 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
) {
451 return (CS_ERR_BAD_HANDLE
);
454 pthread_mutex_lock (&sam_internal_data
.lock
);
456 *size
= sam_internal_data
.user_data_size
;
458 pthread_mutex_unlock (&sam_internal_data
.lock
);
463 cs_error_t
sam_data_restore (
472 return (CS_ERR_INVALID_PARAM
);
475 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_INITIALIZED
&&
476 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
&&
477 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
) {
479 return (CS_ERR_BAD_HANDLE
);
482 pthread_mutex_lock (&sam_internal_data
.lock
);
484 if (sam_internal_data
.user_data_size
== 0) {
490 if (size
< sam_internal_data
.user_data_size
) {
491 err
= CS_ERR_INVALID_PARAM
;
496 memcpy (data
, sam_internal_data
.user_data
, sam_internal_data
.user_data_size
);
498 pthread_mutex_unlock (&sam_internal_data
.lock
);
503 pthread_mutex_unlock (&sam_internal_data
.lock
);
508 cs_error_t
sam_data_store (
516 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_INITIALIZED
&&
517 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
&&
518 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
) {
520 return (CS_ERR_BAD_HANDLE
);
528 pthread_mutex_lock (&sam_internal_data
.lock
);
530 if (sam_internal_data
.am_i_child
) {
532 * We are child so we must send data to parent
534 command
= SAM_COMMAND_DATA_STORE
;
535 if (sam_safe_write (sam_internal_data
.child_fd_out
, &command
, sizeof (command
)) != sizeof (command
)) {
536 err
= CS_ERR_LIBRARY
;
541 if (sam_safe_write (sam_internal_data
.child_fd_out
, &size
, sizeof (size
)) != sizeof (size
)) {
542 err
= CS_ERR_LIBRARY
;
547 if (data
!= NULL
&& sam_safe_write (sam_internal_data
.child_fd_out
, data
, size
) != size
) {
548 err
= CS_ERR_LIBRARY
;
556 if ((err
= sam_read_reply (sam_internal_data
.child_fd_in
)) != CS_OK
) {
562 * We are parent or we received OK reply from parent -> do required action
565 free (sam_internal_data
.user_data
);
566 sam_internal_data
.user_data
= NULL
;
567 sam_internal_data
.user_data_allocated
= 0;
568 sam_internal_data
.user_data_size
= 0;
570 if (sam_internal_data
.user_data_allocated
< size
) {
571 if ((new_data
= realloc (sam_internal_data
.user_data
, size
)) == NULL
) {
572 err
= CS_ERR_NO_MEMORY
;
577 sam_internal_data
.user_data_allocated
= size
;
579 new_data
= sam_internal_data
.user_data
;
581 sam_internal_data
.user_data
= new_data
;
582 sam_internal_data
.user_data_size
= size
;
584 memcpy (sam_internal_data
.user_data
, data
, size
);
587 pthread_mutex_unlock (&sam_internal_data
.lock
);
592 pthread_mutex_unlock (&sam_internal_data
.lock
);
597 cs_error_t
sam_start (void)
601 sam_recovery_policy_t recpol
;
603 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
) {
604 return (CS_ERR_BAD_HANDLE
);
607 recpol
= sam_internal_data
.recovery_policy
;
609 if (recpol
& SAM_RECOVERY_POLICY_QUORUM
|| recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
610 pthread_mutex_lock (&sam_internal_data
.lock
);
613 command
= SAM_COMMAND_START
;
615 if (sam_safe_write (sam_internal_data
.child_fd_out
, &command
, sizeof (command
)) != sizeof (command
)) {
616 if (recpol
& SAM_RECOVERY_POLICY_QUORUM
|| recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
617 pthread_mutex_unlock (&sam_internal_data
.lock
);
620 return (CS_ERR_LIBRARY
);
623 if (recpol
& SAM_RECOVERY_POLICY_QUORUM
|| recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
625 * Wait for parent reply
627 if ((err
= sam_read_reply (sam_internal_data
.child_fd_in
)) != CS_OK
) {
628 pthread_mutex_unlock (&sam_internal_data
.lock
);
633 pthread_mutex_unlock (&sam_internal_data
.lock
);
636 if (sam_internal_data
.hc_callback
)
637 if (sam_safe_write (sam_internal_data
.cb_wpipe_fd
, &command
, sizeof (command
)) != sizeof (command
))
638 return (CS_ERR_LIBRARY
);
640 sam_internal_data
.internal_status
= SAM_INTERNAL_STATUS_STARTED
;
645 cs_error_t
sam_stop (void)
650 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
) {
651 return (CS_ERR_BAD_HANDLE
);
654 command
= SAM_COMMAND_STOP
;
656 if (sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_CONFDB
) {
657 pthread_mutex_lock (&sam_internal_data
.lock
);
660 if (sam_safe_write (sam_internal_data
.child_fd_out
, &command
, sizeof (command
)) != sizeof (command
)) {
661 if (sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_CONFDB
) {
662 pthread_mutex_unlock (&sam_internal_data
.lock
);
665 return (CS_ERR_LIBRARY
);
668 if (sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_CONFDB
) {
670 * Wait for parent reply
672 if ((err
= sam_read_reply (sam_internal_data
.child_fd_in
)) != CS_OK
) {
673 pthread_mutex_unlock (&sam_internal_data
.lock
);
678 pthread_mutex_unlock (&sam_internal_data
.lock
);
681 if (sam_internal_data
.hc_callback
)
682 if (sam_safe_write (sam_internal_data
.cb_wpipe_fd
, &command
, sizeof (command
)) != sizeof (command
))
683 return (CS_ERR_LIBRARY
);
685 sam_internal_data
.internal_status
= SAM_INTERNAL_STATUS_REGISTERED
;
690 cs_error_t
sam_hc_send (void)
694 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
) {
695 return (CS_ERR_BAD_HANDLE
);
698 command
= SAM_COMMAND_HB
;
700 if (sam_safe_write (sam_internal_data
.child_fd_out
, &command
, sizeof (command
)) != sizeof (command
))
701 return (CS_ERR_LIBRARY
);
706 cs_error_t
sam_finalize (void)
710 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_INITIALIZED
&&
711 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
&&
712 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
) {
713 return (CS_ERR_BAD_HANDLE
);
716 if (sam_internal_data
.internal_status
== SAM_INTERNAL_STATUS_STARTED
) {
722 sam_internal_data
.internal_status
= SAM_INTERNAL_STATUS_FINALIZED
;
724 free (sam_internal_data
.user_data
);
730 cs_error_t
sam_mark_failed (void)
734 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
&&
735 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
) {
736 return (CS_ERR_BAD_HANDLE
);
739 if (!(sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_CONFDB
)) {
740 return (CS_ERR_INVALID_PARAM
);
743 command
= SAM_COMMAND_MARK_FAILED
;
745 if (sam_safe_write (sam_internal_data
.child_fd_out
, &command
, sizeof (command
)) != sizeof (command
))
746 return (CS_ERR_LIBRARY
);
751 cs_error_t
sam_warn_signal_set (int warn_signal
)
756 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_INITIALIZED
&&
757 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
&&
758 sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_STARTED
) {
759 return (CS_ERR_BAD_HANDLE
);
762 pthread_mutex_lock (&sam_internal_data
.lock
);
764 if (sam_internal_data
.am_i_child
) {
766 * We are child so we must send data to parent
768 command
= SAM_COMMAND_WARN_SIGNAL_SET
;
769 if (sam_safe_write (sam_internal_data
.child_fd_out
, &command
, sizeof (command
)) != sizeof (command
)) {
770 err
= CS_ERR_LIBRARY
;
775 if (sam_safe_write (sam_internal_data
.child_fd_out
, &warn_signal
, sizeof (warn_signal
)) !=
776 sizeof (warn_signal
)) {
777 err
= CS_ERR_LIBRARY
;
785 if ((err
= sam_read_reply (sam_internal_data
.child_fd_in
)) != CS_OK
) {
791 * We are parent or we received OK reply from parent -> do required action
793 sam_internal_data
.warn_signal
= warn_signal
;
795 pthread_mutex_unlock (&sam_internal_data
.lock
);
800 pthread_mutex_unlock (&sam_internal_data
.lock
);
805 static cs_error_t
sam_parent_reply_send (
813 reply
= SAM_REPLY_OK
;
815 if (sam_safe_write (parent_fd_out
, &reply
, sizeof (reply
)) != sizeof (reply
)) {
816 err
= CS_ERR_LIBRARY
;
824 reply
= SAM_REPLY_ERROR
;
825 if (sam_safe_write (parent_fd_out
, &reply
, sizeof (reply
)) != sizeof (reply
)) {
826 return (CS_ERR_LIBRARY
);
828 if (sam_safe_write (parent_fd_out
, &err
, sizeof (err
)) != sizeof (err
)) {
829 return (CS_ERR_LIBRARY
);
836 static cs_error_t
sam_parent_warn_signal_set (
845 if (sam_safe_read (parent_fd_in
, &warn_signal
, sizeof (warn_signal
)) != sizeof (warn_signal
)) {
846 err
= CS_ERR_LIBRARY
;
850 err
= sam_warn_signal_set (warn_signal
);
856 return (sam_parent_reply_send (CS_OK
, parent_fd_in
, parent_fd_out
));
859 return (sam_parent_reply_send (err
, parent_fd_in
, parent_fd_out
));
862 static cs_error_t
sam_parent_wait_for_quorum (
867 struct pollfd pfds
[2];
870 if (sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_CONFDB
) {
871 if ((err
= sam_confdb_update_key (SAM_CONFDB_KEY_STATE
, SAM_CONFDB_S_Q_WAIT
)) != CS_OK
) {
877 * Update current quorum
879 if ((err
= quorum_dispatch (sam_internal_data
.quorum_handle
, CS_DISPATCH_ALL
)) != CS_OK
) {
886 while (!sam_internal_data
.quorate
) {
887 pfds
[0].fd
= parent_fd_in
;
891 pfds
[1].fd
= sam_internal_data
.quorum_fd
;
892 pfds
[1].events
= POLLIN
;
895 poll_err
= poll (pfds
, 2, -1);
897 if (poll_err
== -1) {
900 * If it is EINTR, continue, otherwise QUIT
902 if (errno
!= EINTR
) {
903 err
= CS_ERR_LIBRARY
;
908 if (pfds
[0].revents
!= 0) {
909 if (pfds
[0].revents
== POLLERR
|| pfds
[0].revents
== POLLHUP
||pfds
[0].revents
== POLLNVAL
) {
917 if (pfds
[1].revents
!= 0) {
918 if ((err
= quorum_dispatch (sam_internal_data
.quorum_handle
, CS_DISPATCH_ONE
)) != CS_OK
) {
924 if (sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_CONFDB
) {
925 if ((err
= sam_confdb_update_key (SAM_CONFDB_KEY_STATE
, SAM_CONFDB_S_STARTED
)) != CS_OK
) {
930 return (sam_parent_reply_send (CS_OK
, parent_fd_in
, parent_fd_out
));
933 if (sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_CONFDB
) {
934 sam_confdb_update_key (SAM_CONFDB_KEY_STATE
, SAM_CONFDB_S_REGISTERED
);
937 return (sam_parent_reply_send (err
, parent_fd_in
, parent_fd_out
));
940 static cs_error_t
sam_parent_confdb_state_set (
949 state_s
= SAM_CONFDB_S_STARTED
;
951 state_s
= SAM_CONFDB_S_REGISTERED
;
954 if ((err
= sam_confdb_update_key (SAM_CONFDB_KEY_STATE
, state_s
)) != CS_OK
) {
958 return (sam_parent_reply_send (CS_OK
, parent_fd_in
, parent_fd_out
));
961 return (sam_parent_reply_send (err
, parent_fd_in
, parent_fd_out
));
964 static cs_error_t
sam_parent_kill_child (
971 if (!sam_internal_data
.term_send
) {
973 * We didn't send warn_signal yet.
975 kill (child_pid
, sam_internal_data
.warn_signal
);
977 sam_internal_data
.term_send
= 1;
980 * We sent child warning. Now, we will not be so nice
982 kill (child_pid
, SIGKILL
);
983 *action
= SAM_PARENT_ACTION_RECOVERY
;
989 static cs_error_t
sam_parent_mark_child_failed (
993 sam_recovery_policy_t recpol
;
995 recpol
= sam_internal_data
.recovery_policy
;
997 sam_internal_data
.term_send
= 1;
998 sam_internal_data
.recovery_policy
= SAM_RECOVERY_POLICY_QUIT
|
999 (SAM_RP_MASK_C (recpol
) ? SAM_RECOVERY_POLICY_CONFDB
: 0) |
1000 (SAM_RP_MASK_Q (recpol
) ? SAM_RECOVERY_POLICY_QUORUM
: 0);
1002 return (sam_parent_kill_child (action
, child_pid
));
1005 static cs_error_t
sam_parent_data_store (
1016 if (sam_safe_read (parent_fd_in
, &size
, sizeof (size
)) != sizeof (size
)) {
1017 err
= CS_ERR_LIBRARY
;
1022 user_data
= malloc (size
);
1023 if (user_data
== NULL
) {
1024 err
= CS_ERR_NO_MEMORY
;
1028 if (sam_safe_read (parent_fd_in
, user_data
, size
) != size
) {
1029 err
= CS_ERR_LIBRARY
;
1030 goto free_error_reply
;
1034 err
= sam_data_store (user_data
, size
);
1036 goto free_error_reply
;
1041 return (sam_parent_reply_send (CS_OK
, parent_fd_in
, parent_fd_out
));
1046 return (sam_parent_reply_send (err
, parent_fd_in
, parent_fd_out
));
1049 static enum sam_parent_action_t
sam_parent_handler (
1060 struct pollfd pfds
[2];
1063 sam_recovery_policy_t recpol
;
1067 action
= SAM_PARENT_ACTION_CONTINUE
;
1068 recpol
= sam_internal_data
.recovery_policy
;
1070 while (action
== SAM_PARENT_ACTION_CONTINUE
) {
1071 pfds
[0].fd
= parent_fd_in
;
1072 pfds
[0].events
= POLLIN
;
1073 pfds
[0].revents
= 0;
1076 if (status
== 1 && sam_internal_data
.time_interval
!= 0) {
1077 time_interval
= sam_internal_data
.time_interval
;
1082 if (recpol
& SAM_RECOVERY_POLICY_QUORUM
) {
1083 pfds
[nfds
].fd
= sam_internal_data
.quorum_fd
;
1084 pfds
[nfds
].events
= POLLIN
;
1085 pfds
[nfds
].revents
= 0;
1089 poll_error
= poll (pfds
, nfds
, time_interval
);
1091 if (poll_error
== -1) {
1094 * If it is EINTR, continue, otherwise QUIT
1096 if (errno
!= EINTR
) {
1097 action
= SAM_PARENT_ACTION_ERROR
;
1101 if (poll_error
== 0) {
1103 * Time limit expires
1106 action
= SAM_PARENT_ACTION_QUIT
;
1108 sam_parent_kill_child (&action
, child_pid
);
1112 if (poll_error
> 0) {
1113 if (pfds
[0].revents
!= 0) {
1115 * We have EOF or command in pipe
1117 bytes_read
= sam_safe_read (parent_fd_in
, &command
, 1);
1119 if (bytes_read
== 0) {
1121 * Handle EOF -> Take recovery action or quit if sam_start wasn't called
1124 action
= SAM_PARENT_ACTION_QUIT
;
1126 action
= SAM_PARENT_ACTION_RECOVERY
;
1131 if (bytes_read
== -1) {
1132 action
= SAM_PARENT_ACTION_ERROR
;
1136 if (recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
1137 sam_confdb_update_key (SAM_CONFDB_KEY_LAST_HC
, NULL
);
1141 * We have read command
1144 case SAM_COMMAND_START
:
1149 if (recpol
& SAM_RECOVERY_POLICY_QUORUM
) {
1150 if (sam_parent_wait_for_quorum (parent_fd_in
,
1151 parent_fd_out
) != CS_OK
) {
1156 if (recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
1157 if (sam_parent_confdb_state_set (parent_fd_in
,
1158 parent_fd_out
, 1) != CS_OK
) {
1166 case SAM_COMMAND_STOP
:
1171 if (recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
1172 if (sam_parent_confdb_state_set (parent_fd_in
,
1173 parent_fd_out
, 0) != CS_OK
) {
1181 case SAM_COMMAND_DATA_STORE
:
1182 sam_parent_data_store (parent_fd_in
, parent_fd_out
);
1184 case SAM_COMMAND_WARN_SIGNAL_SET
:
1185 sam_parent_warn_signal_set (parent_fd_in
, parent_fd_out
);
1187 case SAM_COMMAND_MARK_FAILED
:
1189 sam_parent_mark_child_failed (&action
, child_pid
);
1192 } /* if (pfds[0].revents != 0) */
1194 if ((sam_internal_data
.recovery_policy
& SAM_RECOVERY_POLICY_QUORUM
) &&
1195 pfds
[1].revents
!= 0) {
1197 * Handle quorum change
1199 err
= quorum_dispatch (sam_internal_data
.quorum_handle
, CS_DISPATCH_ALL
);
1202 (!sam_internal_data
.quorate
|| (err
!= CS_ERR_TRY_AGAIN
&& err
!= CS_OK
))) {
1203 sam_parent_kill_child (&action
, child_pid
);
1206 } /* select_error > 0 */
1207 } /* action == SAM_PARENT_ACTION_CONTINUE */
1213 cs_error_t
sam_register (
1214 unsigned int *instance_id
)
1219 int pipe_fd_out
[2], pipe_fd_in
[2];
1220 enum sam_parent_action_t action
, old_action
;
1222 sam_recovery_policy_t recpol
;
1224 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_INITIALIZED
) {
1225 return (CS_ERR_BAD_HANDLE
);
1228 recpol
= sam_internal_data
.recovery_policy
;
1230 if (recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
1234 if ((error
= sam_confdb_register ()) != CS_OK
) {
1242 if ((pipe_error
= pipe (pipe_fd_out
)) != 0) {
1243 error
= CS_ERR_LIBRARY
;
1247 if ((pipe_error
= pipe (pipe_fd_in
)) != 0) {
1248 close (pipe_fd_out
[0]);
1249 close (pipe_fd_out
[1]);
1251 error
= CS_ERR_LIBRARY
;
1255 if (recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
1256 if ((error
= sam_confdb_update_key (SAM_CONFDB_KEY_STATE
, SAM_CONFDB_S_REGISTERED
)) != CS_OK
) {
1261 sam_internal_data
.instance_id
++;
1263 sam_internal_data
.term_send
= 0;
1271 sam_internal_data
.instance_id
--;
1273 error
= CS_ERR_LIBRARY
;
1281 close (pipe_fd_out
[0]);
1282 close (pipe_fd_in
[1]);
1284 sam_internal_data
.child_fd_out
= pipe_fd_out
[1];
1285 sam_internal_data
.child_fd_in
= pipe_fd_in
[0];
1288 *instance_id
= sam_internal_data
.instance_id
;
1290 sam_internal_data
.am_i_child
= 1;
1291 sam_internal_data
.internal_status
= SAM_INTERNAL_STATUS_REGISTERED
;
1293 pthread_mutex_init (&sam_internal_data
.lock
, NULL
);
1300 close (pipe_fd_out
[1]);
1301 close (pipe_fd_in
[0]);
1303 action
= sam_parent_handler (pipe_fd_out
[0], pipe_fd_in
[1], pid
);
1305 close (pipe_fd_out
[0]);
1306 close (pipe_fd_in
[1]);
1308 if (action
== SAM_PARENT_ACTION_ERROR
) {
1309 error
= CS_ERR_LIBRARY
;
1314 * We really don't like zombies
1316 while (waitpid (pid
, &child_status
, 0) == -1 && errno
== EINTR
)
1319 old_action
= action
;
1321 if (action
== SAM_PARENT_ACTION_RECOVERY
) {
1322 if (SAM_RP_MASK (sam_internal_data
.recovery_policy
) == SAM_RECOVERY_POLICY_QUIT
)
1323 action
= SAM_PARENT_ACTION_QUIT
;
1327 if (action
== SAM_PARENT_ACTION_QUIT
) {
1328 if (recpol
& SAM_RECOVERY_POLICY_QUORUM
) {
1329 quorum_finalize (sam_internal_data
.quorum_handle
);
1332 if (recpol
& SAM_RECOVERY_POLICY_CONFDB
) {
1333 if (old_action
== SAM_PARENT_ACTION_RECOVERY
) {
1337 sam_confdb_update_key (SAM_CONFDB_KEY_STATE
, SAM_CONFDB_S_FAILED
);
1339 sam_confdb_destroy_pid_obj ();
1343 exit (WEXITSTATUS (child_status
));
1354 static void *hc_callback_thread (void *unused_param
)
1358 ssize_t bytes_readed
;
1360 int time_interval
, tmp_time_interval
;
1367 time_interval
= sam_internal_data
.time_interval
>> 2;
1370 pfds
.fd
= sam_internal_data
.cb_rpipe_fd
;
1371 pfds
.events
= POLLIN
;
1375 tmp_time_interval
= time_interval
;
1377 tmp_time_interval
= -1;
1380 poll_error
= poll (&pfds
, 1, tmp_time_interval
);
1382 if (poll_error
== 0) {
1383 if (sam_hc_send () == CS_OK
) {
1388 if (sam_internal_data
.hc_callback () != 0) {
1396 if (poll_error
> 0) {
1397 bytes_readed
= sam_safe_read (sam_internal_data
.cb_rpipe_fd
, &command
, 1);
1399 if (bytes_readed
> 0) {
1400 if (status
== 0 && command
== SAM_COMMAND_START
)
1403 if (status
== 1 && command
== SAM_COMMAND_STOP
)
1411 * This makes compiler happy, it's same as return (NULL);
1413 return (unused_param
);
1416 cs_error_t
sam_hc_callback_register (sam_hc_callback_t cb
)
1418 cs_error_t error
= CS_OK
;
1419 pthread_attr_t thread_attr
;
1423 if (sam_internal_data
.internal_status
!= SAM_INTERNAL_STATUS_REGISTERED
) {
1424 return (CS_ERR_BAD_HANDLE
);
1427 if (sam_internal_data
.time_interval
== 0) {
1428 return (CS_ERR_INVALID_PARAM
);
1431 if (sam_internal_data
.cb_registered
) {
1432 sam_internal_data
.hc_callback
= cb
;
1438 * We know, this is first registration
1442 return (CS_ERR_INVALID_PARAM
);
1445 pipe_error
= pipe (pipe_fd
);
1447 if (pipe_error
!= 0) {
1449 * Pipe creation error
1451 error
= CS_ERR_LIBRARY
;
1455 sam_internal_data
.cb_rpipe_fd
= pipe_fd
[0];
1456 sam_internal_data
.cb_wpipe_fd
= pipe_fd
[1];
1459 * Create thread attributes
1461 error
= pthread_attr_init (&thread_attr
);
1463 error
= CS_ERR_LIBRARY
;
1464 goto error_close_fd_exit
;
1468 pthread_attr_setdetachstate (&thread_attr
, PTHREAD_CREATE_DETACHED
);
1469 pthread_attr_setstacksize (&thread_attr
, 32768);
1474 error
= pthread_create (&sam_internal_data
.cb_thread
, &thread_attr
, hc_callback_thread
, NULL
);
1477 error
= CS_ERR_LIBRARY
;
1478 goto error_attr_destroy_exit
;
1484 pthread_attr_destroy(&thread_attr
);
1486 sam_internal_data
.cb_registered
= 1;
1487 sam_internal_data
.hc_callback
= cb
;
1491 error_attr_destroy_exit
:
1492 pthread_attr_destroy(&thread_attr
);
1493 error_close_fd_exit
:
1494 sam_internal_data
.cb_rpipe_fd
= sam_internal_data
.cb_wpipe_fd
= 0;