]> git.proxmox.com Git - pve-cluster.git/blobdiff - data/PVE/IPCC.xs
add pmxcfs restart detection heuristic for IPCC
[pve-cluster.git] / data / PVE / IPCC.xs
index 9d2c6d95547cbe0e532819bacb16537324e616ff..decc60b721bb9ba8c92fadccf2140294872a78a1 100644 (file)
@@ -14,6 +14,8 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/uio.h>
+#include <unistd.h>
+#include <time.h>
 #include <errno.h>
 
 #ifndef SCM_RIGHTS
 #include <sys/syslog.h>
 #include <qb/qbdefs.h>
 #include <qb/qbutil.h>
+#include <qb/qblog.h>
 #include <qb/qbipcc.h>
 
+#define RESTART_FLAG_FILE "/run/pve-cluster/cfs-restart-flag"
+#define RESTART_GRACE_PERIOD 5
+
 #define PCS_SOCKET_NAME "pve2"
 
 #define PCS_SERVICE1 1
@@ -36,16 +42,52 @@ static pid_t conn_pid;
 
 static char ipcbuffer[MAX_MSG_SIZE];
 
-static void libqb_log_writer(const char *file_name,
-                            int32_t file_line,
-                            int32_t severity, const char *msg)
-{
-       if (severity == LOG_DEBUG)
-               return;
+static qb_ipcc_connection_t *init_connection() {
+
+       static qb_ipcc_connection_t *connection = NULL;
+       struct timespec retry_timeout, now;
+       int cfs_restart_flag_fd = -1;
+
+       // check if pmxcfs is currently restarting
+       if ((cfs_restart_flag_fd = open(RESTART_FLAG_FILE, 0)) > 0) {
+               clock_gettime(CLOCK_MONOTONIC, &retry_timeout);
+               retry_timeout.tv_sec += RESTART_GRACE_PERIOD;
+       }
+
+       qb_log_init("IPCC.xs", LOG_USER, LOG_EMERG);
+       qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE);
+
+retry_connection:
+       connection = qb_ipcc_connect(PCS_SOCKET_NAME, MAX_MSG_SIZE);
+
+       if (!connection) {
+               if (cfs_restart_flag_fd >= 0) {
+                       // cfs restarting and hopefully back soon, poll
+                       clock_gettime(CLOCK_MONOTONIC, &now);
+
+                       if (now.tv_sec < retry_timeout.tv_sec ||
+                          (now.tv_sec == retry_timeout.tv_sec &&
+                           now.tv_nsec < retry_timeout.tv_nsec)) {
 
-       warn("libqb: %s:%d [%d] %s\n", file_name, file_line, severity, msg);
+                               usleep(100 * 1000);
+                               goto retry_connection;
+
+                       } else {
+                               // timeout: cleanup flag file if still the same
+                               struct stat s;
+                               fstat(cfs_restart_flag_fd, &s);
+                               if (s.st_nlink > 0)
+                                       unlink(RESTART_FLAG_FILE);
+                       }
+               }
+       }
+
+       if (cfs_restart_flag_fd >= 0) close(cfs_restart_flag_fd);
+
+       return connection;
 }
 
+
 MODULE = PVE::IPCC             PACKAGE = PVE::IPCC             
 
 SV *
@@ -55,6 +97,7 @@ SV * data;
 PROTOTYPE: $;$
 CODE:
 {
+       uint8_t retried_cache_connection = 0;
        pid_t cpid = getpid();
 
        /* Each process needs its own ipcc connection,
@@ -65,8 +108,8 @@ CODE:
        }
 
        if (conn == NULL) {
-               qb_util_set_log_function(libqb_log_writer);
-               conn = qb_ipcc_connect(PCS_SOCKET_NAME, MAX_MSG_SIZE);
+recache_connection:
+               conn = init_connection();
 
                if (!conn)
                        XSRETURN_UNDEF;
@@ -97,6 +140,11 @@ CODE:
        if (res < 0) {
                qb_ipcc_disconnect(conn);
                conn = NULL;
+               // requests during cfs restart and the first thereafter will fail, retry
+               if (!retried_cache_connection) {
+                       retried_cache_connection = 1;
+                       goto recache_connection;
+               }
                errno = -res;
                XSRETURN_UNDEF;
        }
@@ -110,10 +158,10 @@ CODE:
                errno = -res_header->error;
                XSRETURN_UNDEF;
        } else {
+               errno = 0;
                if (dsize > 0) {
                        RETVAL = newSVpv(ipcbuffer + sizeof(struct qb_ipc_response_header), dsize);
                } else {
-                       errno = 0;
                        XSRETURN_UNDEF;
                }
        }