]> git.proxmox.com Git - pve-cluster.git/blame - data/PVE/IPCC.xs
add pmxcfs restart detection heuristic for IPCC
[pve-cluster.git] / data / PVE / IPCC.xs
CommitLineData
fe000966
DM
1#ifdef HAVE_CONFIG_H
2#include <config.h>
3#endif /* HAVE_CONFIG_H */
4
5#include "EXTERN.h"
6#include "perl.h"
7#include "XSUB.h"
8
9#include "ppport.h"
10
24a3d153
DM
11/* sendfd: BSD style file descriptor passing over unix domain sockets
12 * Richard Stevens: Unix Network Programming, Prentice Hall, 1990;
13 */
14#include <sys/types.h>
15#include <sys/socket.h>
16#include <sys/uio.h>
3209f24c
TL
17#include <unistd.h>
18#include <time.h>
24a3d153
DM
19#include <errno.h>
20
21#ifndef SCM_RIGHTS
22#error "SCM_RIGHTS undefined"
23#endif
24
db9751d8 25/* interface to pmxcfs (libqb) */
fe000966
DM
26#include <sys/syslog.h>
27#include <qb/qbdefs.h>
28#include <qb/qbutil.h>
89fde9ac 29#include <qb/qblog.h>
fe000966
DM
30#include <qb/qbipcc.h>
31
3209f24c
TL
32#define RESTART_FLAG_FILE "/run/pve-cluster/cfs-restart-flag"
33#define RESTART_GRACE_PERIOD 5
34
fe000966
DM
35#define PCS_SOCKET_NAME "pve2"
36
37#define PCS_SERVICE1 1
38#define MAX_MSG_SIZE (8192*128)
39
40static qb_ipcc_connection_t *conn;
49929b8a 41static pid_t conn_pid;
429f04f3 42
fe000966
DM
43static char ipcbuffer[MAX_MSG_SIZE];
44
3209f24c
TL
45static qb_ipcc_connection_t *init_connection() {
46
47 static qb_ipcc_connection_t *connection = NULL;
48 struct timespec retry_timeout, now;
49 int cfs_restart_flag_fd = -1;
50
51 // check if pmxcfs is currently restarting
52 if ((cfs_restart_flag_fd = open(RESTART_FLAG_FILE, 0)) > 0) {
53 clock_gettime(CLOCK_MONOTONIC, &retry_timeout);
54 retry_timeout.tv_sec += RESTART_GRACE_PERIOD;
55 }
56
57 qb_log_init("IPCC.xs", LOG_USER, LOG_EMERG);
58 qb_log_ctl(QB_LOG_SYSLOG, QB_LOG_CONF_ENABLED, QB_TRUE);
59
60retry_connection:
61 connection = qb_ipcc_connect(PCS_SOCKET_NAME, MAX_MSG_SIZE);
62
63 if (!connection) {
64 if (cfs_restart_flag_fd >= 0) {
65 // cfs restarting and hopefully back soon, poll
66 clock_gettime(CLOCK_MONOTONIC, &now);
67
68 if (now.tv_sec < retry_timeout.tv_sec ||
69 (now.tv_sec == retry_timeout.tv_sec &&
70 now.tv_nsec < retry_timeout.tv_nsec)) {
71
72 usleep(100 * 1000);
73 goto retry_connection;
74
75 } else {
76 // timeout: cleanup flag file if still the same
77 struct stat s;
78 fstat(cfs_restart_flag_fd, &s);
79 if (s.st_nlink > 0)
80 unlink(RESTART_FLAG_FILE);
81 }
82 }
83 }
84
85 if (cfs_restart_flag_fd >= 0) close(cfs_restart_flag_fd);
86
87 return connection;
88}
89
90
fe000966
DM
91MODULE = PVE::IPCC PACKAGE = PVE::IPCC
92
93SV *
94ipcc_send_rec(msgid, data=NULL)
95I32 msgid;
96SV * data;
97PROTOTYPE: $;$
98CODE:
99{
3209f24c 100 uint8_t retried_cache_connection = 0;
429f04f3
DM
101 pid_t cpid = getpid();
102
103 /* Each process needs its own ipcc connection,
104 * else the shared memory buffer gets corrupted.
105 */
106 if (conn && conn_pid != cpid) {
107 conn = NULL;
108 }
109
fe000966 110 if (conn == NULL) {
3209f24c
TL
111recache_connection:
112 conn = init_connection();
fe000966
DM
113
114 if (!conn)
115 XSRETURN_UNDEF;
429f04f3
DM
116
117 conn_pid = cpid;
fe000966
DM
118 }
119
120 size_t len = 0;
121 char *dataptr = NULL;
122 if (data && SvPOK(data))
123 dataptr = SvPV(data, len);
124
125 int iov_len = 2;
126 struct iovec iov[iov_len];
127
128 struct qb_ipc_request_header req_header;
129
130 req_header.id = msgid;
131 req_header.size = sizeof(req_header) + len;
132
133 iov[0].iov_base = (char *)&req_header;
134 iov[0].iov_len = sizeof(req_header);
135 iov[1].iov_base = dataptr;
136 iov[1].iov_len = len;
137
138 int32_t ms_timeout = -1; // fixme:
139 int res = qb_ipcc_sendv_recv(conn, iov, iov_len, ipcbuffer, sizeof(ipcbuffer), ms_timeout);
140 if (res < 0) {
141 qb_ipcc_disconnect(conn);
142 conn = NULL;
3209f24c
TL
143 // requests during cfs restart and the first thereafter will fail, retry
144 if (!retried_cache_connection) {
145 retried_cache_connection = 1;
146 goto recache_connection;
147 }
fe000966
DM
148 errno = -res;
149 XSRETURN_UNDEF;
150 }
151
152 struct qb_ipc_response_header *res_header;
153
154 res_header = (struct qb_ipc_response_header *)ipcbuffer;
155 int dsize = res_header->size - sizeof(struct qb_ipc_response_header);
156
157 if (res_header->error < 0) {
158 errno = -res_header->error;
159 XSRETURN_UNDEF;
160 } else {
904e6ee9 161 errno = 0;
fe000966
DM
162 if (dsize > 0) {
163 RETVAL = newSVpv(ipcbuffer + sizeof(struct qb_ipc_response_header), dsize);
164 } else {
fe000966
DM
165 XSRETURN_UNDEF;
166 }
167 }
168}
169OUTPUT: RETVAL
170
24a3d153
DM
171# helper to pass SCM ACCESS RIGHTS
172
173int
174sendfd(sock_fd, send_me_fd, data=NULL)
175int sock_fd
176int send_me_fd
177SV * data;
178CODE:
179{
180 int ret = 0;
181 struct iovec iov[1];
182 struct msghdr msg;
183 memset(&msg, 0, sizeof(msg));
184
185 size_t len = 0;
186 char *dataptr = NULL;
187 if (data && SvPOK(data))
188 dataptr = SvPV(data, len);
189
190 iov[0].iov_base = dataptr;
191 iov[0].iov_len = len;
192 msg.msg_iov = iov;
193 msg.msg_iovlen = 1;
194 msg.msg_name = 0;
195 msg.msg_namelen = 0;
196
197 char control[CMSG_SPACE(sizeof(int))];
198 memset(control, 0, sizeof(control));
199
200 msg.msg_control = control;
201 msg.msg_controllen = sizeof(control);
202 msg.msg_flags = 0;
203
204 struct cmsghdr* h = CMSG_FIRSTHDR(&msg);
205 h->cmsg_len = CMSG_LEN(sizeof(int));
206 h->cmsg_level= SOL_SOCKET;
207 h->cmsg_type = SCM_RIGHTS;
208 *((int*)CMSG_DATA(h)) = send_me_fd;
209
db9751d8 210 int repeat;
24a3d153 211 do {
db9751d8 212 repeat = 0;
24a3d153 213 ret = sendmsg(sock_fd, &msg, 0);
db9751d8
DM
214 if (ret < 0) {
215 if (errno == EINTR) {
216 repeat = 1;
217 } else if (errno == EAGAIN || errno == EWOULDBLOCK) {
218 repeat = 1;
219 usleep(1000);
220 }
221 }
222 } while (repeat);
223
24a3d153
DM
224 RETVAL = ret;
225}
226OUTPUT: RETVAL