]>
Commit | Line | Data |
---|---|---|
718e3744 | 1 | /* Kernel communication using netlink interface. |
2 | * Copyright (C) 1999 Kunihiro Ishiguro | |
3 | * | |
4 | * This file is part of GNU Zebra. | |
5 | * | |
6 | * GNU Zebra is free software; you can redistribute it and/or modify it | |
7 | * under the terms of the GNU General Public License as published by the | |
8 | * Free Software Foundation; either version 2, or (at your option) any | |
9 | * later version. | |
10 | * | |
11 | * GNU Zebra is distributed in the hope that it will be useful, but | |
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | * General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with GNU Zebra; see the file COPYING. If not, write to the Free | |
18 | * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
19 | * 02111-1307, USA. | |
20 | */ | |
1fdc9eae | 21 | |
22 | #include <zebra.h> | |
23 | ||
24 | #include "linklist.h" | |
25 | #include "if.h" | |
26 | #include "log.h" | |
27 | #include "prefix.h" | |
28 | #include "connected.h" | |
29 | #include "table.h" | |
30 | #include "memory.h" | |
31 | #include "zebra_memory.h" | |
32 | #include "rib.h" | |
33 | #include "thread.h" | |
34 | #include "privs.h" | |
35 | #include "nexthop.h" | |
36 | #include "vrf.h" | |
37 | #include "mpls.h" | |
38 | ||
39 | #include "zebra/zserv.h" | |
40 | #include "zebra/zebra_ns.h" | |
41 | #include "zebra/zebra_vrf.h" | |
42 | #include "zebra/debug.h" | |
43 | #include "zebra/kernel_netlink.h" | |
44 | #include "zebra/rt_netlink.h" | |
45 | #include "zebra/if_netlink.h" | |
46 | ||
47 | #ifndef SO_RCVBUFFORCE | |
48 | #define SO_RCVBUFFORCE (33) | |
49 | #endif | |
50 | ||
51 | /* Hack for GNU libc version 2. */ | |
52 | #ifndef MSG_TRUNC | |
53 | #define MSG_TRUNC 0x20 | |
54 | #endif /* MSG_TRUNC */ | |
55 | ||
56 | #ifndef NLMSG_TAIL | |
57 | #define NLMSG_TAIL(nmsg) \ | |
58 | ((struct rtattr *) (((u_char *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len))) | |
59 | #endif | |
60 | ||
61 | #ifndef RTA_TAIL | |
62 | #define RTA_TAIL(rta) \ | |
63 | ((struct rtattr *) (((u_char *) (rta)) + RTA_ALIGN((rta)->rta_len))) | |
64 | #endif | |
65 | ||
f909c673 DS |
66 | #ifndef RTNL_FAMILY_IP6MR |
67 | #define RTNL_FAMILY_IP6MR 129 | |
68 | #endif | |
69 | ||
70 | #ifndef RTPROT_MROUTED | |
71 | #define RTPROT_MROUTED 17 | |
72 | #endif | |
73 | ||
1fdc9eae | 74 | static const struct message nlmsg_str[] = { |
75 | {RTM_NEWROUTE, "RTM_NEWROUTE"}, | |
76 | {RTM_DELROUTE, "RTM_DELROUTE"}, | |
77 | {RTM_GETROUTE, "RTM_GETROUTE"}, | |
78 | {RTM_NEWLINK, "RTM_NEWLINK"}, | |
79 | {RTM_DELLINK, "RTM_DELLINK"}, | |
80 | {RTM_GETLINK, "RTM_GETLINK"}, | |
81 | {RTM_NEWADDR, "RTM_NEWADDR"}, | |
82 | {RTM_DELADDR, "RTM_DELADDR"}, | |
83 | {RTM_GETADDR, "RTM_GETADDR"}, | |
84 | {RTM_NEWNEIGH, "RTM_NEWNEIGH"}, | |
85 | {RTM_DELNEIGH, "RTM_DELNEIGH"}, | |
86 | {RTM_GETNEIGH, "RTM_GETNEIGH"}, | |
87 | {0, NULL} | |
88 | }; | |
89 | ||
90 | static const struct message rtproto_str[] = { | |
91 | {RTPROT_REDIRECT, "redirect"}, | |
92 | {RTPROT_KERNEL, "kernel"}, | |
93 | {RTPROT_BOOT, "boot"}, | |
94 | {RTPROT_STATIC, "static"}, | |
95 | {RTPROT_GATED, "GateD"}, | |
96 | {RTPROT_RA, "router advertisement"}, | |
97 | {RTPROT_MRT, "MRT"}, | |
98 | {RTPROT_ZEBRA, "Zebra"}, | |
99 | #ifdef RTPROT_BIRD | |
100 | {RTPROT_BIRD, "BIRD"}, | |
101 | #endif /* RTPROT_BIRD */ | |
03549ced | 102 | {RTPROT_MROUTED, "mroute"}, |
1fdc9eae | 103 | {0, NULL} |
104 | }; | |
105 | ||
b339bde7 DS |
106 | static const struct message family_str[] = { |
107 | {AF_INET, "ipv4"}, | |
108 | {AF_INET6, "ipv6"}, | |
109 | {AF_BRIDGE, "bridge"}, | |
110 | {RTNL_FAMILY_IPMR, "ipv4MR"}, | |
111 | {RTNL_FAMILY_IP6MR, "ipv6MR"}, | |
112 | {0, NULL}, | |
113 | }; | |
114 | ||
115 | static const struct message rttype_str[] = { | |
116 | {RTN_UNICAST, "unicast"}, | |
117 | {RTN_MULTICAST, "multicast"}, | |
118 | {0, NULL}, | |
119 | }; | |
120 | ||
1fdc9eae | 121 | extern struct thread_master *master; |
122 | extern u_int32_t nl_rcvbufsize; | |
123 | ||
124 | extern struct zebra_privs_t zserv_privs; | |
125 | ||
30359046 | 126 | int |
1fdc9eae | 127 | netlink_talk_filter (struct sockaddr_nl *snl, struct nlmsghdr *h, |
936ebf0a | 128 | ns_id_t ns_id, int startup) |
1fdc9eae | 129 | { |
130 | zlog_warn ("netlink_talk: ignoring message type 0x%04x NS %u", h->nlmsg_type, | |
131 | ns_id); | |
132 | return 0; | |
133 | } | |
134 | ||
135 | static int | |
136 | netlink_recvbuf (struct nlsock *nl, uint32_t newsize) | |
137 | { | |
138 | u_int32_t oldsize; | |
139 | socklen_t newlen = sizeof(newsize); | |
140 | socklen_t oldlen = sizeof(oldsize); | |
141 | int ret; | |
142 | ||
143 | ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &oldsize, &oldlen); | |
144 | if (ret < 0) | |
145 | { | |
146 | zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name, | |
147 | safe_strerror (errno)); | |
148 | return -1; | |
149 | } | |
150 | ||
151 | /* Try force option (linux >= 2.6.14) and fall back to normal set */ | |
152 | if ( zserv_privs.change (ZPRIVS_RAISE) ) | |
153 | zlog_err ("routing_socket: Can't raise privileges"); | |
154 | ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUFFORCE, &nl_rcvbufsize, | |
155 | sizeof(nl_rcvbufsize)); | |
156 | if ( zserv_privs.change (ZPRIVS_LOWER) ) | |
157 | zlog_err ("routing_socket: Can't lower privileges"); | |
158 | if (ret < 0) | |
159 | ret = setsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &nl_rcvbufsize, | |
160 | sizeof(nl_rcvbufsize)); | |
161 | if (ret < 0) | |
162 | { | |
163 | zlog (NULL, LOG_ERR, "Can't set %s receive buffer size: %s", nl->name, | |
164 | safe_strerror (errno)); | |
165 | return -1; | |
166 | } | |
167 | ||
168 | ret = getsockopt(nl->sock, SOL_SOCKET, SO_RCVBUF, &newsize, &newlen); | |
169 | if (ret < 0) | |
170 | { | |
171 | zlog (NULL, LOG_ERR, "Can't get %s receive buffer size: %s", nl->name, | |
172 | safe_strerror (errno)); | |
173 | return -1; | |
174 | } | |
175 | ||
176 | zlog (NULL, LOG_INFO, | |
177 | "Setting netlink socket receive buffer size: %u -> %u", | |
178 | oldsize, newsize); | |
179 | return 0; | |
180 | } | |
181 | ||
182 | /* Make socket for Linux netlink interface. */ | |
183 | static int | |
184 | netlink_socket (struct nlsock *nl, unsigned long groups, ns_id_t ns_id) | |
185 | { | |
186 | int ret; | |
187 | struct sockaddr_nl snl; | |
188 | int sock; | |
189 | int namelen; | |
190 | int save_errno; | |
191 | ||
192 | if (zserv_privs.change (ZPRIVS_RAISE)) | |
193 | { | |
194 | zlog (NULL, LOG_ERR, "Can't raise privileges"); | |
195 | return -1; | |
196 | } | |
197 | ||
198 | sock = socket (AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); | |
199 | if (sock < 0) | |
200 | { | |
201 | zlog (NULL, LOG_ERR, "Can't open %s socket: %s", nl->name, | |
202 | safe_strerror (errno)); | |
203 | return -1; | |
204 | } | |
205 | ||
206 | memset (&snl, 0, sizeof snl); | |
207 | snl.nl_family = AF_NETLINK; | |
208 | snl.nl_groups = groups; | |
209 | ||
210 | /* Bind the socket to the netlink structure for anything. */ | |
211 | ret = bind (sock, (struct sockaddr *) &snl, sizeof snl); | |
212 | save_errno = errno; | |
213 | if (zserv_privs.change (ZPRIVS_LOWER)) | |
214 | zlog (NULL, LOG_ERR, "Can't lower privileges"); | |
215 | ||
216 | if (ret < 0) | |
217 | { | |
218 | zlog (NULL, LOG_ERR, "Can't bind %s socket to group 0x%x: %s", | |
219 | nl->name, snl.nl_groups, safe_strerror (save_errno)); | |
220 | close (sock); | |
221 | return -1; | |
222 | } | |
223 | ||
224 | /* multiple netlink sockets will have different nl_pid */ | |
225 | namelen = sizeof snl; | |
226 | ret = getsockname (sock, (struct sockaddr *) &snl, (socklen_t *) &namelen); | |
227 | if (ret < 0 || namelen != sizeof snl) | |
228 | { | |
229 | zlog (NULL, LOG_ERR, "Can't get %s socket name: %s", nl->name, | |
230 | safe_strerror (errno)); | |
231 | close (sock); | |
232 | return -1; | |
233 | } | |
234 | ||
235 | nl->snl = snl; | |
236 | nl->sock = sock; | |
237 | return ret; | |
238 | } | |
239 | ||
240 | static int | |
241 | netlink_information_fetch (struct sockaddr_nl *snl, struct nlmsghdr *h, | |
936ebf0a | 242 | ns_id_t ns_id, int startup) |
1fdc9eae | 243 | { |
244 | /* JF: Ignore messages that aren't from the kernel */ | |
245 | if ( snl->nl_pid != 0 ) | |
246 | { | |
247 | zlog ( NULL, LOG_ERR, "Ignoring message from pid %u", snl->nl_pid ); | |
248 | return 0; | |
249 | } | |
250 | ||
251 | switch (h->nlmsg_type) | |
252 | { | |
253 | case RTM_NEWROUTE: | |
936ebf0a | 254 | return netlink_route_change (snl, h, ns_id, startup); |
1fdc9eae | 255 | break; |
256 | case RTM_DELROUTE: | |
936ebf0a | 257 | return netlink_route_change (snl, h, ns_id, startup); |
1fdc9eae | 258 | break; |
259 | case RTM_NEWLINK: | |
936ebf0a | 260 | return netlink_link_change (snl, h, ns_id, startup); |
1fdc9eae | 261 | break; |
262 | case RTM_DELLINK: | |
936ebf0a | 263 | return netlink_link_change (snl, h, ns_id, startup); |
1fdc9eae | 264 | break; |
265 | case RTM_NEWADDR: | |
936ebf0a | 266 | return netlink_interface_addr (snl, h, ns_id, startup); |
1fdc9eae | 267 | break; |
268 | case RTM_DELADDR: | |
936ebf0a | 269 | return netlink_interface_addr (snl, h, ns_id, startup); |
1fdc9eae | 270 | break; |
271 | default: | |
272 | zlog_warn ("Unknown netlink nlmsg_type %d vrf %u\n", h->nlmsg_type, | |
273 | ns_id); | |
274 | break; | |
275 | } | |
276 | return 0; | |
277 | } | |
278 | ||
279 | static int | |
280 | kernel_read (struct thread *thread) | |
281 | { | |
282 | struct zebra_ns *zns = (struct zebra_ns *)THREAD_ARG (thread); | |
936ebf0a | 283 | netlink_parse_info (netlink_information_fetch, &zns->netlink, zns, 5, 0); |
1fdc9eae | 284 | zns->t_netlink = thread_add_read (zebrad.master, kernel_read, zns, |
285 | zns->netlink.sock); | |
286 | ||
287 | return 0; | |
288 | } | |
289 | ||
290 | /* Filter out messages from self that occur on listener socket, | |
291 | * caused by our actions on the command socket | |
292 | */ | |
293 | static void netlink_install_filter (int sock, __u32 pid) | |
294 | { | |
295 | struct sock_filter filter[] = { | |
296 | /* 0: ldh [4] */ | |
297 | BPF_STMT(BPF_LD|BPF_ABS|BPF_H, offsetof(struct nlmsghdr, nlmsg_type)), | |
298 | /* 1: jeq 0x18 jt 3 jf 6 */ | |
299 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_NEWROUTE), 1, 0), | |
300 | /* 2: jeq 0x19 jt 3 jf 6 */ | |
301 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htons(RTM_DELROUTE), 0, 3), | |
302 | /* 3: ldw [12] */ | |
303 | BPF_STMT(BPF_LD|BPF_ABS|BPF_W, offsetof(struct nlmsghdr, nlmsg_pid)), | |
304 | /* 4: jeq XX jt 5 jf 6 */ | |
305 | BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, htonl(pid), 0, 1), | |
306 | /* 5: ret 0 (skip) */ | |
307 | BPF_STMT(BPF_RET|BPF_K, 0), | |
308 | /* 6: ret 0xffff (keep) */ | |
309 | BPF_STMT(BPF_RET|BPF_K, 0xffff), | |
310 | }; | |
311 | ||
312 | struct sock_fprog prog = { | |
313 | .len = array_size(filter), | |
314 | .filter = filter, | |
315 | }; | |
316 | ||
317 | if (setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)) < 0) | |
318 | zlog_warn ("Can't install socket filter: %s\n", safe_strerror(errno)); | |
319 | } | |
320 | ||
321 | void | |
322 | netlink_parse_rtattr (struct rtattr **tb, int max, struct rtattr *rta, | |
323 | int len) | |
324 | { | |
325 | while (RTA_OK (rta, len)) | |
326 | { | |
327 | if (rta->rta_type <= max) | |
328 | tb[rta->rta_type] = rta; | |
329 | rta = RTA_NEXT (rta, len); | |
330 | } | |
331 | } | |
332 | ||
333 | int | |
c7450f9a DL |
334 | addattr_l (struct nlmsghdr *n, unsigned int maxlen, int type, |
335 | void *data, unsigned int alen) | |
1fdc9eae | 336 | { |
337 | int len; | |
338 | struct rtattr *rta; | |
339 | ||
340 | len = RTA_LENGTH (alen); | |
341 | ||
342 | if (NLMSG_ALIGN (n->nlmsg_len) + RTA_ALIGN (len) > maxlen) | |
343 | return -1; | |
344 | ||
345 | rta = (struct rtattr *) (((char *) n) + NLMSG_ALIGN (n->nlmsg_len)); | |
346 | rta->rta_type = type; | |
347 | rta->rta_len = len; | |
4b2792b5 DS |
348 | |
349 | if (data) | |
350 | memcpy (RTA_DATA (rta), data, alen); | |
351 | else | |
e2a534d5 | 352 | assert (alen == 0); |
4b2792b5 | 353 | |
1fdc9eae | 354 | n->nlmsg_len = NLMSG_ALIGN (n->nlmsg_len) + RTA_ALIGN (len); |
355 | ||
356 | return 0; | |
357 | } | |
358 | ||
359 | int | |
360 | rta_addattr_l (struct rtattr *rta, unsigned int maxlen, int type, | |
c7450f9a | 361 | void *data, unsigned int alen) |
1fdc9eae | 362 | { |
363 | unsigned int len; | |
364 | struct rtattr *subrta; | |
365 | ||
366 | len = RTA_LENGTH (alen); | |
367 | ||
368 | if (RTA_ALIGN (rta->rta_len) + RTA_ALIGN (len) > maxlen) | |
369 | return -1; | |
370 | ||
371 | subrta = (struct rtattr *) (((char *) rta) + RTA_ALIGN (rta->rta_len)); | |
372 | subrta->rta_type = type; | |
373 | subrta->rta_len = len; | |
4b2792b5 DS |
374 | |
375 | if (data) | |
376 | memcpy (RTA_DATA (subrta), data, alen); | |
377 | else | |
e2a534d5 | 378 | assert (alen == 0); |
4b2792b5 | 379 | |
1fdc9eae | 380 | rta->rta_len = NLMSG_ALIGN (rta->rta_len) + RTA_ALIGN (len); |
381 | ||
382 | return 0; | |
383 | } | |
384 | ||
385 | int | |
386 | addattr32 (struct nlmsghdr *n, unsigned int maxlen, int type, int data) | |
387 | { | |
388 | return addattr_l(n, maxlen, type, &data, sizeof(u_int32_t)); | |
389 | } | |
390 | ||
391 | struct rtattr * | |
392 | addattr_nest(struct nlmsghdr *n, int maxlen, int type) | |
393 | { | |
394 | struct rtattr *nest = NLMSG_TAIL(n); | |
395 | ||
396 | addattr_l(n, maxlen, type, NULL, 0); | |
397 | return nest; | |
398 | } | |
399 | ||
400 | int | |
401 | addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest) | |
402 | { | |
403 | nest->rta_len = (u_char *)NLMSG_TAIL(n) - (u_char *)nest; | |
404 | return n->nlmsg_len; | |
405 | } | |
406 | ||
407 | struct rtattr * | |
408 | rta_nest(struct rtattr *rta, int maxlen, int type) | |
409 | { | |
410 | struct rtattr *nest = RTA_TAIL(rta); | |
411 | ||
412 | rta_addattr_l(rta, maxlen, type, NULL, 0); | |
413 | return nest; | |
414 | } | |
415 | ||
416 | int | |
417 | rta_nest_end(struct rtattr *rta, struct rtattr *nest) | |
418 | { | |
419 | nest->rta_len = (u_char *)RTA_TAIL(rta) - (u_char *)nest; | |
420 | return rta->rta_len; | |
421 | } | |
422 | ||
423 | const char * | |
424 | nl_msg_type_to_str (uint16_t msg_type) | |
425 | { | |
426 | return lookup (nlmsg_str, msg_type); | |
427 | } | |
428 | ||
429 | const char * | |
430 | nl_rtproto_to_str (u_char rtproto) | |
431 | { | |
432 | return lookup (rtproto_str, rtproto); | |
433 | } | |
b339bde7 DS |
434 | |
435 | const char * | |
436 | nl_family_to_str (u_char family) | |
437 | { | |
438 | return lookup (family_str, family); | |
439 | } | |
440 | ||
441 | const char * | |
442 | nl_rttype_to_str (u_char rttype) | |
443 | { | |
444 | return lookup (rttype_str, rttype); | |
445 | } | |
446 | ||
936ebf0a DS |
447 | /* |
448 | * netlink_parse_info | |
449 | * | |
450 | * Receive message from netlink interface and pass those information | |
451 | * to the given function. | |
452 | * | |
453 | * filter -> Function to call to read the results | |
454 | * nl -> netlink socket information | |
455 | * zns -> The zebra namespace data | |
456 | * count -> How many we should read in, 0 means as much as possible | |
457 | * startup -> Are we reading in under startup conditions? passed to | |
458 | * the filter. | |
459 | */ | |
1fdc9eae | 460 | int |
461 | netlink_parse_info (int (*filter) (struct sockaddr_nl *, struct nlmsghdr *, | |
936ebf0a DS |
462 | ns_id_t, int), |
463 | struct nlsock *nl, struct zebra_ns *zns, int count, int startup) | |
1fdc9eae | 464 | { |
465 | int status; | |
466 | int ret = 0; | |
467 | int error; | |
468 | int read_in = 0; | |
469 | ||
470 | while (1) | |
471 | { | |
472 | char buf[NL_PKT_BUF_SIZE]; | |
473 | struct iovec iov = { | |
474 | .iov_base = buf, | |
475 | .iov_len = sizeof buf | |
476 | }; | |
477 | struct sockaddr_nl snl; | |
478 | struct msghdr msg = { | |
479 | .msg_name = (void *) &snl, | |
480 | .msg_namelen = sizeof snl, | |
481 | .msg_iov = &iov, | |
482 | .msg_iovlen = 1 | |
483 | }; | |
484 | struct nlmsghdr *h; | |
485 | ||
486 | if (count && read_in >= count) | |
487 | return 0; | |
488 | ||
489 | status = recvmsg (nl->sock, &msg, 0); | |
490 | if (status < 0) | |
491 | { | |
492 | if (errno == EINTR) | |
493 | continue; | |
494 | if (errno == EWOULDBLOCK || errno == EAGAIN) | |
495 | break; | |
496 | zlog (NULL, LOG_ERR, "%s recvmsg overrun: %s", | |
497 | nl->name, safe_strerror(errno)); | |
498 | /* | |
499 | * In this case we are screwed. | |
500 | * There is no good way to | |
501 | * recover zebra at this point. | |
502 | */ | |
503 | exit (-1); | |
504 | continue; | |
505 | } | |
506 | ||
507 | if (status == 0) | |
508 | { | |
509 | zlog (NULL, LOG_ERR, "%s EOF", nl->name); | |
510 | return -1; | |
511 | } | |
512 | ||
513 | if (msg.msg_namelen != sizeof snl) | |
514 | { | |
515 | zlog (NULL, LOG_ERR, "%s sender address length error: length %d", | |
516 | nl->name, msg.msg_namelen); | |
517 | return -1; | |
518 | } | |
519 | ||
520 | if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) | |
521 | { | |
522 | zlog_debug("%s: << netlink message dump [recv]", __func__); | |
523 | zlog_hexdump(&msg, sizeof(msg)); | |
524 | } | |
525 | ||
526 | read_in++; | |
527 | for (h = (struct nlmsghdr *) buf; NLMSG_OK (h, (unsigned int) status); | |
528 | h = NLMSG_NEXT (h, status)) | |
529 | { | |
530 | /* Finish of reading. */ | |
531 | if (h->nlmsg_type == NLMSG_DONE) | |
532 | return ret; | |
533 | ||
534 | /* Error handling. */ | |
535 | if (h->nlmsg_type == NLMSG_ERROR) | |
536 | { | |
537 | struct nlmsgerr *err = (struct nlmsgerr *) NLMSG_DATA (h); | |
538 | int errnum = err->error; | |
539 | int msg_type = err->msg.nlmsg_type; | |
540 | ||
541 | /* If the error field is zero, then this is an ACK */ | |
542 | if (err->error == 0) | |
543 | { | |
544 | if (IS_ZEBRA_DEBUG_KERNEL) | |
545 | { | |
546 | zlog_debug ("%s: %s ACK: type=%s(%u), seq=%u, pid=%u", | |
547 | __FUNCTION__, nl->name, | |
548 | nl_msg_type_to_str (err->msg.nlmsg_type), | |
549 | err->msg.nlmsg_type, err->msg.nlmsg_seq, | |
550 | err->msg.nlmsg_pid); | |
551 | } | |
552 | ||
553 | /* return if not a multipart message, otherwise continue */ | |
554 | if (!(h->nlmsg_flags & NLM_F_MULTI)) | |
555 | return 0; | |
556 | continue; | |
557 | } | |
558 | ||
559 | if (h->nlmsg_len < NLMSG_LENGTH (sizeof (struct nlmsgerr))) | |
560 | { | |
561 | zlog (NULL, LOG_ERR, "%s error: message truncated", | |
562 | nl->name); | |
563 | return -1; | |
564 | } | |
565 | ||
566 | /* Deal with errors that occur because of races in link handling */ | |
567 | if (nl == &zns->netlink_cmd | |
568 | && ((msg_type == RTM_DELROUTE && | |
569 | (-errnum == ENODEV || -errnum == ESRCH)) | |
882261e1 DS |
570 | || (msg_type == RTM_NEWROUTE && |
571 | (-errnum == ENETDOWN || -errnum == EEXIST)))) | |
1fdc9eae | 572 | { |
573 | if (IS_ZEBRA_DEBUG_KERNEL) | |
574 | zlog_debug ("%s: error: %s type=%s(%u), seq=%u, pid=%u", | |
575 | nl->name, safe_strerror (-errnum), | |
576 | nl_msg_type_to_str (msg_type), | |
577 | msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); | |
578 | return 0; | |
579 | } | |
580 | ||
581 | /* We see RTM_DELNEIGH when shutting down an interface with an IPv4 | |
582 | * link-local. The kernel should have already deleted the neighbor | |
583 | * so do not log these as an error. | |
584 | */ | |
585 | if (msg_type == RTM_DELNEIGH || | |
586 | (nl == &zns->netlink_cmd && msg_type == RTM_NEWROUTE && | |
587 | (-errnum == ESRCH || -errnum == ENETUNREACH))) | |
588 | { | |
589 | /* This is known to happen in some situations, don't log | |
590 | * as error. | |
591 | */ | |
592 | if (IS_ZEBRA_DEBUG_KERNEL) | |
593 | zlog_debug ("%s error: %s, type=%s(%u), seq=%u, pid=%u", | |
594 | nl->name, safe_strerror (-errnum), | |
595 | nl_msg_type_to_str (msg_type), | |
596 | msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); | |
597 | } | |
598 | else | |
599 | zlog_err ("%s error: %s, type=%s(%u), seq=%u, pid=%u", | |
600 | nl->name, safe_strerror (-errnum), | |
601 | nl_msg_type_to_str (msg_type), | |
602 | msg_type, err->msg.nlmsg_seq, err->msg.nlmsg_pid); | |
603 | ||
604 | return -1; | |
605 | } | |
606 | ||
607 | /* OK we got netlink message. */ | |
608 | if (IS_ZEBRA_DEBUG_KERNEL) | |
609 | zlog_debug ("netlink_parse_info: %s type %s(%u), len=%d, seq=%u, pid=%u", | |
610 | nl->name, | |
611 | nl_msg_type_to_str (h->nlmsg_type), h->nlmsg_type, | |
612 | h->nlmsg_len, h->nlmsg_seq, h->nlmsg_pid); | |
613 | ||
614 | /* skip unsolicited messages originating from command socket | |
615 | * linux sets the originators port-id for {NEW|DEL}ADDR messages, | |
616 | * so this has to be checked here. */ | |
617 | if (nl != &zns->netlink_cmd | |
618 | && h->nlmsg_pid == zns->netlink_cmd.snl.nl_pid | |
619 | && (h->nlmsg_type != RTM_NEWADDR && h->nlmsg_type != RTM_DELADDR)) | |
620 | { | |
621 | if (IS_ZEBRA_DEBUG_KERNEL) | |
622 | zlog_debug ("netlink_parse_info: %s packet comes from %s", | |
623 | zns->netlink_cmd.name, nl->name); | |
624 | continue; | |
625 | } | |
626 | ||
936ebf0a | 627 | error = (*filter) (&snl, h, zns->ns_id, startup); |
1fdc9eae | 628 | if (error < 0) |
629 | { | |
630 | zlog (NULL, LOG_ERR, "%s filter function error", nl->name); | |
631 | ret = error; | |
632 | } | |
633 | } | |
634 | ||
635 | /* After error care. */ | |
636 | if (msg.msg_flags & MSG_TRUNC) | |
637 | { | |
638 | zlog (NULL, LOG_ERR, "%s error: message truncated", nl->name); | |
639 | continue; | |
640 | } | |
641 | if (status) | |
642 | { | |
643 | zlog (NULL, LOG_ERR, "%s error: data remnant size %d", nl->name, | |
644 | status); | |
645 | return -1; | |
646 | } | |
647 | } | |
648 | return ret; | |
649 | } | |
650 | ||
936ebf0a DS |
651 | /* |
652 | * netlink_talk | |
653 | * | |
654 | * sendmsg() to netlink socket then recvmsg(). | |
655 | * Calls netlink_parse_info to parse returned data | |
656 | * | |
657 | * filter -> The filter to read final results from kernel | |
658 | * nlmsghdr -> The data to send to the kernel | |
659 | * nl -> The netlink socket information | |
660 | * zns -> The zebra namespace information | |
661 | * startup -> Are we reading in under startup conditions | |
662 | * This is passed through eventually to filter. | |
663 | */ | |
1fdc9eae | 664 | int |
30359046 | 665 | netlink_talk (int (*filter) (struct sockaddr_nl *, struct nlmsghdr *, |
936ebf0a DS |
666 | ns_id_t, int startup), |
667 | struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns, int startup) | |
1fdc9eae | 668 | { |
669 | int status; | |
670 | struct sockaddr_nl snl; | |
671 | struct iovec iov = { | |
672 | .iov_base = (void *) n, | |
673 | .iov_len = n->nlmsg_len | |
674 | }; | |
675 | struct msghdr msg = { | |
676 | .msg_name = (void *) &snl, | |
677 | .msg_namelen = sizeof snl, | |
678 | .msg_iov = &iov, | |
679 | .msg_iovlen = 1, | |
680 | }; | |
681 | int save_errno; | |
682 | ||
683 | memset (&snl, 0, sizeof snl); | |
684 | snl.nl_family = AF_NETLINK; | |
685 | ||
686 | n->nlmsg_seq = ++nl->seq; | |
687 | ||
688 | /* Request an acknowledgement by setting NLM_F_ACK */ | |
689 | n->nlmsg_flags |= NLM_F_ACK; | |
690 | ||
691 | if (IS_ZEBRA_DEBUG_KERNEL) | |
692 | zlog_debug ("netlink_talk: %s type %s(%u), len=%d seq=%u flags 0x%x", | |
693 | nl->name, | |
694 | nl_msg_type_to_str (n->nlmsg_type), n->nlmsg_type, | |
695 | n->nlmsg_len, n->nlmsg_seq, n->nlmsg_flags); | |
696 | ||
697 | /* Send message to netlink interface. */ | |
698 | if (zserv_privs.change (ZPRIVS_RAISE)) | |
699 | zlog (NULL, LOG_ERR, "Can't raise privileges"); | |
700 | status = sendmsg (nl->sock, &msg, 0); | |
701 | save_errno = errno; | |
702 | if (zserv_privs.change (ZPRIVS_LOWER)) | |
703 | zlog (NULL, LOG_ERR, "Can't lower privileges"); | |
704 | ||
705 | if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_SEND) | |
706 | { | |
707 | zlog_debug("%s: >> netlink message dump [sent]", __func__); | |
708 | zlog_hexdump(&msg, sizeof(msg)); | |
709 | } | |
710 | ||
711 | if (status < 0) | |
712 | { | |
713 | zlog (NULL, LOG_ERR, "netlink_talk sendmsg() error: %s", | |
714 | safe_strerror (save_errno)); | |
715 | return -1; | |
716 | } | |
717 | ||
718 | ||
719 | /* | |
720 | * Get reply from netlink socket. | |
721 | * The reply should either be an acknowlegement or an error. | |
722 | */ | |
936ebf0a | 723 | return netlink_parse_info (filter, nl, zns, 0, startup); |
1fdc9eae | 724 | } |
725 | ||
726 | /* Get type specified information from netlink. */ | |
727 | int | |
728 | netlink_request (int family, int type, struct nlsock *nl) | |
729 | { | |
730 | int ret; | |
731 | struct sockaddr_nl snl; | |
732 | int save_errno; | |
733 | ||
734 | struct | |
735 | { | |
736 | struct nlmsghdr nlh; | |
737 | struct rtgenmsg g; | |
738 | } req; | |
739 | ||
740 | /* Check netlink socket. */ | |
741 | if (nl->sock < 0) | |
742 | { | |
743 | zlog (NULL, LOG_ERR, "%s socket isn't active.", nl->name); | |
744 | return -1; | |
745 | } | |
746 | ||
747 | memset (&snl, 0, sizeof snl); | |
748 | snl.nl_family = AF_NETLINK; | |
749 | ||
750 | memset (&req, 0, sizeof req); | |
751 | req.nlh.nlmsg_len = sizeof req; | |
752 | req.nlh.nlmsg_type = type; | |
753 | req.nlh.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; | |
754 | req.nlh.nlmsg_pid = nl->snl.nl_pid; | |
755 | req.nlh.nlmsg_seq = ++nl->seq; | |
756 | req.g.rtgen_family = family; | |
757 | ||
758 | /* linux appears to check capabilities on every message | |
759 | * have to raise caps for every message sent | |
760 | */ | |
761 | if (zserv_privs.change (ZPRIVS_RAISE)) | |
762 | { | |
763 | zlog (NULL, LOG_ERR, "Can't raise privileges"); | |
764 | return -1; | |
765 | } | |
766 | ||
767 | ret = sendto (nl->sock, (void *) &req, sizeof req, 0, | |
768 | (struct sockaddr *) &snl, sizeof snl); | |
769 | save_errno = errno; | |
770 | ||
771 | if (zserv_privs.change (ZPRIVS_LOWER)) | |
772 | zlog (NULL, LOG_ERR, "Can't lower privileges"); | |
773 | ||
774 | if (ret < 0) | |
775 | { | |
776 | zlog (NULL, LOG_ERR, "%s sendto failed: %s", nl->name, | |
777 | safe_strerror (save_errno)); | |
778 | return -1; | |
779 | } | |
780 | ||
781 | return 0; | |
782 | } | |
783 | ||
784 | /* Exported interface function. This function simply calls | |
785 | netlink_socket (). */ | |
786 | void | |
787 | kernel_init (struct zebra_ns *zns) | |
788 | { | |
789 | unsigned long groups; | |
790 | ||
f30c50b9 RW |
791 | /* Initialize netlink sockets */ |
792 | groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR | | |
03549ced DS |
793 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR | |
794 | RTMGRP_IPV4_MROUTE; | |
f30c50b9 RW |
795 | |
796 | snprintf (zns->netlink.name, sizeof (zns->netlink.name), | |
797 | "netlink-listen (NS %u)", zns->ns_id); | |
798 | zns->netlink.sock = -1; | |
1fdc9eae | 799 | netlink_socket (&zns->netlink, groups, zns->ns_id); |
f30c50b9 RW |
800 | |
801 | snprintf (zns->netlink_cmd.name, sizeof (zns->netlink_cmd.name), | |
802 | "netlink-cmd (NS %u)", zns->ns_id); | |
803 | zns->netlink_cmd.sock = -1; | |
1fdc9eae | 804 | netlink_socket (&zns->netlink_cmd, 0, zns->ns_id); |
805 | ||
806 | /* Register kernel socket. */ | |
807 | if (zns->netlink.sock > 0) | |
808 | { | |
809 | /* Only want non-blocking on the netlink event socket */ | |
810 | if (fcntl (zns->netlink.sock, F_SETFL, O_NONBLOCK) < 0) | |
811 | zlog_err ("Can't set %s socket flags: %s", zns->netlink.name, | |
812 | safe_strerror (errno)); | |
813 | ||
814 | /* Set receive buffer size if it's set from command line */ | |
815 | if (nl_rcvbufsize) | |
816 | netlink_recvbuf (&zns->netlink, nl_rcvbufsize); | |
817 | ||
818 | netlink_install_filter (zns->netlink.sock, zns->netlink_cmd.snl.nl_pid); | |
819 | zns->t_netlink = thread_add_read (zebrad.master, kernel_read, zns, | |
820 | zns->netlink.sock); | |
821 | } | |
822 | } | |
823 | ||
824 | void | |
825 | kernel_terminate (struct zebra_ns *zns) | |
826 | { | |
827 | THREAD_READ_OFF (zns->t_netlink); | |
828 | ||
829 | if (zns->netlink.sock >= 0) | |
830 | { | |
831 | close (zns->netlink.sock); | |
832 | zns->netlink.sock = -1; | |
833 | } | |
834 | ||
835 | if (zns->netlink_cmd.sock >= 0) | |
836 | { | |
837 | close (zns->netlink_cmd.sock); | |
838 | zns->netlink_cmd.sock = -1; | |
839 | } | |
840 | } |