1 /* Copyright (C) 2017 Cavium, Inc.
3 * This program is free software; you can redistribute it and/or modify it
4 * under the terms of version 2 of the GNU General Public License
5 * as published by the Free Software Foundation.
8 #include <linux/netlink.h>
9 #include <linux/rtnetlink.h>
16 #include <sys/socket.h>
19 #include <arpa/inet.h>
24 #include <sys/ioctl.h>
25 #include <sys/syscall.h>
27 #include "bpf/libbpf.h"
28 #include <sys/resource.h>
31 int sock
, sock_arp
, flags
= XDP_FLAGS_UPDATE_IF_NOEXIST
;
32 static int total_ifindex
;
33 static int *ifindex_list
;
34 static __u32
*prog_id_list
;
36 static int lpm_map_fd
;
37 static int rxcnt_map_fd
;
38 static int arp_table_map_fd
;
39 static int exact_match_map_fd
;
40 static int tx_port_map_fd
;
42 static int get_route_table(int rtm_family
);
43 static void int_exit(int sig
)
48 for (i
= 0; i
< total_ifindex
; i
++) {
49 if (bpf_get_link_xdp_id(ifindex_list
[i
], &prog_id
, flags
)) {
50 printf("bpf_get_link_xdp_id on iface %d failed\n",
54 if (prog_id_list
[i
] == prog_id
)
55 bpf_set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
57 printf("couldn't find a prog id on iface %d\n",
60 printf("program on iface %d changed, not removing\n",
67 static void close_and_exit(int sig
)
75 /* Get the mac address of the interface given interface name */
76 static __be64
getmac(char *iface
)
82 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
83 ifr
.ifr_addr
.sa_family
= AF_INET
;
84 strncpy(ifr
.ifr_name
, iface
, IFNAMSIZ
- 1);
85 if (ioctl(fd
, SIOCGIFHWADDR
, &ifr
) < 0) {
86 printf("ioctl failed leaving....\n");
89 for (i
= 0; i
< 6 ; i
++)
90 *((__u8
*)&mac
+ i
) = (__u8
)ifr
.ifr_hwaddr
.sa_data
[i
];
95 static int recv_msg(struct sockaddr_nl sock_addr
, int sock
)
103 len
= recv(sock
, buf_ptr
, sizeof(buf
) - nll
, 0);
107 nh
= (struct nlmsghdr
*)buf_ptr
;
109 if (nh
->nlmsg_type
== NLMSG_DONE
)
113 if ((sock_addr
.nl_groups
& RTMGRP_NEIGH
) == RTMGRP_NEIGH
)
116 if ((sock_addr
.nl_groups
& RTMGRP_IPV4_ROUTE
) == RTMGRP_IPV4_ROUTE
)
122 /* Function to parse the route entry returned by netlink
123 * Updates the route entry related map entries
125 static void read_route(struct nlmsghdr
*nh
, int nll
)
127 char dsts
[24], gws
[24], ifs
[16], dsts_len
[24], metrics
[24];
128 struct bpf_lpm_trie_key
*prefix_key
;
129 struct rtattr
*rt_attr
;
130 struct rtmsg
*rt_msg
;
135 int dst_len
, iface
, metric
;
146 struct arp_table arp
;
151 if (nh
->nlmsg_type
== RTM_DELROUTE
)
152 printf("DELETING Route entry\n");
153 else if (nh
->nlmsg_type
== RTM_GETROUTE
)
154 printf("READING Route entry\n");
155 else if (nh
->nlmsg_type
== RTM_NEWROUTE
)
156 printf("NEW Route entry\n");
158 printf("%d\n", nh
->nlmsg_type
);
160 memset(&route
, 0, sizeof(route
));
161 printf("Destination\t\tGateway\t\tGenmask\t\tMetric\t\tIface\n");
162 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
163 rt_msg
= (struct rtmsg
*)NLMSG_DATA(nh
);
164 rtm_family
= rt_msg
->rtm_family
;
165 if (rtm_family
== AF_INET
)
166 if (rt_msg
->rtm_table
!= RT_TABLE_MAIN
)
168 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
169 rtl
= RTM_PAYLOAD(nh
);
171 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
172 switch (rt_attr
->rta_type
) {
175 (*((__be32
*)RTA_DATA(rt_attr
))));
179 *((__be32
*)RTA_DATA(rt_attr
)));
183 *((int *)RTA_DATA(rt_attr
)));
186 sprintf(metrics
, "%u",
187 *((int *)RTA_DATA(rt_attr
)));
192 sprintf(dsts_len
, "%d", rt_msg
->rtm_dst_len
);
193 route
.dst
= atoi(dsts
);
194 route
.dst_len
= atoi(dsts_len
);
195 route
.gw
= atoi(gws
);
196 route
.iface
= atoi(ifs
);
197 route
.metric
= atoi(metrics
);
198 route
.iface_name
= alloca(sizeof(char *) * IFNAMSIZ
);
199 route
.iface_name
= if_indextoname(route
.iface
, route
.iface_name
);
200 route
.mac
= getmac(route
.iface_name
);
203 assert(bpf_map_update_elem(tx_port_map_fd
,
204 &route
.iface
, &route
.iface
, 0) == 0);
205 if (rtm_family
== AF_INET
) {
214 prefix_key
= alloca(sizeof(*prefix_key
) + 3);
215 prefix_value
= alloca(sizeof(*prefix_value
));
217 prefix_key
->prefixlen
= 32;
218 prefix_key
->prefixlen
= route
.dst_len
;
219 direct_entry
.mac
= route
.mac
& 0xffffffffffff;
220 direct_entry
.ifindex
= route
.iface
;
221 direct_entry
.arp
.mac
= 0;
222 direct_entry
.arp
.dst
= 0;
223 if (route
.dst_len
== 32) {
224 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
225 assert(bpf_map_delete_elem(exact_match_map_fd
,
228 if (bpf_map_lookup_elem(arp_table_map_fd
,
230 &direct_entry
.arp
.mac
) == 0)
231 direct_entry
.arp
.dst
= route
.dst
;
232 assert(bpf_map_update_elem(exact_match_map_fd
,
234 &direct_entry
, 0) == 0);
237 for (i
= 0; i
< 4; i
++)
238 prefix_key
->data
[i
] = (route
.dst
>> i
* 8) & 0xff;
240 printf("%3d.%d.%d.%d\t\t%3x\t\t%d\t\t%d\t\t%s\n",
241 (int)prefix_key
->data
[0],
242 (int)prefix_key
->data
[1],
243 (int)prefix_key
->data
[2],
244 (int)prefix_key
->data
[3],
245 route
.gw
, route
.dst_len
,
248 if (bpf_map_lookup_elem(lpm_map_fd
, prefix_key
,
250 for (i
= 0; i
< 4; i
++)
251 prefix_value
->prefix
[i
] = prefix_key
->data
[i
];
252 prefix_value
->value
= route
.mac
& 0xffffffffffff;
253 prefix_value
->ifindex
= route
.iface
;
254 prefix_value
->gw
= route
.gw
;
255 prefix_value
->metric
= route
.metric
;
257 assert(bpf_map_update_elem(lpm_map_fd
,
262 if (nh
->nlmsg_type
== RTM_DELROUTE
) {
263 printf("deleting entry\n");
264 printf("prefix key=%d.%d.%d.%d/%d",
269 prefix_key
->prefixlen
);
270 assert(bpf_map_delete_elem(lpm_map_fd
,
273 /* Rereading the route table to check if
274 * there is an entry with the same
275 * prefix but a different metric as the
278 get_route_table(AF_INET
);
279 } else if (prefix_key
->data
[0] ==
280 prefix_value
->prefix
[0] &&
281 prefix_key
->data
[1] ==
282 prefix_value
->prefix
[1] &&
283 prefix_key
->data
[2] ==
284 prefix_value
->prefix
[2] &&
285 prefix_key
->data
[3] ==
286 prefix_value
->prefix
[3] &&
287 route
.metric
>= prefix_value
->metric
) {
290 for (i
= 0; i
< 4; i
++)
291 prefix_value
->prefix
[i
] =
293 prefix_value
->value
=
294 route
.mac
& 0xffffffffffff;
295 prefix_value
->ifindex
= route
.iface
;
296 prefix_value
->gw
= route
.gw
;
297 prefix_value
->metric
= route
.metric
;
298 assert(bpf_map_update_elem(lpm_map_fd
,
305 memset(&route
, 0, sizeof(route
));
306 memset(dsts
, 0, sizeof(dsts
));
307 memset(dsts_len
, 0, sizeof(dsts_len
));
308 memset(gws
, 0, sizeof(gws
));
309 memset(ifs
, 0, sizeof(ifs
));
310 memset(&route
, 0, sizeof(route
));
314 /* Function to read the existing route table when the process is launched*/
315 static int get_route_table(int rtm_family
)
317 struct sockaddr_nl sa
;
331 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
333 printf("open netlink socket: %s\n", strerror(errno
));
336 memset(&sa
, 0, sizeof(sa
));
337 sa
.nl_family
= AF_NETLINK
;
338 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
339 printf("bind to netlink: %s\n", strerror(errno
));
343 memset(&req
, 0, sizeof(req
));
344 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
345 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
346 req
.nl
.nlmsg_type
= RTM_GETROUTE
;
348 req
.rt
.rtm_family
= rtm_family
;
349 req
.rt
.rtm_table
= RT_TABLE_MAIN
;
350 req
.nl
.nlmsg_pid
= 0;
351 req
.nl
.nlmsg_seq
= ++seq
;
352 memset(&msg
, 0, sizeof(msg
));
353 iov
.iov_base
= (void *)&req
.nl
;
354 iov
.iov_len
= req
.nl
.nlmsg_len
;
357 ret
= sendmsg(sock
, &msg
, 0);
359 printf("send to netlink: %s\n", strerror(errno
));
363 memset(buf
, 0, sizeof(buf
));
364 nll
= recv_msg(sa
, sock
);
366 printf("recv from netlink: %s\n", strerror(nll
));
370 nh
= (struct nlmsghdr
*)buf
;
377 /* Function to parse the arp entry returned by netlink
378 * Updates the arp entry related map entries
380 static void read_arp(struct nlmsghdr
*nh
, int nll
)
382 struct rtattr
*rt_attr
;
383 char dsts
[24], mac
[24];
384 struct ndmsg
*rt_msg
;
392 struct arp_table arp
;
397 if (nh
->nlmsg_type
== RTM_GETNEIGH
)
398 printf("READING arp entry\n");
399 printf("Address\tHwAddress\n");
400 for (; NLMSG_OK(nh
, nll
); nh
= NLMSG_NEXT(nh
, nll
)) {
401 rt_msg
= (struct ndmsg
*)NLMSG_DATA(nh
);
402 rt_attr
= (struct rtattr
*)RTM_RTA(rt_msg
);
403 ndm_family
= rt_msg
->ndm_family
;
404 rtl
= RTM_PAYLOAD(nh
);
405 for (; RTA_OK(rt_attr
, rtl
); rt_attr
= RTA_NEXT(rt_attr
, rtl
)) {
406 switch (rt_attr
->rta_type
) {
409 *((__be32
*)RTA_DATA(rt_attr
)));
413 *((__be64
*)RTA_DATA(rt_attr
)));
419 arp_entry
.dst
= atoi(dsts
);
420 arp_entry
.mac
= atol(mac
);
421 printf("%x\t\t%llx\n", arp_entry
.dst
, arp_entry
.mac
);
422 if (ndm_family
== AF_INET
) {
423 if (bpf_map_lookup_elem(exact_match_map_fd
,
425 &direct_entry
) == 0) {
426 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
427 direct_entry
.arp
.dst
= 0;
428 direct_entry
.arp
.mac
= 0;
429 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
430 direct_entry
.arp
.dst
= arp_entry
.dst
;
431 direct_entry
.arp
.mac
= arp_entry
.mac
;
433 assert(bpf_map_update_elem(exact_match_map_fd
,
437 memset(&direct_entry
, 0, sizeof(direct_entry
));
439 if (nh
->nlmsg_type
== RTM_DELNEIGH
) {
440 assert(bpf_map_delete_elem(arp_table_map_fd
,
441 &arp_entry
.dst
) == 0);
442 } else if (nh
->nlmsg_type
== RTM_NEWNEIGH
) {
443 assert(bpf_map_update_elem(arp_table_map_fd
,
449 memset(&arp_entry
, 0, sizeof(arp_entry
));
450 memset(dsts
, 0, sizeof(dsts
));
454 /* Function to read the existing arp table when the process is launched*/
455 static int get_arp_table(int rtm_family
)
457 struct sockaddr_nl sa
;
470 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
472 printf("open netlink socket: %s\n", strerror(errno
));
475 memset(&sa
, 0, sizeof(sa
));
476 sa
.nl_family
= AF_NETLINK
;
477 if (bind(sock
, (struct sockaddr
*)&sa
, sizeof(sa
)) < 0) {
478 printf("bind to netlink: %s\n", strerror(errno
));
482 memset(&req
, 0, sizeof(req
));
483 req
.nl
.nlmsg_len
= NLMSG_LENGTH(sizeof(struct rtmsg
));
484 req
.nl
.nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
485 req
.nl
.nlmsg_type
= RTM_GETNEIGH
;
486 req
.rt
.ndm_state
= NUD_REACHABLE
;
487 req
.rt
.ndm_family
= rtm_family
;
488 req
.nl
.nlmsg_pid
= 0;
489 req
.nl
.nlmsg_seq
= ++seq
;
490 memset(&msg
, 0, sizeof(msg
));
491 iov
.iov_base
= (void *)&req
.nl
;
492 iov
.iov_len
= req
.nl
.nlmsg_len
;
495 ret
= sendmsg(sock
, &msg
, 0);
497 printf("send to netlink: %s\n", strerror(errno
));
501 memset(buf
, 0, sizeof(buf
));
502 nll
= recv_msg(sa
, sock
);
504 printf("recv from netlink: %s\n", strerror(nll
));
508 nh
= (struct nlmsghdr
*)buf
;
515 /* Function to keep track and update changes in route and arp table
516 * Give regular statistics of packets forwarded
518 static int monitor_route(void)
520 unsigned int nr_cpus
= bpf_num_possible_cpus();
521 const unsigned int nr_keys
= 256;
522 struct pollfd fds_route
, fds_arp
;
523 __u64 prev
[nr_keys
][nr_cpus
];
524 struct sockaddr_nl la
, lr
;
525 __u64 values
[nr_cpus
];
532 sock
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
534 printf("open netlink socket: %s\n", strerror(errno
));
538 fcntl(sock
, F_SETFL
, O_NONBLOCK
);
539 memset(&lr
, 0, sizeof(lr
));
540 lr
.nl_family
= AF_NETLINK
;
541 lr
.nl_groups
= RTMGRP_IPV6_ROUTE
| RTMGRP_IPV4_ROUTE
| RTMGRP_NOTIFY
;
542 if (bind(sock
, (struct sockaddr
*)&lr
, sizeof(lr
)) < 0) {
543 printf("bind to netlink: %s\n", strerror(errno
));
548 fds_route
.events
= POLL_IN
;
550 sock_arp
= socket(AF_NETLINK
, SOCK_RAW
, NETLINK_ROUTE
);
552 printf("open netlink socket: %s\n", strerror(errno
));
556 fcntl(sock_arp
, F_SETFL
, O_NONBLOCK
);
557 memset(&la
, 0, sizeof(la
));
558 la
.nl_family
= AF_NETLINK
;
559 la
.nl_groups
= RTMGRP_NEIGH
| RTMGRP_NOTIFY
;
560 if (bind(sock_arp
, (struct sockaddr
*)&la
, sizeof(la
)) < 0) {
561 printf("bind to netlink: %s\n", strerror(errno
));
565 fds_arp
.fd
= sock_arp
;
566 fds_arp
.events
= POLL_IN
;
568 memset(prev
, 0, sizeof(prev
));
570 signal(SIGINT
, close_and_exit
);
571 signal(SIGTERM
, close_and_exit
);
574 for (key
= 0; key
< nr_keys
; key
++) {
577 assert(bpf_map_lookup_elem(rxcnt_map_fd
,
579 for (i
= 0; i
< nr_cpus
; i
++)
580 sum
+= (values
[i
] - prev
[key
][i
]);
582 printf("proto %u: %10llu pkt/s\n",
583 key
, sum
/ interval
);
584 memcpy(prev
[key
], values
, sizeof(values
));
587 memset(buf
, 0, sizeof(buf
));
588 if (poll(&fds_route
, 1, 3) == POLL_IN
) {
589 nll
= recv_msg(lr
, sock
);
591 printf("recv from netlink: %s\n", strerror(nll
));
596 nh
= (struct nlmsghdr
*)buf
;
597 printf("Routing table updated.\n");
600 memset(buf
, 0, sizeof(buf
));
601 if (poll(&fds_arp
, 1, 3) == POLL_IN
) {
602 nll
= recv_msg(la
, sock_arp
);
604 printf("recv from netlink: %s\n", strerror(nll
));
609 nh
= (struct nlmsghdr
*)buf
;
619 static void usage(const char *prog
)
622 "%s: %s [OPTS] interface name list\n\n"
625 " -F force loading prog\n",
629 int main(int ac
, char **argv
)
631 struct rlimit r
= {RLIM_INFINITY
, RLIM_INFINITY
};
632 struct bpf_prog_load_attr prog_load_attr
= {
633 .prog_type
= BPF_PROG_TYPE_XDP
,
635 struct bpf_prog_info info
= {};
636 __u32 info_len
= sizeof(info
);
637 const char *optstr
= "SF";
638 struct bpf_object
*obj
;
644 snprintf(filename
, sizeof(filename
), "%s_kern.o", argv
[0]);
645 prog_load_attr
.file
= filename
;
647 total_ifindex
= ac
- 1;
648 ifname_list
= (argv
+ 1);
650 while ((opt
= getopt(ac
, argv
, optstr
)) != -1) {
653 flags
|= XDP_FLAGS_SKB_MODE
;
658 flags
&= ~XDP_FLAGS_UPDATE_IF_NOEXIST
;
663 usage(basename(argv
[0]));
669 usage(basename(argv
[0]));
673 if (setrlimit(RLIMIT_MEMLOCK
, &r
)) {
674 perror("setrlimit(RLIMIT_MEMLOCK)");
678 if (bpf_prog_load_xattr(&prog_load_attr
, &obj
, &prog_fd
))
681 printf("\n**************loading bpf file*********************\n\n\n");
683 printf("bpf_prog_load_xattr: %s\n", strerror(errno
));
687 lpm_map_fd
= bpf_object__find_map_fd_by_name(obj
, "lpm_map");
688 rxcnt_map_fd
= bpf_object__find_map_fd_by_name(obj
, "rxcnt");
689 arp_table_map_fd
= bpf_object__find_map_fd_by_name(obj
, "arp_table");
690 exact_match_map_fd
= bpf_object__find_map_fd_by_name(obj
,
692 tx_port_map_fd
= bpf_object__find_map_fd_by_name(obj
, "tx_port");
693 if (lpm_map_fd
< 0 || rxcnt_map_fd
< 0 || arp_table_map_fd
< 0 ||
694 exact_match_map_fd
< 0 || tx_port_map_fd
< 0) {
695 printf("bpf_object__find_map_fd_by_name failed\n");
699 ifindex_list
= (int *)calloc(total_ifindex
, sizeof(int *));
700 for (i
= 0; i
< total_ifindex
; i
++) {
701 ifindex_list
[i
] = if_nametoindex(ifname_list
[i
]);
702 if (!ifindex_list
[i
]) {
703 printf("Couldn't translate interface name: %s",
708 prog_id_list
= (__u32
*)calloc(total_ifindex
, sizeof(__u32
*));
709 for (i
= 0; i
< total_ifindex
; i
++) {
710 if (bpf_set_link_xdp_fd(ifindex_list
[i
], prog_fd
, flags
) < 0) {
711 printf("link set xdp fd failed\n");
712 int recovery_index
= i
;
714 for (i
= 0; i
< recovery_index
; i
++)
715 bpf_set_link_xdp_fd(ifindex_list
[i
], -1, flags
);
719 err
= bpf_obj_get_info_by_fd(prog_fd
, &info
, &info_len
);
721 printf("can't get prog info - %s\n", strerror(errno
));
724 prog_id_list
[i
] = info
.id
;
725 memset(&info
, 0, sizeof(info
));
726 printf("Attached to %d\n", ifindex_list
[i
]);
728 signal(SIGINT
, int_exit
);
729 signal(SIGTERM
, int_exit
);
731 printf("*******************ROUTE TABLE*************************\n\n\n");
732 get_route_table(AF_INET
);
733 printf("*******************ARP TABLE***************************\n\n\n");
734 get_arp_table(AF_INET
);
735 if (monitor_route() < 0) {
736 printf("Error in receiving route update");