2 * Copyright (c) 2011, 2012, 2013 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 #include "route-table.h"
21 #include <arpa/inet.h>
22 #include <sys/socket.h>
23 #include <linux/rtnetlink.h>
29 #include "netlink-notifier.h"
30 #include "netlink-socket.h"
32 #include "rtnetlink-link.h"
35 VLOG_DEFINE_THIS_MODULE(route_table
);
38 /* Copied from struct rtmsg. */
39 unsigned char rtm_dst_len
;
41 /* Extracted from Netlink attributes. */
42 uint32_t rta_dst
; /* Destination in host byte order. 0 if missing. */
43 int rta_oif
; /* Output interface index. */
46 /* A digested version of a route message sent down by the kernel to indicate
47 * that a route has changed. */
48 struct route_table_msg
{
49 bool relevant
; /* Should this message be processed? */
50 int nlmsg_type
; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */
51 struct route_data rd
; /* Data parsed from this message. */
55 struct hmap_node node
; /* Node in route_map. */
56 struct route_data rd
; /* Data associated with this node. */
60 struct hmap_node node
; /* Node in name_map. */
61 uint32_t ifi_index
; /* Kernel interface index. */
63 char ifname
[IFNAMSIZ
]; /* Interface name. */
66 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(5, 20);
68 static unsigned int register_count
= 0;
69 static struct nln
*nln
= NULL
;
70 static struct route_table_msg rtmsg
;
71 static struct nln_notifier
*route_notifier
= NULL
;
72 static struct nln_notifier
*name_notifier
= NULL
;
74 static bool route_table_valid
= false;
75 static bool name_table_valid
= false;
76 static struct hmap route_map
;
77 static struct hmap name_map
;
79 static int route_table_reset(void);
80 static void route_table_handle_msg(const struct route_table_msg
*);
81 static bool route_table_parse(struct ofpbuf
*, struct route_table_msg
*);
82 static void route_table_change(const struct route_table_msg
*, void *);
83 static struct route_node
*route_node_lookup(const struct route_data
*);
84 static struct route_node
*route_node_lookup_by_ip(uint32_t ip
);
85 static void route_map_clear(void);
86 static uint32_t hash_route_data(const struct route_data
*);
88 static void name_table_init(void);
89 static void name_table_uninit(void);
90 static int name_table_reset(void);
91 static void name_table_change(const struct rtnetlink_link_change
*, void *);
92 static void name_map_clear(void);
93 static struct name_node
*name_node_lookup(int ifi_index
);
95 /* Populates 'name' with the name of the interface traffic destined for 'ip'
96 * is likely to egress out of (see route_table_get_ifindex).
98 * Returns true if successful, otherwise false. */
100 route_table_get_name(ovs_be32 ip
, char name
[IFNAMSIZ
])
104 if (!name_table_valid
) {
108 if (route_table_get_ifindex(ip
, &ifindex
)) {
109 struct name_node
*nn
;
111 nn
= name_node_lookup(ifindex
);
113 ovs_strlcpy(name
, nn
->ifname
, IFNAMSIZ
);
121 /* Populates 'ifindex' with the interface index traffic destined for 'ip' is
122 * likely to egress. There is no hard guarantee that traffic destined for 'ip'
123 * will egress out the specified interface. 'ifindex' may refer to an
124 * interface which is not physical (such as a bridge port).
126 * Returns true if successful, otherwise false. */
128 route_table_get_ifindex(ovs_be32 ip_
, int *ifindex
)
130 struct route_node
*rn
;
131 uint32_t ip
= ntohl(ip_
);
135 if (!route_table_valid
) {
139 rn
= route_node_lookup_by_ip(ip
);
142 *ifindex
= rn
->rd
.rta_oif
;
146 /* Choose a default route. */
147 HMAP_FOR_EACH(rn
, node
, &route_map
) {
148 if (rn
->rd
.rta_dst
== 0 && rn
->rd
.rtm_dst_len
== 0) {
149 *ifindex
= rn
->rd
.rta_oif
;
157 /* Users of the route_table module should register themselves with this
158 * function before making any other route_table function calls. */
160 route_table_register(void)
162 if (!register_count
) {
164 ovs_assert(!route_notifier
);
166 nln
= nln_create(NETLINK_ROUTE
, RTNLGRP_IPV4_ROUTE
,
167 (nln_parse_func
*) route_table_parse
, &rtmsg
);
170 nln_notifier_create(nln
, (nln_notify_func
*) route_table_change
,
173 hmap_init(&route_map
);
181 /* Users of the route_table module should unregister themselves with this
182 * function when they will no longer be making any more route_table fuction
185 route_table_unregister(void)
189 if (!register_count
) {
190 nln_notifier_destroy(route_notifier
);
191 route_notifier
= NULL
;
196 hmap_destroy(&route_map
);
201 /* Run periodically to update the locally maintained routing table. */
203 route_table_run(void)
206 rtnetlink_link_run();
211 /* Causes poll_block() to wake up when route_table updates are required. */
213 route_table_wait(void)
216 rtnetlink_link_wait();
222 route_table_reset(void)
225 struct rtgenmsg
*rtmsg
;
226 struct ofpbuf request
, reply
;
229 route_table_valid
= true;
231 ofpbuf_init(&request
, 0);
233 nl_msg_put_nlmsghdr(&request
, sizeof *rtmsg
, RTM_GETROUTE
, NLM_F_REQUEST
);
235 rtmsg
= ofpbuf_put_zeros(&request
, sizeof *rtmsg
);
236 rtmsg
->rtgen_family
= AF_INET
;
238 nl_dump_start(&dump
, NETLINK_ROUTE
, &request
);
239 ofpbuf_uninit(&request
);
241 while (nl_dump_next(&dump
, &reply
)) {
242 struct route_table_msg msg
;
244 if (route_table_parse(&reply
, &msg
)) {
245 route_table_handle_msg(&msg
);
249 return nl_dump_done(&dump
);
254 route_table_parse(struct ofpbuf
*buf
, struct route_table_msg
*change
)
258 static const struct nl_policy policy
[] = {
259 [RTA_DST
] = { .type
= NL_A_U32
, .optional
= true },
260 [RTA_OIF
] = { .type
= NL_A_U32
, .optional
= false },
263 struct nlattr
*attrs
[ARRAY_SIZE(policy
)];
265 parsed
= nl_policy_parse(buf
, NLMSG_HDRLEN
+ sizeof(struct rtmsg
),
266 policy
, attrs
, ARRAY_SIZE(policy
));
269 const struct rtmsg
*rtm
;
270 const struct nlmsghdr
*nlmsg
;
273 rtm
= ofpbuf_at(buf
, NLMSG_HDRLEN
, sizeof *rtm
);
275 if (rtm
->rtm_family
!= AF_INET
) {
276 VLOG_DBG_RL(&rl
, "received non AF_INET rtnetlink route message");
280 memset(change
, 0, sizeof *change
);
281 change
->relevant
= true;
283 if (rtm
->rtm_scope
== RT_SCOPE_NOWHERE
) {
284 change
->relevant
= false;
287 if (rtm
->rtm_type
!= RTN_UNICAST
&&
288 rtm
->rtm_type
!= RTN_LOCAL
) {
289 change
->relevant
= false;
292 change
->nlmsg_type
= nlmsg
->nlmsg_type
;
293 change
->rd
.rtm_dst_len
= rtm
->rtm_dst_len
;
294 change
->rd
.rta_oif
= nl_attr_get_u32(attrs
[RTA_OIF
]);
296 if (attrs
[RTA_DST
]) {
297 change
->rd
.rta_dst
= ntohl(nl_attr_get_be32(attrs
[RTA_DST
]));
301 VLOG_DBG_RL(&rl
, "received unparseable rtnetlink route message");
308 route_table_change(const struct route_table_msg
*change OVS_UNUSED
,
309 void *aux OVS_UNUSED
)
311 route_table_valid
= false;
315 route_table_handle_msg(const struct route_table_msg
*change
)
317 if (change
->relevant
&& change
->nlmsg_type
== RTM_NEWROUTE
&&
318 !route_node_lookup(&change
->rd
)) {
319 struct route_node
*rn
;
321 rn
= xzalloc(sizeof *rn
);
322 memcpy(&rn
->rd
, &change
->rd
, sizeof change
->rd
);
324 hmap_insert(&route_map
, &rn
->node
, hash_route_data(&rn
->rd
));
328 static struct route_node
*
329 route_node_lookup(const struct route_data
*rd
)
331 struct route_node
*rn
;
333 HMAP_FOR_EACH_WITH_HASH(rn
, node
, hash_route_data(rd
), &route_map
) {
334 if (!memcmp(&rn
->rd
, rd
, sizeof *rd
)) {
342 static struct route_node
*
343 route_node_lookup_by_ip(uint32_t ip
)
346 struct route_node
*rn
, *rn_ret
;
351 HMAP_FOR_EACH(rn
, node
, &route_map
) {
352 uint32_t mask
= 0xffffffff << (32 - rn
->rd
.rtm_dst_len
);
354 if (rn
->rd
.rta_dst
== 0 && rn
->rd
.rtm_dst_len
== 0) {
359 if (rn
->rd
.rtm_dst_len
> dst_len
&&
360 (ip
& mask
) == (rn
->rd
.rta_dst
& mask
)) {
362 dst_len
= rn
->rd
.rtm_dst_len
;
370 route_map_clear(void)
372 struct route_node
*rn
, *rn_next
;
374 HMAP_FOR_EACH_SAFE(rn
, rn_next
, node
, &route_map
) {
375 hmap_remove(&route_map
, &rn
->node
);
381 hash_route_data(const struct route_data
*rd
)
383 return hash_bytes(rd
, sizeof *rd
, 0);
389 name_table_init(void)
391 hmap_init(&name_map
);
392 name_notifier
= rtnetlink_link_notifier_create(name_table_change
, NULL
);
393 name_table_valid
= false;
397 name_table_uninit(void)
399 rtnetlink_link_notifier_destroy(name_notifier
);
400 name_notifier
= NULL
;
402 hmap_destroy(&name_map
);
406 name_table_reset(void)
409 struct rtgenmsg
*rtmsg
;
410 struct ofpbuf request
, reply
;
412 name_table_valid
= true;
415 ofpbuf_init(&request
, 0);
416 nl_msg_put_nlmsghdr(&request
, sizeof *rtmsg
, RTM_GETLINK
, NLM_F_REQUEST
);
417 rtmsg
= ofpbuf_put_zeros(&request
, sizeof *rtmsg
);
418 rtmsg
->rtgen_family
= AF_INET
;
420 nl_dump_start(&dump
, NETLINK_ROUTE
, &request
);
421 ofpbuf_uninit(&request
);
423 while (nl_dump_next(&dump
, &reply
)) {
424 struct rtnetlink_link_change change
;
426 if (rtnetlink_link_parse(&reply
, &change
)
427 && change
.nlmsg_type
== RTM_NEWLINK
428 && !name_node_lookup(change
.ifi_index
)) {
429 struct name_node
*nn
;
431 nn
= xzalloc(sizeof *nn
);
432 nn
->ifi_index
= change
.ifi_index
;
433 ovs_strlcpy(nn
->ifname
, change
.ifname
, IFNAMSIZ
);
434 hmap_insert(&name_map
, &nn
->node
, hash_int(nn
->ifi_index
, 0));
437 return nl_dump_done(&dump
);
441 name_table_change(const struct rtnetlink_link_change
*change OVS_UNUSED
,
442 void *aux OVS_UNUSED
)
444 /* Changes to interface status can cause routing table changes that some
445 * versions of the linux kernel do not advertise for some reason. */
446 route_table_valid
= false;
447 name_table_valid
= false;
450 static struct name_node
*
451 name_node_lookup(int ifi_index
)
453 struct name_node
*nn
;
455 HMAP_FOR_EACH_WITH_HASH(nn
, node
, hash_int(ifi_index
, 0), &name_map
) {
456 if (nn
->ifi_index
== ifi_index
) {
467 struct name_node
*nn
, *nn_next
;
469 HMAP_FOR_EACH_SAFE(nn
, nn_next
, node
, &name_map
) {
470 hmap_remove(&name_map
, &nn
->node
);