]>
Commit | Line | Data |
---|---|---|
a132aa96 | 1 | /* |
37cd552e | 2 | * Copyright (c) 2011, 2012, 2013 Nicira, Inc. |
a132aa96 EJ |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | ||
19 | #include "route-table.h" | |
20 | ||
a132aa96 EJ |
21 | #include <arpa/inet.h> |
22 | #include <sys/socket.h> | |
23 | #include <linux/rtnetlink.h> | |
24 | #include <net/if.h> | |
25 | ||
26 | #include "hash.h" | |
27 | #include "hmap.h" | |
28 | #include "netlink.h" | |
45c8d3a1 | 29 | #include "netlink-notifier.h" |
a132aa96 EJ |
30 | #include "netlink-socket.h" |
31 | #include "ofpbuf.h" | |
b46ccdf5 | 32 | #include "rtnetlink-link.h" |
a132aa96 EJ |
33 | #include "vlog.h" |
34 | ||
35 | VLOG_DEFINE_THIS_MODULE(route_table); | |
36 | ||
37 | struct route_data { | |
38 | /* Copied from struct rtmsg. */ | |
39 | unsigned char rtm_dst_len; | |
40 | ||
41 | /* Extracted from Netlink attributes. */ | |
42 | uint32_t rta_dst; /* Destination in host byte order. 0 if missing. */ | |
43 | int rta_oif; /* Output interface index. */ | |
44 | }; | |
45 | ||
46 | /* A digested version of a route message sent down by the kernel to indicate | |
47 | * that a route has changed. */ | |
48 | struct route_table_msg { | |
db2dede4 | 49 | bool relevant; /* Should this message be processed? */ |
a132aa96 EJ |
50 | int nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */ |
51 | struct route_data rd; /* Data parsed from this message. */ | |
52 | }; | |
53 | ||
54 | struct route_node { | |
55 | struct hmap_node node; /* Node in route_map. */ | |
56 | struct route_data rd; /* Data associated with this node. */ | |
57 | }; | |
58 | ||
b46ccdf5 EJ |
59 | struct name_node { |
60 | struct hmap_node node; /* Node in name_map. */ | |
61 | uint32_t ifi_index; /* Kernel interface index. */ | |
62 | ||
63 | char ifname[IFNAMSIZ]; /* Interface name. */ | |
64 | }; | |
65 | ||
a132aa96 EJ |
66 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); |
67 | ||
68 | static unsigned int register_count = 0; | |
0a811051 | 69 | static struct nln *nln = NULL; |
a132aa96 | 70 | static struct route_table_msg rtmsg; |
2ee6545f EJ |
71 | static struct nln_notifier *route_notifier = NULL; |
72 | static struct nln_notifier *name_notifier = NULL; | |
f0e167f0 EJ |
73 | |
74 | static bool route_table_valid = false; | |
b46ccdf5 | 75 | static bool name_table_valid = false; |
a132aa96 | 76 | static struct hmap route_map; |
b46ccdf5 | 77 | static struct hmap name_map; |
a132aa96 EJ |
78 | |
79 | static int route_table_reset(void); | |
f0e167f0 | 80 | static void route_table_handle_msg(const struct route_table_msg *); |
a132aa96 EJ |
81 | static bool route_table_parse(struct ofpbuf *, struct route_table_msg *); |
82 | static void route_table_change(const struct route_table_msg *, void *); | |
83 | static struct route_node *route_node_lookup(const struct route_data *); | |
84 | static struct route_node *route_node_lookup_by_ip(uint32_t ip); | |
85 | static void route_map_clear(void); | |
86 | static uint32_t hash_route_data(const struct route_data *); | |
87 | ||
b46ccdf5 EJ |
88 | static void name_table_init(void); |
89 | static void name_table_uninit(void); | |
90 | static int name_table_reset(void); | |
91 | static void name_table_change(const struct rtnetlink_link_change *, void *); | |
92 | static void name_map_clear(void); | |
93 | static struct name_node *name_node_lookup(int ifi_index); | |
94 | ||
95 | /* Populates 'name' with the name of the interface traffic destined for 'ip' | |
96 | * is likely to egress out of (see route_table_get_ifindex). | |
97 | * | |
98 | * Returns true if successful, otherwise false. */ | |
99 | bool | |
100 | route_table_get_name(ovs_be32 ip, char name[IFNAMSIZ]) | |
101 | { | |
102 | int ifindex; | |
103 | ||
104 | if (!name_table_valid) { | |
105 | name_table_reset(); | |
106 | } | |
107 | ||
108 | if (route_table_get_ifindex(ip, &ifindex)) { | |
109 | struct name_node *nn; | |
110 | ||
111 | nn = name_node_lookup(ifindex); | |
112 | if (nn) { | |
e868fb3d | 113 | ovs_strlcpy(name, nn->ifname, IFNAMSIZ); |
b46ccdf5 EJ |
114 | return true; |
115 | } | |
116 | } | |
117 | ||
118 | return false; | |
119 | } | |
120 | ||
a132aa96 EJ |
121 | /* Populates 'ifindex' with the interface index traffic destined for 'ip' is |
122 | * likely to egress. There is no hard guarantee that traffic destined for 'ip' | |
123 | * will egress out the specified interface. 'ifindex' may refer to an | |
124 | * interface which is not physical (such as a bridge port). | |
125 | * | |
126 | * Returns true if successful, otherwise false. */ | |
127 | bool | |
128 | route_table_get_ifindex(ovs_be32 ip_, int *ifindex) | |
129 | { | |
130 | struct route_node *rn; | |
131 | uint32_t ip = ntohl(ip_); | |
132 | ||
133 | *ifindex = 0; | |
134 | ||
f0e167f0 EJ |
135 | if (!route_table_valid) { |
136 | route_table_reset(); | |
137 | } | |
138 | ||
a132aa96 EJ |
139 | rn = route_node_lookup_by_ip(ip); |
140 | ||
141 | if (rn) { | |
142 | *ifindex = rn->rd.rta_oif; | |
143 | return true; | |
144 | } | |
145 | ||
146 | /* Choose a default route. */ | |
147 | HMAP_FOR_EACH(rn, node, &route_map) { | |
148 | if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) { | |
149 | *ifindex = rn->rd.rta_oif; | |
150 | return true; | |
151 | } | |
152 | } | |
153 | ||
154 | return false; | |
155 | } | |
156 | ||
157 | /* Users of the route_table module should register themselves with this | |
158 | * function before making any other route_table function calls. */ | |
159 | void | |
160 | route_table_register(void) | |
161 | { | |
162 | if (!register_count) { | |
cb22974d BP |
163 | ovs_assert(!nln); |
164 | ovs_assert(!route_notifier); | |
a132aa96 | 165 | |
0a811051 EJ |
166 | nln = nln_create(NETLINK_ROUTE, RTNLGRP_IPV4_ROUTE, |
167 | (nln_parse_func *) route_table_parse, &rtmsg); | |
2ee6545f EJ |
168 | |
169 | route_notifier = | |
170 | nln_notifier_create(nln, (nln_notify_func *) route_table_change, | |
171 | NULL); | |
a132aa96 EJ |
172 | |
173 | hmap_init(&route_map); | |
174 | route_table_reset(); | |
b46ccdf5 | 175 | name_table_init(); |
a132aa96 EJ |
176 | } |
177 | ||
178 | register_count++; | |
179 | } | |
180 | ||
181 | /* Users of the route_table module should unregister themselves with this | |
182 | * function when they will no longer be making any more route_table fuction | |
183 | * calls. */ | |
184 | void | |
185 | route_table_unregister(void) | |
186 | { | |
187 | register_count--; | |
188 | ||
189 | if (!register_count) { | |
f4dc8c58 | 190 | nln_notifier_destroy(route_notifier); |
2f0dc471 | 191 | route_notifier = NULL; |
0a811051 EJ |
192 | nln_destroy(nln); |
193 | nln = NULL; | |
a132aa96 EJ |
194 | |
195 | route_map_clear(); | |
196 | hmap_destroy(&route_map); | |
b46ccdf5 | 197 | name_table_uninit(); |
a132aa96 EJ |
198 | } |
199 | } | |
200 | ||
201 | /* Run periodically to update the locally maintained routing table. */ | |
202 | void | |
203 | route_table_run(void) | |
204 | { | |
0a811051 | 205 | if (nln) { |
18a23781 EJ |
206 | rtnetlink_link_run(); |
207 | nln_run(nln); | |
a132aa96 EJ |
208 | } |
209 | } | |
210 | ||
211 | /* Causes poll_block() to wake up when route_table updates are required. */ | |
212 | void | |
213 | route_table_wait(void) | |
214 | { | |
0a811051 | 215 | if (nln) { |
18a23781 EJ |
216 | rtnetlink_link_wait(); |
217 | nln_wait(nln); | |
a132aa96 EJ |
218 | } |
219 | } | |
220 | ||
221 | static int | |
222 | route_table_reset(void) | |
223 | { | |
a132aa96 EJ |
224 | struct nl_dump dump; |
225 | struct rtgenmsg *rtmsg; | |
d57695d7 JS |
226 | uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; |
227 | struct ofpbuf request, reply, buf; | |
a132aa96 EJ |
228 | |
229 | route_map_clear(); | |
f0e167f0 | 230 | route_table_valid = true; |
a132aa96 | 231 | |
a132aa96 EJ |
232 | ofpbuf_init(&request, 0); |
233 | ||
234 | nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETROUTE, NLM_F_REQUEST); | |
235 | ||
236 | rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); | |
237 | rtmsg->rtgen_family = AF_INET; | |
238 | ||
a88b4e04 | 239 | nl_dump_start(&dump, NETLINK_ROUTE, &request); |
896b3272 | 240 | ofpbuf_uninit(&request); |
a132aa96 | 241 | |
d57695d7 JS |
242 | ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); |
243 | while (nl_dump_next(&dump, &reply, &buf)) { | |
a132aa96 EJ |
244 | struct route_table_msg msg; |
245 | ||
246 | if (route_table_parse(&reply, &msg)) { | |
f0e167f0 | 247 | route_table_handle_msg(&msg); |
a132aa96 EJ |
248 | } |
249 | } | |
d57695d7 | 250 | ofpbuf_uninit(&buf); |
a132aa96 | 251 | |
a88b4e04 | 252 | return nl_dump_done(&dump); |
a132aa96 EJ |
253 | } |
254 | ||
255 | ||
256 | static bool | |
257 | route_table_parse(struct ofpbuf *buf, struct route_table_msg *change) | |
258 | { | |
259 | bool parsed; | |
260 | ||
261 | static const struct nl_policy policy[] = { | |
262 | [RTA_DST] = { .type = NL_A_U32, .optional = true }, | |
263 | [RTA_OIF] = { .type = NL_A_U32, .optional = false }, | |
264 | }; | |
265 | ||
37cd552e | 266 | struct nlattr *attrs[ARRAY_SIZE(policy)]; |
a132aa96 EJ |
267 | |
268 | parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg), | |
269 | policy, attrs, ARRAY_SIZE(policy)); | |
270 | ||
271 | if (parsed) { | |
272 | const struct rtmsg *rtm; | |
273 | const struct nlmsghdr *nlmsg; | |
274 | ||
1f317cb5 | 275 | nlmsg = ofpbuf_data(buf); |
db5a1019 | 276 | rtm = ofpbuf_at(buf, NLMSG_HDRLEN, sizeof *rtm); |
a132aa96 EJ |
277 | |
278 | if (rtm->rtm_family != AF_INET) { | |
279 | VLOG_DBG_RL(&rl, "received non AF_INET rtnetlink route message"); | |
280 | return false; | |
281 | } | |
282 | ||
283 | memset(change, 0, sizeof *change); | |
db2dede4 EJ |
284 | change->relevant = true; |
285 | ||
286 | if (rtm->rtm_scope == RT_SCOPE_NOWHERE) { | |
287 | change->relevant = false; | |
288 | } | |
289 | ||
290 | if (rtm->rtm_type != RTN_UNICAST && | |
291 | rtm->rtm_type != RTN_LOCAL) { | |
292 | change->relevant = false; | |
293 | } | |
a132aa96 EJ |
294 | |
295 | change->nlmsg_type = nlmsg->nlmsg_type; | |
296 | change->rd.rtm_dst_len = rtm->rtm_dst_len; | |
297 | change->rd.rta_oif = nl_attr_get_u32(attrs[RTA_OIF]); | |
298 | ||
299 | if (attrs[RTA_DST]) { | |
300 | change->rd.rta_dst = ntohl(nl_attr_get_be32(attrs[RTA_DST])); | |
301 | } | |
302 | ||
303 | } else { | |
304 | VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message"); | |
305 | } | |
306 | ||
307 | return parsed; | |
308 | } | |
309 | ||
310 | static void | |
f0e167f0 EJ |
311 | route_table_change(const struct route_table_msg *change OVS_UNUSED, |
312 | void *aux OVS_UNUSED) | |
a132aa96 | 313 | { |
f0e167f0 EJ |
314 | route_table_valid = false; |
315 | } | |
316 | ||
317 | static void | |
318 | route_table_handle_msg(const struct route_table_msg *change) | |
319 | { | |
320 | if (change->relevant && change->nlmsg_type == RTM_NEWROUTE && | |
321 | !route_node_lookup(&change->rd)) { | |
a132aa96 EJ |
322 | struct route_node *rn; |
323 | ||
f0e167f0 EJ |
324 | rn = xzalloc(sizeof *rn); |
325 | memcpy(&rn->rd, &change->rd, sizeof change->rd); | |
a132aa96 | 326 | |
f0e167f0 | 327 | hmap_insert(&route_map, &rn->node, hash_route_data(&rn->rd)); |
a132aa96 EJ |
328 | } |
329 | } | |
330 | ||
331 | static struct route_node * | |
332 | route_node_lookup(const struct route_data *rd) | |
333 | { | |
334 | struct route_node *rn; | |
335 | ||
336 | HMAP_FOR_EACH_WITH_HASH(rn, node, hash_route_data(rd), &route_map) { | |
337 | if (!memcmp(&rn->rd, rd, sizeof *rd)) { | |
338 | return rn; | |
339 | } | |
340 | } | |
341 | ||
342 | return NULL; | |
343 | } | |
344 | ||
345 | static struct route_node * | |
346 | route_node_lookup_by_ip(uint32_t ip) | |
347 | { | |
348 | int dst_len; | |
349 | struct route_node *rn, *rn_ret; | |
350 | ||
351 | dst_len = -1; | |
352 | rn_ret = NULL; | |
353 | ||
354 | HMAP_FOR_EACH(rn, node, &route_map) { | |
355 | uint32_t mask = 0xffffffff << (32 - rn->rd.rtm_dst_len); | |
356 | ||
357 | if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) { | |
358 | /* Default route. */ | |
359 | continue; | |
360 | } | |
361 | ||
362 | if (rn->rd.rtm_dst_len > dst_len && | |
363 | (ip & mask) == (rn->rd.rta_dst & mask)) { | |
364 | rn_ret = rn; | |
365 | dst_len = rn->rd.rtm_dst_len; | |
366 | } | |
367 | } | |
368 | ||
369 | return rn_ret; | |
370 | } | |
371 | ||
372 | static void | |
373 | route_map_clear(void) | |
374 | { | |
375 | struct route_node *rn, *rn_next; | |
376 | ||
377 | HMAP_FOR_EACH_SAFE(rn, rn_next, node, &route_map) { | |
378 | hmap_remove(&route_map, &rn->node); | |
379 | free(rn); | |
380 | } | |
381 | } | |
382 | ||
383 | static uint32_t | |
384 | hash_route_data(const struct route_data *rd) | |
385 | { | |
386 | return hash_bytes(rd, sizeof *rd, 0); | |
387 | } | |
b46ccdf5 EJ |
388 | \f |
389 | /* name_table . */ | |
390 | ||
391 | static void | |
392 | name_table_init(void) | |
393 | { | |
394 | hmap_init(&name_map); | |
2ee6545f | 395 | name_notifier = rtnetlink_link_notifier_create(name_table_change, NULL); |
b46ccdf5 EJ |
396 | name_table_valid = false; |
397 | } | |
398 | ||
399 | static void | |
400 | name_table_uninit(void) | |
401 | { | |
2ee6545f EJ |
402 | rtnetlink_link_notifier_destroy(name_notifier); |
403 | name_notifier = NULL; | |
b46ccdf5 EJ |
404 | name_map_clear(); |
405 | hmap_destroy(&name_map); | |
406 | } | |
407 | ||
408 | static int | |
409 | name_table_reset(void) | |
410 | { | |
b46ccdf5 EJ |
411 | struct nl_dump dump; |
412 | struct rtgenmsg *rtmsg; | |
d57695d7 JS |
413 | uint64_t reply_stub[NL_DUMP_BUFSIZE / 8]; |
414 | struct ofpbuf request, reply, buf; | |
b46ccdf5 EJ |
415 | |
416 | name_table_valid = true; | |
417 | name_map_clear(); | |
b46ccdf5 EJ |
418 | |
419 | ofpbuf_init(&request, 0); | |
420 | nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETLINK, NLM_F_REQUEST); | |
421 | rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg); | |
422 | rtmsg->rtgen_family = AF_INET; | |
423 | ||
a88b4e04 | 424 | nl_dump_start(&dump, NETLINK_ROUTE, &request); |
896b3272 BP |
425 | ofpbuf_uninit(&request); |
426 | ||
d57695d7 JS |
427 | ofpbuf_use_stub(&buf, reply_stub, sizeof reply_stub); |
428 | while (nl_dump_next(&dump, &reply, &buf)) { | |
b46ccdf5 EJ |
429 | struct rtnetlink_link_change change; |
430 | ||
431 | if (rtnetlink_link_parse(&reply, &change) | |
432 | && change.nlmsg_type == RTM_NEWLINK | |
433 | && !name_node_lookup(change.ifi_index)) { | |
434 | struct name_node *nn; | |
435 | ||
436 | nn = xzalloc(sizeof *nn); | |
437 | nn->ifi_index = change.ifi_index; | |
e868fb3d | 438 | ovs_strlcpy(nn->ifname, change.ifname, IFNAMSIZ); |
b46ccdf5 EJ |
439 | hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0)); |
440 | } | |
441 | } | |
d57695d7 | 442 | ofpbuf_uninit(&buf); |
b46ccdf5 EJ |
443 | return nl_dump_done(&dump); |
444 | } | |
445 | ||
446 | static void | |
447 | name_table_change(const struct rtnetlink_link_change *change OVS_UNUSED, | |
448 | void *aux OVS_UNUSED) | |
449 | { | |
450 | /* Changes to interface status can cause routing table changes that some | |
451 | * versions of the linux kernel do not advertise for some reason. */ | |
452 | route_table_valid = false; | |
453 | name_table_valid = false; | |
454 | } | |
455 | ||
456 | static struct name_node * | |
457 | name_node_lookup(int ifi_index) | |
458 | { | |
459 | struct name_node *nn; | |
460 | ||
461 | HMAP_FOR_EACH_WITH_HASH(nn, node, hash_int(ifi_index, 0), &name_map) { | |
462 | if (nn->ifi_index == ifi_index) { | |
463 | return nn; | |
464 | } | |
465 | } | |
466 | ||
467 | return NULL; | |
468 | } | |
469 | ||
470 | static void | |
471 | name_map_clear(void) | |
472 | { | |
473 | struct name_node *nn, *nn_next; | |
474 | ||
475 | HMAP_FOR_EACH_SAFE(nn, nn_next, node, &name_map) { | |
476 | hmap_remove(&name_map, &nn->node); | |
477 | free(nn); | |
478 | } | |
479 | } |