]> git.proxmox.com Git - ovs.git/blob - lib/route-table.c
tests: Fix deprecated use of qw.
[ovs.git] / lib / route-table.c
1 /*
2 * Copyright (c) 2011 Nicira Networks.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "route-table.h"
20
21 #include <assert.h>
22 #include <arpa/inet.h>
23 #include <sys/socket.h>
24 #include <linux/rtnetlink.h>
25 #include <net/if.h>
26
27 #include "hash.h"
28 #include "hmap.h"
29 #include "netlink.h"
30 #include "netlink-socket.h"
31 #include "ofpbuf.h"
32 #include "rtnetlink.h"
33 #include "rtnetlink-link.h"
34 #include "vlog.h"
35
36 VLOG_DEFINE_THIS_MODULE(route_table);
37
38 struct route_data {
39 /* Copied from struct rtmsg. */
40 unsigned char rtm_dst_len;
41
42 /* Extracted from Netlink attributes. */
43 uint32_t rta_dst; /* Destination in host byte order. 0 if missing. */
44 int rta_oif; /* Output interface index. */
45 };
46
47 /* A digested version of a route message sent down by the kernel to indicate
48 * that a route has changed. */
49 struct route_table_msg {
50 bool relevant; /* Should this message be processed? */
51 int nlmsg_type; /* e.g. RTM_NEWROUTE, RTM_DELROUTE. */
52 struct route_data rd; /* Data parsed from this message. */
53 };
54
55 struct route_node {
56 struct hmap_node node; /* Node in route_map. */
57 struct route_data rd; /* Data associated with this node. */
58 };
59
60 struct name_node {
61 struct hmap_node node; /* Node in name_map. */
62 uint32_t ifi_index; /* Kernel interface index. */
63
64 char ifname[IFNAMSIZ]; /* Interface name. */
65 };
66
67 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
68
69 static unsigned int register_count = 0;
70 static struct rtnetlink *rtn = NULL;
71 static struct route_table_msg rtmsg;
72 static struct rtnetlink_notifier route_notifier;
73 static struct rtnetlink_notifier name_notifier;
74
75 static bool route_table_valid = false;
76 static bool name_table_valid = false;
77 static struct hmap route_map;
78 static struct hmap name_map;
79
80 static int route_table_reset(void);
81 static void route_table_handle_msg(const struct route_table_msg *);
82 static bool route_table_parse(struct ofpbuf *, struct route_table_msg *);
83 static void route_table_change(const struct route_table_msg *, void *);
84 static struct route_node *route_node_lookup(const struct route_data *);
85 static struct route_node *route_node_lookup_by_ip(uint32_t ip);
86 static void route_map_clear(void);
87 static uint32_t hash_route_data(const struct route_data *);
88
89 static void name_table_init(void);
90 static void name_table_uninit(void);
91 static int name_table_reset(void);
92 static void name_table_change(const struct rtnetlink_link_change *, void *);
93 static void name_map_clear(void);
94 static struct name_node *name_node_lookup(int ifi_index);
95
96 /* Populates 'name' with the name of the interface traffic destined for 'ip'
97 * is likely to egress out of (see route_table_get_ifindex).
98 *
99 * Returns true if successful, otherwise false. */
100 bool
101 route_table_get_name(ovs_be32 ip, char name[IFNAMSIZ])
102 {
103 int ifindex;
104
105 if (!name_table_valid) {
106 name_table_reset();
107 }
108
109 if (route_table_get_ifindex(ip, &ifindex)) {
110 struct name_node *nn;
111
112 nn = name_node_lookup(ifindex);
113 if (nn) {
114 ovs_strlcpy(name, nn->ifname, IFNAMSIZ);
115 return true;
116 }
117 }
118
119 return false;
120 }
121
122 /* Populates 'ifindex' with the interface index traffic destined for 'ip' is
123 * likely to egress. There is no hard guarantee that traffic destined for 'ip'
124 * will egress out the specified interface. 'ifindex' may refer to an
125 * interface which is not physical (such as a bridge port).
126 *
127 * Returns true if successful, otherwise false. */
128 bool
129 route_table_get_ifindex(ovs_be32 ip_, int *ifindex)
130 {
131 struct route_node *rn;
132 uint32_t ip = ntohl(ip_);
133
134 *ifindex = 0;
135
136 if (!route_table_valid) {
137 route_table_reset();
138 }
139
140 rn = route_node_lookup_by_ip(ip);
141
142 if (rn) {
143 *ifindex = rn->rd.rta_oif;
144 return true;
145 }
146
147 /* Choose a default route. */
148 HMAP_FOR_EACH(rn, node, &route_map) {
149 if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) {
150 *ifindex = rn->rd.rta_oif;
151 return true;
152 }
153 }
154
155 return false;
156 }
157
158 /* Users of the route_table module should register themselves with this
159 * function before making any other route_table function calls. */
160 void
161 route_table_register(void)
162 {
163 if (!register_count) {
164 rtnetlink_parse_func *pf;
165 rtnetlink_notify_func *nf;
166
167 assert(!rtn);
168
169 pf = (rtnetlink_parse_func *) route_table_parse;
170 nf = (rtnetlink_notify_func *) route_table_change;
171
172 rtn = rtnetlink_create(RTNLGRP_IPV4_ROUTE, pf, &rtmsg);
173 rtnetlink_notifier_register(rtn, &route_notifier, nf, NULL);
174
175 hmap_init(&route_map);
176 route_table_reset();
177 name_table_init();
178 }
179
180 register_count++;
181 }
182
183 /* Users of the route_table module should unregister themselves with this
184 * function when they will no longer be making any more route_table fuction
185 * calls. */
186 void
187 route_table_unregister(void)
188 {
189 register_count--;
190
191 if (!register_count) {
192 rtnetlink_destroy(rtn);
193 rtn = NULL;
194
195 route_map_clear();
196 hmap_destroy(&route_map);
197 name_table_uninit();
198 }
199 }
200
201 /* Run periodically to update the locally maintained routing table. */
202 void
203 route_table_run(void)
204 {
205 if (rtn) {
206 rtnetlink_link_notifier_run();
207 rtnetlink_notifier_run(rtn);
208 }
209 }
210
211 /* Causes poll_block() to wake up when route_table updates are required. */
212 void
213 route_table_wait(void)
214 {
215 if (rtn) {
216 rtnetlink_link_notifier_wait();
217 rtnetlink_notifier_wait(rtn);
218 }
219 }
220
221 static int
222 route_table_reset(void)
223 {
224 int error;
225 struct nl_dump dump;
226 struct rtgenmsg *rtmsg;
227 struct ofpbuf request, reply;
228 static struct nl_sock *rtnl_sock;
229
230 route_map_clear();
231 route_table_valid = true;
232
233 error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
234 if (error) {
235 VLOG_WARN_RL(&rl, "failed to reset routing table, "
236 "cannot create RTNETLINK_ROUTE socket");
237 return error;
238 }
239
240 ofpbuf_init(&request, 0);
241
242 nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETROUTE, NLM_F_REQUEST);
243
244 rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
245 rtmsg->rtgen_family = AF_INET;
246
247 nl_dump_start(&dump, rtnl_sock, &request);
248
249 while (nl_dump_next(&dump, &reply)) {
250 struct route_table_msg msg;
251
252 if (route_table_parse(&reply, &msg)) {
253 route_table_handle_msg(&msg);
254 }
255 }
256
257 error = nl_dump_done(&dump);
258 nl_sock_destroy(rtnl_sock);
259
260 return error;
261 }
262
263
264 static bool
265 route_table_parse(struct ofpbuf *buf, struct route_table_msg *change)
266 {
267 bool parsed;
268
269 static const struct nl_policy policy[] = {
270 [RTA_DST] = { .type = NL_A_U32, .optional = true },
271 [RTA_OIF] = { .type = NL_A_U32, .optional = false },
272 };
273
274 static struct nlattr *attrs[ARRAY_SIZE(policy)];
275
276 parsed = nl_policy_parse(buf, NLMSG_HDRLEN + sizeof(struct rtmsg),
277 policy, attrs, ARRAY_SIZE(policy));
278
279 if (parsed) {
280 const struct rtmsg *rtm;
281 const struct nlmsghdr *nlmsg;
282
283 nlmsg = buf->data;
284 rtm = (const struct rtmsg *) ((const char *) buf->data + NLMSG_HDRLEN);
285
286 if (rtm->rtm_family != AF_INET) {
287 VLOG_DBG_RL(&rl, "received non AF_INET rtnetlink route message");
288 return false;
289 }
290
291 memset(change, 0, sizeof *change);
292 change->relevant = true;
293
294 if (rtm->rtm_scope == RT_SCOPE_NOWHERE) {
295 change->relevant = false;
296 }
297
298 if (rtm->rtm_type != RTN_UNICAST &&
299 rtm->rtm_type != RTN_LOCAL) {
300 change->relevant = false;
301 }
302
303 change->nlmsg_type = nlmsg->nlmsg_type;
304 change->rd.rtm_dst_len = rtm->rtm_dst_len;
305 change->rd.rta_oif = nl_attr_get_u32(attrs[RTA_OIF]);
306
307 if (attrs[RTA_DST]) {
308 change->rd.rta_dst = ntohl(nl_attr_get_be32(attrs[RTA_DST]));
309 }
310
311 } else {
312 VLOG_DBG_RL(&rl, "received unparseable rtnetlink route message");
313 }
314
315 return parsed;
316 }
317
318 static void
319 route_table_change(const struct route_table_msg *change OVS_UNUSED,
320 void *aux OVS_UNUSED)
321 {
322 route_table_valid = false;
323 }
324
325 static void
326 route_table_handle_msg(const struct route_table_msg *change)
327 {
328 if (change->relevant && change->nlmsg_type == RTM_NEWROUTE &&
329 !route_node_lookup(&change->rd)) {
330 struct route_node *rn;
331
332 rn = xzalloc(sizeof *rn);
333 memcpy(&rn->rd, &change->rd, sizeof change->rd);
334
335 hmap_insert(&route_map, &rn->node, hash_route_data(&rn->rd));
336 }
337 }
338
339 static struct route_node *
340 route_node_lookup(const struct route_data *rd)
341 {
342 struct route_node *rn;
343
344 HMAP_FOR_EACH_WITH_HASH(rn, node, hash_route_data(rd), &route_map) {
345 if (!memcmp(&rn->rd, rd, sizeof *rd)) {
346 return rn;
347 }
348 }
349
350 return NULL;
351 }
352
353 static struct route_node *
354 route_node_lookup_by_ip(uint32_t ip)
355 {
356 int dst_len;
357 struct route_node *rn, *rn_ret;
358
359 dst_len = -1;
360 rn_ret = NULL;
361
362 HMAP_FOR_EACH(rn, node, &route_map) {
363 uint32_t mask = 0xffffffff << (32 - rn->rd.rtm_dst_len);
364
365 if (rn->rd.rta_dst == 0 && rn->rd.rtm_dst_len == 0) {
366 /* Default route. */
367 continue;
368 }
369
370 if (rn->rd.rtm_dst_len > dst_len &&
371 (ip & mask) == (rn->rd.rta_dst & mask)) {
372 rn_ret = rn;
373 dst_len = rn->rd.rtm_dst_len;
374 }
375 }
376
377 return rn_ret;
378 }
379
380 static void
381 route_map_clear(void)
382 {
383 struct route_node *rn, *rn_next;
384
385 HMAP_FOR_EACH_SAFE(rn, rn_next, node, &route_map) {
386 hmap_remove(&route_map, &rn->node);
387 free(rn);
388 }
389 }
390
391 static uint32_t
392 hash_route_data(const struct route_data *rd)
393 {
394 return hash_bytes(rd, sizeof *rd, 0);
395 }
396 \f
397 /* name_table . */
398
399 static void
400 name_table_init(void)
401 {
402 hmap_init(&name_map);
403 rtnetlink_link_notifier_register(&name_notifier, name_table_change, NULL);
404 name_table_valid = false;
405 }
406
407 static void
408 name_table_uninit(void)
409 {
410 rtnetlink_link_notifier_unregister(&name_notifier);
411 name_map_clear();
412 hmap_destroy(&name_map);
413 }
414
415 static int
416 name_table_reset(void)
417 {
418 int error;
419 struct nl_dump dump;
420 struct rtgenmsg *rtmsg;
421 struct ofpbuf request, reply;
422 static struct nl_sock *rtnl_sock;
423
424 name_table_valid = true;
425 name_map_clear();
426 error = nl_sock_create(NETLINK_ROUTE, &rtnl_sock);
427 if (error) {
428 VLOG_WARN_RL(&rl, "failed to create NETLINK_ROUTE socket");
429 return error;
430 }
431
432 ofpbuf_init(&request, 0);
433 nl_msg_put_nlmsghdr(&request, sizeof *rtmsg, RTM_GETLINK, NLM_F_REQUEST);
434 rtmsg = ofpbuf_put_zeros(&request, sizeof *rtmsg);
435 rtmsg->rtgen_family = AF_INET;
436
437 nl_dump_start(&dump, rtnl_sock, &request);
438 while (nl_dump_next(&dump, &reply)) {
439 struct rtnetlink_link_change change;
440
441 if (rtnetlink_link_parse(&reply, &change)
442 && change.nlmsg_type == RTM_NEWLINK
443 && !name_node_lookup(change.ifi_index)) {
444 struct name_node *nn;
445
446 nn = xzalloc(sizeof *nn);
447 nn->ifi_index = change.ifi_index;
448 ovs_strlcpy(nn->ifname, change.ifname, IFNAMSIZ);
449 hmap_insert(&name_map, &nn->node, hash_int(nn->ifi_index, 0));
450 }
451 }
452 nl_sock_destroy(rtnl_sock);
453 return nl_dump_done(&dump);
454 }
455
456 static void
457 name_table_change(const struct rtnetlink_link_change *change OVS_UNUSED,
458 void *aux OVS_UNUSED)
459 {
460 /* Changes to interface status can cause routing table changes that some
461 * versions of the linux kernel do not advertise for some reason. */
462 route_table_valid = false;
463 name_table_valid = false;
464 }
465
466 static struct name_node *
467 name_node_lookup(int ifi_index)
468 {
469 struct name_node *nn;
470
471 HMAP_FOR_EACH_WITH_HASH(nn, node, hash_int(ifi_index, 0), &name_map) {
472 if (nn->ifi_index == ifi_index) {
473 return nn;
474 }
475 }
476
477 return NULL;
478 }
479
480 static void
481 name_map_clear(void)
482 {
483 struct name_node *nn, *nn_next;
484
485 HMAP_FOR_EACH_SAFE(nn, nn_next, node, &name_map) {
486 hmap_remove(&name_map, &nn->node);
487 free(nn);
488 }
489 }