]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
7aec165d | 2 | * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "in-band.h" | |
19 | #include <arpa/inet.h> | |
20 | #include <errno.h> | |
21 | #include <inttypes.h> | |
9d82ec47 | 22 | #include <sys/socket.h> |
064af421 BP |
23 | #include <net/if.h> |
24 | #include <string.h> | |
26d9fe3b | 25 | #include <stdlib.h> |
cf3fad8a | 26 | #include "classifier.h" |
0ad9b732 JP |
27 | #include "dhcp.h" |
28 | #include "dpif.h" | |
064af421 | 29 | #include "flow.h" |
064af421 | 30 | #include "netdev.h" |
cdee00fd | 31 | #include "netlink.h" |
064af421 | 32 | #include "odp-util.h" |
064af421 BP |
33 | #include "ofproto.h" |
34 | #include "ofpbuf.h" | |
35 | #include "openflow/openflow.h" | |
36 | #include "packets.h" | |
37 | #include "poll-loop.h" | |
d08a2e92 | 38 | #include "private.h" |
064af421 | 39 | #include "timeval.h" |
064af421 BP |
40 | #include "vlog.h" |
41 | ||
d98e6007 | 42 | VLOG_DEFINE_THIS_MODULE(in_band); |
5136ce49 | 43 | |
ee8b231c BP |
44 | /* In-band control allows a single network to be used for OpenFlow traffic and |
45 | * other data traffic. See ovs-vswitchd.conf.db(5) for a description of | |
46 | * configuring in-band control. | |
85088747 JP |
47 | * |
48 | * This comment is an attempt to describe how in-band control works at a | |
49 | * wire- and implementation-level. Correctly implementing in-band | |
50 | * control has proven difficult due to its many subtleties, and has thus | |
51 | * gone through many iterations. Please read through and understand the | |
52 | * reasoning behind the chosen rules before making modifications. | |
53 | * | |
d2ede7bc BP |
54 | * In Open vSwitch, in-band control is implemented as "hidden" flows (in that |
55 | * they are not visible through OpenFlow) and at a higher priority than | |
56 | * wildcarded flows can be set up by through OpenFlow. This is done so that | |
57 | * the OpenFlow controller cannot interfere with them and possibly break | |
58 | * connectivity with its switches. It is possible to see all flows, including | |
59 | * in-band ones, with the ovs-appctl "bridge/dump-flows" command. | |
85088747 | 60 | * |
d2ede7bc BP |
61 | * The Open vSwitch implementation of in-band control can hide traffic to |
62 | * arbitrary "remotes", where each remote is one TCP port on one IP address. | |
63 | * Currently the remotes are automatically configured as the in-band OpenFlow | |
64 | * controllers plus the OVSDB managers, if any. (The latter is a requirement | |
65 | * because OVSDB managers are responsible for configuring OpenFlow controllers, | |
66 | * so if the manager cannot be reached then OpenFlow cannot be reconfigured.) | |
85088747 | 67 | * |
d2ede7bc BP |
68 | * The following rules (with the OFPP_NORMAL action) are set up on any bridge |
69 | * that has any remotes: | |
70 | * | |
71 | * (a) DHCP requests sent from the local port. | |
72 | * (b) ARP replies to the local port's MAC address. | |
73 | * (c) ARP requests from the local port's MAC address. | |
74 | * | |
75 | * In-band also sets up the following rules for each unique next-hop MAC | |
76 | * address for the remotes' IPs (the "next hop" is either the remote | |
77 | * itself, if it is on a local subnet, or the gateway to reach the remote): | |
d295e8e9 | 78 | * |
d2ede7bc BP |
79 | * (d) ARP replies to the next hop's MAC address. |
80 | * (e) ARP requests from the next hop's MAC address. | |
81 | * | |
82 | * In-band also sets up the following rules for each unique remote IP address: | |
83 | * | |
84 | * (f) ARP replies containing the remote's IP address as a target. | |
85 | * (g) ARP requests containing the remote's IP address as a source. | |
86 | * | |
87 | * In-band also sets up the following rules for each unique remote (IP,port) | |
88 | * pair: | |
89 | * | |
90 | * (h) TCP traffic to the remote's IP and port. | |
91 | * (i) TCP traffic from the remote's IP and port. | |
85088747 JP |
92 | * |
93 | * The goal of these rules is to be as narrow as possible to allow a | |
d2ede7bc BP |
94 | * switch to join a network and be able to communicate with the |
95 | * remotes. As mentioned earlier, these rules have higher priority | |
d295e8e9 | 96 | * than the controller's rules, so if they are too broad, they may |
85088747 JP |
97 | * prevent the controller from implementing its policy. As such, |
98 | * in-band actively monitors some aspects of flow and packet processing | |
99 | * so that the rules can be made more precise. | |
100 | * | |
101 | * In-band control monitors attempts to add flows into the datapath that | |
102 | * could interfere with its duties. The datapath only allows exact | |
103 | * match entries, so in-band control is able to be very precise about | |
104 | * the flows it prevents. Flows that miss in the datapath are sent to | |
105 | * userspace to be processed, so preventing these flows from being | |
d295e8e9 JP |
106 | * cached in the "fast path" does not affect correctness. The only type |
107 | * of flow that is currently prevented is one that would prevent DHCP | |
108 | * replies from being seen by the local port. For example, a rule that | |
109 | * forwarded all DHCP traffic to the controller would not be allowed, | |
85088747 JP |
110 | * but one that forwarded to all ports (including the local port) would. |
111 | * | |
112 | * As mentioned earlier, packets that miss in the datapath are sent to | |
113 | * the userspace for processing. The userspace has its own flow table, | |
d295e8e9 JP |
114 | * the "classifier", so in-band checks whether any special processing |
115 | * is needed before the classifier is consulted. If a packet is a DHCP | |
116 | * response to a request from the local port, the packet is forwarded to | |
117 | * the local port, regardless of the flow table. Note that this requires | |
118 | * L7 processing of DHCP replies to determine whether the 'chaddr' field | |
85088747 JP |
119 | * matches the MAC address of the local port. |
120 | * | |
121 | * It is interesting to note that for an L3-based in-band control | |
d295e8e9 JP |
122 | * mechanism, the majority of rules are devoted to ARP traffic. At first |
123 | * glance, some of these rules appear redundant. However, each serves an | |
124 | * important role. First, in order to determine the MAC address of the | |
125 | * remote side (controller or gateway) for other ARP rules, we must allow | |
126 | * ARP traffic for our local port with rules (b) and (c). If we are | |
127 | * between a switch and its connection to the remote, we have to | |
128 | * allow the other switch's ARP traffic to through. This is done with | |
85088747 | 129 | * rules (d) and (e), since we do not know the addresses of the other |
d295e8e9 JP |
130 | * switches a priori, but do know the remote's or gateway's. Finally, |
131 | * if the remote is running in a local guest VM that is not reached | |
132 | * through the local port, the switch that is connected to the VM must | |
133 | * allow ARP traffic based on the remote's IP address, since it will | |
134 | * not know the MAC address of the local port that is sending the traffic | |
d2ede7bc | 135 | * or the MAC address of the remote in the guest VM. |
85088747 JP |
136 | * |
137 | * With a few notable exceptions below, in-band should work in most | |
138 | * network setups. The following are considered "supported' in the | |
d295e8e9 | 139 | * current implementation: |
85088747 | 140 | * |
d2ede7bc | 141 | * - Locally Connected. The switch and remote are on the same |
85088747 JP |
142 | * subnet. This uses rules (a), (b), (c), (h), and (i). |
143 | * | |
d2ede7bc | 144 | * - Reached through Gateway. The switch and remote are on |
85088747 JP |
145 | * different subnets and must go through a gateway. This uses |
146 | * rules (a), (b), (c), (h), and (i). | |
147 | * | |
d2ede7bc BP |
148 | * - Between Switch and Remote. This switch is between another |
149 | * switch and the remote, and we want to allow the other | |
85088747 JP |
150 | * switch's traffic through. This uses rules (d), (e), (h), and |
151 | * (i). It uses (b) and (c) indirectly in order to know the MAC | |
152 | * address for rules (d) and (e). Note that DHCP for the other | |
d2ede7bc | 153 | * switch will not work unless an OpenFlow controller explicitly lets this |
85088747 JP |
154 | * switch pass the traffic. |
155 | * | |
156 | * - Between Switch and Gateway. This switch is between another | |
157 | * switch and the gateway, and we want to allow the other switch's | |
158 | * traffic through. This uses the same rules and logic as the | |
d2ede7bc | 159 | * "Between Switch and Remote" configuration described earlier. |
85088747 | 160 | * |
d2ede7bc | 161 | * - Remote on Local VM. The remote is a guest VM on the |
d295e8e9 | 162 | * system running in-band control. This uses rules (a), (b), (c), |
85088747 JP |
163 | * (h), and (i). |
164 | * | |
d2ede7bc | 165 | * - Remote on Local VM with Different Networks. The remote |
85088747 | 166 | * is a guest VM on the system running in-band control, but the |
d2ede7bc | 167 | * local port is not used to connect to the remote. For |
85088747 | 168 | * example, an IP address is configured on eth0 of the switch. The |
d2ede7bc | 169 | * remote's VM is connected through eth1 of the switch, but an |
85088747 | 170 | * IP address has not been configured for that port on the switch. |
d2ede7bc | 171 | * As such, the switch will use eth0 to connect to the remote, |
85088747 | 172 | * and eth1's rules about the local port will not work. In the |
d295e8e9 JP |
173 | * example, the switch attached to eth0 would use rules (a), (b), |
174 | * (c), (h), and (i) on eth0. The switch attached to eth1 would use | |
85088747 JP |
175 | * rules (f), (g), (h), and (i). |
176 | * | |
177 | * The following are explicitly *not* supported by in-band control: | |
178 | * | |
d295e8e9 | 179 | * - Specify Remote by Name. Currently, the remote must be |
85088747 JP |
180 | * identified by IP address. A naive approach would be to permit |
181 | * all DNS traffic. Unfortunately, this would prevent the | |
182 | * controller from defining any policy over DNS. Since switches | |
d295e8e9 | 183 | * that are located behind us need to connect to the remote, |
85088747 JP |
184 | * in-band cannot simply add a rule that allows DNS traffic from |
185 | * the local port. The "correct" way to support this is to parse | |
186 | * DNS requests to allow all traffic related to a request for the | |
d2ede7bc | 187 | * remote's name through. Due to the potential security |
85088747 JP |
188 | * problems and amount of processing, we decided to hold off for |
189 | * the time-being. | |
190 | * | |
d2ede7bc | 191 | * - Differing Remotes for Switches. All switches must know |
d295e8e9 | 192 | * the L3 addresses for all the remotes that other switches |
d6fbec6d | 193 | * may use, since rules need to be set up to allow traffic related |
d2ede7bc | 194 | * to those remotes through. See rules (f), (g), (h), and (i). |
85088747 | 195 | * |
d295e8e9 JP |
196 | * - Differing Routes for Switches. In order for the switch to |
197 | * allow other switches to connect to a remote through a | |
85088747 | 198 | * gateway, it allows the gateway's traffic through with rules (d) |
d2ede7bc | 199 | * and (e). If the routes to the remote differ for the two |
d295e8e9 | 200 | * switches, we will not know the MAC address of the alternate |
85088747 JP |
201 | * gateway. |
202 | */ | |
203 | ||
0ade584e BP |
204 | /* Priorities used in classifier for in-band rules. These values are higher |
205 | * than any that may be set with OpenFlow, and "18" kind of looks like "IB". | |
206 | * The ordering of priorities is not important because all of the rules set up | |
207 | * by in-band control have the same action. The only reason to use more than | |
208 | * one priority is to make the kind of flow easier to see during debugging. */ | |
064af421 | 209 | enum { |
d2ede7bc | 210 | /* One set per bridge. */ |
0ade584e | 211 | IBR_FROM_LOCAL_DHCP = 180000, /* (a) From local port, DHCP. */ |
85088747 JP |
212 | IBR_TO_LOCAL_ARP, /* (b) To local port, ARP. */ |
213 | IBR_FROM_LOCAL_ARP, /* (c) From local port, ARP. */ | |
d2ede7bc BP |
214 | |
215 | /* One set per unique next-hop MAC. */ | |
216 | IBR_TO_NEXT_HOP_ARP, /* (d) To remote MAC, ARP. */ | |
217 | IBR_FROM_NEXT_HOP_ARP, /* (e) From remote MAC, ARP. */ | |
218 | ||
219 | /* One set per unique remote IP address. */ | |
220 | IBR_TO_REMOTE_ARP, /* (f) To remote IP, ARP. */ | |
221 | IBR_FROM_REMOTE_ARP, /* (g) From remote IP, ARP. */ | |
222 | ||
223 | /* One set per unique remote (IP,port) pair. */ | |
224 | IBR_TO_REMOTE_TCP, /* (h) To remote IP, TCP port. */ | |
225 | IBR_FROM_REMOTE_TCP /* (i) From remote IP, TCP port. */ | |
064af421 BP |
226 | }; |
227 | ||
0ade584e BP |
228 | /* Track one remote IP and next hop information. */ |
229 | struct in_band_remote { | |
d2ede7bc | 230 | struct sockaddr_in remote_addr; /* IP address, in network byte order. */ |
0ade584e BP |
231 | uint8_t remote_mac[ETH_ADDR_LEN]; /* Next-hop MAC, all-zeros if unknown. */ |
232 | uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous nonzero next-hop MAC. */ | |
233 | struct netdev *remote_netdev; /* Device to send to next-hop MAC. */ | |
234 | }; | |
235 | ||
064af421 BP |
236 | struct in_band { |
237 | struct ofproto *ofproto; | |
b1da6250 | 238 | int queue_id, prev_queue_id; |
064af421 | 239 | |
0ade584e BP |
240 | /* Remote information. */ |
241 | time_t next_remote_refresh; /* Refresh timer. */ | |
242 | struct in_band_remote *remotes; | |
243 | size_t n_remotes; | |
244 | ||
245 | /* Local information. */ | |
246 | time_t next_local_refresh; /* Refresh timer. */ | |
247 | uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */ | |
248 | struct netdev *local_netdev; /* Local port's network device. */ | |
249 | ||
250 | /* Local and remote addresses that are installed as flows. */ | |
251 | uint8_t installed_local_mac[ETH_ADDR_LEN]; | |
d2ede7bc BP |
252 | struct sockaddr_in *remote_addrs; |
253 | size_t n_remote_addrs; | |
0ade584e BP |
254 | uint8_t *remote_macs; |
255 | size_t n_remote_macs; | |
064af421 BP |
256 | }; |
257 | ||
258 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); | |
259 | ||
0ade584e BP |
260 | static int |
261 | refresh_remote(struct in_band *ib, struct in_band_remote *r) | |
064af421 | 262 | { |
0ade584e | 263 | struct in_addr next_hop_inaddr; |
f1acd62b | 264 | char *next_hop_dev; |
0ade584e | 265 | int retval; |
064af421 | 266 | |
0ade584e | 267 | /* Find the next-hop IP address. */ |
d2ede7bc BP |
268 | memset(r->remote_mac, 0, sizeof r->remote_mac); |
269 | retval = netdev_get_next_hop(ib->local_netdev, &r->remote_addr.sin_addr, | |
0ade584e BP |
270 | &next_hop_inaddr, &next_hop_dev); |
271 | if (retval) { | |
272 | VLOG_WARN("cannot find route for controller ("IP_FMT"): %s", | |
d2ede7bc | 273 | IP_ARGS(&r->remote_addr.sin_addr), strerror(retval)); |
0ade584e BP |
274 | return 1; |
275 | } | |
276 | if (!next_hop_inaddr.s_addr) { | |
d2ede7bc | 277 | next_hop_inaddr = r->remote_addr.sin_addr; |
0ade584e | 278 | } |
c752217a | 279 | |
d2ede7bc | 280 | /* Open the next-hop network device. */ |
0ade584e BP |
281 | if (!r->remote_netdev |
282 | || strcmp(netdev_get_name(r->remote_netdev), next_hop_dev)) | |
283 | { | |
284 | netdev_close(r->remote_netdev); | |
064af421 | 285 | |
0ade584e | 286 | retval = netdev_open_default(next_hop_dev, &r->remote_netdev); |
0ad9b732 | 287 | if (retval) { |
0ade584e BP |
288 | VLOG_WARN_RL(&rl, "cannot open netdev %s (next hop " |
289 | "to controller "IP_FMT"): %s", | |
d2ede7bc | 290 | next_hop_dev, IP_ARGS(&r->remote_addr.sin_addr), |
0ade584e BP |
291 | strerror(retval)); |
292 | free(next_hop_dev); | |
293 | return 1; | |
064af421 | 294 | } |
0ade584e BP |
295 | } |
296 | free(next_hop_dev); | |
297 | ||
298 | /* Look up the MAC address of the next-hop IP address. */ | |
299 | retval = netdev_arp_lookup(r->remote_netdev, next_hop_inaddr.s_addr, | |
300 | r->remote_mac); | |
301 | if (retval) { | |
302 | VLOG_DBG_RL(&rl, "cannot look up remote MAC address ("IP_FMT"): %s", | |
303 | IP_ARGS(&next_hop_inaddr.s_addr), strerror(retval)); | |
064af421 | 304 | } |
0ad9b732 | 305 | |
6dee2066 BP |
306 | /* If we don't have a MAC address, then refresh quickly, since we probably |
307 | * will get a MAC address soon (via ARP). Otherwise, we can afford to wait | |
308 | * a little while. */ | |
309 | return eth_addr_is_zero(r->remote_mac) ? 1 : 10; | |
064af421 BP |
310 | } |
311 | ||
0ade584e BP |
312 | static bool |
313 | refresh_remotes(struct in_band *ib) | |
064af421 | 314 | { |
0ade584e BP |
315 | struct in_band_remote *r; |
316 | bool any_changes; | |
0ade584e BP |
317 | |
318 | if (time_now() < ib->next_remote_refresh) { | |
319 | return false; | |
320 | } | |
321 | ||
322 | any_changes = false; | |
5dbdfff7 | 323 | ib->next_remote_refresh = TIME_MAX; |
0ade584e BP |
324 | for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) { |
325 | uint8_t old_remote_mac[ETH_ADDR_LEN]; | |
5dbdfff7 | 326 | time_t next_refresh; |
0ade584e | 327 | |
6dee2066 | 328 | /* Save old MAC. */ |
0ade584e BP |
329 | memcpy(old_remote_mac, r->remote_mac, ETH_ADDR_LEN); |
330 | ||
331 | /* Refresh remote information. */ | |
5dbdfff7 BP |
332 | next_refresh = refresh_remote(ib, r) + time_now(); |
333 | ib->next_remote_refresh = MIN(ib->next_remote_refresh, next_refresh); | |
0ade584e | 334 | |
6dee2066 | 335 | /* If the MAC changed, log the changes. */ |
0ade584e BP |
336 | if (!eth_addr_equals(r->remote_mac, old_remote_mac)) { |
337 | any_changes = true; | |
338 | if (!eth_addr_is_zero(r->remote_mac) | |
339 | && !eth_addr_equals(r->last_remote_mac, r->remote_mac)) { | |
340 | VLOG_DBG("remote MAC address changed from "ETH_ADDR_FMT | |
341 | " to "ETH_ADDR_FMT, | |
342 | ETH_ADDR_ARGS(r->last_remote_mac), | |
343 | ETH_ADDR_ARGS(r->remote_mac)); | |
344 | memcpy(r->last_remote_mac, r->remote_mac, ETH_ADDR_LEN); | |
345 | } | |
064af421 | 346 | } |
064af421 | 347 | } |
0ade584e BP |
348 | |
349 | return any_changes; | |
064af421 BP |
350 | } |
351 | ||
0ade584e BP |
352 | /* Refreshes the MAC address of the local port into ib->local_mac, if it is due |
353 | * for a refresh. Returns true if anything changed, otherwise false. */ | |
354 | static bool | |
355 | refresh_local(struct in_band *ib) | |
064af421 | 356 | { |
0ade584e BP |
357 | uint8_t ea[ETH_ADDR_LEN]; |
358 | time_t now; | |
064af421 | 359 | |
0ade584e BP |
360 | now = time_now(); |
361 | if (now < ib->next_local_refresh) { | |
362 | return false; | |
064af421 | 363 | } |
0ade584e | 364 | ib->next_local_refresh = now + 1; |
064af421 | 365 | |
0ade584e BP |
366 | if (netdev_get_etheraddr(ib->local_netdev, ea) |
367 | || eth_addr_equals(ea, ib->local_mac)) { | |
368 | return false; | |
064af421 | 369 | } |
064af421 | 370 | |
0ade584e BP |
371 | memcpy(ib->local_mac, ea, ETH_ADDR_LEN); |
372 | return true; | |
064af421 BP |
373 | } |
374 | ||
0ad9b732 | 375 | /* Returns true if 'packet' should be sent to the local port regardless |
d295e8e9 | 376 | * of the flow table. */ |
0ad9b732 | 377 | bool |
ae412e7d | 378 | in_band_msg_in_hook(struct in_band *in_band, const struct flow *flow, |
0ad9b732 JP |
379 | const struct ofpbuf *packet) |
380 | { | |
0ad9b732 JP |
381 | /* Regardless of how the flow table is configured, we want to be |
382 | * able to see replies to our DHCP requests. */ | |
383 | if (flow->dl_type == htons(ETH_TYPE_IP) | |
6767a2cc | 384 | && flow->nw_proto == IPPROTO_UDP |
0ad9b732 JP |
385 | && flow->tp_src == htons(DHCP_SERVER_PORT) |
386 | && flow->tp_dst == htons(DHCP_CLIENT_PORT) | |
387 | && packet->l7) { | |
388 | struct dhcp_header *dhcp; | |
0ad9b732 JP |
389 | |
390 | dhcp = ofpbuf_at(packet, (char *)packet->l7 - (char *)packet->data, | |
391 | sizeof *dhcp); | |
392 | if (!dhcp) { | |
393 | return false; | |
394 | } | |
395 | ||
0ade584e BP |
396 | refresh_local(in_band); |
397 | if (!eth_addr_is_zero(in_band->local_mac) | |
398 | && eth_addr_equals(dhcp->chaddr, in_band->local_mac)) { | |
0ad9b732 JP |
399 | return true; |
400 | } | |
401 | } | |
402 | ||
403 | return false; | |
404 | } | |
405 | ||
d295e8e9 | 406 | /* Returns true if the rule that would match 'flow' with 'actions' is |
0ad9b732 JP |
407 | * allowed to be set up in the datapath. */ |
408 | bool | |
19a87e36 | 409 | in_band_rule_check(const struct flow *flow, |
cf22f8cb | 410 | const struct nlattr *actions, size_t actions_len) |
0ad9b732 | 411 | { |
0ad9b732 JP |
412 | /* Don't allow flows that would prevent DHCP replies from being seen |
413 | * by the local port. */ | |
414 | if (flow->dl_type == htons(ETH_TYPE_IP) | |
6767a2cc | 415 | && flow->nw_proto == IPPROTO_UDP |
d295e8e9 | 416 | && flow->tp_src == htons(DHCP_SERVER_PORT) |
0ad9b732 | 417 | && flow->tp_dst == htons(DHCP_CLIENT_PORT)) { |
cdee00fd BP |
418 | const struct nlattr *a; |
419 | unsigned int left; | |
0ad9b732 | 420 | |
cdee00fd | 421 | NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) { |
7aec165d | 422 | if (nl_attr_type(a) == ODP_ACTION_ATTR_OUTPUT |
cdee00fd | 423 | && nl_attr_get_u32(a) == ODPP_LOCAL) { |
0ad9b732 | 424 | return true; |
d295e8e9 | 425 | } |
0ad9b732 JP |
426 | } |
427 | return false; | |
428 | } | |
429 | ||
430 | return true; | |
431 | } | |
432 | ||
0ade584e BP |
433 | static void |
434 | make_rules(struct in_band *ib, | |
cf3fad8a | 435 | void (*cb)(struct in_band *, const struct cls_rule *)) |
0ade584e | 436 | { |
cf3fad8a | 437 | struct cls_rule rule; |
0ade584e BP |
438 | size_t i; |
439 | ||
440 | if (!eth_addr_is_zero(ib->installed_local_mac)) { | |
d2ede7bc | 441 | /* (a) Allow DHCP requests sent from the local port. */ |
cf3fad8a | 442 | cls_rule_init_catchall(&rule, IBR_FROM_LOCAL_DHCP); |
64420dfa BP |
443 | cls_rule_set_in_port(&rule, ODPP_LOCAL); |
444 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP)); | |
445 | cls_rule_set_dl_src(&rule, ib->installed_local_mac); | |
6767a2cc | 446 | cls_rule_set_nw_proto(&rule, IPPROTO_UDP); |
64420dfa BP |
447 | cls_rule_set_tp_src(&rule, htons(DHCP_CLIENT_PORT)); |
448 | cls_rule_set_tp_dst(&rule, htons(DHCP_SERVER_PORT)); | |
0ade584e | 449 | cb(ib, &rule); |
0ad9b732 | 450 | |
d2ede7bc | 451 | /* (b) Allow ARP replies to the local port's MAC address. */ |
cf3fad8a | 452 | cls_rule_init_catchall(&rule, IBR_TO_LOCAL_ARP); |
64420dfa BP |
453 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
454 | cls_rule_set_dl_dst(&rule, ib->installed_local_mac); | |
455 | cls_rule_set_nw_proto(&rule, ARP_OP_REPLY); | |
0ade584e | 456 | cb(ib, &rule); |
26d9fe3b | 457 | |
d2ede7bc | 458 | /* (c) Allow ARP requests from the local port's MAC address. */ |
cf3fad8a | 459 | cls_rule_init_catchall(&rule, IBR_FROM_LOCAL_ARP); |
64420dfa BP |
460 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
461 | cls_rule_set_dl_src(&rule, ib->installed_local_mac); | |
462 | cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST); | |
0ade584e | 463 | cb(ib, &rule); |
0ad9b732 | 464 | } |
a5f37a2d | 465 | |
0ade584e BP |
466 | for (i = 0; i < ib->n_remote_macs; i++) { |
467 | const uint8_t *remote_mac = &ib->remote_macs[i * ETH_ADDR_LEN]; | |
468 | ||
469 | if (i > 0) { | |
470 | const uint8_t *prev_mac = &ib->remote_macs[(i - 1) * ETH_ADDR_LEN]; | |
471 | if (eth_addr_equals(remote_mac, prev_mac)) { | |
472 | /* Skip duplicates. */ | |
473 | continue; | |
474 | } | |
475 | } | |
476 | ||
d2ede7bc | 477 | /* (d) Allow ARP replies to the next hop's MAC address. */ |
cf3fad8a | 478 | cls_rule_init_catchall(&rule, IBR_TO_NEXT_HOP_ARP); |
64420dfa BP |
479 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
480 | cls_rule_set_dl_dst(&rule, remote_mac); | |
481 | cls_rule_set_nw_proto(&rule, ARP_OP_REPLY); | |
0ade584e BP |
482 | cb(ib, &rule); |
483 | ||
d2ede7bc | 484 | /* (e) Allow ARP requests from the next hop's MAC address. */ |
cf3fad8a | 485 | cls_rule_init_catchall(&rule, IBR_FROM_NEXT_HOP_ARP); |
64420dfa BP |
486 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
487 | cls_rule_set_dl_src(&rule, remote_mac); | |
488 | cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST); | |
0ade584e | 489 | cb(ib, &rule); |
064af421 BP |
490 | } |
491 | ||
d2ede7bc BP |
492 | for (i = 0; i < ib->n_remote_addrs; i++) { |
493 | const struct sockaddr_in *a = &ib->remote_addrs[i]; | |
494 | ||
495 | if (!i || a->sin_addr.s_addr != a[-1].sin_addr.s_addr) { | |
496 | /* (f) Allow ARP replies containing the remote's IP address as a | |
497 | * target. */ | |
cf3fad8a | 498 | cls_rule_init_catchall(&rule, IBR_TO_REMOTE_ARP); |
64420dfa BP |
499 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
500 | cls_rule_set_nw_proto(&rule, ARP_OP_REPLY); | |
501 | cls_rule_set_nw_dst(&rule, a->sin_addr.s_addr); | |
d2ede7bc BP |
502 | cb(ib, &rule); |
503 | ||
504 | /* (g) Allow ARP requests containing the remote's IP address as a | |
505 | * source. */ | |
cf3fad8a | 506 | cls_rule_init_catchall(&rule, IBR_FROM_REMOTE_ARP); |
64420dfa BP |
507 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
508 | cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST); | |
509 | cls_rule_set_nw_src(&rule, a->sin_addr.s_addr); | |
d2ede7bc | 510 | cb(ib, &rule); |
0ade584e BP |
511 | } |
512 | ||
d2ede7bc BP |
513 | if (!i |
514 | || a->sin_addr.s_addr != a[-1].sin_addr.s_addr | |
515 | || a->sin_port != a[-1].sin_port) { | |
516 | /* (h) Allow TCP traffic to the remote's IP and port. */ | |
cf3fad8a | 517 | cls_rule_init_catchall(&rule, IBR_TO_REMOTE_TCP); |
64420dfa | 518 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP)); |
6767a2cc | 519 | cls_rule_set_nw_proto(&rule, IPPROTO_TCP); |
64420dfa BP |
520 | cls_rule_set_nw_dst(&rule, a->sin_addr.s_addr); |
521 | cls_rule_set_tp_dst(&rule, a->sin_port); | |
d2ede7bc BP |
522 | cb(ib, &rule); |
523 | ||
524 | /* (i) Allow TCP traffic from the remote's IP and port. */ | |
cf3fad8a | 525 | cls_rule_init_catchall(&rule, IBR_FROM_REMOTE_TCP); |
64420dfa | 526 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP)); |
6767a2cc | 527 | cls_rule_set_nw_proto(&rule, IPPROTO_TCP); |
64420dfa BP |
528 | cls_rule_set_nw_src(&rule, a->sin_addr.s_addr); |
529 | cls_rule_set_tp_src(&rule, a->sin_port); | |
d2ede7bc BP |
530 | cb(ib, &rule); |
531 | } | |
064af421 BP |
532 | } |
533 | } | |
534 | ||
0ade584e | 535 | static void |
cf3fad8a | 536 | drop_rule(struct in_band *ib, const struct cls_rule *rule) |
0ade584e | 537 | { |
cf3fad8a | 538 | ofproto_delete_flow(ib->ofproto, rule); |
0ade584e BP |
539 | } |
540 | ||
c16e55cf BP |
541 | /* Drops from the flow table all of the flows set up by 'ib', then clears out |
542 | * the information about the installed flows so that they can be filled in | |
543 | * again if necessary. */ | |
0ade584e BP |
544 | static void |
545 | drop_rules(struct in_band *ib) | |
546 | { | |
c16e55cf | 547 | /* Drop rules. */ |
0ade584e | 548 | make_rules(ib, drop_rule); |
c16e55cf BP |
549 | |
550 | /* Clear out state. */ | |
551 | memset(ib->installed_local_mac, 0, sizeof ib->installed_local_mac); | |
552 | ||
d2ede7bc BP |
553 | free(ib->remote_addrs); |
554 | ib->remote_addrs = NULL; | |
555 | ib->n_remote_addrs = 0; | |
c16e55cf BP |
556 | |
557 | free(ib->remote_macs); | |
558 | ib->remote_macs = NULL; | |
559 | ib->n_remote_macs = 0; | |
0ade584e BP |
560 | } |
561 | ||
562 | static void | |
cf3fad8a | 563 | add_rule(struct in_band *ib, const struct cls_rule *rule) |
0ade584e | 564 | { |
b1da6250 BP |
565 | struct { |
566 | struct nx_action_set_queue nxsq; | |
567 | struct ofp_action_output oao; | |
568 | } actions; | |
569 | ||
570 | memset(&actions, 0, sizeof actions); | |
571 | ||
572 | actions.oao.type = htons(OFPAT_OUTPUT); | |
573 | actions.oao.len = htons(sizeof actions.oao); | |
574 | actions.oao.port = htons(OFPP_NORMAL); | |
575 | actions.oao.max_len = htons(0); | |
576 | ||
577 | if (ib->queue_id < 0) { | |
578 | ofproto_add_flow(ib->ofproto, rule, | |
579 | (union ofp_action *) &actions.oao, 1); | |
580 | } else { | |
581 | actions.nxsq.type = htons(OFPAT_VENDOR); | |
582 | actions.nxsq.len = htons(sizeof actions.nxsq); | |
583 | actions.nxsq.vendor = htonl(NX_VENDOR_ID); | |
584 | actions.nxsq.subtype = htons(NXAST_SET_QUEUE); | |
585 | actions.nxsq.queue_id = htonl(ib->queue_id); | |
586 | ||
587 | ofproto_add_flow(ib->ofproto, rule, (union ofp_action *) &actions, | |
588 | sizeof actions / sizeof(union ofp_action)); | |
589 | } | |
0ade584e BP |
590 | } |
591 | ||
c16e55cf | 592 | /* Inserts flows into the flow table for the current state of 'ib'. */ |
0ade584e BP |
593 | static void |
594 | add_rules(struct in_band *ib) | |
595 | { | |
596 | make_rules(ib, add_rule); | |
597 | } | |
598 | ||
599 | static int | |
d2ede7bc | 600 | compare_addrs(const void *a_, const void *b_) |
0ade584e | 601 | { |
d2ede7bc BP |
602 | const struct sockaddr_in *a = a_; |
603 | const struct sockaddr_in *b = b_; | |
604 | int cmp; | |
605 | ||
606 | cmp = memcmp(&a->sin_addr.s_addr, | |
607 | &b->sin_addr.s_addr, | |
608 | sizeof a->sin_addr.s_addr); | |
609 | if (cmp) { | |
610 | return cmp; | |
611 | } | |
612 | return memcmp(&a->sin_port, &b->sin_port, sizeof a->sin_port); | |
0ade584e BP |
613 | } |
614 | ||
615 | static int | |
616 | compare_macs(const void *a, const void *b) | |
617 | { | |
130f6e5f | 618 | return eth_addr_compare_3way(a, b); |
0ade584e BP |
619 | } |
620 | ||
621 | void | |
622 | in_band_run(struct in_band *ib) | |
623 | { | |
b1da6250 | 624 | bool local_change, remote_change, queue_id_change; |
0ade584e BP |
625 | struct in_band_remote *r; |
626 | ||
fba0d699 BP |
627 | local_change = refresh_local(ib); |
628 | remote_change = refresh_remotes(ib); | |
b1da6250 BP |
629 | queue_id_change = ib->queue_id != ib->prev_queue_id; |
630 | if (!local_change && !remote_change && !queue_id_change) { | |
0ade584e BP |
631 | /* Nothing changed, nothing to do. */ |
632 | return; | |
633 | } | |
b1da6250 | 634 | ib->prev_queue_id = ib->queue_id; |
0ade584e BP |
635 | |
636 | /* Drop old rules. */ | |
637 | drop_rules(ib); | |
638 | ||
639 | /* Figure out new rules. */ | |
640 | memcpy(ib->installed_local_mac, ib->local_mac, ETH_ADDR_LEN); | |
d2ede7bc BP |
641 | ib->remote_addrs = xmalloc(ib->n_remotes * sizeof *ib->remote_addrs); |
642 | ib->n_remote_addrs = 0; | |
0ade584e BP |
643 | ib->remote_macs = xmalloc(ib->n_remotes * ETH_ADDR_LEN); |
644 | ib->n_remote_macs = 0; | |
645 | for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) { | |
d2ede7bc | 646 | ib->remote_addrs[ib->n_remote_addrs++] = r->remote_addr; |
0ade584e BP |
647 | if (!eth_addr_is_zero(r->remote_mac)) { |
648 | memcpy(&ib->remote_macs[ib->n_remote_macs * ETH_ADDR_LEN], | |
649 | r->remote_mac, ETH_ADDR_LEN); | |
650 | ib->n_remote_macs++; | |
651 | } | |
652 | } | |
653 | ||
654 | /* Sort, to allow make_rules() to easily skip duplicates. */ | |
d2ede7bc BP |
655 | qsort(ib->remote_addrs, ib->n_remote_addrs, sizeof *ib->remote_addrs, |
656 | compare_addrs); | |
0ade584e BP |
657 | qsort(ib->remote_macs, ib->n_remote_macs, ETH_ADDR_LEN, compare_macs); |
658 | ||
659 | /* Add new rules. */ | |
660 | add_rules(ib); | |
661 | } | |
662 | ||
064af421 BP |
663 | void |
664 | in_band_wait(struct in_band *in_band) | |
665 | { | |
7cf8b266 | 666 | long long int wakeup |
0ad9b732 | 667 | = MIN(in_band->next_remote_refresh, in_band->next_local_refresh); |
7cf8b266 | 668 | poll_timer_wait_until(wakeup * 1000); |
064af421 BP |
669 | } |
670 | ||
87472751 BP |
671 | /* ofproto has flushed all flows from the flow table and it is calling us back |
672 | * to allow us to reinstall the ones that are important to us. */ | |
064af421 BP |
673 | void |
674 | in_band_flushed(struct in_band *in_band) | |
675 | { | |
87472751 | 676 | add_rules(in_band); |
064af421 BP |
677 | } |
678 | ||
f1acd62b | 679 | int |
19a87e36 | 680 | in_band_create(struct ofproto *ofproto, const char *local_name, |
9b45d7f5 | 681 | struct in_band **in_bandp) |
064af421 BP |
682 | { |
683 | struct in_band *in_band; | |
f1acd62b | 684 | struct netdev *local_netdev; |
0ad9b732 | 685 | int error; |
064af421 | 686 | |
928ef386 | 687 | *in_bandp = NULL; |
149f577a | 688 | error = netdev_open_default(local_name, &local_netdev); |
f1acd62b BP |
689 | if (error) { |
690 | VLOG_ERR("failed to initialize in-band control: cannot open " | |
691 | "datapath local port %s (%s)", local_name, strerror(error)); | |
692 | return error; | |
693 | } | |
064af421 | 694 | |
ec6fde61 | 695 | in_band = xzalloc(sizeof *in_band); |
064af421 | 696 | in_band->ofproto = ofproto; |
b1da6250 | 697 | in_band->queue_id = in_band->prev_queue_id = -1; |
f1acd62b | 698 | in_band->next_remote_refresh = TIME_MIN; |
0ade584e BP |
699 | in_band->next_local_refresh = TIME_MIN; |
700 | in_band->local_netdev = local_netdev; | |
064af421 BP |
701 | |
702 | *in_bandp = in_band; | |
f1acd62b BP |
703 | |
704 | return 0; | |
064af421 BP |
705 | } |
706 | ||
707 | void | |
0ade584e | 708 | in_band_destroy(struct in_band *ib) |
064af421 | 709 | { |
0ade584e BP |
710 | if (ib) { |
711 | drop_rules(ib); | |
712 | in_band_set_remotes(ib, NULL, 0); | |
0ade584e BP |
713 | netdev_close(ib->local_netdev); |
714 | free(ib); | |
715 | } | |
716 | } | |
f7de2cdf | 717 | |
a3c5ac70 | 718 | static bool |
d2ede7bc BP |
719 | any_addresses_changed(struct in_band *ib, |
720 | const struct sockaddr_in *addresses, size_t n) | |
a3c5ac70 BP |
721 | { |
722 | size_t i; | |
723 | ||
724 | if (n != ib->n_remotes) { | |
725 | return true; | |
726 | } | |
727 | ||
728 | for (i = 0; i < n; i++) { | |
d2ede7bc BP |
729 | const struct sockaddr_in *old = &ib->remotes[i].remote_addr; |
730 | const struct sockaddr_in *new = &addresses[i]; | |
731 | ||
732 | if (old->sin_addr.s_addr != new->sin_addr.s_addr || | |
733 | old->sin_port != new->sin_port) { | |
a3c5ac70 BP |
734 | return true; |
735 | } | |
736 | } | |
737 | ||
738 | return false; | |
739 | } | |
740 | ||
0ade584e | 741 | void |
d2ede7bc BP |
742 | in_band_set_remotes(struct in_band *ib, |
743 | const struct sockaddr_in *addresses, size_t n) | |
0ade584e BP |
744 | { |
745 | size_t i; | |
746 | ||
d2ede7bc | 747 | if (!any_addresses_changed(ib, addresses, n)) { |
0ade584e | 748 | return; |
0ade584e BP |
749 | } |
750 | ||
a3c5ac70 | 751 | /* Clear old remotes. */ |
0ade584e | 752 | for (i = 0; i < ib->n_remotes; i++) { |
0ade584e | 753 | netdev_close(ib->remotes[i].remote_netdev); |
064af421 | 754 | } |
0ade584e | 755 | free(ib->remotes); |
064af421 | 756 | |
a3c5ac70 | 757 | /* Set up new remotes. */ |
bad0c371 | 758 | ib->remotes = n ? xzalloc(n * sizeof *ib->remotes) : NULL; |
0ade584e BP |
759 | ib->n_remotes = n; |
760 | for (i = 0; i < n; i++) { | |
d2ede7bc | 761 | ib->remotes[i].remote_addr = addresses[i]; |
0ade584e | 762 | } |
a3c5ac70 BP |
763 | |
764 | /* Force refresh in next call to in_band_run(). */ | |
765 | ib->next_remote_refresh = TIME_MIN; | |
0ade584e | 766 | } |
b1da6250 BP |
767 | |
768 | /* Sets the OpenFlow queue used by flows set up by 'ib' to 'queue_id'. If | |
769 | * 'queue_id' is negative, 'ib' will not set any queue (which is also the | |
770 | * default). */ | |
771 | void | |
772 | in_band_set_queue(struct in_band *ib, int queue_id) | |
773 | { | |
774 | ib->queue_id = queue_id; | |
775 | } | |
776 |