]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
149f577a | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "in-band.h" | |
19 | #include <arpa/inet.h> | |
20 | #include <errno.h> | |
21 | #include <inttypes.h> | |
9d82ec47 | 22 | #include <sys/socket.h> |
064af421 BP |
23 | #include <net/if.h> |
24 | #include <string.h> | |
26d9fe3b | 25 | #include <stdlib.h> |
cf3fad8a | 26 | #include "classifier.h" |
0ad9b732 JP |
27 | #include "dhcp.h" |
28 | #include "dpif.h" | |
064af421 | 29 | #include "flow.h" |
064af421 BP |
30 | #include "netdev.h" |
31 | #include "odp-util.h" | |
064af421 BP |
32 | #include "ofproto.h" |
33 | #include "ofpbuf.h" | |
34 | #include "openflow/openflow.h" | |
35 | #include "packets.h" | |
36 | #include "poll-loop.h" | |
064af421 BP |
37 | #include "status.h" |
38 | #include "timeval.h" | |
064af421 BP |
39 | #include "vlog.h" |
40 | ||
d98e6007 | 41 | VLOG_DEFINE_THIS_MODULE(in_band); |
5136ce49 | 42 | |
85088747 | 43 | /* In-band control allows a single network to be used for OpenFlow |
d295e8e9 | 44 | * traffic and other data traffic. Refer to ovs-vswitchd.conf(5) and |
85088747 JP |
45 | * secchan(8) for a description of configuring in-band control. |
46 | * | |
47 | * This comment is an attempt to describe how in-band control works at a | |
48 | * wire- and implementation-level. Correctly implementing in-band | |
49 | * control has proven difficult due to its many subtleties, and has thus | |
50 | * gone through many iterations. Please read through and understand the | |
51 | * reasoning behind the chosen rules before making modifications. | |
52 | * | |
d2ede7bc BP |
53 | * In Open vSwitch, in-band control is implemented as "hidden" flows (in that |
54 | * they are not visible through OpenFlow) and at a higher priority than | |
55 | * wildcarded flows can be set up by through OpenFlow. This is done so that | |
56 | * the OpenFlow controller cannot interfere with them and possibly break | |
57 | * connectivity with its switches. It is possible to see all flows, including | |
58 | * in-band ones, with the ovs-appctl "bridge/dump-flows" command. | |
85088747 | 59 | * |
d2ede7bc BP |
60 | * The Open vSwitch implementation of in-band control can hide traffic to |
61 | * arbitrary "remotes", where each remote is one TCP port on one IP address. | |
62 | * Currently the remotes are automatically configured as the in-band OpenFlow | |
63 | * controllers plus the OVSDB managers, if any. (The latter is a requirement | |
64 | * because OVSDB managers are responsible for configuring OpenFlow controllers, | |
65 | * so if the manager cannot be reached then OpenFlow cannot be reconfigured.) | |
85088747 | 66 | * |
d2ede7bc BP |
67 | * The following rules (with the OFPP_NORMAL action) are set up on any bridge |
68 | * that has any remotes: | |
69 | * | |
70 | * (a) DHCP requests sent from the local port. | |
71 | * (b) ARP replies to the local port's MAC address. | |
72 | * (c) ARP requests from the local port's MAC address. | |
73 | * | |
74 | * In-band also sets up the following rules for each unique next-hop MAC | |
75 | * address for the remotes' IPs (the "next hop" is either the remote | |
76 | * itself, if it is on a local subnet, or the gateway to reach the remote): | |
d295e8e9 | 77 | * |
d2ede7bc BP |
78 | * (d) ARP replies to the next hop's MAC address. |
79 | * (e) ARP requests from the next hop's MAC address. | |
80 | * | |
81 | * In-band also sets up the following rules for each unique remote IP address: | |
82 | * | |
83 | * (f) ARP replies containing the remote's IP address as a target. | |
84 | * (g) ARP requests containing the remote's IP address as a source. | |
85 | * | |
86 | * In-band also sets up the following rules for each unique remote (IP,port) | |
87 | * pair: | |
88 | * | |
89 | * (h) TCP traffic to the remote's IP and port. | |
90 | * (i) TCP traffic from the remote's IP and port. | |
85088747 JP |
91 | * |
92 | * The goal of these rules is to be as narrow as possible to allow a | |
d2ede7bc BP |
93 | * switch to join a network and be able to communicate with the |
94 | * remotes. As mentioned earlier, these rules have higher priority | |
d295e8e9 | 95 | * than the controller's rules, so if they are too broad, they may |
85088747 JP |
96 | * prevent the controller from implementing its policy. As such, |
97 | * in-band actively monitors some aspects of flow and packet processing | |
98 | * so that the rules can be made more precise. | |
99 | * | |
100 | * In-band control monitors attempts to add flows into the datapath that | |
101 | * could interfere with its duties. The datapath only allows exact | |
102 | * match entries, so in-band control is able to be very precise about | |
103 | * the flows it prevents. Flows that miss in the datapath are sent to | |
104 | * userspace to be processed, so preventing these flows from being | |
d295e8e9 JP |
105 | * cached in the "fast path" does not affect correctness. The only type |
106 | * of flow that is currently prevented is one that would prevent DHCP | |
107 | * replies from being seen by the local port. For example, a rule that | |
108 | * forwarded all DHCP traffic to the controller would not be allowed, | |
85088747 JP |
109 | * but one that forwarded to all ports (including the local port) would. |
110 | * | |
111 | * As mentioned earlier, packets that miss in the datapath are sent to | |
112 | * the userspace for processing. The userspace has its own flow table, | |
d295e8e9 JP |
113 | * the "classifier", so in-band checks whether any special processing |
114 | * is needed before the classifier is consulted. If a packet is a DHCP | |
115 | * response to a request from the local port, the packet is forwarded to | |
116 | * the local port, regardless of the flow table. Note that this requires | |
117 | * L7 processing of DHCP replies to determine whether the 'chaddr' field | |
85088747 JP |
118 | * matches the MAC address of the local port. |
119 | * | |
120 | * It is interesting to note that for an L3-based in-band control | |
d295e8e9 JP |
121 | * mechanism, the majority of rules are devoted to ARP traffic. At first |
122 | * glance, some of these rules appear redundant. However, each serves an | |
123 | * important role. First, in order to determine the MAC address of the | |
124 | * remote side (controller or gateway) for other ARP rules, we must allow | |
125 | * ARP traffic for our local port with rules (b) and (c). If we are | |
126 | * between a switch and its connection to the remote, we have to | |
127 | * allow the other switch's ARP traffic to through. This is done with | |
85088747 | 128 | * rules (d) and (e), since we do not know the addresses of the other |
d295e8e9 JP |
129 | * switches a priori, but do know the remote's or gateway's. Finally, |
130 | * if the remote is running in a local guest VM that is not reached | |
131 | * through the local port, the switch that is connected to the VM must | |
132 | * allow ARP traffic based on the remote's IP address, since it will | |
133 | * not know the MAC address of the local port that is sending the traffic | |
d2ede7bc | 134 | * or the MAC address of the remote in the guest VM. |
85088747 JP |
135 | * |
136 | * With a few notable exceptions below, in-band should work in most | |
137 | * network setups. The following are considered "supported' in the | |
d295e8e9 | 138 | * current implementation: |
85088747 | 139 | * |
d2ede7bc | 140 | * - Locally Connected. The switch and remote are on the same |
85088747 JP |
141 | * subnet. This uses rules (a), (b), (c), (h), and (i). |
142 | * | |
d2ede7bc | 143 | * - Reached through Gateway. The switch and remote are on |
85088747 JP |
144 | * different subnets and must go through a gateway. This uses |
145 | * rules (a), (b), (c), (h), and (i). | |
146 | * | |
d2ede7bc BP |
147 | * - Between Switch and Remote. This switch is between another |
148 | * switch and the remote, and we want to allow the other | |
85088747 JP |
149 | * switch's traffic through. This uses rules (d), (e), (h), and |
150 | * (i). It uses (b) and (c) indirectly in order to know the MAC | |
151 | * address for rules (d) and (e). Note that DHCP for the other | |
d2ede7bc | 152 | * switch will not work unless an OpenFlow controller explicitly lets this |
85088747 JP |
153 | * switch pass the traffic. |
154 | * | |
155 | * - Between Switch and Gateway. This switch is between another | |
156 | * switch and the gateway, and we want to allow the other switch's | |
157 | * traffic through. This uses the same rules and logic as the | |
d2ede7bc | 158 | * "Between Switch and Remote" configuration described earlier. |
85088747 | 159 | * |
d2ede7bc | 160 | * - Remote on Local VM. The remote is a guest VM on the |
d295e8e9 | 161 | * system running in-band control. This uses rules (a), (b), (c), |
85088747 JP |
162 | * (h), and (i). |
163 | * | |
d2ede7bc | 164 | * - Remote on Local VM with Different Networks. The remote |
85088747 | 165 | * is a guest VM on the system running in-band control, but the |
d2ede7bc | 166 | * local port is not used to connect to the remote. For |
85088747 | 167 | * example, an IP address is configured on eth0 of the switch. The |
d2ede7bc | 168 | * remote's VM is connected through eth1 of the switch, but an |
85088747 | 169 | * IP address has not been configured for that port on the switch. |
d2ede7bc | 170 | * As such, the switch will use eth0 to connect to the remote, |
85088747 | 171 | * and eth1's rules about the local port will not work. In the |
d295e8e9 JP |
172 | * example, the switch attached to eth0 would use rules (a), (b), |
173 | * (c), (h), and (i) on eth0. The switch attached to eth1 would use | |
85088747 JP |
174 | * rules (f), (g), (h), and (i). |
175 | * | |
176 | * The following are explicitly *not* supported by in-band control: | |
177 | * | |
d295e8e9 | 178 | * - Specify Remote by Name. Currently, the remote must be |
85088747 JP |
179 | * identified by IP address. A naive approach would be to permit |
180 | * all DNS traffic. Unfortunately, this would prevent the | |
181 | * controller from defining any policy over DNS. Since switches | |
d295e8e9 | 182 | * that are located behind us need to connect to the remote, |
85088747 JP |
183 | * in-band cannot simply add a rule that allows DNS traffic from |
184 | * the local port. The "correct" way to support this is to parse | |
185 | * DNS requests to allow all traffic related to a request for the | |
d2ede7bc | 186 | * remote's name through. Due to the potential security |
85088747 JP |
187 | * problems and amount of processing, we decided to hold off for |
188 | * the time-being. | |
189 | * | |
d2ede7bc | 190 | * - Differing Remotes for Switches. All switches must know |
d295e8e9 | 191 | * the L3 addresses for all the remotes that other switches |
d6fbec6d | 192 | * may use, since rules need to be set up to allow traffic related |
d2ede7bc | 193 | * to those remotes through. See rules (f), (g), (h), and (i). |
85088747 | 194 | * |
d295e8e9 JP |
195 | * - Differing Routes for Switches. In order for the switch to |
196 | * allow other switches to connect to a remote through a | |
85088747 | 197 | * gateway, it allows the gateway's traffic through with rules (d) |
d2ede7bc | 198 | * and (e). If the routes to the remote differ for the two |
d295e8e9 | 199 | * switches, we will not know the MAC address of the alternate |
85088747 JP |
200 | * gateway. |
201 | */ | |
202 | ||
0ade584e BP |
203 | /* Priorities used in classifier for in-band rules. These values are higher |
204 | * than any that may be set with OpenFlow, and "18" kind of looks like "IB". | |
205 | * The ordering of priorities is not important because all of the rules set up | |
206 | * by in-band control have the same action. The only reason to use more than | |
207 | * one priority is to make the kind of flow easier to see during debugging. */ | |
064af421 | 208 | enum { |
d2ede7bc | 209 | /* One set per bridge. */ |
0ade584e | 210 | IBR_FROM_LOCAL_DHCP = 180000, /* (a) From local port, DHCP. */ |
85088747 JP |
211 | IBR_TO_LOCAL_ARP, /* (b) To local port, ARP. */ |
212 | IBR_FROM_LOCAL_ARP, /* (c) From local port, ARP. */ | |
d2ede7bc BP |
213 | |
214 | /* One set per unique next-hop MAC. */ | |
215 | IBR_TO_NEXT_HOP_ARP, /* (d) To remote MAC, ARP. */ | |
216 | IBR_FROM_NEXT_HOP_ARP, /* (e) From remote MAC, ARP. */ | |
217 | ||
218 | /* One set per unique remote IP address. */ | |
219 | IBR_TO_REMOTE_ARP, /* (f) To remote IP, ARP. */ | |
220 | IBR_FROM_REMOTE_ARP, /* (g) From remote IP, ARP. */ | |
221 | ||
222 | /* One set per unique remote (IP,port) pair. */ | |
223 | IBR_TO_REMOTE_TCP, /* (h) To remote IP, TCP port. */ | |
224 | IBR_FROM_REMOTE_TCP /* (i) From remote IP, TCP port. */ | |
064af421 BP |
225 | }; |
226 | ||
0ade584e BP |
227 | /* Track one remote IP and next hop information. */ |
228 | struct in_band_remote { | |
d2ede7bc | 229 | struct sockaddr_in remote_addr; /* IP address, in network byte order. */ |
0ade584e BP |
230 | uint8_t remote_mac[ETH_ADDR_LEN]; /* Next-hop MAC, all-zeros if unknown. */ |
231 | uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous nonzero next-hop MAC. */ | |
232 | struct netdev *remote_netdev; /* Device to send to next-hop MAC. */ | |
233 | }; | |
234 | ||
064af421 BP |
235 | struct in_band { |
236 | struct ofproto *ofproto; | |
064af421 BP |
237 | struct status_category *ss_cat; |
238 | ||
0ade584e BP |
239 | /* Remote information. */ |
240 | time_t next_remote_refresh; /* Refresh timer. */ | |
241 | struct in_band_remote *remotes; | |
242 | size_t n_remotes; | |
243 | ||
244 | /* Local information. */ | |
245 | time_t next_local_refresh; /* Refresh timer. */ | |
246 | uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */ | |
247 | struct netdev *local_netdev; /* Local port's network device. */ | |
248 | ||
249 | /* Local and remote addresses that are installed as flows. */ | |
250 | uint8_t installed_local_mac[ETH_ADDR_LEN]; | |
d2ede7bc BP |
251 | struct sockaddr_in *remote_addrs; |
252 | size_t n_remote_addrs; | |
0ade584e BP |
253 | uint8_t *remote_macs; |
254 | size_t n_remote_macs; | |
064af421 BP |
255 | }; |
256 | ||
257 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60); | |
258 | ||
0ade584e BP |
259 | static int |
260 | refresh_remote(struct in_band *ib, struct in_band_remote *r) | |
064af421 | 261 | { |
0ade584e | 262 | struct in_addr next_hop_inaddr; |
f1acd62b | 263 | char *next_hop_dev; |
0ade584e | 264 | int retval; |
064af421 | 265 | |
0ade584e | 266 | /* Find the next-hop IP address. */ |
d2ede7bc BP |
267 | memset(r->remote_mac, 0, sizeof r->remote_mac); |
268 | retval = netdev_get_next_hop(ib->local_netdev, &r->remote_addr.sin_addr, | |
0ade584e BP |
269 | &next_hop_inaddr, &next_hop_dev); |
270 | if (retval) { | |
271 | VLOG_WARN("cannot find route for controller ("IP_FMT"): %s", | |
d2ede7bc | 272 | IP_ARGS(&r->remote_addr.sin_addr), strerror(retval)); |
0ade584e BP |
273 | return 1; |
274 | } | |
275 | if (!next_hop_inaddr.s_addr) { | |
d2ede7bc | 276 | next_hop_inaddr = r->remote_addr.sin_addr; |
0ade584e | 277 | } |
c752217a | 278 | |
d2ede7bc | 279 | /* Open the next-hop network device. */ |
0ade584e BP |
280 | if (!r->remote_netdev |
281 | || strcmp(netdev_get_name(r->remote_netdev), next_hop_dev)) | |
282 | { | |
283 | netdev_close(r->remote_netdev); | |
064af421 | 284 | |
0ade584e | 285 | retval = netdev_open_default(next_hop_dev, &r->remote_netdev); |
0ad9b732 | 286 | if (retval) { |
0ade584e BP |
287 | VLOG_WARN_RL(&rl, "cannot open netdev %s (next hop " |
288 | "to controller "IP_FMT"): %s", | |
d2ede7bc | 289 | next_hop_dev, IP_ARGS(&r->remote_addr.sin_addr), |
0ade584e BP |
290 | strerror(retval)); |
291 | free(next_hop_dev); | |
292 | return 1; | |
064af421 | 293 | } |
0ade584e BP |
294 | } |
295 | free(next_hop_dev); | |
296 | ||
297 | /* Look up the MAC address of the next-hop IP address. */ | |
298 | retval = netdev_arp_lookup(r->remote_netdev, next_hop_inaddr.s_addr, | |
299 | r->remote_mac); | |
300 | if (retval) { | |
301 | VLOG_DBG_RL(&rl, "cannot look up remote MAC address ("IP_FMT"): %s", | |
302 | IP_ARGS(&next_hop_inaddr.s_addr), strerror(retval)); | |
064af421 | 303 | } |
0ad9b732 | 304 | |
6dee2066 BP |
305 | /* If we don't have a MAC address, then refresh quickly, since we probably |
306 | * will get a MAC address soon (via ARP). Otherwise, we can afford to wait | |
307 | * a little while. */ | |
308 | return eth_addr_is_zero(r->remote_mac) ? 1 : 10; | |
064af421 BP |
309 | } |
310 | ||
0ade584e BP |
311 | static bool |
312 | refresh_remotes(struct in_band *ib) | |
064af421 | 313 | { |
0ade584e BP |
314 | struct in_band_remote *r; |
315 | bool any_changes; | |
0ade584e BP |
316 | |
317 | if (time_now() < ib->next_remote_refresh) { | |
318 | return false; | |
319 | } | |
320 | ||
321 | any_changes = false; | |
5dbdfff7 | 322 | ib->next_remote_refresh = TIME_MAX; |
0ade584e BP |
323 | for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) { |
324 | uint8_t old_remote_mac[ETH_ADDR_LEN]; | |
5dbdfff7 | 325 | time_t next_refresh; |
0ade584e | 326 | |
6dee2066 | 327 | /* Save old MAC. */ |
0ade584e BP |
328 | memcpy(old_remote_mac, r->remote_mac, ETH_ADDR_LEN); |
329 | ||
330 | /* Refresh remote information. */ | |
5dbdfff7 BP |
331 | next_refresh = refresh_remote(ib, r) + time_now(); |
332 | ib->next_remote_refresh = MIN(ib->next_remote_refresh, next_refresh); | |
0ade584e | 333 | |
6dee2066 | 334 | /* If the MAC changed, log the changes. */ |
0ade584e BP |
335 | if (!eth_addr_equals(r->remote_mac, old_remote_mac)) { |
336 | any_changes = true; | |
337 | if (!eth_addr_is_zero(r->remote_mac) | |
338 | && !eth_addr_equals(r->last_remote_mac, r->remote_mac)) { | |
339 | VLOG_DBG("remote MAC address changed from "ETH_ADDR_FMT | |
340 | " to "ETH_ADDR_FMT, | |
341 | ETH_ADDR_ARGS(r->last_remote_mac), | |
342 | ETH_ADDR_ARGS(r->remote_mac)); | |
343 | memcpy(r->last_remote_mac, r->remote_mac, ETH_ADDR_LEN); | |
344 | } | |
064af421 | 345 | } |
064af421 | 346 | } |
0ade584e BP |
347 | |
348 | return any_changes; | |
064af421 BP |
349 | } |
350 | ||
0ade584e BP |
351 | /* Refreshes the MAC address of the local port into ib->local_mac, if it is due |
352 | * for a refresh. Returns true if anything changed, otherwise false. */ | |
353 | static bool | |
354 | refresh_local(struct in_band *ib) | |
064af421 | 355 | { |
0ade584e BP |
356 | uint8_t ea[ETH_ADDR_LEN]; |
357 | time_t now; | |
064af421 | 358 | |
0ade584e BP |
359 | now = time_now(); |
360 | if (now < ib->next_local_refresh) { | |
361 | return false; | |
064af421 | 362 | } |
0ade584e | 363 | ib->next_local_refresh = now + 1; |
064af421 | 364 | |
0ade584e BP |
365 | if (netdev_get_etheraddr(ib->local_netdev, ea) |
366 | || eth_addr_equals(ea, ib->local_mac)) { | |
367 | return false; | |
064af421 | 368 | } |
064af421 | 369 | |
0ade584e BP |
370 | memcpy(ib->local_mac, ea, ETH_ADDR_LEN); |
371 | return true; | |
064af421 BP |
372 | } |
373 | ||
064af421 | 374 | static void |
0ade584e | 375 | in_band_status_cb(struct status_reply *sr, void *in_band_) |
064af421 | 376 | { |
0ade584e | 377 | struct in_band *in_band = in_band_; |
064af421 | 378 | |
0ade584e BP |
379 | if (!eth_addr_is_zero(in_band->local_mac)) { |
380 | status_reply_put(sr, "local-mac="ETH_ADDR_FMT, | |
381 | ETH_ADDR_ARGS(in_band->local_mac)); | |
382 | } | |
064af421 | 383 | |
0ade584e BP |
384 | if (in_band->n_remotes |
385 | && !eth_addr_is_zero(in_band->remotes[0].remote_mac)) { | |
386 | status_reply_put(sr, "remote-mac="ETH_ADDR_FMT, | |
387 | ETH_ADDR_ARGS(in_band->remotes[0].remote_mac)); | |
064af421 BP |
388 | } |
389 | } | |
390 | ||
0ad9b732 | 391 | /* Returns true if 'packet' should be sent to the local port regardless |
d295e8e9 | 392 | * of the flow table. */ |
0ad9b732 | 393 | bool |
ae412e7d | 394 | in_band_msg_in_hook(struct in_band *in_band, const struct flow *flow, |
0ad9b732 JP |
395 | const struct ofpbuf *packet) |
396 | { | |
397 | if (!in_band) { | |
398 | return false; | |
399 | } | |
400 | ||
401 | /* Regardless of how the flow table is configured, we want to be | |
402 | * able to see replies to our DHCP requests. */ | |
403 | if (flow->dl_type == htons(ETH_TYPE_IP) | |
404 | && flow->nw_proto == IP_TYPE_UDP | |
405 | && flow->tp_src == htons(DHCP_SERVER_PORT) | |
406 | && flow->tp_dst == htons(DHCP_CLIENT_PORT) | |
407 | && packet->l7) { | |
408 | struct dhcp_header *dhcp; | |
0ad9b732 JP |
409 | |
410 | dhcp = ofpbuf_at(packet, (char *)packet->l7 - (char *)packet->data, | |
411 | sizeof *dhcp); | |
412 | if (!dhcp) { | |
413 | return false; | |
414 | } | |
415 | ||
0ade584e BP |
416 | refresh_local(in_band); |
417 | if (!eth_addr_is_zero(in_band->local_mac) | |
418 | && eth_addr_equals(dhcp->chaddr, in_band->local_mac)) { | |
0ad9b732 JP |
419 | return true; |
420 | } | |
421 | } | |
422 | ||
423 | return false; | |
424 | } | |
425 | ||
d295e8e9 | 426 | /* Returns true if the rule that would match 'flow' with 'actions' is |
0ad9b732 JP |
427 | * allowed to be set up in the datapath. */ |
428 | bool | |
ae412e7d | 429 | in_band_rule_check(struct in_band *in_band, const struct flow *flow, |
0ad9b732 JP |
430 | const struct odp_actions *actions) |
431 | { | |
432 | if (!in_band) { | |
433 | return true; | |
434 | } | |
435 | ||
436 | /* Don't allow flows that would prevent DHCP replies from being seen | |
437 | * by the local port. */ | |
438 | if (flow->dl_type == htons(ETH_TYPE_IP) | |
439 | && flow->nw_proto == IP_TYPE_UDP | |
d295e8e9 | 440 | && flow->tp_src == htons(DHCP_SERVER_PORT) |
0ad9b732 JP |
441 | && flow->tp_dst == htons(DHCP_CLIENT_PORT)) { |
442 | int i; | |
443 | ||
444 | for (i=0; i<actions->n_actions; i++) { | |
d295e8e9 | 445 | if (actions->actions[i].output.type == ODPAT_OUTPUT |
0ad9b732 JP |
446 | && actions->actions[i].output.port == ODPP_LOCAL) { |
447 | return true; | |
d295e8e9 | 448 | } |
0ad9b732 JP |
449 | } |
450 | return false; | |
451 | } | |
452 | ||
453 | return true; | |
454 | } | |
455 | ||
0ade584e BP |
456 | static void |
457 | make_rules(struct in_band *ib, | |
cf3fad8a | 458 | void (*cb)(struct in_band *, const struct cls_rule *)) |
0ade584e | 459 | { |
cf3fad8a | 460 | struct cls_rule rule; |
0ade584e BP |
461 | size_t i; |
462 | ||
463 | if (!eth_addr_is_zero(ib->installed_local_mac)) { | |
d2ede7bc | 464 | /* (a) Allow DHCP requests sent from the local port. */ |
cf3fad8a | 465 | cls_rule_init_catchall(&rule, IBR_FROM_LOCAL_DHCP); |
64420dfa BP |
466 | cls_rule_set_in_port(&rule, ODPP_LOCAL); |
467 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP)); | |
468 | cls_rule_set_dl_src(&rule, ib->installed_local_mac); | |
469 | cls_rule_set_nw_proto(&rule, IP_TYPE_UDP); | |
470 | cls_rule_set_tp_src(&rule, htons(DHCP_CLIENT_PORT)); | |
471 | cls_rule_set_tp_dst(&rule, htons(DHCP_SERVER_PORT)); | |
0ade584e | 472 | cb(ib, &rule); |
0ad9b732 | 473 | |
d2ede7bc | 474 | /* (b) Allow ARP replies to the local port's MAC address. */ |
cf3fad8a | 475 | cls_rule_init_catchall(&rule, IBR_TO_LOCAL_ARP); |
64420dfa BP |
476 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
477 | cls_rule_set_dl_dst(&rule, ib->installed_local_mac); | |
478 | cls_rule_set_nw_proto(&rule, ARP_OP_REPLY); | |
0ade584e | 479 | cb(ib, &rule); |
26d9fe3b | 480 | |
d2ede7bc | 481 | /* (c) Allow ARP requests from the local port's MAC address. */ |
cf3fad8a | 482 | cls_rule_init_catchall(&rule, IBR_FROM_LOCAL_ARP); |
64420dfa BP |
483 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
484 | cls_rule_set_dl_src(&rule, ib->installed_local_mac); | |
485 | cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST); | |
0ade584e | 486 | cb(ib, &rule); |
0ad9b732 | 487 | } |
a5f37a2d | 488 | |
0ade584e BP |
489 | for (i = 0; i < ib->n_remote_macs; i++) { |
490 | const uint8_t *remote_mac = &ib->remote_macs[i * ETH_ADDR_LEN]; | |
491 | ||
492 | if (i > 0) { | |
493 | const uint8_t *prev_mac = &ib->remote_macs[(i - 1) * ETH_ADDR_LEN]; | |
494 | if (eth_addr_equals(remote_mac, prev_mac)) { | |
495 | /* Skip duplicates. */ | |
496 | continue; | |
497 | } | |
498 | } | |
499 | ||
d2ede7bc | 500 | /* (d) Allow ARP replies to the next hop's MAC address. */ |
cf3fad8a | 501 | cls_rule_init_catchall(&rule, IBR_TO_NEXT_HOP_ARP); |
64420dfa BP |
502 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
503 | cls_rule_set_dl_dst(&rule, remote_mac); | |
504 | cls_rule_set_nw_proto(&rule, ARP_OP_REPLY); | |
0ade584e BP |
505 | cb(ib, &rule); |
506 | ||
d2ede7bc | 507 | /* (e) Allow ARP requests from the next hop's MAC address. */ |
cf3fad8a | 508 | cls_rule_init_catchall(&rule, IBR_FROM_NEXT_HOP_ARP); |
64420dfa BP |
509 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
510 | cls_rule_set_dl_src(&rule, remote_mac); | |
511 | cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST); | |
0ade584e | 512 | cb(ib, &rule); |
064af421 BP |
513 | } |
514 | ||
d2ede7bc BP |
515 | for (i = 0; i < ib->n_remote_addrs; i++) { |
516 | const struct sockaddr_in *a = &ib->remote_addrs[i]; | |
517 | ||
518 | if (!i || a->sin_addr.s_addr != a[-1].sin_addr.s_addr) { | |
519 | /* (f) Allow ARP replies containing the remote's IP address as a | |
520 | * target. */ | |
cf3fad8a | 521 | cls_rule_init_catchall(&rule, IBR_TO_REMOTE_ARP); |
64420dfa BP |
522 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
523 | cls_rule_set_nw_proto(&rule, ARP_OP_REPLY); | |
524 | cls_rule_set_nw_dst(&rule, a->sin_addr.s_addr); | |
d2ede7bc BP |
525 | cb(ib, &rule); |
526 | ||
527 | /* (g) Allow ARP requests containing the remote's IP address as a | |
528 | * source. */ | |
cf3fad8a | 529 | cls_rule_init_catchall(&rule, IBR_FROM_REMOTE_ARP); |
64420dfa BP |
530 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP)); |
531 | cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST); | |
532 | cls_rule_set_nw_src(&rule, a->sin_addr.s_addr); | |
d2ede7bc | 533 | cb(ib, &rule); |
0ade584e BP |
534 | } |
535 | ||
d2ede7bc BP |
536 | if (!i |
537 | || a->sin_addr.s_addr != a[-1].sin_addr.s_addr | |
538 | || a->sin_port != a[-1].sin_port) { | |
539 | /* (h) Allow TCP traffic to the remote's IP and port. */ | |
cf3fad8a | 540 | cls_rule_init_catchall(&rule, IBR_TO_REMOTE_TCP); |
64420dfa BP |
541 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP)); |
542 | cls_rule_set_nw_proto(&rule, IP_TYPE_TCP); | |
543 | cls_rule_set_nw_dst(&rule, a->sin_addr.s_addr); | |
544 | cls_rule_set_tp_dst(&rule, a->sin_port); | |
d2ede7bc BP |
545 | cb(ib, &rule); |
546 | ||
547 | /* (i) Allow TCP traffic from the remote's IP and port. */ | |
cf3fad8a | 548 | cls_rule_init_catchall(&rule, IBR_FROM_REMOTE_TCP); |
64420dfa BP |
549 | cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP)); |
550 | cls_rule_set_nw_proto(&rule, IP_TYPE_TCP); | |
551 | cls_rule_set_nw_src(&rule, a->sin_addr.s_addr); | |
552 | cls_rule_set_tp_src(&rule, a->sin_port); | |
d2ede7bc BP |
553 | cb(ib, &rule); |
554 | } | |
064af421 BP |
555 | } |
556 | } | |
557 | ||
0ade584e | 558 | static void |
cf3fad8a | 559 | drop_rule(struct in_band *ib, const struct cls_rule *rule) |
0ade584e | 560 | { |
cf3fad8a | 561 | ofproto_delete_flow(ib->ofproto, rule); |
0ade584e BP |
562 | } |
563 | ||
c16e55cf BP |
564 | /* Drops from the flow table all of the flows set up by 'ib', then clears out |
565 | * the information about the installed flows so that they can be filled in | |
566 | * again if necessary. */ | |
0ade584e BP |
567 | static void |
568 | drop_rules(struct in_band *ib) | |
569 | { | |
c16e55cf | 570 | /* Drop rules. */ |
0ade584e | 571 | make_rules(ib, drop_rule); |
c16e55cf BP |
572 | |
573 | /* Clear out state. */ | |
574 | memset(ib->installed_local_mac, 0, sizeof ib->installed_local_mac); | |
575 | ||
d2ede7bc BP |
576 | free(ib->remote_addrs); |
577 | ib->remote_addrs = NULL; | |
578 | ib->n_remote_addrs = 0; | |
c16e55cf BP |
579 | |
580 | free(ib->remote_macs); | |
581 | ib->remote_macs = NULL; | |
582 | ib->n_remote_macs = 0; | |
0ade584e BP |
583 | } |
584 | ||
585 | static void | |
cf3fad8a | 586 | add_rule(struct in_band *ib, const struct cls_rule *rule) |
0ade584e BP |
587 | { |
588 | union ofp_action action; | |
589 | ||
590 | action.type = htons(OFPAT_OUTPUT); | |
591 | action.output.len = htons(sizeof action); | |
592 | action.output.port = htons(OFPP_NORMAL); | |
593 | action.output.max_len = htons(0); | |
fa8b054f | 594 | ofproto_add_flow(ib->ofproto, rule, &action, 1); |
0ade584e BP |
595 | } |
596 | ||
c16e55cf | 597 | /* Inserts flows into the flow table for the current state of 'ib'. */ |
0ade584e BP |
598 | static void |
599 | add_rules(struct in_band *ib) | |
600 | { | |
601 | make_rules(ib, add_rule); | |
602 | } | |
603 | ||
604 | static int | |
d2ede7bc | 605 | compare_addrs(const void *a_, const void *b_) |
0ade584e | 606 | { |
d2ede7bc BP |
607 | const struct sockaddr_in *a = a_; |
608 | const struct sockaddr_in *b = b_; | |
609 | int cmp; | |
610 | ||
611 | cmp = memcmp(&a->sin_addr.s_addr, | |
612 | &b->sin_addr.s_addr, | |
613 | sizeof a->sin_addr.s_addr); | |
614 | if (cmp) { | |
615 | return cmp; | |
616 | } | |
617 | return memcmp(&a->sin_port, &b->sin_port, sizeof a->sin_port); | |
0ade584e BP |
618 | } |
619 | ||
620 | static int | |
621 | compare_macs(const void *a, const void *b) | |
622 | { | |
623 | return memcmp(a, b, ETH_ADDR_LEN); | |
624 | } | |
625 | ||
626 | void | |
627 | in_band_run(struct in_band *ib) | |
628 | { | |
629 | struct in_band_remote *r; | |
fba0d699 | 630 | bool local_change, remote_change; |
0ade584e | 631 | |
fba0d699 BP |
632 | local_change = refresh_local(ib); |
633 | remote_change = refresh_remotes(ib); | |
634 | if (!local_change && !remote_change) { | |
0ade584e BP |
635 | /* Nothing changed, nothing to do. */ |
636 | return; | |
637 | } | |
638 | ||
639 | /* Drop old rules. */ | |
640 | drop_rules(ib); | |
641 | ||
642 | /* Figure out new rules. */ | |
643 | memcpy(ib->installed_local_mac, ib->local_mac, ETH_ADDR_LEN); | |
d2ede7bc BP |
644 | ib->remote_addrs = xmalloc(ib->n_remotes * sizeof *ib->remote_addrs); |
645 | ib->n_remote_addrs = 0; | |
0ade584e BP |
646 | ib->remote_macs = xmalloc(ib->n_remotes * ETH_ADDR_LEN); |
647 | ib->n_remote_macs = 0; | |
648 | for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) { | |
d2ede7bc | 649 | ib->remote_addrs[ib->n_remote_addrs++] = r->remote_addr; |
0ade584e BP |
650 | if (!eth_addr_is_zero(r->remote_mac)) { |
651 | memcpy(&ib->remote_macs[ib->n_remote_macs * ETH_ADDR_LEN], | |
652 | r->remote_mac, ETH_ADDR_LEN); | |
653 | ib->n_remote_macs++; | |
654 | } | |
655 | } | |
656 | ||
657 | /* Sort, to allow make_rules() to easily skip duplicates. */ | |
d2ede7bc BP |
658 | qsort(ib->remote_addrs, ib->n_remote_addrs, sizeof *ib->remote_addrs, |
659 | compare_addrs); | |
0ade584e BP |
660 | qsort(ib->remote_macs, ib->n_remote_macs, ETH_ADDR_LEN, compare_macs); |
661 | ||
662 | /* Add new rules. */ | |
663 | add_rules(ib); | |
664 | } | |
665 | ||
064af421 BP |
666 | void |
667 | in_band_wait(struct in_band *in_band) | |
668 | { | |
7cf8b266 | 669 | long long int wakeup |
0ad9b732 | 670 | = MIN(in_band->next_remote_refresh, in_band->next_local_refresh); |
7cf8b266 | 671 | poll_timer_wait_until(wakeup * 1000); |
064af421 BP |
672 | } |
673 | ||
87472751 BP |
674 | /* ofproto has flushed all flows from the flow table and it is calling us back |
675 | * to allow us to reinstall the ones that are important to us. */ | |
064af421 BP |
676 | void |
677 | in_band_flushed(struct in_band *in_band) | |
678 | { | |
87472751 | 679 | add_rules(in_band); |
064af421 BP |
680 | } |
681 | ||
f1acd62b | 682 | int |
0ad9b732 | 683 | in_band_create(struct ofproto *ofproto, struct dpif *dpif, |
0ade584e | 684 | struct switch_status *ss, struct in_band **in_bandp) |
064af421 BP |
685 | { |
686 | struct in_band *in_band; | |
f1acd62b BP |
687 | char local_name[IF_NAMESIZE]; |
688 | struct netdev *local_netdev; | |
0ad9b732 | 689 | int error; |
064af421 | 690 | |
928ef386 | 691 | *in_bandp = NULL; |
f1acd62b BP |
692 | error = dpif_port_get_name(dpif, ODPP_LOCAL, |
693 | local_name, sizeof local_name); | |
0ad9b732 | 694 | if (error) { |
f1acd62b BP |
695 | VLOG_ERR("failed to initialize in-band control: cannot get name " |
696 | "of datapath local port (%s)", strerror(error)); | |
697 | return error; | |
0ad9b732 JP |
698 | } |
699 | ||
149f577a | 700 | error = netdev_open_default(local_name, &local_netdev); |
f1acd62b BP |
701 | if (error) { |
702 | VLOG_ERR("failed to initialize in-band control: cannot open " | |
703 | "datapath local port %s (%s)", local_name, strerror(error)); | |
704 | return error; | |
705 | } | |
064af421 | 706 | |
ec6fde61 | 707 | in_band = xzalloc(sizeof *in_band); |
064af421 | 708 | in_band->ofproto = ofproto; |
064af421 BP |
709 | in_band->ss_cat = switch_status_register(ss, "in-band", |
710 | in_band_status_cb, in_band); | |
f1acd62b | 711 | in_band->next_remote_refresh = TIME_MIN; |
0ade584e BP |
712 | in_band->next_local_refresh = TIME_MIN; |
713 | in_band->local_netdev = local_netdev; | |
064af421 BP |
714 | |
715 | *in_bandp = in_band; | |
f1acd62b BP |
716 | |
717 | return 0; | |
064af421 BP |
718 | } |
719 | ||
720 | void | |
0ade584e | 721 | in_band_destroy(struct in_band *ib) |
064af421 | 722 | { |
0ade584e BP |
723 | if (ib) { |
724 | drop_rules(ib); | |
725 | in_band_set_remotes(ib, NULL, 0); | |
726 | switch_status_unregister(ib->ss_cat); | |
727 | netdev_close(ib->local_netdev); | |
728 | free(ib); | |
729 | } | |
730 | } | |
f7de2cdf | 731 | |
a3c5ac70 | 732 | static bool |
d2ede7bc BP |
733 | any_addresses_changed(struct in_band *ib, |
734 | const struct sockaddr_in *addresses, size_t n) | |
a3c5ac70 BP |
735 | { |
736 | size_t i; | |
737 | ||
738 | if (n != ib->n_remotes) { | |
739 | return true; | |
740 | } | |
741 | ||
742 | for (i = 0; i < n; i++) { | |
d2ede7bc BP |
743 | const struct sockaddr_in *old = &ib->remotes[i].remote_addr; |
744 | const struct sockaddr_in *new = &addresses[i]; | |
745 | ||
746 | if (old->sin_addr.s_addr != new->sin_addr.s_addr || | |
747 | old->sin_port != new->sin_port) { | |
a3c5ac70 BP |
748 | return true; |
749 | } | |
750 | } | |
751 | ||
752 | return false; | |
753 | } | |
754 | ||
0ade584e | 755 | void |
d2ede7bc BP |
756 | in_band_set_remotes(struct in_band *ib, |
757 | const struct sockaddr_in *addresses, size_t n) | |
0ade584e BP |
758 | { |
759 | size_t i; | |
760 | ||
d2ede7bc | 761 | if (!any_addresses_changed(ib, addresses, n)) { |
0ade584e | 762 | return; |
0ade584e BP |
763 | } |
764 | ||
a3c5ac70 | 765 | /* Clear old remotes. */ |
0ade584e | 766 | for (i = 0; i < ib->n_remotes; i++) { |
0ade584e | 767 | netdev_close(ib->remotes[i].remote_netdev); |
064af421 | 768 | } |
0ade584e | 769 | free(ib->remotes); |
064af421 | 770 | |
a3c5ac70 | 771 | /* Set up new remotes. */ |
bad0c371 | 772 | ib->remotes = n ? xzalloc(n * sizeof *ib->remotes) : NULL; |
0ade584e BP |
773 | ib->n_remotes = n; |
774 | for (i = 0; i < n; i++) { | |
d2ede7bc | 775 | ib->remotes[i].remote_addr = addresses[i]; |
0ade584e | 776 | } |
a3c5ac70 BP |
777 | |
778 | /* Force refresh in next call to in_band_run(). */ | |
779 | ib->next_remote_refresh = TIME_MIN; | |
0ade584e | 780 | } |