]> git.proxmox.com Git - ovs.git/blame - ofproto/in-band.c
ofproto: Drop ofproto_rule_lookup().
[ovs.git] / ofproto / in-band.c
CommitLineData
064af421 1/*
7aec165d 2 * Copyright (c) 2008, 2009, 2010, 2011 Nicira Networks.
064af421 3 *
a14bc59f
BP
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
064af421 7 *
a14bc59f
BP
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
064af421
BP
15 */
16
17#include <config.h>
18#include "in-band.h"
19#include <arpa/inet.h>
20#include <errno.h>
21#include <inttypes.h>
9d82ec47 22#include <sys/socket.h>
064af421
BP
23#include <net/if.h>
24#include <string.h>
26d9fe3b 25#include <stdlib.h>
cf3fad8a 26#include "classifier.h"
0ad9b732
JP
27#include "dhcp.h"
28#include "dpif.h"
064af421 29#include "flow.h"
064af421 30#include "netdev.h"
cdee00fd 31#include "netlink.h"
064af421 32#include "odp-util.h"
064af421
BP
33#include "ofproto.h"
34#include "ofpbuf.h"
35#include "openflow/openflow.h"
36#include "packets.h"
37#include "poll-loop.h"
d08a2e92 38#include "private.h"
064af421 39#include "timeval.h"
064af421
BP
40#include "vlog.h"
41
d98e6007 42VLOG_DEFINE_THIS_MODULE(in_band);
5136ce49 43
ee8b231c
BP
44/* In-band control allows a single network to be used for OpenFlow traffic and
45 * other data traffic. See ovs-vswitchd.conf.db(5) for a description of
46 * configuring in-band control.
85088747
JP
47 *
48 * This comment is an attempt to describe how in-band control works at a
49 * wire- and implementation-level. Correctly implementing in-band
50 * control has proven difficult due to its many subtleties, and has thus
51 * gone through many iterations. Please read through and understand the
52 * reasoning behind the chosen rules before making modifications.
53 *
d2ede7bc
BP
54 * In Open vSwitch, in-band control is implemented as "hidden" flows (in that
55 * they are not visible through OpenFlow) and at a higher priority than
56 * wildcarded flows can be set up by through OpenFlow. This is done so that
57 * the OpenFlow controller cannot interfere with them and possibly break
58 * connectivity with its switches. It is possible to see all flows, including
59 * in-band ones, with the ovs-appctl "bridge/dump-flows" command.
85088747 60 *
d2ede7bc
BP
61 * The Open vSwitch implementation of in-band control can hide traffic to
62 * arbitrary "remotes", where each remote is one TCP port on one IP address.
63 * Currently the remotes are automatically configured as the in-band OpenFlow
64 * controllers plus the OVSDB managers, if any. (The latter is a requirement
65 * because OVSDB managers are responsible for configuring OpenFlow controllers,
66 * so if the manager cannot be reached then OpenFlow cannot be reconfigured.)
85088747 67 *
d2ede7bc
BP
68 * The following rules (with the OFPP_NORMAL action) are set up on any bridge
69 * that has any remotes:
70 *
71 * (a) DHCP requests sent from the local port.
72 * (b) ARP replies to the local port's MAC address.
73 * (c) ARP requests from the local port's MAC address.
74 *
75 * In-band also sets up the following rules for each unique next-hop MAC
76 * address for the remotes' IPs (the "next hop" is either the remote
77 * itself, if it is on a local subnet, or the gateway to reach the remote):
d295e8e9 78 *
d2ede7bc
BP
79 * (d) ARP replies to the next hop's MAC address.
80 * (e) ARP requests from the next hop's MAC address.
81 *
82 * In-band also sets up the following rules for each unique remote IP address:
83 *
84 * (f) ARP replies containing the remote's IP address as a target.
85 * (g) ARP requests containing the remote's IP address as a source.
86 *
87 * In-band also sets up the following rules for each unique remote (IP,port)
88 * pair:
89 *
90 * (h) TCP traffic to the remote's IP and port.
91 * (i) TCP traffic from the remote's IP and port.
85088747
JP
92 *
93 * The goal of these rules is to be as narrow as possible to allow a
d2ede7bc
BP
94 * switch to join a network and be able to communicate with the
95 * remotes. As mentioned earlier, these rules have higher priority
d295e8e9 96 * than the controller's rules, so if they are too broad, they may
85088747
JP
97 * prevent the controller from implementing its policy. As such,
98 * in-band actively monitors some aspects of flow and packet processing
99 * so that the rules can be made more precise.
100 *
101 * In-band control monitors attempts to add flows into the datapath that
102 * could interfere with its duties. The datapath only allows exact
103 * match entries, so in-band control is able to be very precise about
104 * the flows it prevents. Flows that miss in the datapath are sent to
105 * userspace to be processed, so preventing these flows from being
d295e8e9
JP
106 * cached in the "fast path" does not affect correctness. The only type
107 * of flow that is currently prevented is one that would prevent DHCP
108 * replies from being seen by the local port. For example, a rule that
109 * forwarded all DHCP traffic to the controller would not be allowed,
85088747
JP
110 * but one that forwarded to all ports (including the local port) would.
111 *
112 * As mentioned earlier, packets that miss in the datapath are sent to
113 * the userspace for processing. The userspace has its own flow table,
d295e8e9
JP
114 * the "classifier", so in-band checks whether any special processing
115 * is needed before the classifier is consulted. If a packet is a DHCP
116 * response to a request from the local port, the packet is forwarded to
117 * the local port, regardless of the flow table. Note that this requires
118 * L7 processing of DHCP replies to determine whether the 'chaddr' field
85088747
JP
119 * matches the MAC address of the local port.
120 *
121 * It is interesting to note that for an L3-based in-band control
d295e8e9
JP
122 * mechanism, the majority of rules are devoted to ARP traffic. At first
123 * glance, some of these rules appear redundant. However, each serves an
124 * important role. First, in order to determine the MAC address of the
125 * remote side (controller or gateway) for other ARP rules, we must allow
126 * ARP traffic for our local port with rules (b) and (c). If we are
127 * between a switch and its connection to the remote, we have to
128 * allow the other switch's ARP traffic to through. This is done with
85088747 129 * rules (d) and (e), since we do not know the addresses of the other
d295e8e9
JP
130 * switches a priori, but do know the remote's or gateway's. Finally,
131 * if the remote is running in a local guest VM that is not reached
132 * through the local port, the switch that is connected to the VM must
133 * allow ARP traffic based on the remote's IP address, since it will
134 * not know the MAC address of the local port that is sending the traffic
d2ede7bc 135 * or the MAC address of the remote in the guest VM.
85088747
JP
136 *
137 * With a few notable exceptions below, in-band should work in most
138 * network setups. The following are considered "supported' in the
d295e8e9 139 * current implementation:
85088747 140 *
d2ede7bc 141 * - Locally Connected. The switch and remote are on the same
85088747
JP
142 * subnet. This uses rules (a), (b), (c), (h), and (i).
143 *
d2ede7bc 144 * - Reached through Gateway. The switch and remote are on
85088747
JP
145 * different subnets and must go through a gateway. This uses
146 * rules (a), (b), (c), (h), and (i).
147 *
d2ede7bc
BP
148 * - Between Switch and Remote. This switch is between another
149 * switch and the remote, and we want to allow the other
85088747
JP
150 * switch's traffic through. This uses rules (d), (e), (h), and
151 * (i). It uses (b) and (c) indirectly in order to know the MAC
152 * address for rules (d) and (e). Note that DHCP for the other
d2ede7bc 153 * switch will not work unless an OpenFlow controller explicitly lets this
85088747
JP
154 * switch pass the traffic.
155 *
156 * - Between Switch and Gateway. This switch is between another
157 * switch and the gateway, and we want to allow the other switch's
158 * traffic through. This uses the same rules and logic as the
d2ede7bc 159 * "Between Switch and Remote" configuration described earlier.
85088747 160 *
d2ede7bc 161 * - Remote on Local VM. The remote is a guest VM on the
d295e8e9 162 * system running in-band control. This uses rules (a), (b), (c),
85088747
JP
163 * (h), and (i).
164 *
d2ede7bc 165 * - Remote on Local VM with Different Networks. The remote
85088747 166 * is a guest VM on the system running in-band control, but the
d2ede7bc 167 * local port is not used to connect to the remote. For
85088747 168 * example, an IP address is configured on eth0 of the switch. The
d2ede7bc 169 * remote's VM is connected through eth1 of the switch, but an
85088747 170 * IP address has not been configured for that port on the switch.
d2ede7bc 171 * As such, the switch will use eth0 to connect to the remote,
85088747 172 * and eth1's rules about the local port will not work. In the
d295e8e9
JP
173 * example, the switch attached to eth0 would use rules (a), (b),
174 * (c), (h), and (i) on eth0. The switch attached to eth1 would use
85088747
JP
175 * rules (f), (g), (h), and (i).
176 *
177 * The following are explicitly *not* supported by in-band control:
178 *
d295e8e9 179 * - Specify Remote by Name. Currently, the remote must be
85088747
JP
180 * identified by IP address. A naive approach would be to permit
181 * all DNS traffic. Unfortunately, this would prevent the
182 * controller from defining any policy over DNS. Since switches
d295e8e9 183 * that are located behind us need to connect to the remote,
85088747
JP
184 * in-band cannot simply add a rule that allows DNS traffic from
185 * the local port. The "correct" way to support this is to parse
186 * DNS requests to allow all traffic related to a request for the
d2ede7bc 187 * remote's name through. Due to the potential security
85088747
JP
188 * problems and amount of processing, we decided to hold off for
189 * the time-being.
190 *
d2ede7bc 191 * - Differing Remotes for Switches. All switches must know
d295e8e9 192 * the L3 addresses for all the remotes that other switches
d6fbec6d 193 * may use, since rules need to be set up to allow traffic related
d2ede7bc 194 * to those remotes through. See rules (f), (g), (h), and (i).
85088747 195 *
d295e8e9
JP
196 * - Differing Routes for Switches. In order for the switch to
197 * allow other switches to connect to a remote through a
85088747 198 * gateway, it allows the gateway's traffic through with rules (d)
d2ede7bc 199 * and (e). If the routes to the remote differ for the two
d295e8e9 200 * switches, we will not know the MAC address of the alternate
85088747
JP
201 * gateway.
202 */
203
0ade584e
BP
204/* Priorities used in classifier for in-band rules. These values are higher
205 * than any that may be set with OpenFlow, and "18" kind of looks like "IB".
206 * The ordering of priorities is not important because all of the rules set up
207 * by in-band control have the same action. The only reason to use more than
208 * one priority is to make the kind of flow easier to see during debugging. */
064af421 209enum {
d2ede7bc 210 /* One set per bridge. */
0ade584e 211 IBR_FROM_LOCAL_DHCP = 180000, /* (a) From local port, DHCP. */
85088747
JP
212 IBR_TO_LOCAL_ARP, /* (b) To local port, ARP. */
213 IBR_FROM_LOCAL_ARP, /* (c) From local port, ARP. */
d2ede7bc
BP
214
215 /* One set per unique next-hop MAC. */
216 IBR_TO_NEXT_HOP_ARP, /* (d) To remote MAC, ARP. */
217 IBR_FROM_NEXT_HOP_ARP, /* (e) From remote MAC, ARP. */
218
219 /* One set per unique remote IP address. */
220 IBR_TO_REMOTE_ARP, /* (f) To remote IP, ARP. */
221 IBR_FROM_REMOTE_ARP, /* (g) From remote IP, ARP. */
222
223 /* One set per unique remote (IP,port) pair. */
224 IBR_TO_REMOTE_TCP, /* (h) To remote IP, TCP port. */
225 IBR_FROM_REMOTE_TCP /* (i) From remote IP, TCP port. */
064af421
BP
226};
227
0ade584e
BP
228/* Track one remote IP and next hop information. */
229struct in_band_remote {
d2ede7bc 230 struct sockaddr_in remote_addr; /* IP address, in network byte order. */
0ade584e
BP
231 uint8_t remote_mac[ETH_ADDR_LEN]; /* Next-hop MAC, all-zeros if unknown. */
232 uint8_t last_remote_mac[ETH_ADDR_LEN]; /* Previous nonzero next-hop MAC. */
233 struct netdev *remote_netdev; /* Device to send to next-hop MAC. */
234};
235
064af421
BP
236struct in_band {
237 struct ofproto *ofproto;
b1da6250 238 int queue_id, prev_queue_id;
064af421 239
0ade584e
BP
240 /* Remote information. */
241 time_t next_remote_refresh; /* Refresh timer. */
242 struct in_band_remote *remotes;
243 size_t n_remotes;
244
245 /* Local information. */
246 time_t next_local_refresh; /* Refresh timer. */
247 uint8_t local_mac[ETH_ADDR_LEN]; /* Current MAC. */
248 struct netdev *local_netdev; /* Local port's network device. */
249
250 /* Local and remote addresses that are installed as flows. */
251 uint8_t installed_local_mac[ETH_ADDR_LEN];
d2ede7bc
BP
252 struct sockaddr_in *remote_addrs;
253 size_t n_remote_addrs;
0ade584e
BP
254 uint8_t *remote_macs;
255 size_t n_remote_macs;
064af421
BP
256};
257
258static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(60, 60);
259
0ade584e
BP
260static int
261refresh_remote(struct in_band *ib, struct in_band_remote *r)
064af421 262{
0ade584e 263 struct in_addr next_hop_inaddr;
f1acd62b 264 char *next_hop_dev;
0ade584e 265 int retval;
064af421 266
0ade584e 267 /* Find the next-hop IP address. */
d2ede7bc
BP
268 memset(r->remote_mac, 0, sizeof r->remote_mac);
269 retval = netdev_get_next_hop(ib->local_netdev, &r->remote_addr.sin_addr,
0ade584e
BP
270 &next_hop_inaddr, &next_hop_dev);
271 if (retval) {
272 VLOG_WARN("cannot find route for controller ("IP_FMT"): %s",
d2ede7bc 273 IP_ARGS(&r->remote_addr.sin_addr), strerror(retval));
0ade584e
BP
274 return 1;
275 }
276 if (!next_hop_inaddr.s_addr) {
d2ede7bc 277 next_hop_inaddr = r->remote_addr.sin_addr;
0ade584e 278 }
c752217a 279
d2ede7bc 280 /* Open the next-hop network device. */
0ade584e
BP
281 if (!r->remote_netdev
282 || strcmp(netdev_get_name(r->remote_netdev), next_hop_dev))
283 {
284 netdev_close(r->remote_netdev);
064af421 285
0ade584e 286 retval = netdev_open_default(next_hop_dev, &r->remote_netdev);
0ad9b732 287 if (retval) {
0ade584e
BP
288 VLOG_WARN_RL(&rl, "cannot open netdev %s (next hop "
289 "to controller "IP_FMT"): %s",
d2ede7bc 290 next_hop_dev, IP_ARGS(&r->remote_addr.sin_addr),
0ade584e
BP
291 strerror(retval));
292 free(next_hop_dev);
293 return 1;
064af421 294 }
0ade584e
BP
295 }
296 free(next_hop_dev);
297
298 /* Look up the MAC address of the next-hop IP address. */
299 retval = netdev_arp_lookup(r->remote_netdev, next_hop_inaddr.s_addr,
300 r->remote_mac);
301 if (retval) {
302 VLOG_DBG_RL(&rl, "cannot look up remote MAC address ("IP_FMT"): %s",
303 IP_ARGS(&next_hop_inaddr.s_addr), strerror(retval));
064af421 304 }
0ad9b732 305
6dee2066
BP
306 /* If we don't have a MAC address, then refresh quickly, since we probably
307 * will get a MAC address soon (via ARP). Otherwise, we can afford to wait
308 * a little while. */
309 return eth_addr_is_zero(r->remote_mac) ? 1 : 10;
064af421
BP
310}
311
0ade584e
BP
312static bool
313refresh_remotes(struct in_band *ib)
064af421 314{
0ade584e
BP
315 struct in_band_remote *r;
316 bool any_changes;
0ade584e
BP
317
318 if (time_now() < ib->next_remote_refresh) {
319 return false;
320 }
321
322 any_changes = false;
5dbdfff7 323 ib->next_remote_refresh = TIME_MAX;
0ade584e
BP
324 for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) {
325 uint8_t old_remote_mac[ETH_ADDR_LEN];
5dbdfff7 326 time_t next_refresh;
0ade584e 327
6dee2066 328 /* Save old MAC. */
0ade584e
BP
329 memcpy(old_remote_mac, r->remote_mac, ETH_ADDR_LEN);
330
331 /* Refresh remote information. */
5dbdfff7
BP
332 next_refresh = refresh_remote(ib, r) + time_now();
333 ib->next_remote_refresh = MIN(ib->next_remote_refresh, next_refresh);
0ade584e 334
6dee2066 335 /* If the MAC changed, log the changes. */
0ade584e
BP
336 if (!eth_addr_equals(r->remote_mac, old_remote_mac)) {
337 any_changes = true;
338 if (!eth_addr_is_zero(r->remote_mac)
339 && !eth_addr_equals(r->last_remote_mac, r->remote_mac)) {
340 VLOG_DBG("remote MAC address changed from "ETH_ADDR_FMT
341 " to "ETH_ADDR_FMT,
342 ETH_ADDR_ARGS(r->last_remote_mac),
343 ETH_ADDR_ARGS(r->remote_mac));
344 memcpy(r->last_remote_mac, r->remote_mac, ETH_ADDR_LEN);
345 }
064af421 346 }
064af421 347 }
0ade584e
BP
348
349 return any_changes;
064af421
BP
350}
351
0ade584e
BP
352/* Refreshes the MAC address of the local port into ib->local_mac, if it is due
353 * for a refresh. Returns true if anything changed, otherwise false. */
354static bool
355refresh_local(struct in_band *ib)
064af421 356{
0ade584e
BP
357 uint8_t ea[ETH_ADDR_LEN];
358 time_t now;
064af421 359
0ade584e
BP
360 now = time_now();
361 if (now < ib->next_local_refresh) {
362 return false;
064af421 363 }
0ade584e 364 ib->next_local_refresh = now + 1;
064af421 365
0ade584e
BP
366 if (netdev_get_etheraddr(ib->local_netdev, ea)
367 || eth_addr_equals(ea, ib->local_mac)) {
368 return false;
064af421 369 }
064af421 370
0ade584e
BP
371 memcpy(ib->local_mac, ea, ETH_ADDR_LEN);
372 return true;
064af421
BP
373}
374
0ad9b732 375/* Returns true if 'packet' should be sent to the local port regardless
d295e8e9 376 * of the flow table. */
0ad9b732 377bool
ae412e7d 378in_band_msg_in_hook(struct in_band *in_band, const struct flow *flow,
0ad9b732
JP
379 const struct ofpbuf *packet)
380{
0ad9b732
JP
381 /* Regardless of how the flow table is configured, we want to be
382 * able to see replies to our DHCP requests. */
383 if (flow->dl_type == htons(ETH_TYPE_IP)
6767a2cc 384 && flow->nw_proto == IPPROTO_UDP
0ad9b732
JP
385 && flow->tp_src == htons(DHCP_SERVER_PORT)
386 && flow->tp_dst == htons(DHCP_CLIENT_PORT)
387 && packet->l7) {
388 struct dhcp_header *dhcp;
0ad9b732
JP
389
390 dhcp = ofpbuf_at(packet, (char *)packet->l7 - (char *)packet->data,
391 sizeof *dhcp);
392 if (!dhcp) {
393 return false;
394 }
395
0ade584e
BP
396 refresh_local(in_band);
397 if (!eth_addr_is_zero(in_band->local_mac)
398 && eth_addr_equals(dhcp->chaddr, in_band->local_mac)) {
0ad9b732
JP
399 return true;
400 }
401 }
402
403 return false;
404}
405
d295e8e9 406/* Returns true if the rule that would match 'flow' with 'actions' is
0ad9b732
JP
407 * allowed to be set up in the datapath. */
408bool
19a87e36 409in_band_rule_check(const struct flow *flow,
cf22f8cb 410 const struct nlattr *actions, size_t actions_len)
0ad9b732 411{
0ad9b732
JP
412 /* Don't allow flows that would prevent DHCP replies from being seen
413 * by the local port. */
414 if (flow->dl_type == htons(ETH_TYPE_IP)
6767a2cc 415 && flow->nw_proto == IPPROTO_UDP
d295e8e9 416 && flow->tp_src == htons(DHCP_SERVER_PORT)
0ad9b732 417 && flow->tp_dst == htons(DHCP_CLIENT_PORT)) {
cdee00fd
BP
418 const struct nlattr *a;
419 unsigned int left;
0ad9b732 420
cdee00fd 421 NL_ATTR_FOR_EACH_UNSAFE (a, left, actions, actions_len) {
7aec165d 422 if (nl_attr_type(a) == ODP_ACTION_ATTR_OUTPUT
cdee00fd 423 && nl_attr_get_u32(a) == ODPP_LOCAL) {
0ad9b732 424 return true;
d295e8e9 425 }
0ad9b732
JP
426 }
427 return false;
428 }
429
430 return true;
431}
432
0ade584e
BP
433static void
434make_rules(struct in_band *ib,
cf3fad8a 435 void (*cb)(struct in_band *, const struct cls_rule *))
0ade584e 436{
cf3fad8a 437 struct cls_rule rule;
0ade584e
BP
438 size_t i;
439
440 if (!eth_addr_is_zero(ib->installed_local_mac)) {
d2ede7bc 441 /* (a) Allow DHCP requests sent from the local port. */
cf3fad8a 442 cls_rule_init_catchall(&rule, IBR_FROM_LOCAL_DHCP);
64420dfa
BP
443 cls_rule_set_in_port(&rule, ODPP_LOCAL);
444 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP));
445 cls_rule_set_dl_src(&rule, ib->installed_local_mac);
6767a2cc 446 cls_rule_set_nw_proto(&rule, IPPROTO_UDP);
64420dfa
BP
447 cls_rule_set_tp_src(&rule, htons(DHCP_CLIENT_PORT));
448 cls_rule_set_tp_dst(&rule, htons(DHCP_SERVER_PORT));
0ade584e 449 cb(ib, &rule);
0ad9b732 450
d2ede7bc 451 /* (b) Allow ARP replies to the local port's MAC address. */
cf3fad8a 452 cls_rule_init_catchall(&rule, IBR_TO_LOCAL_ARP);
64420dfa
BP
453 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP));
454 cls_rule_set_dl_dst(&rule, ib->installed_local_mac);
455 cls_rule_set_nw_proto(&rule, ARP_OP_REPLY);
0ade584e 456 cb(ib, &rule);
26d9fe3b 457
d2ede7bc 458 /* (c) Allow ARP requests from the local port's MAC address. */
cf3fad8a 459 cls_rule_init_catchall(&rule, IBR_FROM_LOCAL_ARP);
64420dfa
BP
460 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP));
461 cls_rule_set_dl_src(&rule, ib->installed_local_mac);
462 cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST);
0ade584e 463 cb(ib, &rule);
0ad9b732 464 }
a5f37a2d 465
0ade584e
BP
466 for (i = 0; i < ib->n_remote_macs; i++) {
467 const uint8_t *remote_mac = &ib->remote_macs[i * ETH_ADDR_LEN];
468
469 if (i > 0) {
470 const uint8_t *prev_mac = &ib->remote_macs[(i - 1) * ETH_ADDR_LEN];
471 if (eth_addr_equals(remote_mac, prev_mac)) {
472 /* Skip duplicates. */
473 continue;
474 }
475 }
476
d2ede7bc 477 /* (d) Allow ARP replies to the next hop's MAC address. */
cf3fad8a 478 cls_rule_init_catchall(&rule, IBR_TO_NEXT_HOP_ARP);
64420dfa
BP
479 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP));
480 cls_rule_set_dl_dst(&rule, remote_mac);
481 cls_rule_set_nw_proto(&rule, ARP_OP_REPLY);
0ade584e
BP
482 cb(ib, &rule);
483
d2ede7bc 484 /* (e) Allow ARP requests from the next hop's MAC address. */
cf3fad8a 485 cls_rule_init_catchall(&rule, IBR_FROM_NEXT_HOP_ARP);
64420dfa
BP
486 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP));
487 cls_rule_set_dl_src(&rule, remote_mac);
488 cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST);
0ade584e 489 cb(ib, &rule);
064af421
BP
490 }
491
d2ede7bc
BP
492 for (i = 0; i < ib->n_remote_addrs; i++) {
493 const struct sockaddr_in *a = &ib->remote_addrs[i];
494
495 if (!i || a->sin_addr.s_addr != a[-1].sin_addr.s_addr) {
496 /* (f) Allow ARP replies containing the remote's IP address as a
497 * target. */
cf3fad8a 498 cls_rule_init_catchall(&rule, IBR_TO_REMOTE_ARP);
64420dfa
BP
499 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP));
500 cls_rule_set_nw_proto(&rule, ARP_OP_REPLY);
501 cls_rule_set_nw_dst(&rule, a->sin_addr.s_addr);
d2ede7bc
BP
502 cb(ib, &rule);
503
504 /* (g) Allow ARP requests containing the remote's IP address as a
505 * source. */
cf3fad8a 506 cls_rule_init_catchall(&rule, IBR_FROM_REMOTE_ARP);
64420dfa
BP
507 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_ARP));
508 cls_rule_set_nw_proto(&rule, ARP_OP_REQUEST);
509 cls_rule_set_nw_src(&rule, a->sin_addr.s_addr);
d2ede7bc 510 cb(ib, &rule);
0ade584e
BP
511 }
512
d2ede7bc
BP
513 if (!i
514 || a->sin_addr.s_addr != a[-1].sin_addr.s_addr
515 || a->sin_port != a[-1].sin_port) {
516 /* (h) Allow TCP traffic to the remote's IP and port. */
cf3fad8a 517 cls_rule_init_catchall(&rule, IBR_TO_REMOTE_TCP);
64420dfa 518 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP));
6767a2cc 519 cls_rule_set_nw_proto(&rule, IPPROTO_TCP);
64420dfa
BP
520 cls_rule_set_nw_dst(&rule, a->sin_addr.s_addr);
521 cls_rule_set_tp_dst(&rule, a->sin_port);
d2ede7bc
BP
522 cb(ib, &rule);
523
524 /* (i) Allow TCP traffic from the remote's IP and port. */
cf3fad8a 525 cls_rule_init_catchall(&rule, IBR_FROM_REMOTE_TCP);
64420dfa 526 cls_rule_set_dl_type(&rule, htons(ETH_TYPE_IP));
6767a2cc 527 cls_rule_set_nw_proto(&rule, IPPROTO_TCP);
64420dfa
BP
528 cls_rule_set_nw_src(&rule, a->sin_addr.s_addr);
529 cls_rule_set_tp_src(&rule, a->sin_port);
d2ede7bc
BP
530 cb(ib, &rule);
531 }
064af421
BP
532 }
533}
534
0ade584e 535static void
cf3fad8a 536drop_rule(struct in_band *ib, const struct cls_rule *rule)
0ade584e 537{
cf3fad8a 538 ofproto_delete_flow(ib->ofproto, rule);
0ade584e
BP
539}
540
c16e55cf
BP
541/* Drops from the flow table all of the flows set up by 'ib', then clears out
542 * the information about the installed flows so that they can be filled in
543 * again if necessary. */
0ade584e
BP
544static void
545drop_rules(struct in_band *ib)
546{
c16e55cf 547 /* Drop rules. */
0ade584e 548 make_rules(ib, drop_rule);
c16e55cf
BP
549
550 /* Clear out state. */
551 memset(ib->installed_local_mac, 0, sizeof ib->installed_local_mac);
552
d2ede7bc
BP
553 free(ib->remote_addrs);
554 ib->remote_addrs = NULL;
555 ib->n_remote_addrs = 0;
c16e55cf
BP
556
557 free(ib->remote_macs);
558 ib->remote_macs = NULL;
559 ib->n_remote_macs = 0;
0ade584e
BP
560}
561
562static void
cf3fad8a 563add_rule(struct in_band *ib, const struct cls_rule *rule)
0ade584e 564{
b1da6250
BP
565 struct {
566 struct nx_action_set_queue nxsq;
567 struct ofp_action_output oao;
568 } actions;
569
570 memset(&actions, 0, sizeof actions);
571
572 actions.oao.type = htons(OFPAT_OUTPUT);
573 actions.oao.len = htons(sizeof actions.oao);
574 actions.oao.port = htons(OFPP_NORMAL);
575 actions.oao.max_len = htons(0);
576
577 if (ib->queue_id < 0) {
578 ofproto_add_flow(ib->ofproto, rule,
579 (union ofp_action *) &actions.oao, 1);
580 } else {
581 actions.nxsq.type = htons(OFPAT_VENDOR);
582 actions.nxsq.len = htons(sizeof actions.nxsq);
583 actions.nxsq.vendor = htonl(NX_VENDOR_ID);
584 actions.nxsq.subtype = htons(NXAST_SET_QUEUE);
585 actions.nxsq.queue_id = htonl(ib->queue_id);
586
587 ofproto_add_flow(ib->ofproto, rule, (union ofp_action *) &actions,
588 sizeof actions / sizeof(union ofp_action));
589 }
0ade584e
BP
590}
591
c16e55cf 592/* Inserts flows into the flow table for the current state of 'ib'. */
0ade584e
BP
593static void
594add_rules(struct in_band *ib)
595{
596 make_rules(ib, add_rule);
597}
598
599static int
d2ede7bc 600compare_addrs(const void *a_, const void *b_)
0ade584e 601{
d2ede7bc
BP
602 const struct sockaddr_in *a = a_;
603 const struct sockaddr_in *b = b_;
604 int cmp;
605
606 cmp = memcmp(&a->sin_addr.s_addr,
607 &b->sin_addr.s_addr,
608 sizeof a->sin_addr.s_addr);
609 if (cmp) {
610 return cmp;
611 }
612 return memcmp(&a->sin_port, &b->sin_port, sizeof a->sin_port);
0ade584e
BP
613}
614
615static int
616compare_macs(const void *a, const void *b)
617{
130f6e5f 618 return eth_addr_compare_3way(a, b);
0ade584e
BP
619}
620
621void
622in_band_run(struct in_band *ib)
623{
b1da6250 624 bool local_change, remote_change, queue_id_change;
0ade584e
BP
625 struct in_band_remote *r;
626
fba0d699
BP
627 local_change = refresh_local(ib);
628 remote_change = refresh_remotes(ib);
b1da6250
BP
629 queue_id_change = ib->queue_id != ib->prev_queue_id;
630 if (!local_change && !remote_change && !queue_id_change) {
0ade584e
BP
631 /* Nothing changed, nothing to do. */
632 return;
633 }
b1da6250 634 ib->prev_queue_id = ib->queue_id;
0ade584e
BP
635
636 /* Drop old rules. */
637 drop_rules(ib);
638
639 /* Figure out new rules. */
640 memcpy(ib->installed_local_mac, ib->local_mac, ETH_ADDR_LEN);
d2ede7bc
BP
641 ib->remote_addrs = xmalloc(ib->n_remotes * sizeof *ib->remote_addrs);
642 ib->n_remote_addrs = 0;
0ade584e
BP
643 ib->remote_macs = xmalloc(ib->n_remotes * ETH_ADDR_LEN);
644 ib->n_remote_macs = 0;
645 for (r = ib->remotes; r < &ib->remotes[ib->n_remotes]; r++) {
d2ede7bc 646 ib->remote_addrs[ib->n_remote_addrs++] = r->remote_addr;
0ade584e
BP
647 if (!eth_addr_is_zero(r->remote_mac)) {
648 memcpy(&ib->remote_macs[ib->n_remote_macs * ETH_ADDR_LEN],
649 r->remote_mac, ETH_ADDR_LEN);
650 ib->n_remote_macs++;
651 }
652 }
653
654 /* Sort, to allow make_rules() to easily skip duplicates. */
d2ede7bc
BP
655 qsort(ib->remote_addrs, ib->n_remote_addrs, sizeof *ib->remote_addrs,
656 compare_addrs);
0ade584e
BP
657 qsort(ib->remote_macs, ib->n_remote_macs, ETH_ADDR_LEN, compare_macs);
658
659 /* Add new rules. */
660 add_rules(ib);
661}
662
064af421
BP
663void
664in_band_wait(struct in_band *in_band)
665{
7cf8b266 666 long long int wakeup
0ad9b732 667 = MIN(in_band->next_remote_refresh, in_band->next_local_refresh);
7cf8b266 668 poll_timer_wait_until(wakeup * 1000);
064af421
BP
669}
670
87472751
BP
671/* ofproto has flushed all flows from the flow table and it is calling us back
672 * to allow us to reinstall the ones that are important to us. */
064af421
BP
673void
674in_band_flushed(struct in_band *in_band)
675{
87472751 676 add_rules(in_band);
064af421
BP
677}
678
f1acd62b 679int
19a87e36 680in_band_create(struct ofproto *ofproto, const char *local_name,
9b45d7f5 681 struct in_band **in_bandp)
064af421
BP
682{
683 struct in_band *in_band;
f1acd62b 684 struct netdev *local_netdev;
0ad9b732 685 int error;
064af421 686
928ef386 687 *in_bandp = NULL;
149f577a 688 error = netdev_open_default(local_name, &local_netdev);
f1acd62b
BP
689 if (error) {
690 VLOG_ERR("failed to initialize in-band control: cannot open "
691 "datapath local port %s (%s)", local_name, strerror(error));
692 return error;
693 }
064af421 694
ec6fde61 695 in_band = xzalloc(sizeof *in_band);
064af421 696 in_band->ofproto = ofproto;
b1da6250 697 in_band->queue_id = in_band->prev_queue_id = -1;
f1acd62b 698 in_band->next_remote_refresh = TIME_MIN;
0ade584e
BP
699 in_band->next_local_refresh = TIME_MIN;
700 in_band->local_netdev = local_netdev;
064af421
BP
701
702 *in_bandp = in_band;
f1acd62b
BP
703
704 return 0;
064af421
BP
705}
706
707void
0ade584e 708in_band_destroy(struct in_band *ib)
064af421 709{
0ade584e
BP
710 if (ib) {
711 drop_rules(ib);
712 in_band_set_remotes(ib, NULL, 0);
0ade584e
BP
713 netdev_close(ib->local_netdev);
714 free(ib);
715 }
716}
f7de2cdf 717
a3c5ac70 718static bool
d2ede7bc
BP
719any_addresses_changed(struct in_band *ib,
720 const struct sockaddr_in *addresses, size_t n)
a3c5ac70
BP
721{
722 size_t i;
723
724 if (n != ib->n_remotes) {
725 return true;
726 }
727
728 for (i = 0; i < n; i++) {
d2ede7bc
BP
729 const struct sockaddr_in *old = &ib->remotes[i].remote_addr;
730 const struct sockaddr_in *new = &addresses[i];
731
732 if (old->sin_addr.s_addr != new->sin_addr.s_addr ||
733 old->sin_port != new->sin_port) {
a3c5ac70
BP
734 return true;
735 }
736 }
737
738 return false;
739}
740
0ade584e 741void
d2ede7bc
BP
742in_band_set_remotes(struct in_band *ib,
743 const struct sockaddr_in *addresses, size_t n)
0ade584e
BP
744{
745 size_t i;
746
d2ede7bc 747 if (!any_addresses_changed(ib, addresses, n)) {
0ade584e 748 return;
0ade584e
BP
749 }
750
a3c5ac70 751 /* Clear old remotes. */
0ade584e 752 for (i = 0; i < ib->n_remotes; i++) {
0ade584e 753 netdev_close(ib->remotes[i].remote_netdev);
064af421 754 }
0ade584e 755 free(ib->remotes);
064af421 756
a3c5ac70 757 /* Set up new remotes. */
bad0c371 758 ib->remotes = n ? xzalloc(n * sizeof *ib->remotes) : NULL;
0ade584e
BP
759 ib->n_remotes = n;
760 for (i = 0; i < n; i++) {
d2ede7bc 761 ib->remotes[i].remote_addr = addresses[i];
0ade584e 762 }
a3c5ac70
BP
763
764 /* Force refresh in next call to in_band_run(). */
765 ib->next_remote_refresh = TIME_MIN;
0ade584e 766}
b1da6250
BP
767
768/* Sets the OpenFlow queue used by flows set up by 'ib' to 'queue_id'. If
769 * 'queue_id' is negative, 'ib' will not set any queue (which is also the
770 * default). */
771void
772in_band_set_queue(struct in_band *ib, int queue_id)
773{
774 ib->queue_id = queue_id;
775}
776