]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
67a4917b | 2 | * Copyright (c) 2008, 2009, 2010 Nicira Networks. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "learning-switch.h" | |
19 | ||
20 | #include <errno.h> | |
21 | #include <inttypes.h> | |
22 | #include <netinet/in.h> | |
23 | #include <stdlib.h> | |
24 | #include <time.h> | |
25 | ||
10a24935 | 26 | #include "byte-order.h" |
064af421 | 27 | #include "flow.h" |
d4cdc6b4 | 28 | #include "hmap.h" |
064af421 BP |
29 | #include "mac-learning.h" |
30 | #include "ofpbuf.h" | |
aaaa7553 | 31 | #include "ofp-parse.h" |
064af421 | 32 | #include "ofp-print.h" |
fa37b408 | 33 | #include "ofp-util.h" |
064af421 BP |
34 | #include "openflow/openflow.h" |
35 | #include "poll-loop.h" | |
36 | #include "queue.h" | |
37 | #include "rconn.h" | |
d4cdc6b4 | 38 | #include "shash.h" |
064af421 BP |
39 | #include "timeval.h" |
40 | #include "vconn.h" | |
5136ce49 | 41 | #include "vlog.h" |
064af421 | 42 | |
d98e6007 | 43 | VLOG_DEFINE_THIS_MODULE(learning_switch); |
064af421 | 44 | |
d4cdc6b4 BP |
45 | struct lswitch_port { |
46 | struct hmap_node hmap_node; /* Hash node for port number. */ | |
47 | uint16_t port_no; /* OpenFlow port number, in host byte order. */ | |
48 | uint32_t queue_id; /* OpenFlow queue number. */ | |
49 | }; | |
50 | ||
064af421 BP |
51 | struct lswitch { |
52 | /* If nonnegative, the switch sets up flows that expire after the given | |
53 | * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT). | |
54 | * Otherwise, the switch processes every packet. */ | |
55 | int max_idle; | |
56 | ||
57 | unsigned long long int datapath_id; | |
064af421 BP |
58 | time_t last_features_request; |
59 | struct mac_learning *ml; /* NULL to act as hub instead of switch. */ | |
52f4c6fa | 60 | uint32_t wildcards; /* Wildcards to apply to flows. */ |
9af9e2e8 | 61 | bool action_normal; /* Use OFPP_NORMAL? */ |
d4cdc6b4 BP |
62 | |
63 | /* Queue distribution. */ | |
64 | uint32_t default_queue; /* Default OpenFlow queue, or UINT32_MAX. */ | |
65 | struct hmap queue_numbers; /* Map from port number to lswitch_port. */ | |
66 | struct shash queue_names; /* Map from port name to lswitch_port. */ | |
064af421 BP |
67 | |
68 | /* Number of outgoing queued packets on the rconn. */ | |
69 | struct rconn_packet_counter *queued; | |
064af421 BP |
70 | }; |
71 | ||
72 | /* The log messages here could actually be useful in debugging, so keep the | |
73 | * rate limit relatively high. */ | |
74 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(30, 300); | |
75 | ||
76 | static void queue_tx(struct lswitch *, struct rconn *, struct ofpbuf *); | |
77 | static void send_features_request(struct lswitch *, struct rconn *); | |
064af421 BP |
78 | |
79 | typedef void packet_handler_func(struct lswitch *, struct rconn *, void *); | |
80 | static packet_handler_func process_switch_features; | |
81 | static packet_handler_func process_packet_in; | |
82 | static packet_handler_func process_echo_request; | |
064af421 | 83 | |
ad67e568 BP |
84 | /* Creates and returns a new learning switch whose configuration is given by |
85 | * 'cfg'. | |
aaaa7553 | 86 | * |
064af421 BP |
87 | * 'rconn' is used to send out an OpenFlow features request. */ |
88 | struct lswitch * | |
ad67e568 | 89 | lswitch_create(struct rconn *rconn, const struct lswitch_config *cfg) |
064af421 | 90 | { |
09913dfd | 91 | const struct ofpbuf *b; |
064af421 | 92 | struct lswitch *sw; |
064af421 | 93 | |
ec6fde61 | 94 | sw = xzalloc(sizeof *sw); |
ad67e568 | 95 | sw->max_idle = cfg->max_idle; |
064af421 BP |
96 | sw->datapath_id = 0; |
97 | sw->last_features_request = time_now() - 1; | |
ad67e568 BP |
98 | sw->ml = cfg->mode == LSW_LEARN ? mac_learning_create() : NULL; |
99 | sw->action_normal = cfg->mode == LSW_NORMAL; | |
100 | if (cfg->exact_flows) { | |
52f4c6fa BP |
101 | /* Exact match. */ |
102 | sw->wildcards = 0; | |
103 | } else { | |
104 | /* We cannot wildcard all fields. | |
105 | * We need in_port to detect moves. | |
106 | * We need both SA and DA to do learning. */ | |
107 | sw->wildcards = (OFPFW_DL_TYPE | OFPFW_NW_SRC_MASK | OFPFW_NW_DST_MASK | |
108 | | OFPFW_NW_PROTO | OFPFW_TP_SRC | OFPFW_TP_DST); | |
109 | } | |
d4cdc6b4 BP |
110 | |
111 | sw->default_queue = cfg->default_queue; | |
112 | hmap_init(&sw->queue_numbers); | |
113 | shash_init(&sw->queue_names); | |
114 | if (cfg->port_queues) { | |
115 | struct shash_node *node; | |
116 | ||
117 | SHASH_FOR_EACH (node, cfg->port_queues) { | |
118 | struct lswitch_port *port = xmalloc(sizeof *port); | |
119 | hmap_node_nullify(&port->hmap_node); | |
120 | port->queue_id = (uintptr_t) node->data; | |
121 | shash_add(&sw->queue_names, node->name, port); | |
122 | } | |
123 | } | |
124 | ||
064af421 | 125 | sw->queued = rconn_packet_counter_create(); |
064af421 | 126 | send_features_request(sw, rconn); |
09913dfd | 127 | |
ad67e568 | 128 | for (b = cfg->default_flows; b; b = b->next) { |
09913dfd | 129 | queue_tx(sw, rconn, ofpbuf_clone(b)); |
aaaa7553 | 130 | } |
09913dfd | 131 | |
064af421 BP |
132 | return sw; |
133 | } | |
134 | ||
135 | /* Destroys 'sw'. */ | |
136 | void | |
137 | lswitch_destroy(struct lswitch *sw) | |
138 | { | |
139 | if (sw) { | |
d4cdc6b4 BP |
140 | struct lswitch_port *node, *next; |
141 | ||
142 | HMAP_FOR_EACH_SAFE (node, next, hmap_node, &sw->queue_numbers) { | |
143 | hmap_remove(&sw->queue_numbers, &node->hmap_node); | |
144 | free(node); | |
145 | } | |
146 | shash_destroy(&sw->queue_names); | |
064af421 BP |
147 | mac_learning_destroy(sw->ml); |
148 | rconn_packet_counter_destroy(sw->queued); | |
149 | free(sw); | |
150 | } | |
151 | } | |
152 | ||
153 | /* Takes care of necessary 'sw' activity, except for receiving packets (which | |
154 | * the caller must do). */ | |
155 | void | |
ba186119 | 156 | lswitch_run(struct lswitch *sw) |
064af421 | 157 | { |
064af421 BP |
158 | if (sw->ml) { |
159 | mac_learning_run(sw->ml, NULL); | |
160 | } | |
064af421 BP |
161 | } |
162 | ||
163 | void | |
164 | lswitch_wait(struct lswitch *sw) | |
165 | { | |
166 | if (sw->ml) { | |
167 | mac_learning_wait(sw->ml); | |
168 | } | |
064af421 BP |
169 | } |
170 | ||
171 | /* Processes 'msg', which should be an OpenFlow received on 'rconn', according | |
172 | * to the learning switch state in 'sw'. The most likely result of processing | |
173 | * is that flow-setup and packet-out OpenFlow messages will be sent out on | |
174 | * 'rconn'. */ | |
175 | void | |
176 | lswitch_process_packet(struct lswitch *sw, struct rconn *rconn, | |
177 | const struct ofpbuf *msg) | |
178 | { | |
179 | struct processor { | |
180 | uint8_t type; | |
181 | size_t min_size; | |
182 | packet_handler_func *handler; | |
183 | }; | |
184 | static const struct processor processors[] = { | |
185 | { | |
186 | OFPT_ECHO_REQUEST, | |
187 | sizeof(struct ofp_header), | |
188 | process_echo_request | |
189 | }, | |
190 | { | |
191 | OFPT_FEATURES_REPLY, | |
192 | sizeof(struct ofp_switch_features), | |
193 | process_switch_features | |
194 | }, | |
195 | { | |
196 | OFPT_PACKET_IN, | |
197 | offsetof(struct ofp_packet_in, data), | |
198 | process_packet_in | |
199 | }, | |
064af421 | 200 | { |
ca069229 JP |
201 | OFPT_FLOW_REMOVED, |
202 | sizeof(struct ofp_flow_removed), | |
064af421 BP |
203 | NULL |
204 | }, | |
205 | }; | |
206 | const size_t n_processors = ARRAY_SIZE(processors); | |
207 | const struct processor *p; | |
208 | struct ofp_header *oh; | |
209 | ||
210 | oh = msg->data; | |
211 | if (sw->datapath_id == 0 | |
212 | && oh->type != OFPT_ECHO_REQUEST | |
213 | && oh->type != OFPT_FEATURES_REPLY) { | |
214 | send_features_request(sw, rconn); | |
215 | return; | |
216 | } | |
217 | ||
218 | for (p = processors; p < &processors[n_processors]; p++) { | |
219 | if (oh->type == p->type) { | |
220 | if (msg->size < p->min_size) { | |
b123cc3c | 221 | VLOG_WARN_RL(&rl, "%016llx: %s: too short (%zu bytes) for " |
064af421 BP |
222 | "type %"PRIu8" (min %zu)", sw->datapath_id, |
223 | rconn_get_name(rconn), msg->size, oh->type, | |
224 | p->min_size); | |
225 | return; | |
226 | } | |
227 | if (p->handler) { | |
228 | (p->handler)(sw, rconn, msg->data); | |
229 | } | |
230 | return; | |
231 | } | |
232 | } | |
233 | if (VLOG_IS_DBG_ENABLED()) { | |
2a022368 | 234 | char *s = ofp_to_string(msg->data, msg->size, 2); |
b123cc3c | 235 | VLOG_DBG_RL(&rl, "%016llx: OpenFlow packet ignored: %s", |
2a022368 BP |
236 | sw->datapath_id, s); |
237 | free(s); | |
064af421 BP |
238 | } |
239 | } | |
240 | \f | |
241 | static void | |
242 | send_features_request(struct lswitch *sw, struct rconn *rconn) | |
243 | { | |
244 | time_t now = time_now(); | |
245 | if (now >= sw->last_features_request + 1) { | |
246 | struct ofpbuf *b; | |
247 | struct ofp_switch_config *osc; | |
248 | ||
249 | /* Send OFPT_FEATURES_REQUEST. */ | |
250 | make_openflow(sizeof(struct ofp_header), OFPT_FEATURES_REQUEST, &b); | |
251 | queue_tx(sw, rconn, b); | |
252 | ||
253 | /* Send OFPT_SET_CONFIG. */ | |
254 | osc = make_openflow(sizeof *osc, OFPT_SET_CONFIG, &b); | |
064af421 BP |
255 | osc->miss_send_len = htons(OFP_DEFAULT_MISS_SEND_LEN); |
256 | queue_tx(sw, rconn, b); | |
257 | ||
258 | sw->last_features_request = now; | |
259 | } | |
260 | } | |
261 | ||
262 | static void | |
263 | queue_tx(struct lswitch *sw, struct rconn *rconn, struct ofpbuf *b) | |
264 | { | |
265 | int retval = rconn_send_with_limit(rconn, b, sw->queued, 10); | |
266 | if (retval && retval != ENOTCONN) { | |
267 | if (retval == EAGAIN) { | |
b123cc3c | 268 | VLOG_INFO_RL(&rl, "%016llx: %s: tx queue overflow", |
064af421 BP |
269 | sw->datapath_id, rconn_get_name(rconn)); |
270 | } else { | |
b123cc3c | 271 | VLOG_WARN_RL(&rl, "%016llx: %s: send: %s", |
064af421 BP |
272 | sw->datapath_id, rconn_get_name(rconn), |
273 | strerror(retval)); | |
274 | } | |
275 | } | |
276 | } | |
277 | ||
278 | static void | |
ba186119 BP |
279 | process_switch_features(struct lswitch *sw, struct rconn *rconn OVS_UNUSED, |
280 | void *osf_) | |
064af421 BP |
281 | { |
282 | struct ofp_switch_features *osf = osf_; | |
d4cdc6b4 BP |
283 | size_t n_ports; |
284 | size_t i; | |
285 | ||
286 | if (check_ofp_message_array(&osf->header, OFPT_FEATURES_REPLY, | |
287 | sizeof *osf, sizeof *osf->ports, &n_ports)) { | |
288 | return; | |
289 | } | |
064af421 BP |
290 | |
291 | sw->datapath_id = ntohll(osf->datapath_id); | |
d4cdc6b4 BP |
292 | |
293 | for (i = 0; i < n_ports; i++) { | |
294 | struct ofp_phy_port *opp = &osf->ports[i]; | |
295 | struct lswitch_port *lp; | |
296 | ||
297 | opp->name[OFP_MAX_PORT_NAME_LEN - 1] = '\0'; | |
298 | lp = shash_find_data(&sw->queue_names, (char *) opp->name); | |
299 | if (lp && hmap_node_is_null(&lp->hmap_node)) { | |
300 | lp->port_no = ntohs(opp->port_no); | |
301 | hmap_insert(&sw->queue_numbers, &lp->hmap_node, | |
302 | hash_int(lp->port_no, 0)); | |
303 | } | |
304 | } | |
064af421 BP |
305 | } |
306 | ||
81f3cad4 | 307 | static uint16_t |
ae412e7d | 308 | lswitch_choose_destination(struct lswitch *sw, const struct flow *flow) |
064af421 | 309 | { |
81f3cad4 | 310 | uint16_t out_port; |
064af421 | 311 | |
81f3cad4 | 312 | /* Learn the source MAC. */ |
ba186119 | 313 | if (sw->ml) { |
81f3cad4 | 314 | if (mac_learning_learn(sw->ml, flow->dl_src, 0, flow->in_port, |
7febb910 | 315 | GRAT_ARP_LOCK_NONE)) { |
b123cc3c | 316 | VLOG_DBG_RL(&rl, "%016llx: learned that "ETH_ADDR_FMT" is on " |
064af421 | 317 | "port %"PRIu16, sw->datapath_id, |
81f3cad4 | 318 | ETH_ADDR_ARGS(flow->dl_src), flow->in_port); |
064af421 BP |
319 | } |
320 | } | |
321 | ||
5a003f60 | 322 | /* Drop frames for reserved multicast addresses. */ |
81f3cad4 BP |
323 | if (eth_addr_is_reserved(flow->dl_dst)) { |
324 | return OFPP_NONE; | |
064af421 BP |
325 | } |
326 | ||
81f3cad4 | 327 | out_port = OFPP_FLOOD; |
064af421 | 328 | if (sw->ml) { |
81f3cad4 | 329 | int learned_port = mac_learning_lookup(sw->ml, flow->dl_dst, 0, NULL); |
ba186119 | 330 | if (learned_port >= 0) { |
064af421 | 331 | out_port = learned_port; |
81f3cad4 BP |
332 | if (out_port == flow->in_port) { |
333 | /* Don't send a packet back out its input port. */ | |
334 | return OFPP_NONE; | |
335 | } | |
064af421 BP |
336 | } |
337 | } | |
338 | ||
81f3cad4 BP |
339 | /* Check if we need to use "NORMAL" action. */ |
340 | if (sw->action_normal && out_port != OFPP_FLOOD) { | |
341 | return OFPP_NORMAL; | |
342 | } | |
343 | ||
344 | return out_port; | |
345 | } | |
346 | ||
d4cdc6b4 BP |
347 | static uint32_t |
348 | get_queue_id(const struct lswitch *sw, uint16_t in_port) | |
349 | { | |
350 | const struct lswitch_port *port; | |
351 | ||
352 | HMAP_FOR_EACH_WITH_HASH (port, hmap_node, hash_int(in_port, 0), | |
353 | &sw->queue_numbers) { | |
354 | if (port->port_no == in_port) { | |
355 | return port->queue_id; | |
356 | } | |
357 | } | |
358 | ||
359 | return sw->default_queue; | |
360 | } | |
361 | ||
81f3cad4 BP |
362 | static void |
363 | process_packet_in(struct lswitch *sw, struct rconn *rconn, void *opi_) | |
364 | { | |
365 | struct ofp_packet_in *opi = opi_; | |
366 | uint16_t in_port = ntohs(opi->in_port); | |
d4cdc6b4 | 367 | uint32_t queue_id; |
81f3cad4 BP |
368 | uint16_t out_port; |
369 | ||
c71270b7 BP |
370 | struct ofp_action_header actions[2]; |
371 | size_t actions_len; | |
372 | ||
81f3cad4 BP |
373 | size_t pkt_ofs, pkt_len; |
374 | struct ofpbuf pkt; | |
ae412e7d | 375 | struct flow flow; |
81f3cad4 | 376 | |
6699af68 BP |
377 | /* Ignore packets sent via output to OFPP_CONTROLLER. This library never |
378 | * uses such an action. You never know what experiments might be going on, | |
379 | * though, and it seems best not to interfere with them. */ | |
380 | if (opi->reason != OFPR_NO_MATCH) { | |
381 | return; | |
382 | } | |
383 | ||
81f3cad4 BP |
384 | /* Extract flow data from 'opi' into 'flow'. */ |
385 | pkt_ofs = offsetof(struct ofp_packet_in, data); | |
386 | pkt_len = ntohs(opi->header.length) - pkt_ofs; | |
387 | pkt.data = opi->data; | |
388 | pkt.size = pkt_len; | |
389 | flow_extract(&pkt, 0, in_port, &flow); | |
390 | ||
391 | /* Choose output port. */ | |
392 | out_port = lswitch_choose_destination(sw, &flow); | |
393 | ||
c71270b7 | 394 | /* Make actions. */ |
d4cdc6b4 | 395 | queue_id = get_queue_id(sw, in_port); |
c71270b7 BP |
396 | if (out_port == OFPP_NONE) { |
397 | actions_len = 0; | |
d4cdc6b4 | 398 | } else if (queue_id == UINT32_MAX || out_port >= OFPP_MAX) { |
3a929702 BP |
399 | struct ofp_action_output oao; |
400 | ||
401 | memset(&oao, 0, sizeof oao); | |
402 | oao.type = htons(OFPAT_OUTPUT); | |
403 | oao.len = htons(sizeof oao); | |
404 | oao.port = htons(out_port); | |
405 | ||
406 | memcpy(actions, &oao, sizeof oao); | |
407 | actions_len = sizeof oao; | |
c71270b7 | 408 | } else { |
3a929702 BP |
409 | struct ofp_action_enqueue oae; |
410 | ||
411 | memset(&oae, 0, sizeof oae); | |
412 | oae.type = htons(OFPAT_ENQUEUE); | |
413 | oae.len = htons(sizeof oae); | |
414 | oae.port = htons(out_port); | |
d4cdc6b4 | 415 | oae.queue_id = htonl(queue_id); |
3a929702 BP |
416 | |
417 | memcpy(actions, &oae, sizeof oae); | |
418 | actions_len = sizeof oae; | |
c71270b7 BP |
419 | } |
420 | assert(actions_len <= sizeof actions); | |
421 | ||
81f3cad4 BP |
422 | /* Send the packet, and possibly the whole flow, to the output port. */ |
423 | if (sw->max_idle >= 0 && (!sw->ml || out_port != OFPP_FLOOD)) { | |
9af9e2e8 JT |
424 | struct ofpbuf *buffer; |
425 | struct ofp_flow_mod *ofm; | |
9af9e2e8 | 426 | |
064af421 BP |
427 | /* The output port is known, or we always flood everything, so add a |
428 | * new flow. */ | |
c71270b7 BP |
429 | buffer = make_add_flow(&flow, ntohl(opi->buffer_id), |
430 | sw->max_idle, actions_len); | |
431 | ofpbuf_put(buffer, actions, actions_len); | |
9af9e2e8 | 432 | ofm = buffer->data; |
52f4c6fa | 433 | ofm->match.wildcards = htonl(sw->wildcards); |
9af9e2e8 | 434 | queue_tx(sw, rconn, buffer); |
064af421 BP |
435 | |
436 | /* If the switch didn't buffer the packet, we need to send a copy. */ | |
c71270b7 | 437 | if (ntohl(opi->buffer_id) == UINT32_MAX && actions_len > 0) { |
064af421 | 438 | queue_tx(sw, rconn, |
c71270b7 BP |
439 | make_packet_out(&pkt, UINT32_MAX, in_port, |
440 | actions, actions_len / sizeof *actions)); | |
064af421 BP |
441 | } |
442 | } else { | |
443 | /* We don't know that MAC, or we don't set up flows. Send along the | |
444 | * packet without setting up a flow. */ | |
c71270b7 | 445 | if (ntohl(opi->buffer_id) != UINT32_MAX || actions_len > 0) { |
81f3cad4 | 446 | queue_tx(sw, rconn, |
c71270b7 BP |
447 | make_packet_out(&pkt, ntohl(opi->buffer_id), in_port, |
448 | actions, actions_len / sizeof *actions)); | |
064af421 | 449 | } |
064af421 | 450 | } |
064af421 BP |
451 | } |
452 | ||
453 | static void | |
454 | process_echo_request(struct lswitch *sw, struct rconn *rconn, void *rq_) | |
455 | { | |
456 | struct ofp_header *rq = rq_; | |
457 | queue_tx(sw, rconn, make_echo_reply(rq)); | |
458 | } |