2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "learning-switch.h"
22 #include <netinet/in.h>
26 #include "byte-order.h"
27 #include "classifier.h"
30 #include "mac-learning.h"
32 #include "ofp-actions.h"
33 #include "ofp-errors.h"
35 #include "ofp-parse.h"
36 #include "ofp-print.h"
38 #include "openflow/openflow.h"
39 #include "poll-loop.h"
47 VLOG_DEFINE_THIS_MODULE(learning_switch
);
50 struct hmap_node hmap_node
; /* Hash node for port number. */
51 ofp_port_t port_no
; /* OpenFlow port number. */
52 uint32_t queue_id
; /* OpenFlow queue number. */
56 S_CONNECTING
, /* Waiting for connection to complete. */
57 S_FEATURES_REPLY
, /* Waiting for features reply. */
58 S_SWITCHING
, /* Switching flows. */
63 enum lswitch_state state
;
65 /* If nonnegative, the switch sets up flows that expire after the given
66 * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
67 * Otherwise, the switch processes every packet. */
70 enum ofputil_protocol protocol
;
71 unsigned long long int datapath_id
;
72 struct mac_learning
*ml
; /* NULL to act as hub instead of switch. */
73 struct flow_wildcards wc
; /* Wildcards to apply to flows. */
74 bool action_normal
; /* Use OFPP_NORMAL? */
76 /* Queue distribution. */
77 uint32_t default_queue
; /* Default OpenFlow queue, or UINT32_MAX. */
78 struct hmap queue_numbers
; /* Map from port number to lswitch_port. */
79 struct shash queue_names
; /* Map from port name to lswitch_port. */
81 /* Number of outgoing queued packets on the rconn. */
82 struct rconn_packet_counter
*queued
;
84 /* If true, do not reply to any messages from the switch (for debugging
88 /* Optional "flow mod" requests to send to the switch at connection time,
89 * to set up the flow table. */
90 const struct ofputil_flow_mod
*default_flows
;
91 size_t n_default_flows
;
92 enum ofputil_protocol usable_protocols
;
95 /* The log messages here could actually be useful in debugging, so keep the
96 * rate limit relatively high. */
97 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
99 static void queue_tx(struct lswitch
*, struct ofpbuf
*);
100 static void send_features_request(struct lswitch
*);
102 static void lswitch_process_packet(struct lswitch
*, const struct ofpbuf
*);
103 static enum ofperr
process_switch_features(struct lswitch
*,
104 struct ofp_header
*);
105 static void process_packet_in(struct lswitch
*, const struct ofp_header
*);
106 static void process_echo_request(struct lswitch
*, const struct ofp_header
*);
108 /* Creates and returns a new learning switch whose configuration is given by
111 * 'rconn' is used to send out an OpenFlow features request. */
113 lswitch_create(struct rconn
*rconn
, const struct lswitch_config
*cfg
)
118 sw
= xzalloc(sizeof *sw
);
120 sw
->state
= S_CONNECTING
;
121 sw
->max_idle
= cfg
->max_idle
;
123 sw
->ml
= (cfg
->mode
== LSW_LEARN
124 ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME
)
126 sw
->action_normal
= cfg
->mode
== LSW_NORMAL
;
128 switch (cfg
->wildcards
) {
134 /* Try to wildcard as many fields as possible, but we cannot
135 * wildcard all fields. We need in_port to detect moves. We need
136 * Ethernet source and dest and VLAN VID to do L2 learning. */
137 ofpfw
= (OFPFW10_DL_TYPE
| OFPFW10_DL_VLAN_PCP
138 | OFPFW10_NW_SRC_ALL
| OFPFW10_NW_DST_ALL
139 | OFPFW10_NW_TOS
| OFPFW10_NW_PROTO
140 | OFPFW10_TP_SRC
| OFPFW10_TP_DST
);
144 ofpfw
= cfg
->wildcards
;
147 ofputil_wildcard_from_ofpfw10(ofpfw
, &sw
->wc
);
149 sw
->default_queue
= cfg
->default_queue
;
150 hmap_init(&sw
->queue_numbers
);
151 shash_init(&sw
->queue_names
);
152 if (cfg
->port_queues
) {
153 struct simap_node
*node
;
155 SIMAP_FOR_EACH (node
, cfg
->port_queues
) {
156 struct lswitch_port
*port
= xmalloc(sizeof *port
);
157 hmap_node_nullify(&port
->hmap_node
);
158 port
->queue_id
= node
->data
;
159 shash_add(&sw
->queue_names
, node
->name
, port
);
163 sw
->default_flows
= cfg
->default_flows
;
164 sw
->n_default_flows
= cfg
->n_default_flows
;
165 sw
->usable_protocols
= cfg
->usable_protocols
;
167 sw
->queued
= rconn_packet_counter_create();
173 lswitch_handshake(struct lswitch
*sw
)
175 enum ofputil_protocol protocol
;
176 enum ofp_version version
;
178 send_features_request(sw
);
180 version
= rconn_get_version(sw
->rconn
);
181 protocol
= ofputil_protocol_from_ofp_version(version
);
182 if (version
>= OFP13_VERSION
) {
183 /* OpenFlow 1.3 and later by default drop packets that miss in the flow
184 * table. Set up a flow to send packets to the controller by
186 struct ofputil_flow_mod fm
;
187 struct ofpact_output output
;
191 ofpact_init_OUTPUT(&output
);
192 output
.port
= OFPP_CONTROLLER
;
193 output
.max_len
= OFP_DEFAULT_MISS_SEND_LEN
;
195 match_init_catchall(&fm
.match
);
200 fm
.modify_cookie
= false;
202 fm
.command
= OFPFC_ADD
;
206 fm
.buffer_id
= UINT32_MAX
;
207 fm
.out_port
= OFPP_NONE
;
208 fm
.out_group
= OFPG_ANY
;
210 fm
.ofpacts
= &output
.ofpact
;
211 fm
.ofpacts_len
= sizeof output
;
212 fm
.delete_reason
= 0;
214 msg
= ofputil_encode_flow_mod(&fm
, protocol
);
215 error
= rconn_send(sw
->rconn
, msg
, NULL
);
217 VLOG_INFO_RL(&rl
, "%s: failed to add default flow (%s)",
218 rconn_get_name(sw
->rconn
), ovs_strerror(error
));
221 if (sw
->default_flows
) {
222 struct ofpbuf
*msg
= NULL
;
226 /* If the initial protocol isn't good enough for default_flows, then
227 * pick one that will work and encode messages to set up that
230 * This could be improved by actually negotiating a mutually acceptable
231 * flow format with the switch, but that would require an asynchronous
232 * state machine. This version ought to work fine in practice. */
233 if (!(protocol
& sw
->usable_protocols
)) {
234 enum ofputil_protocol want
= rightmost_1bit(sw
->usable_protocols
);
236 msg
= ofputil_encode_set_protocol(protocol
, want
, &protocol
);
240 error
= rconn_send(sw
->rconn
, msg
, NULL
);
243 if (protocol
& sw
->usable_protocols
) {
244 for (i
= 0; !error
&& i
< sw
->n_default_flows
; i
++) {
245 msg
= ofputil_encode_flow_mod(&sw
->default_flows
[i
], protocol
);
246 error
= rconn_send(sw
->rconn
, msg
, NULL
);
250 VLOG_INFO_RL(&rl
, "%s: failed to queue default flows (%s)",
251 rconn_get_name(sw
->rconn
), ovs_strerror(error
));
254 VLOG_INFO_RL(&rl
, "%s: failed to set usable protocol",
255 rconn_get_name(sw
->rconn
));
258 sw
->protocol
= protocol
;
262 lswitch_is_alive(const struct lswitch
*sw
)
264 return rconn_is_alive(sw
->rconn
);
269 lswitch_destroy(struct lswitch
*sw
)
272 struct lswitch_port
*node
, *next
;
274 rconn_destroy(sw
->rconn
);
275 HMAP_FOR_EACH_SAFE (node
, next
, hmap_node
, &sw
->queue_numbers
) {
276 hmap_remove(&sw
->queue_numbers
, &node
->hmap_node
);
279 shash_destroy(&sw
->queue_names
);
280 mac_learning_unref(sw
->ml
);
281 rconn_packet_counter_destroy(sw
->queued
);
286 /* Takes care of necessary 'sw' activity, except for receiving packets (which
287 * the caller must do). */
289 lswitch_run(struct lswitch
*sw
)
294 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
295 mac_learning_run(sw
->ml
);
296 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
299 rconn_run(sw
->rconn
);
301 if (sw
->state
== S_CONNECTING
) {
302 if (rconn_get_version(sw
->rconn
) != -1) {
303 lswitch_handshake(sw
);
304 sw
->state
= S_FEATURES_REPLY
;
309 for (i
= 0; i
< 50; i
++) {
312 msg
= rconn_recv(sw
->rconn
);
318 lswitch_process_packet(sw
, msg
);
325 lswitch_wait(struct lswitch
*sw
)
328 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
329 mac_learning_wait(sw
->ml
);
330 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
332 rconn_run_wait(sw
->rconn
);
333 rconn_recv_wait(sw
->rconn
);
336 /* Processes 'msg', which should be an OpenFlow received on 'rconn', according
337 * to the learning switch state in 'sw'. The most likely result of processing
338 * is that flow-setup and packet-out OpenFlow messages will be sent out on
341 lswitch_process_packet(struct lswitch
*sw
, const struct ofpbuf
*msg
)
347 if (ofptype_pull(&type
, &b
)) {
351 if (sw
->state
== S_FEATURES_REPLY
352 && type
!= OFPTYPE_ECHO_REQUEST
353 && type
!= OFPTYPE_FEATURES_REPLY
) {
358 case OFPTYPE_ECHO_REQUEST
:
359 process_echo_request(sw
, ofpbuf_data(msg
));
362 case OFPTYPE_FEATURES_REPLY
:
363 if (sw
->state
== S_FEATURES_REPLY
) {
364 if (!process_switch_features(sw
, ofpbuf_data(msg
))) {
365 sw
->state
= S_SWITCHING
;
367 rconn_disconnect(sw
->rconn
);
372 case OFPTYPE_PACKET_IN
:
373 process_packet_in(sw
, ofpbuf_data(msg
));
376 case OFPTYPE_FLOW_REMOVED
:
382 case OFPTYPE_ECHO_REPLY
:
383 case OFPTYPE_FEATURES_REQUEST
:
384 case OFPTYPE_GET_CONFIG_REQUEST
:
385 case OFPTYPE_GET_CONFIG_REPLY
:
386 case OFPTYPE_SET_CONFIG
:
387 case OFPTYPE_PORT_STATUS
:
388 case OFPTYPE_PACKET_OUT
:
389 case OFPTYPE_FLOW_MOD
:
390 case OFPTYPE_GROUP_MOD
:
391 case OFPTYPE_PORT_MOD
:
392 case OFPTYPE_TABLE_MOD
:
393 case OFPTYPE_BARRIER_REQUEST
:
394 case OFPTYPE_BARRIER_REPLY
:
395 case OFPTYPE_QUEUE_GET_CONFIG_REQUEST
:
396 case OFPTYPE_QUEUE_GET_CONFIG_REPLY
:
397 case OFPTYPE_DESC_STATS_REQUEST
:
398 case OFPTYPE_DESC_STATS_REPLY
:
399 case OFPTYPE_FLOW_STATS_REQUEST
:
400 case OFPTYPE_FLOW_STATS_REPLY
:
401 case OFPTYPE_AGGREGATE_STATS_REQUEST
:
402 case OFPTYPE_AGGREGATE_STATS_REPLY
:
403 case OFPTYPE_TABLE_STATS_REQUEST
:
404 case OFPTYPE_TABLE_STATS_REPLY
:
405 case OFPTYPE_PORT_STATS_REQUEST
:
406 case OFPTYPE_PORT_STATS_REPLY
:
407 case OFPTYPE_QUEUE_STATS_REQUEST
:
408 case OFPTYPE_QUEUE_STATS_REPLY
:
409 case OFPTYPE_PORT_DESC_STATS_REQUEST
:
410 case OFPTYPE_PORT_DESC_STATS_REPLY
:
411 case OFPTYPE_ROLE_REQUEST
:
412 case OFPTYPE_ROLE_REPLY
:
413 case OFPTYPE_ROLE_STATUS
:
414 case OFPTYPE_SET_FLOW_FORMAT
:
415 case OFPTYPE_FLOW_MOD_TABLE_ID
:
416 case OFPTYPE_SET_PACKET_IN_FORMAT
:
417 case OFPTYPE_FLOW_AGE
:
418 case OFPTYPE_SET_CONTROLLER_ID
:
419 case OFPTYPE_FLOW_MONITOR_STATS_REQUEST
:
420 case OFPTYPE_FLOW_MONITOR_STATS_REPLY
:
421 case OFPTYPE_FLOW_MONITOR_CANCEL
:
422 case OFPTYPE_FLOW_MONITOR_PAUSED
:
423 case OFPTYPE_FLOW_MONITOR_RESUMED
:
424 case OFPTYPE_GET_ASYNC_REQUEST
:
425 case OFPTYPE_GET_ASYNC_REPLY
:
426 case OFPTYPE_SET_ASYNC_CONFIG
:
427 case OFPTYPE_METER_MOD
:
428 case OFPTYPE_GROUP_STATS_REQUEST
:
429 case OFPTYPE_GROUP_STATS_REPLY
:
430 case OFPTYPE_GROUP_DESC_STATS_REQUEST
:
431 case OFPTYPE_GROUP_DESC_STATS_REPLY
:
432 case OFPTYPE_GROUP_FEATURES_STATS_REQUEST
:
433 case OFPTYPE_GROUP_FEATURES_STATS_REPLY
:
434 case OFPTYPE_METER_STATS_REQUEST
:
435 case OFPTYPE_METER_STATS_REPLY
:
436 case OFPTYPE_METER_CONFIG_STATS_REQUEST
:
437 case OFPTYPE_METER_CONFIG_STATS_REPLY
:
438 case OFPTYPE_METER_FEATURES_STATS_REQUEST
:
439 case OFPTYPE_METER_FEATURES_STATS_REPLY
:
440 case OFPTYPE_TABLE_FEATURES_STATS_REQUEST
:
441 case OFPTYPE_TABLE_FEATURES_STATS_REPLY
:
442 case OFPTYPE_BUNDLE_CONTROL
:
443 case OFPTYPE_BUNDLE_ADD_MESSAGE
:
445 if (VLOG_IS_DBG_ENABLED()) {
446 char *s
= ofp_to_string(ofpbuf_data(msg
), ofpbuf_size(msg
), 2);
447 VLOG_DBG_RL(&rl
, "%016llx: OpenFlow packet ignored: %s",
455 send_features_request(struct lswitch
*sw
)
458 struct ofp_switch_config
*osc
;
459 int ofp_version
= rconn_get_version(sw
->rconn
);
461 ovs_assert(ofp_version
> 0 && ofp_version
< 0xff);
463 /* Send OFPT_FEATURES_REQUEST. */
464 b
= ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST
, ofp_version
, 0);
467 /* Send OFPT_SET_CONFIG. */
468 b
= ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG
, ofp_version
, sizeof *osc
);
469 osc
= ofpbuf_put_zeros(b
, sizeof *osc
);
470 osc
->miss_send_len
= htons(OFP_DEFAULT_MISS_SEND_LEN
);
475 queue_tx(struct lswitch
*sw
, struct ofpbuf
*b
)
477 int retval
= rconn_send_with_limit(sw
->rconn
, b
, sw
->queued
, 10);
478 if (retval
&& retval
!= ENOTCONN
) {
479 if (retval
== EAGAIN
) {
480 VLOG_INFO_RL(&rl
, "%016llx: %s: tx queue overflow",
481 sw
->datapath_id
, rconn_get_name(sw
->rconn
));
483 VLOG_WARN_RL(&rl
, "%016llx: %s: send: %s",
484 sw
->datapath_id
, rconn_get_name(sw
->rconn
),
485 ovs_strerror(retval
));
491 process_switch_features(struct lswitch
*sw
, struct ofp_header
*oh
)
493 struct ofputil_switch_features features
;
494 struct ofputil_phy_port port
;
498 error
= ofputil_decode_switch_features(oh
, &features
, &b
);
500 VLOG_ERR("received invalid switch feature reply (%s)",
501 ofperr_to_string(error
));
505 sw
->datapath_id
= features
.datapath_id
;
507 while (!ofputil_pull_phy_port(oh
->version
, &b
, &port
)) {
508 struct lswitch_port
*lp
= shash_find_data(&sw
->queue_names
, port
.name
);
509 if (lp
&& hmap_node_is_null(&lp
->hmap_node
)) {
510 lp
->port_no
= port
.port_no
;
511 hmap_insert(&sw
->queue_numbers
, &lp
->hmap_node
,
512 hash_ofp_port(lp
->port_no
));
519 lswitch_choose_destination(struct lswitch
*sw
, const struct flow
*flow
)
523 /* Learn the source MAC. */
525 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
526 if (mac_learning_may_learn(sw
->ml
, flow
->dl_src
, 0)) {
527 struct mac_entry
*mac
= mac_learning_insert(sw
->ml
, flow
->dl_src
,
529 if (mac
->port
.ofp_port
!= flow
->in_port
.ofp_port
) {
530 VLOG_DBG_RL(&rl
, "%016llx: learned that "ETH_ADDR_FMT
" is on "
531 "port %"PRIu16
, sw
->datapath_id
,
532 ETH_ADDR_ARGS(flow
->dl_src
),
533 flow
->in_port
.ofp_port
);
535 mac
->port
.ofp_port
= flow
->in_port
.ofp_port
;
536 mac_learning_changed(sw
->ml
);
539 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
542 /* Drop frames for reserved multicast addresses. */
543 if (eth_addr_is_reserved(flow
->dl_dst
)) {
547 out_port
= OFPP_FLOOD
;
549 struct mac_entry
*mac
;
551 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
552 mac
= mac_learning_lookup(sw
->ml
, flow
->dl_dst
, 0);
554 out_port
= mac
->port
.ofp_port
;
555 if (out_port
== flow
->in_port
.ofp_port
) {
556 /* Don't send a packet back out its input port. */
557 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
561 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
564 /* Check if we need to use "NORMAL" action. */
565 if (sw
->action_normal
&& out_port
!= OFPP_FLOOD
) {
573 get_queue_id(const struct lswitch
*sw
, ofp_port_t in_port
)
575 const struct lswitch_port
*port
;
577 HMAP_FOR_EACH_WITH_HASH (port
, hmap_node
, hash_ofp_port(in_port
),
578 &sw
->queue_numbers
) {
579 if (port
->port_no
== in_port
) {
580 return port
->queue_id
;
584 return sw
->default_queue
;
588 process_packet_in(struct lswitch
*sw
, const struct ofp_header
*oh
)
590 struct ofputil_packet_in pi
;
594 uint64_t ofpacts_stub
[64 / 8];
595 struct ofpbuf ofpacts
;
597 struct ofputil_packet_out po
;
603 error
= ofputil_decode_packet_in(&pi
, oh
);
605 VLOG_WARN_RL(&rl
, "failed to decode packet-in: %s",
606 ofperr_to_string(error
));
610 /* Ignore packets sent via output to OFPP_CONTROLLER. This library never
611 * uses such an action. You never know what experiments might be going on,
612 * though, and it seems best not to interfere with them. */
613 if (pi
.reason
!= OFPR_NO_MATCH
) {
617 /* Extract flow data from 'opi' into 'flow'. */
618 ofpbuf_use_const(&pkt
, pi
.packet
, pi
.packet_len
);
619 flow_extract(&pkt
, NULL
, &flow
);
620 flow
.in_port
.ofp_port
= pi
.fmd
.in_port
;
621 flow
.tunnel
.tun_id
= pi
.fmd
.tun_id
;
623 /* Choose output port. */
624 out_port
= lswitch_choose_destination(sw
, &flow
);
627 queue_id
= get_queue_id(sw
, pi
.fmd
.in_port
);
628 ofpbuf_use_stack(&ofpacts
, ofpacts_stub
, sizeof ofpacts_stub
);
629 if (out_port
== OFPP_NONE
) {
631 } else if (queue_id
== UINT32_MAX
632 || ofp_to_u16(out_port
) >= ofp_to_u16(OFPP_MAX
)) {
633 ofpact_put_OUTPUT(&ofpacts
)->port
= out_port
;
635 struct ofpact_enqueue
*enqueue
= ofpact_put_ENQUEUE(&ofpacts
);
636 enqueue
->port
= out_port
;
637 enqueue
->queue
= queue_id
;
639 ofpact_pad(&ofpacts
);
641 /* Prepare packet_out in case we need one. */
642 po
.buffer_id
= pi
.buffer_id
;
643 if (po
.buffer_id
== UINT32_MAX
) {
644 po
.packet
= ofpbuf_data(&pkt
);
645 po
.packet_len
= ofpbuf_size(&pkt
);
650 po
.in_port
= pi
.fmd
.in_port
;
651 po
.ofpacts
= ofpbuf_data(&ofpacts
);
652 po
.ofpacts_len
= ofpbuf_size(&ofpacts
);
654 /* Send the packet, and possibly the whole flow, to the output port. */
655 if (sw
->max_idle
>= 0 && (!sw
->ml
|| out_port
!= OFPP_FLOOD
)) {
656 struct ofputil_flow_mod fm
;
657 struct ofpbuf
*buffer
;
659 /* The output port is known, or we always flood everything, so add a
661 memset(&fm
, 0, sizeof fm
);
662 match_init(&fm
.match
, &flow
, &sw
->wc
);
663 ofputil_normalize_match_quiet(&fm
.match
);
664 fm
.priority
= 1; /* Must be > 0 because of table-miss flow entry. */
666 fm
.command
= OFPFC_ADD
;
667 fm
.idle_timeout
= sw
->max_idle
;
668 fm
.buffer_id
= pi
.buffer_id
;
669 fm
.out_port
= OFPP_NONE
;
670 fm
.ofpacts
= ofpbuf_data(&ofpacts
);
671 fm
.ofpacts_len
= ofpbuf_size(&ofpacts
);
672 buffer
= ofputil_encode_flow_mod(&fm
, sw
->protocol
);
674 queue_tx(sw
, buffer
);
676 /* If the switch didn't buffer the packet, we need to send a copy. */
677 if (pi
.buffer_id
== UINT32_MAX
&& out_port
!= OFPP_NONE
) {
678 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
681 /* We don't know that MAC, or we don't set up flows. Send along the
682 * packet without setting up a flow. */
683 if (pi
.buffer_id
!= UINT32_MAX
|| out_port
!= OFPP_NONE
) {
684 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
690 process_echo_request(struct lswitch
*sw
, const struct ofp_header
*rq
)
692 queue_tx(sw
, make_echo_reply(rq
));