2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "learning-switch.h"
22 #include <netinet/in.h>
26 #include "byte-order.h"
27 #include "classifier.h"
30 #include "mac-learning.h"
32 #include "ofp-actions.h"
33 #include "ofp-errors.h"
35 #include "ofp-parse.h"
36 #include "ofp-print.h"
38 #include "openflow/openflow.h"
39 #include "poll-loop.h"
47 VLOG_DEFINE_THIS_MODULE(learning_switch
);
50 struct hmap_node hmap_node
; /* Hash node for port number. */
51 ofp_port_t port_no
; /* OpenFlow port number. */
52 uint32_t queue_id
; /* OpenFlow queue number. */
56 S_CONNECTING
, /* Waiting for connection to complete. */
57 S_FEATURES_REPLY
, /* Waiting for features reply. */
58 S_SWITCHING
, /* Switching flows. */
63 enum lswitch_state state
;
65 /* If nonnegative, the switch sets up flows that expire after the given
66 * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
67 * Otherwise, the switch processes every packet. */
70 enum ofputil_protocol protocol
;
71 unsigned long long int datapath_id
;
72 struct mac_learning
*ml
; /* NULL to act as hub instead of switch. */
73 struct flow_wildcards wc
; /* Wildcards to apply to flows. */
74 bool action_normal
; /* Use OFPP_NORMAL? */
76 /* Queue distribution. */
77 uint32_t default_queue
; /* Default OpenFlow queue, or UINT32_MAX. */
78 struct hmap queue_numbers
; /* Map from port number to lswitch_port. */
79 struct shash queue_names
; /* Map from port name to lswitch_port. */
81 /* Number of outgoing queued packets on the rconn. */
82 struct rconn_packet_counter
*queued
;
84 /* If true, do not reply to any messages from the switch (for debugging
88 /* Optional "flow mod" requests to send to the switch at connection time,
89 * to set up the flow table. */
90 const struct ofputil_flow_mod
*default_flows
;
91 size_t n_default_flows
;
92 enum ofputil_protocol usable_protocols
;
95 /* The log messages here could actually be useful in debugging, so keep the
96 * rate limit relatively high. */
97 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
99 static void queue_tx(struct lswitch
*, struct ofpbuf
*);
100 static void send_features_request(struct lswitch
*);
102 static void lswitch_process_packet(struct lswitch
*, const struct ofpbuf
*);
103 static enum ofperr
process_switch_features(struct lswitch
*,
104 struct ofp_header
*);
105 static void process_packet_in(struct lswitch
*, const struct ofp_header
*);
106 static void process_echo_request(struct lswitch
*, const struct ofp_header
*);
108 /* Creates and returns a new learning switch whose configuration is given by
111 * 'rconn' is used to send out an OpenFlow features request. */
113 lswitch_create(struct rconn
*rconn
, const struct lswitch_config
*cfg
)
118 sw
= xzalloc(sizeof *sw
);
120 sw
->state
= S_CONNECTING
;
121 sw
->max_idle
= cfg
->max_idle
;
123 sw
->ml
= (cfg
->mode
== LSW_LEARN
124 ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME
)
126 sw
->action_normal
= cfg
->mode
== LSW_NORMAL
;
128 switch (cfg
->wildcards
) {
134 /* Try to wildcard as many fields as possible, but we cannot
135 * wildcard all fields. We need in_port to detect moves. We need
136 * Ethernet source and dest and VLAN VID to do L2 learning. */
137 ofpfw
= (OFPFW10_DL_TYPE
| OFPFW10_DL_VLAN_PCP
138 | OFPFW10_NW_SRC_ALL
| OFPFW10_NW_DST_ALL
139 | OFPFW10_NW_TOS
| OFPFW10_NW_PROTO
140 | OFPFW10_TP_SRC
| OFPFW10_TP_DST
);
144 ofpfw
= cfg
->wildcards
;
147 ofputil_wildcard_from_ofpfw10(ofpfw
, &sw
->wc
);
149 sw
->default_queue
= cfg
->default_queue
;
150 hmap_init(&sw
->queue_numbers
);
151 shash_init(&sw
->queue_names
);
152 if (cfg
->port_queues
) {
153 struct simap_node
*node
;
155 SIMAP_FOR_EACH (node
, cfg
->port_queues
) {
156 struct lswitch_port
*port
= xmalloc(sizeof *port
);
157 hmap_node_nullify(&port
->hmap_node
);
158 port
->queue_id
= node
->data
;
159 shash_add(&sw
->queue_names
, node
->name
, port
);
163 sw
->default_flows
= cfg
->default_flows
;
164 sw
->n_default_flows
= cfg
->n_default_flows
;
165 sw
->usable_protocols
= cfg
->usable_protocols
;
167 sw
->queued
= rconn_packet_counter_create();
173 lswitch_handshake(struct lswitch
*sw
)
175 enum ofputil_protocol protocol
;
177 send_features_request(sw
);
179 protocol
= ofputil_protocol_from_ofp_version(rconn_get_version(sw
->rconn
));
180 if (sw
->default_flows
) {
181 struct ofpbuf
*msg
= NULL
;
185 /* If the initial protocol isn't good enough for default_flows, then
186 * pick one that will work and encode messages to set up that
189 * This could be improved by actually negotiating a mutually acceptable
190 * flow format with the switch, but that would require an asynchronous
191 * state machine. This version ought to work fine in practice. */
192 if (!(protocol
& sw
->usable_protocols
)) {
193 enum ofputil_protocol want
= rightmost_1bit(sw
->usable_protocols
);
195 msg
= ofputil_encode_set_protocol(protocol
, want
, &protocol
);
199 error
= rconn_send(sw
->rconn
, msg
, NULL
);
202 if (protocol
& sw
->usable_protocols
) {
203 for (i
= 0; !error
&& i
< sw
->n_default_flows
; i
++) {
204 msg
= ofputil_encode_flow_mod(&sw
->default_flows
[i
], protocol
);
205 error
= rconn_send(sw
->rconn
, msg
, NULL
);
209 VLOG_INFO_RL(&rl
, "%s: failed to queue default flows (%s)",
210 rconn_get_name(sw
->rconn
), ovs_strerror(error
));
213 VLOG_INFO_RL(&rl
, "%s: failed to set usable protocol",
214 rconn_get_name(sw
->rconn
));
217 sw
->protocol
= protocol
;
221 lswitch_is_alive(const struct lswitch
*sw
)
223 return rconn_is_alive(sw
->rconn
);
228 lswitch_destroy(struct lswitch
*sw
)
231 struct lswitch_port
*node
, *next
;
233 rconn_destroy(sw
->rconn
);
234 HMAP_FOR_EACH_SAFE (node
, next
, hmap_node
, &sw
->queue_numbers
) {
235 hmap_remove(&sw
->queue_numbers
, &node
->hmap_node
);
238 shash_destroy(&sw
->queue_names
);
239 mac_learning_unref(sw
->ml
);
240 rconn_packet_counter_destroy(sw
->queued
);
245 /* Takes care of necessary 'sw' activity, except for receiving packets (which
246 * the caller must do). */
248 lswitch_run(struct lswitch
*sw
)
253 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
254 mac_learning_run(sw
->ml
);
255 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
258 rconn_run(sw
->rconn
);
260 if (sw
->state
== S_CONNECTING
) {
261 if (rconn_get_version(sw
->rconn
) != -1) {
262 lswitch_handshake(sw
);
263 sw
->state
= S_FEATURES_REPLY
;
268 for (i
= 0; i
< 50; i
++) {
271 msg
= rconn_recv(sw
->rconn
);
277 lswitch_process_packet(sw
, msg
);
284 lswitch_wait(struct lswitch
*sw
)
287 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
288 mac_learning_wait(sw
->ml
);
289 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
291 rconn_run_wait(sw
->rconn
);
292 rconn_recv_wait(sw
->rconn
);
295 /* Processes 'msg', which should be an OpenFlow received on 'rconn', according
296 * to the learning switch state in 'sw'. The most likely result of processing
297 * is that flow-setup and packet-out OpenFlow messages will be sent out on
300 lswitch_process_packet(struct lswitch
*sw
, const struct ofpbuf
*msg
)
306 if (ofptype_pull(&type
, &b
)) {
310 if (sw
->state
== S_FEATURES_REPLY
311 && type
!= OFPTYPE_ECHO_REQUEST
312 && type
!= OFPTYPE_FEATURES_REPLY
) {
317 case OFPTYPE_ECHO_REQUEST
:
318 process_echo_request(sw
, msg
->data
);
321 case OFPTYPE_FEATURES_REPLY
:
322 if (sw
->state
== S_FEATURES_REPLY
) {
323 if (!process_switch_features(sw
, msg
->data
)) {
324 sw
->state
= S_SWITCHING
;
326 rconn_disconnect(sw
->rconn
);
331 case OFPTYPE_PACKET_IN
:
332 process_packet_in(sw
, msg
->data
);
335 case OFPTYPE_FLOW_REMOVED
:
341 case OFPTYPE_ECHO_REPLY
:
342 case OFPTYPE_FEATURES_REQUEST
:
343 case OFPTYPE_GET_CONFIG_REQUEST
:
344 case OFPTYPE_GET_CONFIG_REPLY
:
345 case OFPTYPE_SET_CONFIG
:
346 case OFPTYPE_PORT_STATUS
:
347 case OFPTYPE_PACKET_OUT
:
348 case OFPTYPE_FLOW_MOD
:
349 case OFPTYPE_GROUP_MOD
:
350 case OFPTYPE_PORT_MOD
:
351 case OFPTYPE_TABLE_MOD
:
352 case OFPTYPE_BARRIER_REQUEST
:
353 case OFPTYPE_BARRIER_REPLY
:
354 case OFPTYPE_QUEUE_GET_CONFIG_REQUEST
:
355 case OFPTYPE_QUEUE_GET_CONFIG_REPLY
:
356 case OFPTYPE_DESC_STATS_REQUEST
:
357 case OFPTYPE_DESC_STATS_REPLY
:
358 case OFPTYPE_FLOW_STATS_REQUEST
:
359 case OFPTYPE_FLOW_STATS_REPLY
:
360 case OFPTYPE_AGGREGATE_STATS_REQUEST
:
361 case OFPTYPE_AGGREGATE_STATS_REPLY
:
362 case OFPTYPE_TABLE_STATS_REQUEST
:
363 case OFPTYPE_TABLE_STATS_REPLY
:
364 case OFPTYPE_PORT_STATS_REQUEST
:
365 case OFPTYPE_PORT_STATS_REPLY
:
366 case OFPTYPE_QUEUE_STATS_REQUEST
:
367 case OFPTYPE_QUEUE_STATS_REPLY
:
368 case OFPTYPE_PORT_DESC_STATS_REQUEST
:
369 case OFPTYPE_PORT_DESC_STATS_REPLY
:
370 case OFPTYPE_ROLE_REQUEST
:
371 case OFPTYPE_ROLE_REPLY
:
372 case OFPTYPE_ROLE_STATUS
:
373 case OFPTYPE_SET_FLOW_FORMAT
:
374 case OFPTYPE_FLOW_MOD_TABLE_ID
:
375 case OFPTYPE_SET_PACKET_IN_FORMAT
:
376 case OFPTYPE_FLOW_AGE
:
377 case OFPTYPE_SET_CONTROLLER_ID
:
378 case OFPTYPE_FLOW_MONITOR_STATS_REQUEST
:
379 case OFPTYPE_FLOW_MONITOR_STATS_REPLY
:
380 case OFPTYPE_FLOW_MONITOR_CANCEL
:
381 case OFPTYPE_FLOW_MONITOR_PAUSED
:
382 case OFPTYPE_FLOW_MONITOR_RESUMED
:
383 case OFPTYPE_GET_ASYNC_REQUEST
:
384 case OFPTYPE_GET_ASYNC_REPLY
:
385 case OFPTYPE_SET_ASYNC_CONFIG
:
386 case OFPTYPE_METER_MOD
:
387 case OFPTYPE_GROUP_STATS_REQUEST
:
388 case OFPTYPE_GROUP_STATS_REPLY
:
389 case OFPTYPE_GROUP_DESC_STATS_REQUEST
:
390 case OFPTYPE_GROUP_DESC_STATS_REPLY
:
391 case OFPTYPE_GROUP_FEATURES_STATS_REQUEST
:
392 case OFPTYPE_GROUP_FEATURES_STATS_REPLY
:
393 case OFPTYPE_METER_STATS_REQUEST
:
394 case OFPTYPE_METER_STATS_REPLY
:
395 case OFPTYPE_METER_CONFIG_STATS_REQUEST
:
396 case OFPTYPE_METER_CONFIG_STATS_REPLY
:
397 case OFPTYPE_METER_FEATURES_STATS_REQUEST
:
398 case OFPTYPE_METER_FEATURES_STATS_REPLY
:
399 case OFPTYPE_TABLE_FEATURES_STATS_REQUEST
:
400 case OFPTYPE_TABLE_FEATURES_STATS_REPLY
:
402 if (VLOG_IS_DBG_ENABLED()) {
403 char *s
= ofp_to_string(msg
->data
, msg
->size
, 2);
404 VLOG_DBG_RL(&rl
, "%016llx: OpenFlow packet ignored: %s",
412 send_features_request(struct lswitch
*sw
)
415 struct ofp_switch_config
*osc
;
416 int ofp_version
= rconn_get_version(sw
->rconn
);
418 ovs_assert(ofp_version
> 0 && ofp_version
< 0xff);
420 /* Send OFPT_FEATURES_REQUEST. */
421 b
= ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST
, ofp_version
, 0);
424 /* Send OFPT_SET_CONFIG. */
425 b
= ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG
, ofp_version
, sizeof *osc
);
426 osc
= ofpbuf_put_zeros(b
, sizeof *osc
);
427 osc
->miss_send_len
= htons(OFP_DEFAULT_MISS_SEND_LEN
);
432 queue_tx(struct lswitch
*sw
, struct ofpbuf
*b
)
434 int retval
= rconn_send_with_limit(sw
->rconn
, b
, sw
->queued
, 10);
435 if (retval
&& retval
!= ENOTCONN
) {
436 if (retval
== EAGAIN
) {
437 VLOG_INFO_RL(&rl
, "%016llx: %s: tx queue overflow",
438 sw
->datapath_id
, rconn_get_name(sw
->rconn
));
440 VLOG_WARN_RL(&rl
, "%016llx: %s: send: %s",
441 sw
->datapath_id
, rconn_get_name(sw
->rconn
),
442 ovs_strerror(retval
));
448 process_switch_features(struct lswitch
*sw
, struct ofp_header
*oh
)
450 struct ofputil_switch_features features
;
451 struct ofputil_phy_port port
;
455 error
= ofputil_decode_switch_features(oh
, &features
, &b
);
457 VLOG_ERR("received invalid switch feature reply (%s)",
458 ofperr_to_string(error
));
462 sw
->datapath_id
= features
.datapath_id
;
464 while (!ofputil_pull_phy_port(oh
->version
, &b
, &port
)) {
465 struct lswitch_port
*lp
= shash_find_data(&sw
->queue_names
, port
.name
);
466 if (lp
&& hmap_node_is_null(&lp
->hmap_node
)) {
467 lp
->port_no
= port
.port_no
;
468 hmap_insert(&sw
->queue_numbers
, &lp
->hmap_node
,
469 hash_ofp_port(lp
->port_no
));
476 lswitch_choose_destination(struct lswitch
*sw
, const struct flow
*flow
)
480 /* Learn the source MAC. */
482 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
483 if (mac_learning_may_learn(sw
->ml
, flow
->dl_src
, 0)) {
484 struct mac_entry
*mac
= mac_learning_insert(sw
->ml
, flow
->dl_src
,
486 if (mac
->port
.ofp_port
!= flow
->in_port
.ofp_port
) {
487 VLOG_DBG_RL(&rl
, "%016llx: learned that "ETH_ADDR_FMT
" is on "
488 "port %"PRIu16
, sw
->datapath_id
,
489 ETH_ADDR_ARGS(flow
->dl_src
),
490 flow
->in_port
.ofp_port
);
492 mac
->port
.ofp_port
= flow
->in_port
.ofp_port
;
493 mac_learning_changed(sw
->ml
);
496 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
499 /* Drop frames for reserved multicast addresses. */
500 if (eth_addr_is_reserved(flow
->dl_dst
)) {
504 out_port
= OFPP_FLOOD
;
506 struct mac_entry
*mac
;
508 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
509 mac
= mac_learning_lookup(sw
->ml
, flow
->dl_dst
, 0);
511 out_port
= mac
->port
.ofp_port
;
512 if (out_port
== flow
->in_port
.ofp_port
) {
513 /* Don't send a packet back out its input port. */
514 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
518 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
521 /* Check if we need to use "NORMAL" action. */
522 if (sw
->action_normal
&& out_port
!= OFPP_FLOOD
) {
530 get_queue_id(const struct lswitch
*sw
, ofp_port_t in_port
)
532 const struct lswitch_port
*port
;
534 HMAP_FOR_EACH_WITH_HASH (port
, hmap_node
, hash_ofp_port(in_port
),
535 &sw
->queue_numbers
) {
536 if (port
->port_no
== in_port
) {
537 return port
->queue_id
;
541 return sw
->default_queue
;
545 process_packet_in(struct lswitch
*sw
, const struct ofp_header
*oh
)
547 struct ofputil_packet_in pi
;
551 uint64_t ofpacts_stub
[64 / 8];
552 struct ofpbuf ofpacts
;
554 struct ofputil_packet_out po
;
560 error
= ofputil_decode_packet_in(&pi
, oh
);
562 VLOG_WARN_RL(&rl
, "failed to decode packet-in: %s",
563 ofperr_to_string(error
));
567 /* Ignore packets sent via output to OFPP_CONTROLLER. This library never
568 * uses such an action. You never know what experiments might be going on,
569 * though, and it seems best not to interfere with them. */
570 if (pi
.reason
!= OFPR_NO_MATCH
) {
574 /* Extract flow data from 'opi' into 'flow'. */
575 ofpbuf_use_const(&pkt
, pi
.packet
, pi
.packet_len
);
576 flow_extract(&pkt
, NULL
, &flow
);
577 flow
.in_port
.ofp_port
= pi
.fmd
.in_port
;
578 flow
.tunnel
.tun_id
= pi
.fmd
.tun_id
;
580 /* Choose output port. */
581 out_port
= lswitch_choose_destination(sw
, &flow
);
584 queue_id
= get_queue_id(sw
, pi
.fmd
.in_port
);
585 ofpbuf_use_stack(&ofpacts
, ofpacts_stub
, sizeof ofpacts_stub
);
586 if (out_port
== OFPP_NONE
) {
588 } else if (queue_id
== UINT32_MAX
589 || ofp_to_u16(out_port
) >= ofp_to_u16(OFPP_MAX
)) {
590 ofpact_put_OUTPUT(&ofpacts
)->port
= out_port
;
592 struct ofpact_enqueue
*enqueue
= ofpact_put_ENQUEUE(&ofpacts
);
593 enqueue
->port
= out_port
;
594 enqueue
->queue
= queue_id
;
596 ofpact_pad(&ofpacts
);
598 /* Prepare packet_out in case we need one. */
599 po
.buffer_id
= pi
.buffer_id
;
600 if (po
.buffer_id
== UINT32_MAX
) {
601 po
.packet
= pkt
.data
;
602 po
.packet_len
= pkt
.size
;
607 po
.in_port
= pi
.fmd
.in_port
;
608 po
.ofpacts
= ofpacts
.data
;
609 po
.ofpacts_len
= ofpacts
.size
;
611 /* Send the packet, and possibly the whole flow, to the output port. */
612 if (sw
->max_idle
>= 0 && (!sw
->ml
|| out_port
!= OFPP_FLOOD
)) {
613 struct ofputil_flow_mod fm
;
614 struct ofpbuf
*buffer
;
616 /* The output port is known, or we always flood everything, so add a
618 memset(&fm
, 0, sizeof fm
);
619 match_init(&fm
.match
, &flow
, &sw
->wc
);
620 ofputil_normalize_match_quiet(&fm
.match
);
623 fm
.command
= OFPFC_ADD
;
624 fm
.idle_timeout
= sw
->max_idle
;
625 fm
.buffer_id
= pi
.buffer_id
;
626 fm
.out_port
= OFPP_NONE
;
627 fm
.ofpacts
= ofpacts
.data
;
628 fm
.ofpacts_len
= ofpacts
.size
;
629 buffer
= ofputil_encode_flow_mod(&fm
, sw
->protocol
);
631 queue_tx(sw
, buffer
);
633 /* If the switch didn't buffer the packet, we need to send a copy. */
634 if (pi
.buffer_id
== UINT32_MAX
&& out_port
!= OFPP_NONE
) {
635 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
638 /* We don't know that MAC, or we don't set up flows. Send along the
639 * packet without setting up a flow. */
640 if (pi
.buffer_id
!= UINT32_MAX
|| out_port
!= OFPP_NONE
) {
641 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
647 process_echo_request(struct lswitch
*sw
, const struct ofp_header
*rq
)
649 queue_tx(sw
, make_echo_reply(rq
));