2 * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "learning-switch.h"
22 #include <netinet/in.h>
26 #include "byte-order.h"
27 #include "classifier.h"
30 #include "mac-learning.h"
32 #include "ofp-actions.h"
33 #include "ofp-errors.h"
35 #include "ofp-parse.h"
36 #include "ofp-print.h"
38 #include "openflow/openflow.h"
39 #include "poll-loop.h"
47 VLOG_DEFINE_THIS_MODULE(learning_switch
);
50 struct hmap_node hmap_node
; /* Hash node for port number. */
51 uint16_t port_no
; /* OpenFlow port number, in host byte order. */
52 uint32_t queue_id
; /* OpenFlow queue number. */
56 S_CONNECTING
, /* Waiting for connection to complete. */
57 S_FEATURES_REPLY
, /* Waiting for features reply. */
58 S_SWITCHING
, /* Switching flows. */
63 enum lswitch_state state
;
65 /* If nonnegative, the switch sets up flows that expire after the given
66 * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
67 * Otherwise, the switch processes every packet. */
70 enum ofputil_protocol protocol
;
71 unsigned long long int datapath_id
;
72 struct mac_learning
*ml
; /* NULL to act as hub instead of switch. */
73 struct flow_wildcards wc
; /* Wildcards to apply to flows. */
74 bool action_normal
; /* Use OFPP_NORMAL? */
76 /* Queue distribution. */
77 uint32_t default_queue
; /* Default OpenFlow queue, or UINT32_MAX. */
78 struct hmap queue_numbers
; /* Map from port number to lswitch_port. */
79 struct shash queue_names
; /* Map from port name to lswitch_port. */
81 /* Number of outgoing queued packets on the rconn. */
82 struct rconn_packet_counter
*queued
;
84 /* If true, do not reply to any messages from the switch (for debugging
88 /* Optional "flow mod" requests to send to the switch at connection time,
89 * to set up the flow table. */
90 const struct ofputil_flow_mod
*default_flows
;
91 size_t n_default_flows
;
94 /* The log messages here could actually be useful in debugging, so keep the
95 * rate limit relatively high. */
96 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
98 static void queue_tx(struct lswitch
*, struct ofpbuf
*);
99 static void send_features_request(struct lswitch
*);
101 static void lswitch_process_packet(struct lswitch
*, const struct ofpbuf
*);
102 static enum ofperr
process_switch_features(struct lswitch
*,
103 struct ofp_header
*);
104 static void process_packet_in(struct lswitch
*, const struct ofp_header
*);
105 static void process_echo_request(struct lswitch
*, const struct ofp_header
*);
107 /* Creates and returns a new learning switch whose configuration is given by
110 * 'rconn' is used to send out an OpenFlow features request. */
112 lswitch_create(struct rconn
*rconn
, const struct lswitch_config
*cfg
)
117 sw
= xzalloc(sizeof *sw
);
119 sw
->state
= S_CONNECTING
;
120 sw
->max_idle
= cfg
->max_idle
;
122 sw
->ml
= (cfg
->mode
== LSW_LEARN
123 ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME
)
125 sw
->action_normal
= cfg
->mode
== LSW_NORMAL
;
127 switch (cfg
->wildcards
) {
133 /* Try to wildcard as many fields as possible, but we cannot
134 * wildcard all fields. We need in_port to detect moves. We need
135 * Ethernet source and dest and VLAN VID to do L2 learning. */
136 ofpfw
= (OFPFW10_DL_TYPE
| OFPFW10_DL_VLAN_PCP
137 | OFPFW10_NW_SRC_ALL
| OFPFW10_NW_DST_ALL
138 | OFPFW10_NW_TOS
| OFPFW10_NW_PROTO
139 | OFPFW10_TP_SRC
| OFPFW10_TP_DST
);
143 ofpfw
= cfg
->wildcards
;
146 ofputil_wildcard_from_ofpfw10(ofpfw
, &sw
->wc
);
148 sw
->default_queue
= cfg
->default_queue
;
149 hmap_init(&sw
->queue_numbers
);
150 shash_init(&sw
->queue_names
);
151 if (cfg
->port_queues
) {
152 struct simap_node
*node
;
154 SIMAP_FOR_EACH (node
, cfg
->port_queues
) {
155 struct lswitch_port
*port
= xmalloc(sizeof *port
);
156 hmap_node_nullify(&port
->hmap_node
);
157 port
->queue_id
= node
->data
;
158 shash_add(&sw
->queue_names
, node
->name
, port
);
162 sw
->default_flows
= cfg
->default_flows
;
163 sw
->n_default_flows
= cfg
->n_default_flows
;
165 sw
->queued
= rconn_packet_counter_create();
171 lswitch_handshake(struct lswitch
*sw
)
173 enum ofputil_protocol protocol
;
175 send_features_request(sw
);
177 protocol
= ofputil_protocol_from_ofp_version(rconn_get_version(sw
->rconn
));
178 if (sw
->default_flows
) {
179 enum ofputil_protocol usable_protocols
;
180 struct ofpbuf
*msg
= NULL
;
184 /* If the initial protocol isn't good enough for default_flows, then
185 * pick one that will work and encode messages to set up that
188 * This could be improved by actually negotiating a mutually acceptable
189 * flow format with the switch, but that would require an asynchronous
190 * state machine. This version ought to work fine in practice. */
191 usable_protocols
= ofputil_flow_mod_usable_protocols(
192 sw
->default_flows
, sw
->n_default_flows
);
193 if (!(protocol
& usable_protocols
)) {
194 enum ofputil_protocol want
= rightmost_1bit(usable_protocols
);
196 msg
= ofputil_encode_set_protocol(protocol
, want
, &protocol
);
200 error
= rconn_send(sw
->rconn
, msg
, NULL
);
204 for (i
= 0; !error
&& i
< sw
->n_default_flows
; i
++) {
205 msg
= ofputil_encode_flow_mod(&sw
->default_flows
[i
], protocol
);
206 error
= rconn_send(sw
->rconn
, msg
, NULL
);
210 VLOG_INFO_RL(&rl
, "%s: failed to queue default flows (%s)",
211 rconn_get_name(sw
->rconn
), strerror(error
));
214 sw
->protocol
= protocol
;
218 lswitch_is_alive(const struct lswitch
*sw
)
220 return rconn_is_alive(sw
->rconn
);
225 lswitch_destroy(struct lswitch
*sw
)
228 struct lswitch_port
*node
, *next
;
230 rconn_destroy(sw
->rconn
);
231 HMAP_FOR_EACH_SAFE (node
, next
, hmap_node
, &sw
->queue_numbers
) {
232 hmap_remove(&sw
->queue_numbers
, &node
->hmap_node
);
235 shash_destroy(&sw
->queue_names
);
236 mac_learning_destroy(sw
->ml
);
237 rconn_packet_counter_destroy(sw
->queued
);
242 /* Takes care of necessary 'sw' activity, except for receiving packets (which
243 * the caller must do). */
245 lswitch_run(struct lswitch
*sw
)
250 mac_learning_run(sw
->ml
, NULL
);
253 rconn_run(sw
->rconn
);
255 if (sw
->state
== S_CONNECTING
) {
256 if (rconn_get_version(sw
->rconn
) != -1) {
257 lswitch_handshake(sw
);
258 sw
->state
= S_FEATURES_REPLY
;
263 for (i
= 0; i
< 50; i
++) {
266 msg
= rconn_recv(sw
->rconn
);
272 lswitch_process_packet(sw
, msg
);
279 lswitch_wait(struct lswitch
*sw
)
282 mac_learning_wait(sw
->ml
);
284 rconn_run_wait(sw
->rconn
);
285 rconn_recv_wait(sw
->rconn
);
288 /* Processes 'msg', which should be an OpenFlow received on 'rconn', according
289 * to the learning switch state in 'sw'. The most likely result of processing
290 * is that flow-setup and packet-out OpenFlow messages will be sent out on
293 lswitch_process_packet(struct lswitch
*sw
, const struct ofpbuf
*msg
)
299 if (ofptype_pull(&type
, &b
)) {
303 if (sw
->state
== S_FEATURES_REPLY
304 && type
!= OFPTYPE_ECHO_REQUEST
305 && type
!= OFPTYPE_FEATURES_REPLY
) {
310 case OFPTYPE_ECHO_REQUEST
:
311 process_echo_request(sw
, msg
->data
);
314 case OFPTYPE_FEATURES_REPLY
:
315 if (sw
->state
== S_FEATURES_REPLY
) {
316 if (!process_switch_features(sw
, msg
->data
)) {
317 sw
->state
= S_SWITCHING
;
319 rconn_disconnect(sw
->rconn
);
324 case OFPTYPE_PACKET_IN
:
325 process_packet_in(sw
, msg
->data
);
328 case OFPTYPE_FLOW_REMOVED
:
334 case OFPTYPE_ECHO_REPLY
:
335 case OFPTYPE_FEATURES_REQUEST
:
336 case OFPTYPE_GET_CONFIG_REQUEST
:
337 case OFPTYPE_GET_CONFIG_REPLY
:
338 case OFPTYPE_SET_CONFIG
:
339 case OFPTYPE_PORT_STATUS
:
340 case OFPTYPE_PACKET_OUT
:
341 case OFPTYPE_FLOW_MOD
:
342 case OFPTYPE_PORT_MOD
:
343 case OFPTYPE_BARRIER_REQUEST
:
344 case OFPTYPE_BARRIER_REPLY
:
345 case OFPTYPE_DESC_STATS_REQUEST
:
346 case OFPTYPE_DESC_STATS_REPLY
:
347 case OFPTYPE_FLOW_STATS_REQUEST
:
348 case OFPTYPE_FLOW_STATS_REPLY
:
349 case OFPTYPE_AGGREGATE_STATS_REQUEST
:
350 case OFPTYPE_AGGREGATE_STATS_REPLY
:
351 case OFPTYPE_TABLE_STATS_REQUEST
:
352 case OFPTYPE_TABLE_STATS_REPLY
:
353 case OFPTYPE_PORT_STATS_REQUEST
:
354 case OFPTYPE_PORT_STATS_REPLY
:
355 case OFPTYPE_QUEUE_STATS_REQUEST
:
356 case OFPTYPE_QUEUE_STATS_REPLY
:
357 case OFPTYPE_PORT_DESC_STATS_REQUEST
:
358 case OFPTYPE_PORT_DESC_STATS_REPLY
:
359 case OFPTYPE_ROLE_REQUEST
:
360 case OFPTYPE_ROLE_REPLY
:
361 case OFPTYPE_SET_FLOW_FORMAT
:
362 case OFPTYPE_FLOW_MOD_TABLE_ID
:
363 case OFPTYPE_SET_PACKET_IN_FORMAT
:
364 case OFPTYPE_FLOW_AGE
:
365 case OFPTYPE_SET_ASYNC_CONFIG
:
366 case OFPTYPE_SET_CONTROLLER_ID
:
367 case OFPTYPE_FLOW_MONITOR_STATS_REQUEST
:
368 case OFPTYPE_FLOW_MONITOR_STATS_REPLY
:
369 case OFPTYPE_FLOW_MONITOR_CANCEL
:
370 case OFPTYPE_FLOW_MONITOR_PAUSED
:
371 case OFPTYPE_FLOW_MONITOR_RESUMED
:
373 if (VLOG_IS_DBG_ENABLED()) {
374 char *s
= ofp_to_string(msg
->data
, msg
->size
, 2);
375 VLOG_DBG_RL(&rl
, "%016llx: OpenFlow packet ignored: %s",
383 send_features_request(struct lswitch
*sw
)
386 struct ofp_switch_config
*osc
;
387 int ofp_version
= rconn_get_version(sw
->rconn
);
389 assert(ofp_version
> 0 && ofp_version
< 0xff);
391 /* Send OFPT_FEATURES_REQUEST. */
392 b
= ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST
, ofp_version
, 0);
395 /* Send OFPT_SET_CONFIG. */
396 b
= ofpraw_alloc(OFPRAW_OFPT_SET_CONFIG
, ofp_version
, sizeof *osc
);
397 osc
= ofpbuf_put_zeros(b
, sizeof *osc
);
398 osc
->miss_send_len
= htons(OFP_DEFAULT_MISS_SEND_LEN
);
403 queue_tx(struct lswitch
*sw
, struct ofpbuf
*b
)
405 int retval
= rconn_send_with_limit(sw
->rconn
, b
, sw
->queued
, 10);
406 if (retval
&& retval
!= ENOTCONN
) {
407 if (retval
== EAGAIN
) {
408 VLOG_INFO_RL(&rl
, "%016llx: %s: tx queue overflow",
409 sw
->datapath_id
, rconn_get_name(sw
->rconn
));
411 VLOG_WARN_RL(&rl
, "%016llx: %s: send: %s",
412 sw
->datapath_id
, rconn_get_name(sw
->rconn
),
419 process_switch_features(struct lswitch
*sw
, struct ofp_header
*oh
)
421 struct ofputil_switch_features features
;
422 struct ofputil_phy_port port
;
426 error
= ofputil_decode_switch_features(oh
, &features
, &b
);
428 VLOG_ERR("received invalid switch feature reply (%s)",
429 ofperr_to_string(error
));
433 sw
->datapath_id
= features
.datapath_id
;
435 while (!ofputil_pull_phy_port(oh
->version
, &b
, &port
)) {
436 struct lswitch_port
*lp
= shash_find_data(&sw
->queue_names
, port
.name
);
437 if (lp
&& hmap_node_is_null(&lp
->hmap_node
)) {
438 lp
->port_no
= port
.port_no
;
439 hmap_insert(&sw
->queue_numbers
, &lp
->hmap_node
,
440 hash_int(lp
->port_no
, 0));
447 lswitch_choose_destination(struct lswitch
*sw
, const struct flow
*flow
)
451 /* Learn the source MAC. */
452 if (mac_learning_may_learn(sw
->ml
, flow
->dl_src
, 0)) {
453 struct mac_entry
*mac
= mac_learning_insert(sw
->ml
, flow
->dl_src
, 0);
454 if (mac_entry_is_new(mac
) || mac
->port
.i
!= flow
->in_port
) {
455 VLOG_DBG_RL(&rl
, "%016llx: learned that "ETH_ADDR_FMT
" is on "
456 "port %"PRIu16
, sw
->datapath_id
,
457 ETH_ADDR_ARGS(flow
->dl_src
), flow
->in_port
);
459 mac
->port
.i
= flow
->in_port
;
460 mac_learning_changed(sw
->ml
, mac
);
464 /* Drop frames for reserved multicast addresses. */
465 if (eth_addr_is_reserved(flow
->dl_dst
)) {
469 out_port
= OFPP_FLOOD
;
471 struct mac_entry
*mac
;
473 mac
= mac_learning_lookup(sw
->ml
, flow
->dl_dst
, 0, NULL
);
475 out_port
= mac
->port
.i
;
476 if (out_port
== flow
->in_port
) {
477 /* Don't send a packet back out its input port. */
483 /* Check if we need to use "NORMAL" action. */
484 if (sw
->action_normal
&& out_port
!= OFPP_FLOOD
) {
492 get_queue_id(const struct lswitch
*sw
, uint16_t in_port
)
494 const struct lswitch_port
*port
;
496 HMAP_FOR_EACH_WITH_HASH (port
, hmap_node
, hash_int(in_port
, 0),
497 &sw
->queue_numbers
) {
498 if (port
->port_no
== in_port
) {
499 return port
->queue_id
;
503 return sw
->default_queue
;
507 process_packet_in(struct lswitch
*sw
, const struct ofp_header
*oh
)
509 struct ofputil_packet_in pi
;
513 uint64_t ofpacts_stub
[64 / 8];
514 struct ofpbuf ofpacts
;
516 struct ofputil_packet_out po
;
522 error
= ofputil_decode_packet_in(&pi
, oh
);
524 VLOG_WARN_RL(&rl
, "failed to decode packet-in: %s",
525 ofperr_to_string(error
));
529 /* Ignore packets sent via output to OFPP_CONTROLLER. This library never
530 * uses such an action. You never know what experiments might be going on,
531 * though, and it seems best not to interfere with them. */
532 if (pi
.reason
!= OFPR_NO_MATCH
) {
536 /* Extract flow data from 'opi' into 'flow'. */
537 ofpbuf_use_const(&pkt
, pi
.packet
, pi
.packet_len
);
538 flow_extract(&pkt
, 0, NULL
, pi
.fmd
.in_port
, &flow
);
539 flow
.tunnel
.tun_id
= pi
.fmd
.tun_id
;
541 /* Choose output port. */
542 out_port
= lswitch_choose_destination(sw
, &flow
);
545 queue_id
= get_queue_id(sw
, pi
.fmd
.in_port
);
546 ofpbuf_use_stack(&ofpacts
, ofpacts_stub
, sizeof ofpacts_stub
);
547 if (out_port
== OFPP_NONE
) {
549 } else if (queue_id
== UINT32_MAX
|| out_port
>= OFPP_MAX
) {
550 ofpact_put_OUTPUT(&ofpacts
)->port
= out_port
;
552 struct ofpact_enqueue
*enqueue
= ofpact_put_ENQUEUE(&ofpacts
);
553 enqueue
->port
= out_port
;
554 enqueue
->queue
= queue_id
;
556 ofpact_pad(&ofpacts
);
558 /* Prepare packet_out in case we need one. */
559 po
.buffer_id
= pi
.buffer_id
;
560 if (po
.buffer_id
== UINT32_MAX
) {
561 po
.packet
= pkt
.data
;
562 po
.packet_len
= pkt
.size
;
567 po
.in_port
= pi
.fmd
.in_port
;
568 po
.ofpacts
= ofpacts
.data
;
569 po
.ofpacts_len
= ofpacts
.size
;
571 /* Send the packet, and possibly the whole flow, to the output port. */
572 if (sw
->max_idle
>= 0 && (!sw
->ml
|| out_port
!= OFPP_FLOOD
)) {
573 struct ofputil_flow_mod fm
;
574 struct ofpbuf
*buffer
;
576 /* The output port is known, or we always flood everything, so add a
578 memset(&fm
, 0, sizeof fm
);
579 match_init(&fm
.match
, &flow
, &sw
->wc
);
580 ofputil_normalize_match_quiet(&fm
.match
);
583 fm
.command
= OFPFC_ADD
;
584 fm
.idle_timeout
= sw
->max_idle
;
585 fm
.buffer_id
= pi
.buffer_id
;
586 fm
.out_port
= OFPP_NONE
;
587 fm
.ofpacts
= ofpacts
.data
;
588 fm
.ofpacts_len
= ofpacts
.size
;
589 buffer
= ofputil_encode_flow_mod(&fm
, sw
->protocol
);
591 queue_tx(sw
, buffer
);
593 /* If the switch didn't buffer the packet, we need to send a copy. */
594 if (pi
.buffer_id
== UINT32_MAX
&& out_port
!= OFPP_NONE
) {
595 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
598 /* We don't know that MAC, or we don't set up flows. Send along the
599 * packet without setting up a flow. */
600 if (pi
.buffer_id
!= UINT32_MAX
|| out_port
!= OFPP_NONE
) {
601 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
607 process_echo_request(struct lswitch
*sw
, const struct ofp_header
*rq
)
609 queue_tx(sw
, make_echo_reply(rq
));