2 * Copyright (c) 2008, 2009, 2010 Nicira Networks.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "learning-switch.h"
22 #include <netinet/in.h>
27 #include "mac-learning.h"
29 #include "ofp-parse.h"
30 #include "ofp-print.h"
32 #include "openflow/openflow.h"
33 #include "poll-loop.h"
42 VLOG_DEFINE_THIS_MODULE(learning_switch
)
48 P_FORWARDING
= 1 << 3,
53 /* If nonnegative, the switch sets up flows that expire after the given
54 * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
55 * Otherwise, the switch processes every packet. */
58 unsigned long long int datapath_id
;
59 uint32_t capabilities
;
60 time_t last_features_request
;
61 struct mac_learning
*ml
; /* NULL to act as hub instead of switch. */
62 uint32_t wildcards
; /* Wildcards to apply to flows. */
63 bool action_normal
; /* Use OFPP_NORMAL? */
64 uint32_t queue
; /* OpenFlow queue to use, or UINT32_MAX. */
66 /* Number of outgoing queued packets on the rconn. */
67 struct rconn_packet_counter
*queued
;
69 /* Spanning tree protocol implementation.
71 * We implement STP states by, whenever a port's STP state changes,
72 * querying all the flows on the switch and then deleting any of them that
73 * are inappropriate for a port's STP state. */
74 long long int next_query
; /* Next time at which to query all flows. */
75 long long int last_query
; /* Last time we sent a query. */
76 long long int last_reply
; /* Last time we received a query reply. */
77 unsigned int port_states
[STP_MAX_PORTS
];
78 uint32_t query_xid
; /* XID used for query. */
79 int n_flows
, n_no_recv
, n_no_send
;
82 /* The log messages here could actually be useful in debugging, so keep the
83 * rate limit relatively high. */
84 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
86 static void queue_tx(struct lswitch
*, struct rconn
*, struct ofpbuf
*);
87 static void send_features_request(struct lswitch
*, struct rconn
*);
88 static void send_default_flows(struct lswitch
*sw
, struct rconn
*rconn
,
90 static void schedule_query(struct lswitch
*, long long int delay
);
91 static bool may_learn(const struct lswitch
*, uint16_t port_no
);
92 static bool may_recv(const struct lswitch
*, uint16_t port_no
,
94 static bool may_send(const struct lswitch
*, uint16_t port_no
);
96 typedef void packet_handler_func(struct lswitch
*, struct rconn
*, void *);
97 static packet_handler_func process_switch_features
;
98 static packet_handler_func process_packet_in
;
99 static packet_handler_func process_echo_request
;
100 static packet_handler_func process_port_status
;
101 static packet_handler_func process_phy_port
;
102 static packet_handler_func process_stats_reply
;
104 /* Creates and returns a new learning switch.
106 * If 'learn_macs' is true, the new switch will learn the ports on which MAC
107 * addresses appear. Otherwise, the new switch will flood all packets.
109 * If 'max_idle' is nonnegative, the new switch will set up flows that expire
110 * after the given number of seconds (or never expire, if 'max_idle' is
111 * OFP_FLOW_PERMANENT). Otherwise, the new switch will process every packet.
113 * The caller may provide the file stream 'default_flows' that defines
114 * default flows that should be pushed when a switch connects. Each
115 * line is a flow entry in the format described for "add-flows" command
116 * in the Flow Syntax section of the ovs-ofct(8) man page. The caller
117 * is responsible for closing the stream.
119 * 'rconn' is used to send out an OpenFlow features request. */
121 lswitch_create(struct rconn
*rconn
, bool learn_macs
,
122 bool exact_flows
, int max_idle
, bool action_normal
,
128 sw
= xzalloc(sizeof *sw
);
129 sw
->max_idle
= max_idle
;
131 sw
->last_features_request
= time_now() - 1;
132 sw
->ml
= learn_macs
? mac_learning_create() : NULL
;
133 sw
->action_normal
= action_normal
;
138 /* We cannot wildcard all fields.
139 * We need in_port to detect moves.
140 * We need both SA and DA to do learning. */
141 sw
->wildcards
= (OFPFW_DL_TYPE
| OFPFW_NW_SRC_MASK
| OFPFW_NW_DST_MASK
142 | OFPFW_NW_PROTO
| OFPFW_TP_SRC
| OFPFW_TP_DST
);
144 sw
->queue
= UINT32_MAX
;
145 sw
->queued
= rconn_packet_counter_create();
146 sw
->next_query
= LLONG_MIN
;
147 sw
->last_query
= LLONG_MIN
;
148 sw
->last_reply
= LLONG_MIN
;
149 for (i
= 0; i
< STP_MAX_PORTS
; i
++) {
150 sw
->port_states
[i
] = P_DISABLED
;
152 send_features_request(sw
, rconn
);
154 send_default_flows(sw
, rconn
, default_flows
);
161 lswitch_destroy(struct lswitch
*sw
)
164 mac_learning_destroy(sw
->ml
);
165 rconn_packet_counter_destroy(sw
->queued
);
170 /* Sets 'queue' as the OpenFlow queue used by packets and flows set up by 'sw'.
171 * Specify UINT32_MAX to avoid specifying a particular queue, which is also the
172 * default if this function is never called for 'sw'. */
174 lswitch_set_queue(struct lswitch
*sw
, uint32_t queue
)
179 /* Takes care of necessary 'sw' activity, except for receiving packets (which
180 * the caller must do). */
182 lswitch_run(struct lswitch
*sw
, struct rconn
*rconn
)
184 long long int now
= time_msec();
187 mac_learning_run(sw
->ml
, NULL
);
190 /* If we're waiting for more replies, keeping waiting for up to 10 s. */
191 if (sw
->last_reply
!= LLONG_MIN
) {
192 if (now
- sw
->last_reply
> 10000) {
193 VLOG_ERR_RL(&rl
, "%016llx: No more flow stat replies last 10 s",
195 sw
->last_reply
= LLONG_MIN
;
196 sw
->last_query
= LLONG_MIN
;
197 schedule_query(sw
, 0);
203 /* If we're waiting for any reply at all, keep waiting for up to 10 s. */
204 if (sw
->last_query
!= LLONG_MIN
) {
205 if (now
- sw
->last_query
> 10000) {
206 VLOG_ERR_RL(&rl
, "%016llx: No flow stat replies in last 10 s",
208 sw
->last_query
= LLONG_MIN
;
209 schedule_query(sw
, 0);
215 /* If it's time to send another query, do so. */
216 if (sw
->next_query
!= LLONG_MIN
&& now
>= sw
->next_query
) {
217 sw
->next_query
= LLONG_MIN
;
218 if (!rconn_is_connected(rconn
)) {
219 schedule_query(sw
, 1000);
221 struct ofp_stats_request
*osr
;
222 struct ofp_flow_stats_request
*ofsr
;
226 VLOG_DBG("%016llx: Sending flow stats request to implement STP",
229 sw
->last_query
= now
;
230 sw
->query_xid
= random_uint32();
234 osr
= make_openflow_xid(sizeof *osr
+ sizeof *ofsr
,
235 OFPT_STATS_REQUEST
, sw
->query_xid
, &b
);
236 osr
->type
= htons(OFPST_FLOW
);
237 osr
->flags
= htons(0);
238 ofsr
= (struct ofp_flow_stats_request
*) osr
->body
;
239 ofsr
->match
.wildcards
= htonl(OFPFW_ALL
);
240 ofsr
->table_id
= 0xff;
241 ofsr
->out_port
= htons(OFPP_NONE
);
243 error
= rconn_send(rconn
, b
, NULL
);
245 VLOG_WARN_RL(&rl
, "%016llx: sending flow stats request "
246 "failed: %s", sw
->datapath_id
, strerror(error
));
248 schedule_query(sw
, 1000);
255 wait_timeout(long long int started
)
257 poll_timer_wait_until(started
+ 10000);
261 lswitch_wait(struct lswitch
*sw
)
264 mac_learning_wait(sw
->ml
);
267 if (sw
->last_reply
!= LLONG_MIN
) {
268 wait_timeout(sw
->last_reply
);
269 } else if (sw
->last_query
!= LLONG_MIN
) {
270 wait_timeout(sw
->last_query
);
274 /* Processes 'msg', which should be an OpenFlow received on 'rconn', according
275 * to the learning switch state in 'sw'. The most likely result of processing
276 * is that flow-setup and packet-out OpenFlow messages will be sent out on
279 lswitch_process_packet(struct lswitch
*sw
, struct rconn
*rconn
,
280 const struct ofpbuf
*msg
)
285 packet_handler_func
*handler
;
287 static const struct processor processors
[] = {
290 sizeof(struct ofp_header
),
295 sizeof(struct ofp_switch_features
),
296 process_switch_features
300 offsetof(struct ofp_packet_in
, data
),
305 sizeof(struct ofp_port_status
),
310 offsetof(struct ofp_stats_reply
, body
),
315 sizeof(struct ofp_flow_removed
),
319 const size_t n_processors
= ARRAY_SIZE(processors
);
320 const struct processor
*p
;
321 struct ofp_header
*oh
;
324 if (sw
->datapath_id
== 0
325 && oh
->type
!= OFPT_ECHO_REQUEST
326 && oh
->type
!= OFPT_FEATURES_REPLY
) {
327 send_features_request(sw
, rconn
);
331 for (p
= processors
; p
< &processors
[n_processors
]; p
++) {
332 if (oh
->type
== p
->type
) {
333 if (msg
->size
< p
->min_size
) {
334 VLOG_WARN_RL(&rl
, "%016llx: %s: too short (%zu bytes) for "
335 "type %"PRIu8
" (min %zu)", sw
->datapath_id
,
336 rconn_get_name(rconn
), msg
->size
, oh
->type
,
341 (p
->handler
)(sw
, rconn
, msg
->data
);
346 if (VLOG_IS_DBG_ENABLED()) {
347 char *p
= ofp_to_string(msg
->data
, msg
->size
, 2);
348 VLOG_DBG_RL(&rl
, "%016llx: OpenFlow packet ignored: %s",
355 send_features_request(struct lswitch
*sw
, struct rconn
*rconn
)
357 time_t now
= time_now();
358 if (now
>= sw
->last_features_request
+ 1) {
360 struct ofp_switch_config
*osc
;
362 /* Send OFPT_FEATURES_REQUEST. */
363 make_openflow(sizeof(struct ofp_header
), OFPT_FEATURES_REQUEST
, &b
);
364 queue_tx(sw
, rconn
, b
);
366 /* Send OFPT_SET_CONFIG. */
367 osc
= make_openflow(sizeof *osc
, OFPT_SET_CONFIG
, &b
);
368 osc
->miss_send_len
= htons(OFP_DEFAULT_MISS_SEND_LEN
);
369 queue_tx(sw
, rconn
, b
);
371 sw
->last_features_request
= now
;
376 send_default_flows(struct lswitch
*sw
, struct rconn
*rconn
,
381 while (fgets(line
, sizeof line
, default_flows
)) {
383 struct ofp_flow_mod
*ofm
;
384 uint16_t priority
, idle_timeout
, hard_timeout
;
386 struct ofp_match match
;
390 /* Delete comments. */
391 comment
= strchr(line
, '#');
396 /* Drop empty lines. */
397 if (line
[strspn(line
, " \t\n")] == '\0') {
401 /* Parse and send. str_to_flow() will expand and reallocate the data
402 * in 'buffer', so we can't keep pointers to across the str_to_flow()
404 make_openflow(sizeof *ofm
, OFPT_FLOW_MOD
, &b
);
405 parse_ofp_str(line
, &match
, b
,
406 NULL
, NULL
, &priority
, &idle_timeout
, &hard_timeout
,
410 ofm
->command
= htons(OFPFC_ADD
);
411 ofm
->cookie
= htonll(cookie
);
412 ofm
->idle_timeout
= htons(idle_timeout
);
413 ofm
->hard_timeout
= htons(hard_timeout
);
414 ofm
->buffer_id
= htonl(UINT32_MAX
);
415 ofm
->priority
= htons(priority
);
417 update_openflow_length(b
);
418 queue_tx(sw
, rconn
, b
);
423 queue_tx(struct lswitch
*sw
, struct rconn
*rconn
, struct ofpbuf
*b
)
425 int retval
= rconn_send_with_limit(rconn
, b
, sw
->queued
, 10);
426 if (retval
&& retval
!= ENOTCONN
) {
427 if (retval
== EAGAIN
) {
428 VLOG_INFO_RL(&rl
, "%016llx: %s: tx queue overflow",
429 sw
->datapath_id
, rconn_get_name(rconn
));
431 VLOG_WARN_RL(&rl
, "%016llx: %s: send: %s",
432 sw
->datapath_id
, rconn_get_name(rconn
),
439 schedule_query(struct lswitch
*sw
, long long int delay
)
441 long long int now
= time_msec();
442 if (sw
->next_query
== LLONG_MIN
|| sw
->next_query
> now
+ delay
) {
443 sw
->next_query
= now
+ delay
;
448 process_switch_features(struct lswitch
*sw
, struct rconn
*rconn
, void *osf_
)
450 struct ofp_switch_features
*osf
= osf_
;
451 size_t n_ports
= ((ntohs(osf
->header
.length
)
452 - offsetof(struct ofp_switch_features
, ports
))
453 / sizeof *osf
->ports
);
456 sw
->datapath_id
= ntohll(osf
->datapath_id
);
457 sw
->capabilities
= ntohl(osf
->capabilities
);
458 for (i
= 0; i
< n_ports
; i
++) {
459 process_phy_port(sw
, rconn
, &osf
->ports
[i
]);
461 if (sw
->capabilities
& OFPC_STP
) {
462 schedule_query(sw
, 1000);
467 lswitch_choose_destination(struct lswitch
*sw
, const flow_t
*flow
)
471 /* Learn the source MAC. */
472 if (may_learn(sw
, flow
->in_port
) && sw
->ml
) {
473 if (mac_learning_learn(sw
->ml
, flow
->dl_src
, 0, flow
->in_port
,
474 GRAT_ARP_LOCK_NONE
)) {
475 VLOG_DBG_RL(&rl
, "%016llx: learned that "ETH_ADDR_FMT
" is on "
476 "port %"PRIu16
, sw
->datapath_id
,
477 ETH_ADDR_ARGS(flow
->dl_src
), flow
->in_port
);
481 /* Drop frames for reserved multicast addresses. */
482 if (eth_addr_is_reserved(flow
->dl_dst
)) {
486 if (!may_recv(sw
, flow
->in_port
, false)) {
487 /* STP prevents receiving anything on this port. */
491 out_port
= OFPP_FLOOD
;
493 int learned_port
= mac_learning_lookup(sw
->ml
, flow
->dl_dst
, 0, NULL
);
494 if (learned_port
>= 0 && may_send(sw
, learned_port
)) {
495 out_port
= learned_port
;
496 if (out_port
== flow
->in_port
) {
497 /* Don't send a packet back out its input port. */
503 /* Check if we need to use "NORMAL" action. */
504 if (sw
->action_normal
&& out_port
!= OFPP_FLOOD
) {
512 process_packet_in(struct lswitch
*sw
, struct rconn
*rconn
, void *opi_
)
514 struct ofp_packet_in
*opi
= opi_
;
515 uint16_t in_port
= ntohs(opi
->in_port
);
518 struct ofp_action_header actions
[2];
521 size_t pkt_ofs
, pkt_len
;
525 /* Extract flow data from 'opi' into 'flow'. */
526 pkt_ofs
= offsetof(struct ofp_packet_in
, data
);
527 pkt_len
= ntohs(opi
->header
.length
) - pkt_ofs
;
528 pkt
.data
= opi
->data
;
530 flow_extract(&pkt
, 0, in_port
, &flow
);
532 /* Choose output port. */
533 out_port
= lswitch_choose_destination(sw
, &flow
);
536 if (out_port
== OFPP_NONE
) {
538 } else if (sw
->queue
== UINT32_MAX
|| out_port
>= OFPP_MAX
) {
539 struct ofp_action_output oao
;
541 memset(&oao
, 0, sizeof oao
);
542 oao
.type
= htons(OFPAT_OUTPUT
);
543 oao
.len
= htons(sizeof oao
);
544 oao
.port
= htons(out_port
);
546 memcpy(actions
, &oao
, sizeof oao
);
547 actions_len
= sizeof oao
;
549 struct ofp_action_enqueue oae
;
551 memset(&oae
, 0, sizeof oae
);
552 oae
.type
= htons(OFPAT_ENQUEUE
);
553 oae
.len
= htons(sizeof oae
);
554 oae
.port
= htons(out_port
);
555 oae
.queue_id
= htonl(sw
->queue
);
557 memcpy(actions
, &oae
, sizeof oae
);
558 actions_len
= sizeof oae
;
560 assert(actions_len
<= sizeof actions
);
562 /* Send the packet, and possibly the whole flow, to the output port. */
563 if (sw
->max_idle
>= 0 && (!sw
->ml
|| out_port
!= OFPP_FLOOD
)) {
564 struct ofpbuf
*buffer
;
565 struct ofp_flow_mod
*ofm
;
567 /* The output port is known, or we always flood everything, so add a
569 buffer
= make_add_flow(&flow
, ntohl(opi
->buffer_id
),
570 sw
->max_idle
, actions_len
);
571 ofpbuf_put(buffer
, actions
, actions_len
);
573 ofm
->match
.wildcards
= htonl(sw
->wildcards
);
574 queue_tx(sw
, rconn
, buffer
);
576 /* If the switch didn't buffer the packet, we need to send a copy. */
577 if (ntohl(opi
->buffer_id
) == UINT32_MAX
&& actions_len
> 0) {
579 make_packet_out(&pkt
, UINT32_MAX
, in_port
,
580 actions
, actions_len
/ sizeof *actions
));
583 /* We don't know that MAC, or we don't set up flows. Send along the
584 * packet without setting up a flow. */
585 if (ntohl(opi
->buffer_id
) != UINT32_MAX
|| actions_len
> 0) {
587 make_packet_out(&pkt
, ntohl(opi
->buffer_id
), in_port
,
588 actions
, actions_len
/ sizeof *actions
));
594 process_echo_request(struct lswitch
*sw
, struct rconn
*rconn
, void *rq_
)
596 struct ofp_header
*rq
= rq_
;
597 queue_tx(sw
, rconn
, make_echo_reply(rq
));
601 process_port_status(struct lswitch
*sw
, struct rconn
*rconn
, void *ops_
)
603 struct ofp_port_status
*ops
= ops_
;
604 process_phy_port(sw
, rconn
, &ops
->desc
);
608 process_phy_port(struct lswitch
*sw
, struct rconn
*rconn OVS_UNUSED
,
611 const struct ofp_phy_port
*opp
= opp_
;
612 uint16_t port_no
= ntohs(opp
->port_no
);
613 if (sw
->capabilities
& OFPC_STP
&& port_no
< STP_MAX_PORTS
) {
614 uint32_t config
= ntohl(opp
->config
);
615 uint32_t state
= ntohl(opp
->state
);
616 unsigned int *port_state
= &sw
->port_states
[port_no
];
617 unsigned int new_port_state
;
619 if (!(config
& (OFPPC_NO_STP
| OFPPC_PORT_DOWN
))
620 && !(state
& OFPPS_LINK_DOWN
))
622 switch (state
& OFPPS_STP_MASK
) {
623 case OFPPS_STP_LISTEN
:
624 new_port_state
= P_LISTENING
;
626 case OFPPS_STP_LEARN
:
627 new_port_state
= P_LEARNING
;
629 case OFPPS_STP_FORWARD
:
630 new_port_state
= P_FORWARDING
;
632 case OFPPS_STP_BLOCK
:
633 new_port_state
= P_BLOCKING
;
636 new_port_state
= P_DISABLED
;
640 new_port_state
= P_FORWARDING
;
642 if (*port_state
!= new_port_state
) {
643 *port_state
= new_port_state
;
644 schedule_query(sw
, 1000);
650 get_port_state(const struct lswitch
*sw
, uint16_t port_no
)
652 return (port_no
>= STP_MAX_PORTS
|| !(sw
->capabilities
& OFPC_STP
)
654 : sw
->port_states
[port_no
]);
658 may_learn(const struct lswitch
*sw
, uint16_t port_no
)
660 return get_port_state(sw
, port_no
) & (P_LEARNING
| P_FORWARDING
);
664 may_recv(const struct lswitch
*sw
, uint16_t port_no
, bool any_actions
)
666 unsigned int state
= get_port_state(sw
, port_no
);
668 ? state
& (P_DISABLED
| P_LISTENING
| P_BLOCKING
)
669 : state
& (P_DISABLED
| P_LISTENING
| P_BLOCKING
| P_LEARNING
));
673 may_send(const struct lswitch
*sw
, uint16_t port_no
)
675 return get_port_state(sw
, port_no
) & P_FORWARDING
;
679 process_flow_stats(struct lswitch
*sw
, struct rconn
*rconn
,
680 const struct ofp_flow_stats
*ofs
)
682 const char *end
= (char *) ofs
+ ntohs(ofs
->length
);
685 /* Decide to delete the flow if it matches on an STP-disabled physical
686 * port. But don't delete it if the flow just drops all received packets,
687 * because that's a perfectly reasonable thing to do for disabled physical
689 if (!(ofs
->match
.wildcards
& htonl(OFPFW_IN_PORT
))) {
690 if (!may_recv(sw
, ntohs(ofs
->match
.in_port
),
691 end
> (char *) ofs
->actions
)) {
697 /* Decide to delete the flow if it forwards to an STP-disabled physical
700 const struct ofp_action_header
*a
;
703 for (a
= ofs
->actions
; (char *) a
< end
; a
+= len
/ 8) {
705 if (len
> end
- (char *) a
) {
706 VLOG_DBG_RL(&rl
, "%016llx: action exceeds available space "
708 sw
->datapath_id
, len
, end
- (char *) a
);
710 } else if (len
% 8) {
711 VLOG_DBG_RL(&rl
, "%016llx: action length (%zu) not multiple "
712 "of 8 bytes", sw
->datapath_id
, len
);
716 if (a
->type
== htons(OFPAT_OUTPUT
)) {
717 struct ofp_action_output
*oao
= (struct ofp_action_output
*) a
;
718 if (!may_send(sw
, ntohs(oao
->port
))) {
727 /* Delete the flow. */
729 struct ofp_flow_mod
*ofm
;
732 ofm
= make_openflow(offsetof(struct ofp_flow_mod
, actions
),
734 ofm
->match
= ofs
->match
;
735 ofm
->command
= OFPFC_DELETE_STRICT
;
736 rconn_send(rconn
, b
, NULL
);
741 process_stats_reply(struct lswitch
*sw
, struct rconn
*rconn
, void *osr_
)
743 struct ofp_stats_reply
*osr
= osr_
;
744 struct flow_stats_iterator i
;
745 const struct ofp_flow_stats
*fs
;
747 if (sw
->last_query
== LLONG_MIN
748 || osr
->type
!= htons(OFPST_FLOW
)
749 || osr
->header
.xid
!= sw
->query_xid
) {
752 for (fs
= flow_stats_first(&i
, osr
); fs
; fs
= flow_stats_next(&i
)) {
754 process_flow_stats(sw
, rconn
, fs
);
756 if (!(osr
->flags
& htons(OFPSF_REPLY_MORE
))) {
757 VLOG_DBG("%016llx: Deleted %d of %d received flows to "
758 "implement STP, %d because of no-recv, %d because of "
759 "no-send", sw
->datapath_id
,
760 sw
->n_no_recv
+ sw
->n_no_send
, sw
->n_flows
,
761 sw
->n_no_recv
, sw
->n_no_send
);
762 sw
->last_query
= LLONG_MIN
;
763 sw
->last_reply
= LLONG_MIN
;
765 sw
->last_reply
= time_msec();