2 * Copyright (c) 2008-2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "learning-switch.h"
22 #include <sys/types.h>
23 #include <netinet/in.h>
27 #include "byte-order.h"
28 #include "classifier.h"
29 #include "dp-packet.h"
31 #include "openvswitch/hmap.h"
32 #include "mac-learning.h"
33 #include "openflow/openflow.h"
34 #include "openvswitch/ofp-actions.h"
35 #include "openvswitch/ofp-connection.h"
36 #include "openvswitch/ofp-errors.h"
37 #include "openvswitch/ofp-flow.h"
38 #include "openvswitch/ofp-match.h"
39 #include "openvswitch/ofp-msgs.h"
40 #include "openvswitch/ofp-print.h"
41 #include "openvswitch/ofp-util.h"
42 #include "openvswitch/ofp-packet.h"
43 #include "openvswitch/ofp-port.h"
44 #include "openvswitch/ofp-switch.h"
45 #include "openvswitch/ofpbuf.h"
46 #include "openvswitch/vconn.h"
47 #include "openvswitch/vlog.h"
48 #include "openvswitch/poll-loop.h"
49 #include "openvswitch/rconn.h"
50 #include "openvswitch/shash.h"
54 VLOG_DEFINE_THIS_MODULE(learning_switch
);
57 struct hmap_node hmap_node
; /* Hash node for port number. */
58 ofp_port_t port_no
; /* OpenFlow port number. */
59 uint32_t queue_id
; /* OpenFlow queue number. */
63 S_CONNECTING
, /* Waiting for connection to complete. */
64 S_FEATURES_REPLY
, /* Waiting for features reply. */
65 S_SWITCHING
, /* Switching flows. */
70 enum lswitch_state state
;
72 /* If nonnegative, the switch sets up flows that expire after the given
73 * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
74 * Otherwise, the switch processes every packet. */
77 enum ofputil_protocol protocol
;
78 unsigned long long int datapath_id
;
79 struct mac_learning
*ml
; /* NULL to act as hub instead of switch. */
80 struct flow_wildcards wc
; /* Wildcards to apply to flows. */
81 bool action_normal
; /* Use OFPP_NORMAL? */
83 /* Queue distribution. */
84 uint32_t default_queue
; /* Default OpenFlow queue, or UINT32_MAX. */
85 struct hmap queue_numbers
; /* Map from port number to lswitch_port. */
86 struct shash queue_names
; /* Map from port name to lswitch_port. */
88 /* Number of outgoing queued packets on the rconn. */
89 struct rconn_packet_counter
*queued
;
91 /* If true, do not reply to any messages from the switch (for debugging
95 /* Optional "flow mod" requests to send to the switch at connection time,
96 * to set up the flow table. */
97 const struct ofputil_flow_mod
*default_flows
;
98 size_t n_default_flows
;
99 enum ofputil_protocol usable_protocols
;
102 /* The log messages here could actually be useful in debugging, so keep the
103 * rate limit relatively high. */
104 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
106 static void queue_tx(struct lswitch
*, struct ofpbuf
*);
107 static void send_features_request(struct lswitch
*);
109 static void lswitch_process_packet(struct lswitch
*, const struct ofpbuf
*);
110 static enum ofperr
process_switch_features(struct lswitch
*,
111 struct ofp_header
*);
112 static void process_packet_in(struct lswitch
*, const struct ofp_header
*);
113 static void process_echo_request(struct lswitch
*, const struct ofp_header
*);
115 static ofp_port_t
get_mac_entry_ofp_port(const struct mac_learning
*ml
,
116 const struct mac_entry
*)
117 OVS_REQ_RDLOCK(ml
->rwlock
);
118 static void set_mac_entry_ofp_port(struct mac_learning
*ml
,
119 struct mac_entry
*, ofp_port_t
)
120 OVS_REQ_WRLOCK(ml
->rwlock
);
122 /* Creates and returns a new learning switch whose configuration is given by
125 * 'rconn' is used to send out an OpenFlow features request. */
127 lswitch_create(struct rconn
*rconn
, const struct lswitch_config
*cfg
)
132 sw
= xzalloc(sizeof *sw
);
134 sw
->state
= S_CONNECTING
;
135 sw
->max_idle
= cfg
->max_idle
;
137 sw
->ml
= (cfg
->mode
== LSW_LEARN
138 ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME
)
140 sw
->action_normal
= cfg
->mode
== LSW_NORMAL
;
142 switch (cfg
->wildcards
) {
148 /* Try to wildcard as many fields as possible, but we cannot
149 * wildcard all fields. We need in_port to detect moves. We need
150 * Ethernet source and dest and VLAN VID to do L2 learning. */
151 ofpfw
= (OFPFW10_DL_TYPE
| OFPFW10_DL_VLAN_PCP
152 | OFPFW10_NW_SRC_ALL
| OFPFW10_NW_DST_ALL
153 | OFPFW10_NW_TOS
| OFPFW10_NW_PROTO
154 | OFPFW10_TP_SRC
| OFPFW10_TP_DST
);
158 ofpfw
= cfg
->wildcards
;
161 ofputil_wildcard_from_ofpfw10(ofpfw
, &sw
->wc
);
163 sw
->default_queue
= cfg
->default_queue
;
164 hmap_init(&sw
->queue_numbers
);
165 shash_init(&sw
->queue_names
);
166 if (cfg
->port_queues
) {
167 struct simap_node
*node
;
169 SIMAP_FOR_EACH (node
, cfg
->port_queues
) {
170 struct lswitch_port
*port
= xmalloc(sizeof *port
);
171 hmap_node_nullify(&port
->hmap_node
);
172 port
->queue_id
= node
->data
;
173 shash_add(&sw
->queue_names
, node
->name
, port
);
177 sw
->default_flows
= cfg
->default_flows
;
178 sw
->n_default_flows
= cfg
->n_default_flows
;
179 sw
->usable_protocols
= cfg
->usable_protocols
;
181 sw
->queued
= rconn_packet_counter_create();
187 lswitch_handshake(struct lswitch
*sw
)
189 enum ofputil_protocol protocol
;
190 enum ofp_version version
;
192 send_features_request(sw
);
194 version
= rconn_get_version(sw
->rconn
);
195 protocol
= ofputil_protocol_from_ofp_version(version
);
196 if (version
>= OFP13_VERSION
) {
197 /* OpenFlow 1.3 and later by default drop packets that miss in the flow
198 * table. Set up a flow to send packets to the controller by
200 struct ofpact_output output
;
204 ofpact_init_OUTPUT(&output
);
205 output
.port
= OFPP_CONTROLLER
;
206 output
.max_len
= OFP_DEFAULT_MISS_SEND_LEN
;
208 struct ofputil_flow_mod fm
= {
211 .command
= OFPFC_ADD
,
212 .buffer_id
= UINT32_MAX
,
213 .out_port
= OFPP_NONE
,
214 .out_group
= OFPG_ANY
,
215 .ofpacts
= &output
.ofpact
,
216 .ofpacts_len
= sizeof output
,
218 minimatch_init_catchall(&fm
.match
);
219 msg
= ofputil_encode_flow_mod(&fm
, protocol
);
220 minimatch_destroy(&fm
.match
);
222 error
= rconn_send(sw
->rconn
, msg
, NULL
);
224 VLOG_INFO_RL(&rl
, "%s: failed to add default flow (%s)",
225 rconn_get_name(sw
->rconn
), ovs_strerror(error
));
228 if (sw
->default_flows
) {
229 struct ofpbuf
*msg
= NULL
;
233 /* If the initial protocol isn't good enough for default_flows, then
234 * pick one that will work and encode messages to set up that
237 * This could be improved by actually negotiating a mutually acceptable
238 * flow format with the switch, but that would require an asynchronous
239 * state machine. This version ought to work fine in practice. */
240 if (!(protocol
& sw
->usable_protocols
)) {
241 enum ofputil_protocol want
= rightmost_1bit(sw
->usable_protocols
);
243 msg
= ofputil_encode_set_protocol(protocol
, want
, &protocol
);
247 error
= rconn_send(sw
->rconn
, msg
, NULL
);
250 if (protocol
& sw
->usable_protocols
) {
251 for (i
= 0; !error
&& i
< sw
->n_default_flows
; i
++) {
252 msg
= ofputil_encode_flow_mod(&sw
->default_flows
[i
], protocol
);
253 error
= rconn_send(sw
->rconn
, msg
, NULL
);
257 VLOG_INFO_RL(&rl
, "%s: failed to queue default flows (%s)",
258 rconn_get_name(sw
->rconn
), ovs_strerror(error
));
261 VLOG_INFO_RL(&rl
, "%s: failed to set usable protocol",
262 rconn_get_name(sw
->rconn
));
265 sw
->protocol
= protocol
;
269 lswitch_is_alive(const struct lswitch
*sw
)
271 return rconn_is_alive(sw
->rconn
);
276 lswitch_destroy(struct lswitch
*sw
)
279 struct lswitch_port
*node
;
281 rconn_destroy(sw
->rconn
);
282 HMAP_FOR_EACH_POP (node
, hmap_node
, &sw
->queue_numbers
) {
285 shash_destroy(&sw
->queue_names
);
286 mac_learning_unref(sw
->ml
);
287 rconn_packet_counter_destroy(sw
->queued
);
292 /* Takes care of necessary 'sw' activity, except for receiving packets (which
293 * the caller must do). */
295 lswitch_run(struct lswitch
*sw
)
300 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
301 mac_learning_run(sw
->ml
);
302 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
305 rconn_run(sw
->rconn
);
307 if (sw
->state
== S_CONNECTING
) {
308 if (rconn_is_connected(sw
->rconn
)) {
309 lswitch_handshake(sw
);
310 sw
->state
= S_FEATURES_REPLY
;
315 for (i
= 0; i
< 50; i
++) {
318 msg
= rconn_recv(sw
->rconn
);
324 lswitch_process_packet(sw
, msg
);
331 lswitch_wait(struct lswitch
*sw
)
334 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
335 mac_learning_wait(sw
->ml
);
336 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
338 rconn_run_wait(sw
->rconn
);
339 rconn_recv_wait(sw
->rconn
);
342 /* Processes 'msg', which should be an OpenFlow received on 'rconn', according
343 * to the learning switch state in 'sw'. The most likely result of processing
344 * is that flow-setup and packet-out OpenFlow messages will be sent out on
347 lswitch_process_packet(struct lswitch
*sw
, const struct ofpbuf
*msg
)
353 if (ofptype_pull(&type
, &b
)) {
357 if (sw
->state
== S_FEATURES_REPLY
358 && type
!= OFPTYPE_ECHO_REQUEST
359 && type
!= OFPTYPE_FEATURES_REPLY
) {
363 if (type
== OFPTYPE_ECHO_REQUEST
) {
364 process_echo_request(sw
, msg
->data
);
365 } else if (type
== OFPTYPE_FEATURES_REPLY
) {
366 if (sw
->state
== S_FEATURES_REPLY
) {
367 if (!process_switch_features(sw
, msg
->data
)) {
368 sw
->state
= S_SWITCHING
;
370 rconn_disconnect(sw
->rconn
);
373 } else if (type
== OFPTYPE_PACKET_IN
) {
374 process_packet_in(sw
, msg
->data
);
375 } else if (type
== OFPTYPE_FLOW_REMOVED
) {
377 } else if (VLOG_IS_DBG_ENABLED()) {
378 char *s
= ofp_to_string(msg
->data
, msg
->size
, NULL
, NULL
, 2);
379 VLOG_DBG_RL(&rl
, "%016llx: OpenFlow packet ignored: %s",
386 send_features_request(struct lswitch
*sw
)
389 int ofp_version
= rconn_get_version(sw
->rconn
);
391 ovs_assert(ofp_version
> 0 && ofp_version
< 0xff);
393 /* Send OFPT_FEATURES_REQUEST. */
394 b
= ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST
, ofp_version
, 0);
397 /* Send OFPT_SET_CONFIG. */
398 struct ofputil_switch_config config
= {
399 .miss_send_len
= OFP_DEFAULT_MISS_SEND_LEN
401 queue_tx(sw
, ofputil_encode_set_config(&config
, ofp_version
));
405 queue_tx(struct lswitch
*sw
, struct ofpbuf
*b
)
407 int retval
= rconn_send_with_limit(sw
->rconn
, b
, sw
->queued
, 10);
408 if (retval
&& retval
!= ENOTCONN
) {
409 if (retval
== EAGAIN
) {
410 VLOG_INFO_RL(&rl
, "%016llx: %s: tx queue overflow",
411 sw
->datapath_id
, rconn_get_name(sw
->rconn
));
413 VLOG_WARN_RL(&rl
, "%016llx: %s: send: %s",
414 sw
->datapath_id
, rconn_get_name(sw
->rconn
),
415 ovs_strerror(retval
));
421 process_switch_features(struct lswitch
*sw
, struct ofp_header
*oh
)
423 struct ofputil_switch_features features
;
424 struct ofputil_phy_port port
;
426 struct ofpbuf b
= ofpbuf_const_initializer(oh
, ntohs(oh
->length
));
427 enum ofperr error
= ofputil_pull_switch_features(&b
, &features
);
429 VLOG_ERR("received invalid switch feature reply (%s)",
430 ofperr_to_string(error
));
434 sw
->datapath_id
= features
.datapath_id
;
436 while (!ofputil_pull_phy_port(oh
->version
, &b
, &port
)) {
437 struct lswitch_port
*lp
= shash_find_data(&sw
->queue_names
, port
.name
);
438 if (lp
&& hmap_node_is_null(&lp
->hmap_node
)) {
439 lp
->port_no
= port
.port_no
;
440 hmap_insert(&sw
->queue_numbers
, &lp
->hmap_node
,
441 hash_ofp_port(lp
->port_no
));
448 lswitch_choose_destination(struct lswitch
*sw
, const struct flow
*flow
)
452 /* Learn the source MAC. */
454 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
455 if (mac_learning_may_learn(sw
->ml
, flow
->dl_src
, 0)) {
456 struct mac_entry
*mac
= mac_learning_insert(sw
->ml
, flow
->dl_src
,
458 if (get_mac_entry_ofp_port(sw
->ml
, mac
)
459 != flow
->in_port
.ofp_port
) {
460 VLOG_DBG_RL(&rl
, "%016llx: learned that "ETH_ADDR_FMT
" is on "
461 "port %"PRIu32
, sw
->datapath_id
,
462 ETH_ADDR_ARGS(flow
->dl_src
),
463 flow
->in_port
.ofp_port
);
465 set_mac_entry_ofp_port(sw
->ml
, mac
, flow
->in_port
.ofp_port
);
468 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
471 /* Drop frames for reserved multicast addresses. */
472 if (eth_addr_is_reserved(flow
->dl_dst
)) {
476 out_port
= OFPP_FLOOD
;
478 struct mac_entry
*mac
;
480 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
481 mac
= mac_learning_lookup(sw
->ml
, flow
->dl_dst
, 0);
483 out_port
= get_mac_entry_ofp_port(sw
->ml
, mac
);
484 if (out_port
== flow
->in_port
.ofp_port
) {
485 /* Don't send a packet back out its input port. */
486 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
490 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
493 /* Check if we need to use "NORMAL" action. */
494 if (sw
->action_normal
&& out_port
!= OFPP_FLOOD
) {
502 get_queue_id(const struct lswitch
*sw
, ofp_port_t in_port
)
504 const struct lswitch_port
*port
;
506 HMAP_FOR_EACH_WITH_HASH (port
, hmap_node
, hash_ofp_port(in_port
),
507 &sw
->queue_numbers
) {
508 if (port
->port_no
== in_port
) {
509 return port
->queue_id
;
513 return sw
->default_queue
;
517 process_packet_in(struct lswitch
*sw
, const struct ofp_header
*oh
)
519 struct ofputil_packet_in pi
;
524 uint64_t ofpacts_stub
[64 / 8];
525 struct ofpbuf ofpacts
;
527 struct ofputil_packet_out po
;
530 struct dp_packet pkt
;
533 error
= ofputil_decode_packet_in(oh
, true, NULL
, NULL
, &pi
, NULL
,
536 VLOG_WARN_RL(&rl
, "failed to decode packet-in: %s",
537 ofperr_to_string(error
));
541 /* Ignore packets sent via output to OFPP_CONTROLLER. This library never
542 * uses such an action. You never know what experiments might be going on,
543 * though, and it seems best not to interfere with them. */
544 if (pi
.reason
!= OFPR_NO_MATCH
) {
548 /* Extract flow data from 'pi' into 'flow'. */
549 dp_packet_use_const(&pkt
, pi
.packet
, pi
.packet_len
);
550 flow_extract(&pkt
, &flow
);
551 flow
.in_port
.ofp_port
= pi
.flow_metadata
.flow
.in_port
.ofp_port
;
552 flow
.tunnel
.tun_id
= pi
.flow_metadata
.flow
.tunnel
.tun_id
;
554 /* Choose output port. */
555 out_port
= lswitch_choose_destination(sw
, &flow
);
558 queue_id
= get_queue_id(sw
, pi
.flow_metadata
.flow
.in_port
.ofp_port
);
559 ofpbuf_use_stack(&ofpacts
, ofpacts_stub
, sizeof ofpacts_stub
);
560 if (out_port
== OFPP_NONE
) {
562 } else if (queue_id
== UINT32_MAX
563 || ofp_to_u16(out_port
) >= ofp_to_u16(OFPP_MAX
)) {
564 ofpact_put_OUTPUT(&ofpacts
)->port
= out_port
;
566 struct ofpact_enqueue
*enqueue
= ofpact_put_ENQUEUE(&ofpacts
);
567 enqueue
->port
= out_port
;
568 enqueue
->queue
= queue_id
;
571 /* Prepare packet_out in case we need one. */
572 po
.buffer_id
= buffer_id
;
573 if (buffer_id
== UINT32_MAX
) {
574 po
.packet
= dp_packet_data(&pkt
);
575 po
.packet_len
= dp_packet_size(&pkt
);
580 match_set_in_port(&po
.flow_metadata
,
581 pi
.flow_metadata
.flow
.in_port
.ofp_port
);
582 po
.ofpacts
= ofpacts
.data
;
583 po
.ofpacts_len
= ofpacts
.size
;
585 /* Send the packet, and possibly the whole flow, to the output port. */
586 if (sw
->max_idle
>= 0 && (!sw
->ml
|| out_port
!= OFPP_FLOOD
)) {
587 /* The output port is known, or we always flood everything, so add a
589 struct ofputil_flow_mod fm
= {
590 .priority
= 1, /* Must be > 0 because of table-miss flow entry. */
592 .command
= OFPFC_ADD
,
593 .idle_timeout
= sw
->max_idle
,
594 .buffer_id
= buffer_id
,
595 .out_port
= OFPP_NONE
,
596 .ofpacts
= ofpacts
.data
,
597 .ofpacts_len
= ofpacts
.size
,
601 match_init(&match
, &flow
, &sw
->wc
);
602 ofputil_normalize_match_quiet(&match
);
603 minimatch_init(&fm
.match
, &match
);
605 struct ofpbuf
*buffer
= ofputil_encode_flow_mod(&fm
, sw
->protocol
);
607 minimatch_destroy(&fm
.match
);
609 queue_tx(sw
, buffer
);
611 /* If the switch didn't buffer the packet, we need to send a copy. */
612 if (buffer_id
== UINT32_MAX
&& out_port
!= OFPP_NONE
) {
613 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
616 /* We don't know that MAC, or we don't set up flows. Send along the
617 * packet without setting up a flow. */
618 if (buffer_id
!= UINT32_MAX
|| out_port
!= OFPP_NONE
) {
619 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
625 process_echo_request(struct lswitch
*sw
, const struct ofp_header
*rq
)
627 queue_tx(sw
, ofputil_encode_echo_reply(rq
));
631 get_mac_entry_ofp_port(const struct mac_learning
*ml
,
632 const struct mac_entry
*e
)
633 OVS_REQ_RDLOCK(ml
->rwlock
)
635 void *port
= mac_entry_get_port(ml
, e
);
636 return (OVS_FORCE ofp_port_t
) (uintptr_t) port
;
640 set_mac_entry_ofp_port(struct mac_learning
*ml
,
641 struct mac_entry
*e
, ofp_port_t ofp_port
)
642 OVS_REQ_WRLOCK(ml
->rwlock
)
644 mac_entry_set_port(ml
, e
, (void *) (OVS_FORCE
uintptr_t) ofp_port
);