2 * Copyright (c) 2008-2017 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "learning-switch.h"
22 #include <netinet/in.h>
26 #include "byte-order.h"
27 #include "classifier.h"
28 #include "dp-packet.h"
30 #include "openvswitch/hmap.h"
31 #include "mac-learning.h"
32 #include "openflow/openflow.h"
33 #include "openvswitch/ofp-actions.h"
34 #include "openvswitch/ofp-errors.h"
35 #include "openvswitch/ofp-msgs.h"
36 #include "openvswitch/ofp-print.h"
37 #include "openvswitch/ofp-util.h"
38 #include "openvswitch/ofp-parse.h"
39 #include "openvswitch/ofpbuf.h"
40 #include "openvswitch/vconn.h"
41 #include "openvswitch/vlog.h"
42 #include "poll-loop.h"
44 #include "openvswitch/shash.h"
48 VLOG_DEFINE_THIS_MODULE(learning_switch
);
51 struct hmap_node hmap_node
; /* Hash node for port number. */
52 ofp_port_t port_no
; /* OpenFlow port number. */
53 uint32_t queue_id
; /* OpenFlow queue number. */
57 S_CONNECTING
, /* Waiting for connection to complete. */
58 S_FEATURES_REPLY
, /* Waiting for features reply. */
59 S_SWITCHING
, /* Switching flows. */
64 enum lswitch_state state
;
66 /* If nonnegative, the switch sets up flows that expire after the given
67 * number of seconds (or never expire, if the value is OFP_FLOW_PERMANENT).
68 * Otherwise, the switch processes every packet. */
71 enum ofputil_protocol protocol
;
72 unsigned long long int datapath_id
;
73 struct mac_learning
*ml
; /* NULL to act as hub instead of switch. */
74 struct flow_wildcards wc
; /* Wildcards to apply to flows. */
75 bool action_normal
; /* Use OFPP_NORMAL? */
77 /* Queue distribution. */
78 uint32_t default_queue
; /* Default OpenFlow queue, or UINT32_MAX. */
79 struct hmap queue_numbers
; /* Map from port number to lswitch_port. */
80 struct shash queue_names
; /* Map from port name to lswitch_port. */
82 /* Number of outgoing queued packets on the rconn. */
83 struct rconn_packet_counter
*queued
;
85 /* If true, do not reply to any messages from the switch (for debugging
89 /* Optional "flow mod" requests to send to the switch at connection time,
90 * to set up the flow table. */
91 const struct ofputil_flow_mod
*default_flows
;
92 size_t n_default_flows
;
93 enum ofputil_protocol usable_protocols
;
96 /* The log messages here could actually be useful in debugging, so keep the
97 * rate limit relatively high. */
98 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(30, 300);
100 static void queue_tx(struct lswitch
*, struct ofpbuf
*);
101 static void send_features_request(struct lswitch
*);
103 static void lswitch_process_packet(struct lswitch
*, const struct ofpbuf
*);
104 static enum ofperr
process_switch_features(struct lswitch
*,
105 struct ofp_header
*);
106 static void process_packet_in(struct lswitch
*, const struct ofp_header
*);
107 static void process_echo_request(struct lswitch
*, const struct ofp_header
*);
109 static ofp_port_t
get_mac_entry_ofp_port(const struct mac_learning
*ml
,
110 const struct mac_entry
*)
111 OVS_REQ_RDLOCK(ml
->rwlock
);
112 static void set_mac_entry_ofp_port(struct mac_learning
*ml
,
113 struct mac_entry
*, ofp_port_t
)
114 OVS_REQ_WRLOCK(ml
->rwlock
);
116 /* Creates and returns a new learning switch whose configuration is given by
119 * 'rconn' is used to send out an OpenFlow features request. */
121 lswitch_create(struct rconn
*rconn
, const struct lswitch_config
*cfg
)
126 sw
= xzalloc(sizeof *sw
);
128 sw
->state
= S_CONNECTING
;
129 sw
->max_idle
= cfg
->max_idle
;
131 sw
->ml
= (cfg
->mode
== LSW_LEARN
132 ? mac_learning_create(MAC_ENTRY_DEFAULT_IDLE_TIME
)
134 sw
->action_normal
= cfg
->mode
== LSW_NORMAL
;
136 switch (cfg
->wildcards
) {
142 /* Try to wildcard as many fields as possible, but we cannot
143 * wildcard all fields. We need in_port to detect moves. We need
144 * Ethernet source and dest and VLAN VID to do L2 learning. */
145 ofpfw
= (OFPFW10_DL_TYPE
| OFPFW10_DL_VLAN_PCP
146 | OFPFW10_NW_SRC_ALL
| OFPFW10_NW_DST_ALL
147 | OFPFW10_NW_TOS
| OFPFW10_NW_PROTO
148 | OFPFW10_TP_SRC
| OFPFW10_TP_DST
);
152 ofpfw
= cfg
->wildcards
;
155 ofputil_wildcard_from_ofpfw10(ofpfw
, &sw
->wc
);
157 sw
->default_queue
= cfg
->default_queue
;
158 hmap_init(&sw
->queue_numbers
);
159 shash_init(&sw
->queue_names
);
160 if (cfg
->port_queues
) {
161 struct simap_node
*node
;
163 SIMAP_FOR_EACH (node
, cfg
->port_queues
) {
164 struct lswitch_port
*port
= xmalloc(sizeof *port
);
165 hmap_node_nullify(&port
->hmap_node
);
166 port
->queue_id
= node
->data
;
167 shash_add(&sw
->queue_names
, node
->name
, port
);
171 sw
->default_flows
= cfg
->default_flows
;
172 sw
->n_default_flows
= cfg
->n_default_flows
;
173 sw
->usable_protocols
= cfg
->usable_protocols
;
175 sw
->queued
= rconn_packet_counter_create();
181 lswitch_handshake(struct lswitch
*sw
)
183 enum ofputil_protocol protocol
;
184 enum ofp_version version
;
186 send_features_request(sw
);
188 version
= rconn_get_version(sw
->rconn
);
189 protocol
= ofputil_protocol_from_ofp_version(version
);
190 if (version
>= OFP13_VERSION
) {
191 /* OpenFlow 1.3 and later by default drop packets that miss in the flow
192 * table. Set up a flow to send packets to the controller by
194 struct ofpact_output output
;
198 ofpact_init_OUTPUT(&output
);
199 output
.port
= OFPP_CONTROLLER
;
200 output
.max_len
= OFP_DEFAULT_MISS_SEND_LEN
;
202 struct ofputil_flow_mod fm
= {
203 .match
= MATCH_CATCHALL_INITIALIZER
,
206 .command
= OFPFC_ADD
,
207 .buffer_id
= UINT32_MAX
,
208 .out_port
= OFPP_NONE
,
209 .out_group
= OFPG_ANY
,
210 .ofpacts
= &output
.ofpact
,
211 .ofpacts_len
= sizeof output
,
214 msg
= ofputil_encode_flow_mod(&fm
, protocol
);
215 error
= rconn_send(sw
->rconn
, msg
, NULL
);
217 VLOG_INFO_RL(&rl
, "%s: failed to add default flow (%s)",
218 rconn_get_name(sw
->rconn
), ovs_strerror(error
));
221 if (sw
->default_flows
) {
222 struct ofpbuf
*msg
= NULL
;
226 /* If the initial protocol isn't good enough for default_flows, then
227 * pick one that will work and encode messages to set up that
230 * This could be improved by actually negotiating a mutually acceptable
231 * flow format with the switch, but that would require an asynchronous
232 * state machine. This version ought to work fine in practice. */
233 if (!(protocol
& sw
->usable_protocols
)) {
234 enum ofputil_protocol want
= rightmost_1bit(sw
->usable_protocols
);
236 msg
= ofputil_encode_set_protocol(protocol
, want
, &protocol
);
240 error
= rconn_send(sw
->rconn
, msg
, NULL
);
243 if (protocol
& sw
->usable_protocols
) {
244 for (i
= 0; !error
&& i
< sw
->n_default_flows
; i
++) {
245 msg
= ofputil_encode_flow_mod(&sw
->default_flows
[i
], protocol
);
246 error
= rconn_send(sw
->rconn
, msg
, NULL
);
250 VLOG_INFO_RL(&rl
, "%s: failed to queue default flows (%s)",
251 rconn_get_name(sw
->rconn
), ovs_strerror(error
));
254 VLOG_INFO_RL(&rl
, "%s: failed to set usable protocol",
255 rconn_get_name(sw
->rconn
));
258 sw
->protocol
= protocol
;
262 lswitch_is_alive(const struct lswitch
*sw
)
264 return rconn_is_alive(sw
->rconn
);
269 lswitch_destroy(struct lswitch
*sw
)
272 struct lswitch_port
*node
;
274 rconn_destroy(sw
->rconn
);
275 HMAP_FOR_EACH_POP (node
, hmap_node
, &sw
->queue_numbers
) {
278 shash_destroy(&sw
->queue_names
);
279 mac_learning_unref(sw
->ml
);
280 rconn_packet_counter_destroy(sw
->queued
);
285 /* Takes care of necessary 'sw' activity, except for receiving packets (which
286 * the caller must do). */
288 lswitch_run(struct lswitch
*sw
)
293 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
294 mac_learning_run(sw
->ml
);
295 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
298 rconn_run(sw
->rconn
);
300 if (sw
->state
== S_CONNECTING
) {
301 if (rconn_get_version(sw
->rconn
) != -1) {
302 lswitch_handshake(sw
);
303 sw
->state
= S_FEATURES_REPLY
;
308 for (i
= 0; i
< 50; i
++) {
311 msg
= rconn_recv(sw
->rconn
);
317 lswitch_process_packet(sw
, msg
);
324 lswitch_wait(struct lswitch
*sw
)
327 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
328 mac_learning_wait(sw
->ml
);
329 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
331 rconn_run_wait(sw
->rconn
);
332 rconn_recv_wait(sw
->rconn
);
335 /* Processes 'msg', which should be an OpenFlow received on 'rconn', according
336 * to the learning switch state in 'sw'. The most likely result of processing
337 * is that flow-setup and packet-out OpenFlow messages will be sent out on
340 lswitch_process_packet(struct lswitch
*sw
, const struct ofpbuf
*msg
)
346 if (ofptype_pull(&type
, &b
)) {
350 if (sw
->state
== S_FEATURES_REPLY
351 && type
!= OFPTYPE_ECHO_REQUEST
352 && type
!= OFPTYPE_FEATURES_REPLY
) {
356 if (type
== OFPTYPE_ECHO_REQUEST
) {
357 process_echo_request(sw
, msg
->data
);
358 } else if (type
== OFPTYPE_FEATURES_REPLY
) {
359 if (sw
->state
== S_FEATURES_REPLY
) {
360 if (!process_switch_features(sw
, msg
->data
)) {
361 sw
->state
= S_SWITCHING
;
363 rconn_disconnect(sw
->rconn
);
366 } else if (type
== OFPTYPE_PACKET_IN
) {
367 process_packet_in(sw
, msg
->data
);
368 } else if (type
== OFPTYPE_FLOW_REMOVED
) {
370 } else if (VLOG_IS_DBG_ENABLED()) {
371 char *s
= ofp_to_string(msg
->data
, msg
->size
, NULL
, 2);
372 VLOG_DBG_RL(&rl
, "%016llx: OpenFlow packet ignored: %s",
379 send_features_request(struct lswitch
*sw
)
382 int ofp_version
= rconn_get_version(sw
->rconn
);
384 ovs_assert(ofp_version
> 0 && ofp_version
< 0xff);
386 /* Send OFPT_FEATURES_REQUEST. */
387 b
= ofpraw_alloc(OFPRAW_OFPT_FEATURES_REQUEST
, ofp_version
, 0);
390 /* Send OFPT_SET_CONFIG. */
391 struct ofputil_switch_config config
= {
392 .miss_send_len
= OFP_DEFAULT_MISS_SEND_LEN
394 queue_tx(sw
, ofputil_encode_set_config(&config
, ofp_version
));
398 queue_tx(struct lswitch
*sw
, struct ofpbuf
*b
)
400 int retval
= rconn_send_with_limit(sw
->rconn
, b
, sw
->queued
, 10);
401 if (retval
&& retval
!= ENOTCONN
) {
402 if (retval
== EAGAIN
) {
403 VLOG_INFO_RL(&rl
, "%016llx: %s: tx queue overflow",
404 sw
->datapath_id
, rconn_get_name(sw
->rconn
));
406 VLOG_WARN_RL(&rl
, "%016llx: %s: send: %s",
407 sw
->datapath_id
, rconn_get_name(sw
->rconn
),
408 ovs_strerror(retval
));
414 process_switch_features(struct lswitch
*sw
, struct ofp_header
*oh
)
416 struct ofputil_switch_features features
;
417 struct ofputil_phy_port port
;
419 struct ofpbuf b
= ofpbuf_const_initializer(oh
, ntohs(oh
->length
));
420 enum ofperr error
= ofputil_pull_switch_features(&b
, &features
);
422 VLOG_ERR("received invalid switch feature reply (%s)",
423 ofperr_to_string(error
));
427 sw
->datapath_id
= features
.datapath_id
;
429 while (!ofputil_pull_phy_port(oh
->version
, &b
, &port
)) {
430 struct lswitch_port
*lp
= shash_find_data(&sw
->queue_names
, port
.name
);
431 if (lp
&& hmap_node_is_null(&lp
->hmap_node
)) {
432 lp
->port_no
= port
.port_no
;
433 hmap_insert(&sw
->queue_numbers
, &lp
->hmap_node
,
434 hash_ofp_port(lp
->port_no
));
441 lswitch_choose_destination(struct lswitch
*sw
, const struct flow
*flow
)
445 /* Learn the source MAC. */
447 ovs_rwlock_wrlock(&sw
->ml
->rwlock
);
448 if (mac_learning_may_learn(sw
->ml
, flow
->dl_src
, 0)) {
449 struct mac_entry
*mac
= mac_learning_insert(sw
->ml
, flow
->dl_src
,
451 if (get_mac_entry_ofp_port(sw
->ml
, mac
)
452 != flow
->in_port
.ofp_port
) {
453 VLOG_DBG_RL(&rl
, "%016llx: learned that "ETH_ADDR_FMT
" is on "
454 "port %"PRIu32
, sw
->datapath_id
,
455 ETH_ADDR_ARGS(flow
->dl_src
),
456 flow
->in_port
.ofp_port
);
458 set_mac_entry_ofp_port(sw
->ml
, mac
, flow
->in_port
.ofp_port
);
461 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
464 /* Drop frames for reserved multicast addresses. */
465 if (eth_addr_is_reserved(flow
->dl_dst
)) {
469 out_port
= OFPP_FLOOD
;
471 struct mac_entry
*mac
;
473 ovs_rwlock_rdlock(&sw
->ml
->rwlock
);
474 mac
= mac_learning_lookup(sw
->ml
, flow
->dl_dst
, 0);
476 out_port
= get_mac_entry_ofp_port(sw
->ml
, mac
);
477 if (out_port
== flow
->in_port
.ofp_port
) {
478 /* Don't send a packet back out its input port. */
479 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
483 ovs_rwlock_unlock(&sw
->ml
->rwlock
);
486 /* Check if we need to use "NORMAL" action. */
487 if (sw
->action_normal
&& out_port
!= OFPP_FLOOD
) {
495 get_queue_id(const struct lswitch
*sw
, ofp_port_t in_port
)
497 const struct lswitch_port
*port
;
499 HMAP_FOR_EACH_WITH_HASH (port
, hmap_node
, hash_ofp_port(in_port
),
500 &sw
->queue_numbers
) {
501 if (port
->port_no
== in_port
) {
502 return port
->queue_id
;
506 return sw
->default_queue
;
510 process_packet_in(struct lswitch
*sw
, const struct ofp_header
*oh
)
512 struct ofputil_packet_in pi
;
517 uint64_t ofpacts_stub
[64 / 8];
518 struct ofpbuf ofpacts
;
520 struct ofputil_packet_out po
;
523 struct dp_packet pkt
;
526 error
= ofputil_decode_packet_in(oh
, true, NULL
, NULL
, &pi
, NULL
,
529 VLOG_WARN_RL(&rl
, "failed to decode packet-in: %s",
530 ofperr_to_string(error
));
534 /* Ignore packets sent via output to OFPP_CONTROLLER. This library never
535 * uses such an action. You never know what experiments might be going on,
536 * though, and it seems best not to interfere with them. */
537 if (pi
.reason
!= OFPR_NO_MATCH
) {
541 /* Extract flow data from 'pi' into 'flow'. */
542 dp_packet_use_const(&pkt
, pi
.packet
, pi
.packet_len
);
543 flow_extract(&pkt
, &flow
);
544 flow
.in_port
.ofp_port
= pi
.flow_metadata
.flow
.in_port
.ofp_port
;
545 flow
.tunnel
.tun_id
= pi
.flow_metadata
.flow
.tunnel
.tun_id
;
547 /* Choose output port. */
548 out_port
= lswitch_choose_destination(sw
, &flow
);
551 queue_id
= get_queue_id(sw
, pi
.flow_metadata
.flow
.in_port
.ofp_port
);
552 ofpbuf_use_stack(&ofpacts
, ofpacts_stub
, sizeof ofpacts_stub
);
553 if (out_port
== OFPP_NONE
) {
555 } else if (queue_id
== UINT32_MAX
556 || ofp_to_u16(out_port
) >= ofp_to_u16(OFPP_MAX
)) {
557 ofpact_put_OUTPUT(&ofpacts
)->port
= out_port
;
559 struct ofpact_enqueue
*enqueue
= ofpact_put_ENQUEUE(&ofpacts
);
560 enqueue
->port
= out_port
;
561 enqueue
->queue
= queue_id
;
564 /* Prepare packet_out in case we need one. */
565 po
.buffer_id
= buffer_id
;
566 if (buffer_id
== UINT32_MAX
) {
567 po
.packet
= dp_packet_data(&pkt
);
568 po
.packet_len
= dp_packet_size(&pkt
);
573 match_set_in_port(&po
.flow_metadata
,
574 pi
.flow_metadata
.flow
.in_port
.ofp_port
);
575 po
.ofpacts
= ofpacts
.data
;
576 po
.ofpacts_len
= ofpacts
.size
;
578 /* Send the packet, and possibly the whole flow, to the output port. */
579 if (sw
->max_idle
>= 0 && (!sw
->ml
|| out_port
!= OFPP_FLOOD
)) {
580 /* The output port is known, or we always flood everything, so add a
582 struct ofputil_flow_mod fm
= {
583 .priority
= 1, /* Must be > 0 because of table-miss flow entry. */
585 .command
= OFPFC_ADD
,
586 .idle_timeout
= sw
->max_idle
,
587 .buffer_id
= buffer_id
,
588 .out_port
= OFPP_NONE
,
589 .ofpacts
= ofpacts
.data
,
590 .ofpacts_len
= ofpacts
.size
,
592 match_init(&fm
.match
, &flow
, &sw
->wc
);
593 ofputil_normalize_match_quiet(&fm
.match
);
595 struct ofpbuf
*buffer
= ofputil_encode_flow_mod(&fm
, sw
->protocol
);
597 queue_tx(sw
, buffer
);
599 /* If the switch didn't buffer the packet, we need to send a copy. */
600 if (buffer_id
== UINT32_MAX
&& out_port
!= OFPP_NONE
) {
601 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
604 /* We don't know that MAC, or we don't set up flows. Send along the
605 * packet without setting up a flow. */
606 if (buffer_id
!= UINT32_MAX
|| out_port
!= OFPP_NONE
) {
607 queue_tx(sw
, ofputil_encode_packet_out(&po
, sw
->protocol
));
613 process_echo_request(struct lswitch
*sw
, const struct ofp_header
*rq
)
615 queue_tx(sw
, make_echo_reply(rq
));
619 get_mac_entry_ofp_port(const struct mac_learning
*ml
,
620 const struct mac_entry
*e
)
621 OVS_REQ_RDLOCK(ml
->rwlock
)
623 void *port
= mac_entry_get_port(ml
, e
);
624 return (OVS_FORCE ofp_port_t
) (uintptr_t) port
;
628 set_mac_entry_ofp_port(struct mac_learning
*ml
,
629 struct mac_entry
*e
, ofp_port_t ofp_port
)
630 OVS_REQ_WRLOCK(ml
->rwlock
)
632 mac_entry_set_port(ml
, e
, (void *) (OVS_FORCE
uintptr_t) ofp_port
);