2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2019 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "openvswitch/rconn.h"
24 #include "openflow/openflow.h"
25 #include "openvswitch/ofp-msgs.h"
26 #include "openvswitch/ofp-util.h"
27 #include "openvswitch/ofpbuf.h"
28 #include "openvswitch/vconn.h"
29 #include "openvswitch/vlog.h"
30 #include "openvswitch/poll-loop.h"
35 #include "ovs-thread.h"
37 VLOG_DEFINE_THIS_MODULE(rconn
);
39 COVERAGE_DEFINE(rconn_discarded
);
40 COVERAGE_DEFINE(rconn_overflow
);
41 COVERAGE_DEFINE(rconn_queued
);
42 COVERAGE_DEFINE(rconn_sent
);
44 /* The connection states have the following meanings:
46 * - S_VOID: No connection information is configured.
48 * - S_BACKOFF: Waiting for a period of time before reconnecting.
50 * - S_CONNECTING: A connection attempt is in progress and has not yet
51 * succeeded or failed.
53 * - S_ACTIVE: A connection has been established and appears to be healthy.
55 * - S_IDLE: A connection has been established but has been idle for some
56 * time. An echo request has been sent, but no reply has yet been
59 * - S_DISCONNECTED: An unreliable connection has disconnected and cannot be
60 * automatically retried.
64 STATE(BACKOFF, 1 << 1) \
65 STATE(CONNECTING, 1 << 2) \
66 STATE(ACTIVE, 1 << 3) \
68 STATE(DISCONNECTED, 1 << 5)
70 #define STATE(NAME, VALUE) S_##NAME = VALUE,
76 state_name(enum state state
)
79 #define STATE(NAME, VALUE) case S_##NAME: return #NAME;
86 /* A reliable connection to an OpenFlow switch or controller.
88 * Members of type 'long long int' are times in milliseconds on the monotonic
89 * clock, as returned by time_msec(). Other times are durations in seconds.
91 * See the large comment in rconn.h for more information. */
93 struct ovs_mutex mutex
;
96 long long int state_entered
;
99 char *name
; /* Human-readable descriptive name. */
100 char *target
; /* vconn name, passed to vconn_open(). */
103 struct ovs_list txq
; /* Contains "struct ofpbuf"s. */
105 long long int backoff
; /* Current backoff, in milliseconds. */
106 long long int max_backoff
; /* Limit for backoff, in milliseconds. */
107 long long int backoff_deadline
;
108 long long int last_connected
;
109 long long int last_disconnected
;
113 /* In S_ACTIVE and S_IDLE, probably_admitted reports whether we believe
114 * that the peer has made a (positive) admission control decision on our
115 * connection. If we have not yet been (probably) admitted, then the
116 * connection does not reset the timer used for deciding whether the switch
117 * should go into fail-open mode.
119 * last_admitted reports the last time we believe such a positive admission
120 * control decision was made. */
121 bool probably_admitted
;
122 long long int last_admitted
; /* Milliseconds on monotonic clock. */
124 /* Throughout this file, "probe" is shorthand for "inactivity probe". When
125 * no activity has been observed from the peer for a while, we send out an
126 * echo request as an inactivity probe packet. We should receive back a
129 * "Activity" is defined as either receiving an OpenFlow message from the
130 * peer or successfully sending a message that had been in 'txq'. */
131 int probe_interval
; /* Secs of inactivity before sending probe. */
132 long long int last_activity
; /* Last time we saw some activity. */
136 /* Messages sent or received are copied to the monitor connections. */
137 #define MAXIMUM_MONITORS 8
138 struct vconn
*monitors
[MAXIMUM_MONITORS
];
141 uint32_t allowed_versions
; /* Acceptable OpenFlow versions. */
142 int version
; /* Current or most recent version. */
145 /* Counts packets and bytes queued into an rconn by a given source. */
146 struct rconn_packet_counter
{
147 struct ovs_mutex mutex
;
148 unsigned int n_packets OVS_GUARDED
; /* Number of packets queued. */
149 unsigned int n_bytes OVS_GUARDED
; /* Number of bytes queued. */
150 int ref_cnt OVS_GUARDED
; /* Number of owners. */
153 uint32_t rconn_get_allowed_versions(const struct rconn
*rconn
)
155 return rconn
->allowed_versions
;
158 static long long int elapsed_in_this_state(const struct rconn
*rc
)
159 OVS_REQUIRES(rc
->mutex
);
160 static long long int timeout(const struct rconn
*rc
) OVS_REQUIRES(rc
->mutex
);
161 static bool timed_out(const struct rconn
*rc
) OVS_REQUIRES(rc
->mutex
);
162 static void state_transition(struct rconn
*rc
, enum state
)
163 OVS_REQUIRES(rc
->mutex
);
164 static void rconn_set_target__(struct rconn
*rc
,
165 const char *target
, const char *name
)
166 OVS_REQUIRES(rc
->mutex
);
167 static int rconn_send__(struct rconn
*rc
, struct ofpbuf
*,
168 struct rconn_packet_counter
*)
169 OVS_REQUIRES(rc
->mutex
);
170 static int try_send(struct rconn
*rc
) OVS_REQUIRES(rc
->mutex
);
171 static void reconnect(struct rconn
*rc
) OVS_REQUIRES(rc
->mutex
);
172 static void report_error(struct rconn
*rc
, int error
) OVS_REQUIRES(rc
->mutex
);
173 static void rconn_disconnect__(struct rconn
*rc
) OVS_REQUIRES(rc
->mutex
);
174 static void disconnect(struct rconn
*rc
, int error
) OVS_REQUIRES(rc
->mutex
);
175 static void flush_queue(struct rconn
*rc
) OVS_REQUIRES(rc
->mutex
);
176 static void close_monitor(struct rconn
*rc
, size_t idx
, int retval
)
177 OVS_REQUIRES(rc
->mutex
);
178 static void copy_to_monitor(struct rconn
*, const struct ofpbuf
*);
179 static bool is_connected_state(enum state
);
180 static bool is_admitted_msg(const struct ofpbuf
*);
181 static bool rconn_logging_connection_attempts__(const struct rconn
*rc
)
182 OVS_REQUIRES(rc
->mutex
);
184 /* The following prototypes duplicate those in rconn.h, but there we weren't
185 * able to add the OVS_EXCLUDED annotations because the definition of struct
186 * rconn was not visible. */
188 void rconn_set_max_backoff(struct rconn
*rc
, int max_backoff
)
189 OVS_EXCLUDED(rc
->mutex
);
190 void rconn_connect(struct rconn
*rc
, const char *target
, const char *name
)
191 OVS_EXCLUDED(rc
->mutex
);
192 void rconn_connect_unreliably(struct rconn
*rc
,
193 struct vconn
*vconn
, const char *name
)
194 OVS_EXCLUDED(rc
->mutex
);
195 void rconn_reconnect(struct rconn
*rc
) OVS_EXCLUDED(rc
->mutex
);
196 void rconn_disconnect(struct rconn
*rc
) OVS_EXCLUDED(rc
->mutex
);
197 void rconn_run(struct rconn
*rc
) OVS_EXCLUDED(rc
->mutex
);
198 void rconn_run_wait(struct rconn
*rc
) OVS_EXCLUDED(rc
->mutex
);
199 struct ofpbuf
*rconn_recv(struct rconn
*rc
) OVS_EXCLUDED(rc
->mutex
);
200 void rconn_recv_wait(struct rconn
*rc
) OVS_EXCLUDED(rc
->mutex
);
201 int rconn_send(struct rconn
*rc
, struct ofpbuf
*b
,
202 struct rconn_packet_counter
*counter
)
203 OVS_EXCLUDED(rc
->mutex
);
204 int rconn_send_with_limit(struct rconn
*rc
, struct ofpbuf
*b
,
205 struct rconn_packet_counter
*counter
,
207 OVS_EXCLUDED(rc
->mutex
);
208 void rconn_add_monitor(struct rconn
*rc
, struct vconn
*vconn
)
209 OVS_EXCLUDED(rc
->mutex
);
210 void rconn_set_name(struct rconn
*rc
, const char *new_name
)
211 OVS_EXCLUDED(rc
->mutex
);
212 bool rconn_is_admitted(const struct rconn
*rconn
) OVS_EXCLUDED(rconn
->mutex
);
213 int rconn_failure_duration(const struct rconn
*rconn
)
214 OVS_EXCLUDED(rconn
->mutex
);
215 ovs_be16
rconn_get_local_port(const struct rconn
*rconn
)
216 OVS_EXCLUDED(rconn
->mutex
);
217 int rconn_get_version(const struct rconn
*rconn
) OVS_EXCLUDED(rconn
->mutex
);
218 unsigned int rconn_count_txqlen(const struct rconn
*rc
)
219 OVS_EXCLUDED(rc
->mutex
);
222 /* Creates and returns a new rconn.
224 * 'probe_interval' is a number of seconds. If the interval passes once
225 * without an OpenFlow message being received from the peer, the rconn sends
226 * out an "echo request" message. If the interval passes again without a
227 * message being received, the rconn disconnects and re-connects to the peer.
228 * Setting 'probe_interval' to 0 disables this behavior.
230 * 'max_backoff' is the maximum number of seconds between attempts to connect
231 * to the peer. The actual interval starts at 1 second and doubles on each
232 * failure until it reaches 'max_backoff'. If 0 is specified, the default of
235 * The new rconn is initially unconnected. Use rconn_connect() or
236 * rconn_connect_unreliably() to connect it.
238 * Connections made by the rconn will automatically negotiate an OpenFlow
239 * protocol version acceptable to both peers on the connection. The version
240 * negotiated will be one of those in the 'allowed_versions' bitmap: version
241 * 'x' is allowed if allowed_versions & (1 << x) is nonzero. (The underlying
242 * vconn will treat an 'allowed_versions' of 0 as OFPUTIL_DEFAULT_VERSIONS.)
245 rconn_create(int probe_interval
, int max_backoff
, uint8_t dscp
,
246 uint32_t allowed_versions
)
248 struct rconn
*rc
= xzalloc(sizeof *rc
);
250 ovs_mutex_init(&rc
->mutex
);
253 rc
->state_entered
= time_msec();
256 rc
->name
= xstrdup("void");
257 rc
->target
= xstrdup("void");
258 rc
->reliable
= false;
260 ovs_list_init(&rc
->txq
);
263 rc
->max_backoff
= max_backoff
? llsat_mul(1000, max_backoff
) : 8000;
264 rc
->backoff_deadline
= LLONG_MIN
;
265 rc
->last_connected
= LLONG_MIN
;
266 rc
->last_disconnected
= LLONG_MIN
;
269 rc
->probably_admitted
= false;
270 rc
->last_admitted
= time_msec();
272 rc
->last_activity
= time_msec();
274 rconn_set_probe_interval(rc
, probe_interval
);
275 rconn_set_dscp(rc
, dscp
);
279 rc
->allowed_versions
= allowed_versions
;
286 rconn_set_max_backoff(struct rconn
*rc
, int max_backoff
)
287 OVS_EXCLUDED(rc
->mutex
)
289 ovs_mutex_lock(&rc
->mutex
);
290 rc
->max_backoff
= llsat_mul(1000, MAX(1, max_backoff
));
291 if (rc
->state
== S_BACKOFF
&& rc
->backoff
> rc
->max_backoff
) {
292 rc
->backoff
= rc
->max_backoff
;
294 long long int max_deadline
= llsat_add(time_msec(), rc
->max_backoff
);
295 if (rc
->backoff_deadline
> max_deadline
) {
296 rc
->backoff_deadline
= max_deadline
;
299 ovs_mutex_unlock(&rc
->mutex
);
303 rconn_get_max_backoff(const struct rconn
*rc
)
305 /* rc->max_backoff is 1000 times some 'int', so dividing by 1000 will yield
306 * a value in the range of 'int', therefore this is safe. */
307 return rc
->max_backoff
/ 1000;
311 rconn_set_dscp(struct rconn
*rc
, uint8_t dscp
)
317 rconn_get_dscp(const struct rconn
*rc
)
323 rconn_set_probe_interval(struct rconn
*rc
, int probe_interval
)
325 rc
->probe_interval
= probe_interval
? MAX(5, probe_interval
) : 0;
329 rconn_get_probe_interval(const struct rconn
*rc
)
331 return rc
->probe_interval
;
334 /* Drops any existing connection on 'rc', then sets up 'rc' to connect to
335 * 'target' and reconnect as needed. 'target' should be a remote OpenFlow
336 * target in a form acceptable to vconn_open().
338 * If 'name' is nonnull, then it is used in log messages in place of 'target'.
339 * It should presumably give more information to a human reader than 'target',
340 * but it need not be acceptable to vconn_open(). */
342 rconn_connect(struct rconn
*rc
, const char *target
, const char *name
)
343 OVS_EXCLUDED(rc
->mutex
)
345 ovs_mutex_lock(&rc
->mutex
);
346 rconn_disconnect__(rc
);
347 rconn_set_target__(rc
, target
, name
);
349 if (!stream_or_pstream_needs_probes(target
)) {
350 rc
->probe_interval
= 0;
353 ovs_mutex_unlock(&rc
->mutex
);
356 /* Drops any existing connection on 'rc', then configures 'rc' to use
357 * 'vconn'. If the connection on 'vconn' drops, 'rc' will not reconnect on it
360 * By default, the target obtained from vconn_get_name(vconn) is used in log
361 * messages. If 'name' is nonnull, then it is used instead. It should
362 * presumably give more information to a human reader than the target, but it
363 * need not be acceptable to vconn_open(). */
365 rconn_connect_unreliably(struct rconn
*rc
,
366 struct vconn
*vconn
, const char *name
)
367 OVS_EXCLUDED(rc
->mutex
)
369 ovs_assert(vconn
!= NULL
);
371 ovs_mutex_lock(&rc
->mutex
);
372 rconn_disconnect__(rc
);
373 rconn_set_target__(rc
, vconn_get_name(vconn
), name
);
374 rc
->reliable
= false;
376 state_transition(rc
, S_CONNECTING
);
377 ovs_mutex_unlock(&rc
->mutex
);
380 /* If 'rc' is connected, forces it to drop the connection and reconnect. */
382 rconn_reconnect(struct rconn
*rc
)
383 OVS_EXCLUDED(rc
->mutex
)
385 ovs_mutex_lock(&rc
->mutex
);
386 if (rc
->state
& (S_ACTIVE
| S_IDLE
)) {
387 VLOG_INFO("%s: disconnecting", rc
->name
);
390 ovs_mutex_unlock(&rc
->mutex
);
394 rconn_disconnect__(struct rconn
*rc
)
395 OVS_REQUIRES(rc
->mutex
)
397 if (rc
->state
!= S_VOID
) {
399 vconn_close(rc
->vconn
);
402 rconn_set_target__(rc
, "void", NULL
);
403 rc
->reliable
= false;
406 rc
->backoff_deadline
= LLONG_MIN
;
408 state_transition(rc
, S_VOID
);
413 rconn_disconnect(struct rconn
*rc
)
414 OVS_EXCLUDED(rc
->mutex
)
416 ovs_mutex_lock(&rc
->mutex
);
417 rconn_disconnect__(rc
);
418 ovs_mutex_unlock(&rc
->mutex
);
421 /* Disconnects 'rc' and frees the underlying storage. */
423 rconn_destroy(struct rconn
*rc
)
428 ovs_mutex_lock(&rc
->mutex
);
431 vconn_close(rc
->vconn
);
433 ofpbuf_list_delete(&rc
->txq
);
434 for (i
= 0; i
< rc
->n_monitors
; i
++) {
435 vconn_close(rc
->monitors
[i
]);
437 ovs_mutex_unlock(&rc
->mutex
);
438 ovs_mutex_destroy(&rc
->mutex
);
445 timeout_VOID(const struct rconn
*rc OVS_UNUSED
)
446 OVS_REQUIRES(rc
->mutex
)
452 run_VOID(struct rconn
*rc OVS_UNUSED
)
453 OVS_REQUIRES(rc
->mutex
)
459 reconnect(struct rconn
*rc
)
460 OVS_REQUIRES(rc
->mutex
)
464 if (rconn_logging_connection_attempts__(rc
)) {
465 VLOG_INFO("%s: connecting...", rc
->name
);
467 retval
= vconn_open(rc
->target
, rc
->allowed_versions
, rc
->dscp
,
470 rc
->backoff_deadline
= llsat_add(time_msec(), rc
->backoff
);
471 state_transition(rc
, S_CONNECTING
);
473 VLOG_WARN("%s: connection failed (%s)",
474 rc
->name
, ovs_strerror(retval
));
475 rc
->backoff_deadline
= LLONG_MAX
; /* Prevent resetting backoff. */
476 disconnect(rc
, retval
);
481 timeout_BACKOFF(const struct rconn
*rc
)
482 OVS_REQUIRES(rc
->mutex
)
488 run_BACKOFF(struct rconn
*rc
)
489 OVS_REQUIRES(rc
->mutex
)
497 timeout_CONNECTING(const struct rconn
*rc
)
498 OVS_REQUIRES(rc
->mutex
)
500 return MAX(1000, rc
->backoff
);
504 run_CONNECTING(struct rconn
*rc
)
505 OVS_REQUIRES(rc
->mutex
)
507 int retval
= vconn_connect(rc
->vconn
);
509 VLOG(rc
->reliable
? VLL_INFO
: VLL_DBG
, "%s: connected", rc
->name
);
510 state_transition(rc
, S_ACTIVE
);
511 rc
->version
= vconn_get_version(rc
->vconn
);
512 rc
->last_connected
= rc
->state_entered
;
513 } else if (retval
!= EAGAIN
) {
514 if (rconn_logging_connection_attempts__(rc
)) {
515 VLOG_INFO("%s: connection failed (%s)",
516 rc
->name
, ovs_strerror(retval
));
518 disconnect(rc
, retval
);
519 } else if (timed_out(rc
)) {
520 if (rconn_logging_connection_attempts__(rc
)) {
521 VLOG_INFO("%s: connection timed out", rc
->name
);
523 rc
->backoff_deadline
= LLONG_MAX
; /* Prevent resetting backoff. */
524 disconnect(rc
, ETIMEDOUT
);
529 do_tx_work(struct rconn
*rc
)
530 OVS_REQUIRES(rc
->mutex
)
532 if (ovs_list_is_empty(&rc
->txq
)) {
535 while (!ovs_list_is_empty(&rc
->txq
)) {
536 int error
= try_send(rc
);
540 rc
->last_activity
= time_msec();
542 if (ovs_list_is_empty(&rc
->txq
)) {
543 poll_immediate_wake();
548 timeout_ACTIVE(const struct rconn
*rc
)
549 OVS_REQUIRES(rc
->mutex
)
551 if (rc
->probe_interval
) {
552 long long int base
= MAX(rc
->last_activity
, rc
->state_entered
);
553 long long int probe
= llsat_mul(rc
->probe_interval
, 1000);
554 return llsat_sub(llsat_add(base
, probe
), rc
->state_entered
);
560 run_ACTIVE(struct rconn
*rc
)
561 OVS_REQUIRES(rc
->mutex
)
564 long long int base
= MAX(rc
->last_activity
, rc
->state_entered
);
565 VLOG_DBG("%s: idle %lld seconds, sending inactivity probe",
566 rc
->name
, (time_msec() - base
) / 1000);
568 /* Ordering is important here: rconn_send() can transition to BACKOFF,
569 * and we don't want to transition back to IDLE if so, because then we
570 * can end up queuing a packet with vconn == NULL and then *boom*. */
571 state_transition(rc
, S_IDLE
);
573 /* Send an echo request. */
574 rconn_send__(rc
, ofputil_encode_echo_request(rc
->version
), NULL
);
583 timeout_IDLE(const struct rconn
*rc
)
584 OVS_REQUIRES(rc
->mutex
)
586 return llsat_mul(rc
->probe_interval
, 1000);
590 run_IDLE(struct rconn
*rc
)
591 OVS_REQUIRES(rc
->mutex
)
594 VLOG_ERR("%s: no response to inactivity probe after %lld "
595 "seconds, disconnecting",
596 rc
->name
, elapsed_in_this_state(rc
) / 1000);
597 disconnect(rc
, ETIMEDOUT
);
604 timeout_DISCONNECTED(const struct rconn
*rc OVS_UNUSED
)
605 OVS_REQUIRES(rc
->mutex
)
611 run_DISCONNECTED(struct rconn
*rc OVS_UNUSED
)
612 OVS_REQUIRES(rc
->mutex
)
617 /* Performs whatever activities are necessary to maintain 'rc': if 'rc' is
618 * disconnected, attempts to (re)connect, backing off as necessary; if 'rc' is
619 * connected, attempts to send packets in the send queue, if any. */
621 rconn_run(struct rconn
*rc
)
622 OVS_EXCLUDED(rc
->mutex
)
627 ovs_mutex_lock(&rc
->mutex
);
631 vconn_run(rc
->vconn
);
633 error
= vconn_get_status(rc
->vconn
);
635 report_error(rc
, error
);
636 disconnect(rc
, error
);
639 for (i
= 0; i
< rc
->n_monitors
; ) {
643 vconn_run(rc
->monitors
[i
]);
645 /* Drain any stray message that came in on the monitor connection. */
646 retval
= vconn_recv(rc
->monitors
[i
], &msg
);
649 } else if (retval
!= EAGAIN
) {
650 close_monitor(rc
, i
, retval
);
657 old_state
= rc
->state
;
659 #define STATE(NAME, VALUE) case S_##NAME: run_##NAME(rc); break;
665 } while (rc
->state
!= old_state
);
666 ovs_mutex_unlock(&rc
->mutex
);
669 /* Causes the next call to poll_block() to wake up when rconn_run() should be
672 rconn_run_wait(struct rconn
*rc
)
673 OVS_EXCLUDED(rc
->mutex
)
675 ovs_mutex_lock(&rc
->mutex
);
677 vconn_run_wait(rc
->vconn
);
678 if ((rc
->state
& (S_ACTIVE
| S_IDLE
)) && !ovs_list_is_empty(&rc
->txq
)) {
679 vconn_wait(rc
->vconn
, WAIT_SEND
);
682 for (size_t i
= 0; i
< rc
->n_monitors
; i
++) {
683 vconn_run_wait(rc
->monitors
[i
]);
684 vconn_recv_wait(rc
->monitors
[i
]);
687 poll_timer_wait_until(llsat_add(rc
->state_entered
, timeout(rc
)));
688 ovs_mutex_unlock(&rc
->mutex
);
691 /* Attempts to receive a packet from 'rc'. If successful, returns the packet;
692 * otherwise, returns a null pointer. The caller is responsible for freeing
693 * the packet (with ofpbuf_delete()). */
695 rconn_recv(struct rconn
*rc
)
696 OVS_EXCLUDED(rc
->mutex
)
698 struct ofpbuf
*buffer
= NULL
;
700 ovs_mutex_lock(&rc
->mutex
);
701 if (rc
->state
& (S_ACTIVE
| S_IDLE
)) {
702 int error
= vconn_recv(rc
->vconn
, &buffer
);
704 copy_to_monitor(rc
, buffer
);
705 if (rc
->probably_admitted
|| is_admitted_msg(buffer
)
706 || time_msec() - rc
->last_connected
>= 30 * 1000) {
707 rc
->probably_admitted
= true;
708 rc
->last_admitted
= time_msec();
710 rc
->last_activity
= time_msec();
711 if (rc
->state
== S_IDLE
) {
712 state_transition(rc
, S_ACTIVE
);
714 } else if (error
!= EAGAIN
) {
715 report_error(rc
, error
);
716 disconnect(rc
, error
);
719 ovs_mutex_unlock(&rc
->mutex
);
724 /* Causes the next call to poll_block() to wake up when a packet may be ready
725 * to be received by vconn_recv() on 'rc'. */
727 rconn_recv_wait(struct rconn
*rc
)
728 OVS_EXCLUDED(rc
->mutex
)
730 ovs_mutex_lock(&rc
->mutex
);
732 vconn_wait(rc
->vconn
, WAIT_RECV
);
734 ovs_mutex_unlock(&rc
->mutex
);
738 rconn_send__(struct rconn
*rc
, struct ofpbuf
*b
,
739 struct rconn_packet_counter
*counter
)
740 OVS_REQUIRES(rc
->mutex
)
742 if (rconn_is_connected(rc
)) {
743 COVERAGE_INC(rconn_queued
);
744 copy_to_monitor(rc
, b
);
747 rconn_packet_counter_inc(counter
, b
->size
);
750 /* Reuse 'frame' as a private pointer while 'b' is in txq. */
753 ovs_list_push_back(&rc
->txq
, &b
->list_node
);
755 /* If the queue was empty before we added 'b', try to send some
756 * packets. (But if the queue had packets in it, it's because the
757 * vconn is backlogged and there's no point in stuffing more into it
758 * now. We'll get back to that in rconn_run().) */
759 if (rc
->txq
.next
== &b
->list_node
) {
769 /* Sends 'b' on 'rc'. Returns 0 if successful, or ENOTCONN if 'rc' is not
770 * currently connected. Takes ownership of 'b'.
772 * If 'counter' is non-null, then 'counter' will be incremented while the
773 * packet is in flight, then decremented when it has been sent (or discarded
774 * due to disconnection). Because 'b' may be sent (or discarded) before this
775 * function returns, the caller may not be able to observe any change in
778 * There is no rconn_send_wait() function: an rconn has a send queue that it
779 * takes care of sending if you call rconn_run(), which will have the side
780 * effect of waking up poll_block(). */
782 rconn_send(struct rconn
*rc
, struct ofpbuf
*b
,
783 struct rconn_packet_counter
*counter
)
784 OVS_EXCLUDED(rc
->mutex
)
788 ovs_mutex_lock(&rc
->mutex
);
789 error
= rconn_send__(rc
, b
, counter
);
790 ovs_mutex_unlock(&rc
->mutex
);
795 /* Sends 'b' on 'rc'. Increments 'counter' while the packet is in flight; it
796 * will be decremented when it has been sent (or discarded due to
797 * disconnection). Returns 0 if successful, EAGAIN if 'counter->n' is already
798 * at least as large as 'queue_limit', or ENOTCONN if 'rc' is not currently
799 * connected. Regardless of return value, 'b' is destroyed.
801 * Because 'b' may be sent (or discarded) before this function returns, the
802 * caller may not be able to observe any change in 'counter'.
804 * There is no rconn_send_wait() function: an rconn has a send queue that it
805 * takes care of sending if you call rconn_run(), which will have the side
806 * effect of waking up poll_block(). */
808 rconn_send_with_limit(struct rconn
*rc
, struct ofpbuf
*b
,
809 struct rconn_packet_counter
*counter
, int queue_limit
)
810 OVS_EXCLUDED(rc
->mutex
)
814 ovs_mutex_lock(&rc
->mutex
);
815 if (rconn_packet_counter_n_packets(counter
) < queue_limit
) {
816 error
= rconn_send__(rc
, b
, counter
);
818 COVERAGE_INC(rconn_overflow
);
822 ovs_mutex_unlock(&rc
->mutex
);
827 /* Adds 'vconn' to 'rc' as a monitoring connection, to which all messages sent
828 * and received on 'rconn' will be copied. 'rc' takes ownership of 'vconn'. */
830 rconn_add_monitor(struct rconn
*rc
, struct vconn
*vconn
)
831 OVS_EXCLUDED(rc
->mutex
)
833 ovs_mutex_lock(&rc
->mutex
);
834 if (rc
->n_monitors
< ARRAY_SIZE(rc
->monitors
)) {
835 VLOG_INFO("new monitor connection from %s", vconn_get_name(vconn
));
836 rc
->monitors
[rc
->n_monitors
++] = vconn
;
838 VLOG_DBG("too many monitor connections, discarding %s",
839 vconn_get_name(vconn
));
842 ovs_mutex_unlock(&rc
->mutex
);
845 /* Returns 'rc''s name. This is a name for human consumption, appropriate for
846 * use in log messages. It is not necessarily a name that may be passed
847 * directly to, e.g., vconn_open(). */
849 rconn_get_name(const struct rconn
*rc
)
854 /* Sets 'rc''s name to 'new_name'. */
856 rconn_set_name(struct rconn
*rc
, const char *new_name
)
857 OVS_EXCLUDED(rc
->mutex
)
859 ovs_mutex_lock(&rc
->mutex
);
861 rc
->name
= xstrdup(new_name
);
862 ovs_mutex_unlock(&rc
->mutex
);
865 /* Returns 'rc''s target. This is intended to be a string that may be passed
866 * directly to, e.g., vconn_open(). */
868 rconn_get_target(const struct rconn
*rc
)
873 /* Returns true if 'rconn' will reconnect if it disconnects. */
875 rconn_is_reliable(const struct rconn
*rconn
)
877 return rconn
->reliable
;
880 /* Returns true if 'rconn' is connected or in the process of reconnecting,
881 * false if 'rconn' is disconnected and will not reconnect on its own. */
883 rconn_is_alive(const struct rconn
*rconn
)
885 return rconn
->state
!= S_VOID
&& rconn
->state
!= S_DISCONNECTED
;
888 /* Returns true if 'rconn' is connected, false otherwise. */
890 rconn_is_connected(const struct rconn
*rconn
)
892 return is_connected_state(rconn
->state
);
896 rconn_is_admitted__(const struct rconn
*rconn
)
897 OVS_REQUIRES(rconn
->mutex
)
899 return (rconn_is_connected(rconn
)
900 && rconn
->last_admitted
>= rconn
->last_connected
);
903 /* Returns true if 'rconn' is connected and thought to have been accepted by
904 * the peer's admission-control policy. */
906 rconn_is_admitted(const struct rconn
*rconn
)
907 OVS_EXCLUDED(rconn
->mutex
)
911 ovs_mutex_lock(&rconn
->mutex
);
912 admitted
= rconn_is_admitted__(rconn
);
913 ovs_mutex_unlock(&rconn
->mutex
);
918 /* Returns 0 if 'rconn' is currently connected and considered to have been
919 * accepted by the peer's admission-control policy, otherwise the number of
920 * seconds since 'rconn' was last in such a state. */
922 rconn_failure_duration(const struct rconn
*rconn
)
923 OVS_EXCLUDED(rconn
->mutex
)
927 ovs_mutex_lock(&rconn
->mutex
);
928 duration
= (rconn_is_admitted__(rconn
)
930 : (time_msec() - rconn
->last_admitted
) / 1000);
931 ovs_mutex_unlock(&rconn
->mutex
);
936 /* Returns the OpenFlow version most recently negotiated with a peer, or -1 if
937 * no version has ever been negotiated.
939 * If 'rconn' is connected (that is, if 'rconn_is_connected(rconn)' would
940 * return true), then the return value is guaranteed to be the OpenFlow version
941 * in use for the connection. The converse is not true: when the return value
942 * is not -1, 'rconn' might be disconnected. */
944 rconn_get_version(const struct rconn
*rconn
)
945 OVS_EXCLUDED(rconn
->mutex
)
947 ovs_mutex_lock(&rconn
->mutex
);
948 int version
= rconn
->version
;
949 ovs_mutex_unlock(&rconn
->mutex
);
954 /* Returns a string representing the internal state of 'rc'. The caller must
955 * not modify or free the string. */
957 rconn_get_state(const struct rconn
*rc
)
959 return state_name(rc
->state
);
962 /* Returns the time at which the last successful connection was made by
963 * 'rc'. Returns LLONG_MIN if never connected. */
965 rconn_get_last_connection(const struct rconn
*rc
)
967 return rc
->last_connected
;
970 /* Returns the time at which 'rc' was last disconnected. Returns LLONG_MIN
971 * if never disconnected. */
973 rconn_get_last_disconnect(const struct rconn
*rc
)
975 return rc
->last_disconnected
;
978 /* Returns 'rc''s current connection sequence number, a number that changes
979 * every time that 'rconn' connects or disconnects. */
981 rconn_get_connection_seqno(const struct rconn
*rc
)
986 /* Returns a value that explains why 'rc' last disconnected:
988 * - 0 means that the last disconnection was caused by a call to
989 * rconn_disconnect(), or that 'rc' is new and has not yet completed its
990 * initial connection or connection attempt.
992 * - EOF means that the connection was closed in the normal way by the peer.
994 * - A positive integer is an errno value that represents the error.
997 rconn_get_last_error(const struct rconn
*rc
)
999 return rc
->last_error
;
1002 /* Returns the number of messages queued for transmission on 'rc'. */
1004 rconn_count_txqlen(const struct rconn
*rc
)
1005 OVS_EXCLUDED(rc
->mutex
)
1009 ovs_mutex_lock(&rc
->mutex
);
1010 len
= ovs_list_size(&rc
->txq
);
1011 ovs_mutex_unlock(&rc
->mutex
);
1016 struct rconn_packet_counter
*
1017 rconn_packet_counter_create(void)
1019 struct rconn_packet_counter
*c
= xzalloc(sizeof *c
);
1020 ovs_mutex_init(&c
->mutex
);
1021 ovs_mutex_lock(&c
->mutex
);
1023 ovs_mutex_unlock(&c
->mutex
);
1028 rconn_packet_counter_destroy(struct rconn_packet_counter
*c
)
1033 ovs_mutex_lock(&c
->mutex
);
1034 ovs_assert(c
->ref_cnt
> 0);
1035 dead
= !--c
->ref_cnt
&& !c
->n_packets
;
1036 ovs_mutex_unlock(&c
->mutex
);
1039 ovs_mutex_destroy(&c
->mutex
);
1046 rconn_packet_counter_inc(struct rconn_packet_counter
*c
, unsigned int n_bytes
)
1048 ovs_mutex_lock(&c
->mutex
);
1050 c
->n_bytes
+= n_bytes
;
1051 ovs_mutex_unlock(&c
->mutex
);
1055 rconn_packet_counter_dec(struct rconn_packet_counter
*c
, unsigned int n_bytes
)
1059 ovs_mutex_lock(&c
->mutex
);
1060 ovs_assert(c
->n_packets
> 0);
1061 ovs_assert(c
->n_packets
== 1
1062 ? c
->n_bytes
== n_bytes
1063 : c
->n_bytes
> n_bytes
);
1065 c
->n_bytes
-= n_bytes
;
1066 dead
= !c
->n_packets
&& !c
->ref_cnt
;
1067 ovs_mutex_unlock(&c
->mutex
);
1070 ovs_mutex_destroy(&c
->mutex
);
1076 rconn_packet_counter_n_packets(const struct rconn_packet_counter
*c
)
1080 ovs_mutex_lock(&c
->mutex
);
1082 ovs_mutex_unlock(&c
->mutex
);
1088 rconn_packet_counter_n_bytes(const struct rconn_packet_counter
*c
)
1092 ovs_mutex_lock(&c
->mutex
);
1094 ovs_mutex_unlock(&c
->mutex
);
1099 /* Set rc->target and rc->name to 'target' and 'name', respectively. If 'name'
1100 * is null, 'target' is used. */
1102 rconn_set_target__(struct rconn
*rc
, const char *target
, const char *name
)
1103 OVS_REQUIRES(rc
->mutex
)
1106 rc
->name
= xstrdup(name
? name
: target
);
1108 rc
->target
= xstrdup(target
);
1111 /* Tries to send a packet from 'rc''s send buffer. Returns 0 if successful,
1112 * otherwise a positive errno value. */
1114 try_send(struct rconn
*rc
)
1115 OVS_REQUIRES(rc
->mutex
)
1117 struct ofpbuf
*msg
= ofpbuf_from_list(rc
->txq
.next
);
1118 unsigned int n_bytes
= msg
->size
;
1119 struct rconn_packet_counter
*counter
= msg
->header
;
1122 /* Eagerly remove 'msg' from the txq. We can't remove it from the list
1123 * after sending, if sending is successful, because it is then owned by the
1124 * vconn, which might have freed it already. */
1125 ovs_list_remove(&msg
->list_node
);
1128 retval
= vconn_send(rc
->vconn
, msg
);
1130 msg
->header
= counter
;
1131 ovs_list_push_front(&rc
->txq
, &msg
->list_node
);
1132 if (retval
!= EAGAIN
) {
1133 report_error(rc
, retval
);
1134 disconnect(rc
, retval
);
1138 COVERAGE_INC(rconn_sent
);
1140 rconn_packet_counter_dec(counter
, n_bytes
);
1145 /* Reports that 'error' caused 'rc' to disconnect. 'error' may be a positive
1146 * errno value, or it may be EOF to indicate that the connection was closed
1149 report_error(struct rconn
*rc
, int error
)
1150 OVS_REQUIRES(rc
->mutex
)
1152 /* On Windows, when a peer terminates without calling a closesocket()
1153 * on socket fd, we get WSAECONNRESET. Don't print warning messages
1157 || error
== WSAECONNRESET
1160 /* If 'rc' isn't reliable, then we don't really expect this connection
1161 * to last forever anyway (probably it's a connection that we received
1162 * via accept()), so use DBG level to avoid cluttering the logs. */
1163 enum vlog_level level
= rc
->reliable
? VLL_INFO
: VLL_DBG
;
1164 VLOG(level
, "%s: connection closed by peer", rc
->name
);
1166 VLOG_WARN("%s: connection dropped (%s)",
1167 rc
->name
, ovs_strerror(error
));
1171 /* Disconnects 'rc' and records 'error' as the error that caused 'rc''s last
1174 * - 0 means that this disconnection is due to a request by 'rc''s client,
1175 * not due to any kind of network error.
1177 * - EOF means that the connection was closed in the normal way by the peer.
1179 * - A positive integer is an errno value that represents the error.
1182 disconnect(struct rconn
*rc
, int error
)
1183 OVS_REQUIRES(rc
->mutex
)
1185 rc
->last_error
= error
;
1187 vconn_close(rc
->vconn
);
1191 long long int now
= time_msec();
1193 if (rc
->state
& (S_CONNECTING
| S_ACTIVE
| S_IDLE
)) {
1194 rc
->last_disconnected
= now
;
1198 if (now
>= rc
->backoff_deadline
) {
1200 } else if (rc
->backoff
< rc
->max_backoff
/ 2) {
1201 rc
->backoff
= MAX(1000, 2 * rc
->backoff
);
1202 VLOG_INFO("%s: waiting %lld seconds before reconnect",
1203 rc
->name
, rc
->backoff
/ 1000);
1205 if (rconn_logging_connection_attempts__(rc
)) {
1206 VLOG_INFO("%s: continuing to retry connections in the "
1207 "background but suppressing further logging",
1210 rc
->backoff
= rc
->max_backoff
;
1212 rc
->backoff_deadline
= llsat_add(now
, rc
->backoff
);
1213 state_transition(rc
, S_BACKOFF
);
1215 rc
->last_disconnected
= now
;
1216 state_transition(rc
, S_DISCONNECTED
);
1220 /* Drops all the packets from 'rc''s send queue and decrements their queue
1223 flush_queue(struct rconn
*rc
)
1224 OVS_REQUIRES(rc
->mutex
)
1226 if (ovs_list_is_empty(&rc
->txq
)) {
1229 while (!ovs_list_is_empty(&rc
->txq
)) {
1230 struct ofpbuf
*b
= ofpbuf_from_list(ovs_list_pop_front(&rc
->txq
));
1231 struct rconn_packet_counter
*counter
= b
->header
;
1233 rconn_packet_counter_dec(counter
, b
->size
);
1235 COVERAGE_INC(rconn_discarded
);
1238 poll_immediate_wake();
1241 static long long int
1242 elapsed_in_this_state(const struct rconn
*rc
)
1243 OVS_REQUIRES(rc
->mutex
)
1245 return time_msec() - rc
->state_entered
;
1248 static long long int
1249 timeout(const struct rconn
*rc
)
1250 OVS_REQUIRES(rc
->mutex
)
1252 switch (rc
->state
) {
1253 #define STATE(NAME, VALUE) case S_##NAME: return timeout_##NAME(rc);
1262 timed_out(const struct rconn
*rc
)
1263 OVS_REQUIRES(rc
->mutex
)
1265 return time_msec() >= llsat_add(rc
->state_entered
, timeout(rc
));
1269 state_transition(struct rconn
*rc
, enum state state
)
1270 OVS_REQUIRES(rc
->mutex
)
1272 rc
->seqno
+= is_connected_state(rc
->state
) != is_connected_state(state
);
1273 if (is_connected_state(state
) && !is_connected_state(rc
->state
)) {
1274 rc
->probably_admitted
= false;
1276 VLOG_DBG("%s: entering %s", rc
->name
, state_name(state
));
1278 rc
->state_entered
= time_msec();
1282 close_monitor(struct rconn
*rc
, size_t idx
, int retval
)
1283 OVS_REQUIRES(rc
->mutex
)
1285 VLOG_DBG("%s: closing monitor connection to %s: %s",
1286 rconn_get_name(rc
), vconn_get_name(rc
->monitors
[idx
]),
1287 ovs_retval_to_string(retval
));
1288 rc
->monitors
[idx
] = rc
->monitors
[--rc
->n_monitors
];
1292 copy_to_monitor(struct rconn
*rc
, const struct ofpbuf
*b
)
1293 OVS_REQUIRES(rc
->mutex
)
1295 struct ofpbuf
*clone
= NULL
;
1299 for (i
= 0; i
< rc
->n_monitors
; ) {
1300 struct vconn
*vconn
= rc
->monitors
[i
];
1303 clone
= ofpbuf_clone(b
);
1305 retval
= vconn_send(vconn
, clone
);
1308 } else if (retval
!= EAGAIN
) {
1309 close_monitor(rc
, i
, retval
);
1314 ofpbuf_delete(clone
);
1318 is_connected_state(enum state state
)
1320 return (state
& (S_ACTIVE
| S_IDLE
)) != 0;
1323 /* When a switch initially connects to a controller, the controller may spend a
1324 * little time examining the switch, looking at, for example, its datapath ID,
1325 * before it decides whether it is willing to control that switch. At that
1326 * point, it either disconnects or starts controlling the switch.
1328 * This function returns a guess to its caller about whether 'b' is OpenFlow
1329 * message that indicates that the controller has decided to control the
1330 * switch. It returns false if the message is one that a controller typically
1331 * uses to determine whether a switch is admissible, true if the message is one
1332 * that would typically be used only after the controller has admitted the
1335 is_admitted_msg(const struct ofpbuf
*b
)
1340 error
= ofptype_decode(&type
, b
->data
);
1348 case OFPTYPE_ECHO_REQUEST
:
1349 case OFPTYPE_ECHO_REPLY
:
1350 case OFPTYPE_FEATURES_REQUEST
:
1351 case OFPTYPE_FEATURES_REPLY
:
1352 case OFPTYPE_GET_CONFIG_REQUEST
:
1353 case OFPTYPE_GET_CONFIG_REPLY
:
1354 case OFPTYPE_SET_CONFIG
:
1355 case OFPTYPE_QUEUE_GET_CONFIG_REQUEST
:
1356 case OFPTYPE_QUEUE_GET_CONFIG_REPLY
:
1357 case OFPTYPE_GET_ASYNC_REQUEST
:
1358 case OFPTYPE_GET_ASYNC_REPLY
:
1359 case OFPTYPE_GROUP_STATS_REQUEST
:
1360 case OFPTYPE_GROUP_STATS_REPLY
:
1361 case OFPTYPE_GROUP_DESC_STATS_REQUEST
:
1362 case OFPTYPE_GROUP_DESC_STATS_REPLY
:
1363 case OFPTYPE_GROUP_FEATURES_STATS_REQUEST
:
1364 case OFPTYPE_GROUP_FEATURES_STATS_REPLY
:
1365 case OFPTYPE_TABLE_FEATURES_STATS_REQUEST
:
1366 case OFPTYPE_TABLE_FEATURES_STATS_REPLY
:
1367 case OFPTYPE_TABLE_DESC_REQUEST
:
1368 case OFPTYPE_TABLE_DESC_REPLY
:
1371 case OFPTYPE_PACKET_IN
:
1372 case OFPTYPE_FLOW_REMOVED
:
1373 case OFPTYPE_PORT_STATUS
:
1374 case OFPTYPE_PACKET_OUT
:
1375 case OFPTYPE_FLOW_MOD
:
1376 case OFPTYPE_GROUP_MOD
:
1377 case OFPTYPE_PORT_MOD
:
1378 case OFPTYPE_TABLE_MOD
:
1379 case OFPTYPE_METER_MOD
:
1380 case OFPTYPE_BARRIER_REQUEST
:
1381 case OFPTYPE_BARRIER_REPLY
:
1382 case OFPTYPE_DESC_STATS_REQUEST
:
1383 case OFPTYPE_DESC_STATS_REPLY
:
1384 case OFPTYPE_FLOW_STATS_REQUEST
:
1385 case OFPTYPE_FLOW_STATS_REPLY
:
1386 case OFPTYPE_AGGREGATE_STATS_REQUEST
:
1387 case OFPTYPE_AGGREGATE_STATS_REPLY
:
1388 case OFPTYPE_TABLE_STATS_REQUEST
:
1389 case OFPTYPE_TABLE_STATS_REPLY
:
1390 case OFPTYPE_PORT_STATS_REQUEST
:
1391 case OFPTYPE_PORT_STATS_REPLY
:
1392 case OFPTYPE_QUEUE_STATS_REQUEST
:
1393 case OFPTYPE_QUEUE_STATS_REPLY
:
1394 case OFPTYPE_PORT_DESC_STATS_REQUEST
:
1395 case OFPTYPE_PORT_DESC_STATS_REPLY
:
1396 case OFPTYPE_METER_STATS_REQUEST
:
1397 case OFPTYPE_METER_STATS_REPLY
:
1398 case OFPTYPE_METER_CONFIG_STATS_REQUEST
:
1399 case OFPTYPE_METER_CONFIG_STATS_REPLY
:
1400 case OFPTYPE_METER_FEATURES_STATS_REQUEST
:
1401 case OFPTYPE_METER_FEATURES_STATS_REPLY
:
1402 case OFPTYPE_ROLE_REQUEST
:
1403 case OFPTYPE_ROLE_REPLY
:
1404 case OFPTYPE_ROLE_STATUS
:
1405 case OFPTYPE_REQUESTFORWARD
:
1406 case OFPTYPE_TABLE_STATUS
:
1407 case OFPTYPE_SET_FLOW_FORMAT
:
1408 case OFPTYPE_FLOW_MOD_TABLE_ID
:
1409 case OFPTYPE_SET_PACKET_IN_FORMAT
:
1410 case OFPTYPE_FLOW_AGE
:
1411 case OFPTYPE_SET_ASYNC_CONFIG
:
1412 case OFPTYPE_SET_CONTROLLER_ID
:
1413 case OFPTYPE_FLOW_MONITOR_STATS_REQUEST
:
1414 case OFPTYPE_FLOW_MONITOR_STATS_REPLY
:
1415 case OFPTYPE_FLOW_MONITOR_CANCEL
:
1416 case OFPTYPE_FLOW_MONITOR_PAUSED
:
1417 case OFPTYPE_FLOW_MONITOR_RESUMED
:
1418 case OFPTYPE_BUNDLE_CONTROL
:
1419 case OFPTYPE_BUNDLE_ADD_MESSAGE
:
1420 case OFPTYPE_NXT_TLV_TABLE_MOD
:
1421 case OFPTYPE_NXT_TLV_TABLE_REQUEST
:
1422 case OFPTYPE_NXT_TLV_TABLE_REPLY
:
1423 case OFPTYPE_NXT_RESUME
:
1424 case OFPTYPE_IPFIX_BRIDGE_STATS_REQUEST
:
1425 case OFPTYPE_IPFIX_BRIDGE_STATS_REPLY
:
1426 case OFPTYPE_IPFIX_FLOW_STATS_REQUEST
:
1427 case OFPTYPE_IPFIX_FLOW_STATS_REPLY
:
1428 case OFPTYPE_CT_FLUSH_ZONE
:
1434 /* Returns true if 'rc' is currently logging information about connection
1435 * attempts, false if logging should be suppressed because 'rc' hasn't
1436 * successuflly connected in too long. */
1438 rconn_logging_connection_attempts__(const struct rconn
*rc
)
1439 OVS_REQUIRES(rc
->mutex
)
1441 return rc
->backoff
< rc
->max_backoff
;