2 * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "poll-loop.h"
25 #include "openvswitch/dynamic-string.h"
26 #include "fatal-signal.h"
27 #include "openvswitch/list.h"
28 #include "ovs-thread.h"
30 #include "socket-util.h"
32 #include "openvswitch/vlog.h"
33 #include "openvswitch/hmap.h"
36 VLOG_DEFINE_THIS_MODULE(poll_loop
);
38 COVERAGE_DEFINE(poll_create_node
);
39 COVERAGE_DEFINE(poll_zero_timeout
);
42 struct hmap_node hmap_node
;
43 struct pollfd pollfd
; /* Events to pass to time_poll(). */
44 HANDLE wevent
; /* Events for WaitForMultipleObjects(). */
45 const char *where
; /* Where poll_node was created. */
49 /* All active poll waiters. */
50 struct hmap poll_nodes
;
52 /* Time at which to wake up the next call to poll_block(), LLONG_MIN to
53 * wake up immediately, or LLONG_MAX to wait forever. */
54 long long int timeout_when
; /* In msecs as returned by time_msec(). */
55 const char *timeout_where
; /* Where 'timeout_when' was set. */
58 static struct poll_loop
*poll_loop(void);
60 /* Look up the node with same fd or wevent. */
61 static struct poll_node
*
62 find_poll_node(struct poll_loop
*loop
, int fd
, HANDLE wevent
)
64 struct poll_node
*node
;
66 /* Both 'fd' and 'wevent' cannot be set. */
67 ovs_assert(!fd
!= !wevent
);
69 HMAP_FOR_EACH_WITH_HASH (node
, hmap_node
,
70 hash_2words(fd
, (uint32_t)wevent
),
72 if ((fd
&& node
->pollfd
.fd
== fd
)
73 || (wevent
&& node
->wevent
== wevent
)) {
80 /* On Unix based systems:
82 * Registers 'fd' as waiting for the specified 'events' (which should be
83 * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to
84 * poll_block() will wake up when 'fd' becomes ready for one or more of the
85 * requested events. The 'fd's are given to poll() function later.
89 * If 'fd' is specified, create a new 'wevent'. Association of 'fd' and
90 * 'wevent' for 'events' happens in poll_block(). If 'wevent' is specified,
91 * it is assumed that it is unrelated to any sockets and poll_block()
92 * will wake up on any event on that 'wevent'. It is an error to pass
93 * both 'wevent' and 'fd'.
95 * The event registration is one-shot: only the following call to
96 * poll_block() is affected. The event will need to be re-registered after
97 * poll_block() is called if it is to persist.
99 * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
100 * automatically provide the caller's source file and line number for
103 poll_create_node(int fd
, HANDLE wevent
, short int events
, const char *where
)
105 struct poll_loop
*loop
= poll_loop();
106 struct poll_node
*node
;
108 COVERAGE_INC(poll_create_node
);
110 /* Both 'fd' and 'wevent' cannot be set. */
111 ovs_assert(!fd
!= !wevent
);
113 /* Check for duplicate. If found, "or" the events. */
114 node
= find_poll_node(loop
, fd
, wevent
);
116 node
->pollfd
.events
|= events
;
118 node
= xzalloc(sizeof *node
);
119 hmap_insert(&loop
->poll_nodes
, &node
->hmap_node
,
120 hash_2words(fd
, (uint32_t)wevent
));
121 node
->pollfd
.fd
= fd
;
122 node
->pollfd
.events
= events
;
125 wevent
= CreateEvent(NULL
, FALSE
, FALSE
, NULL
);
128 node
->wevent
= wevent
;
133 /* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN
134 * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will
135 * wake up when 'fd' becomes ready for one or more of the requested events.
137 * On Windows, 'fd' must be a socket.
139 * The event registration is one-shot: only the following call to poll_block()
140 * is affected. The event will need to be re-registered after poll_block() is
141 * called if it is to persist.
143 * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to
144 * automatically provide the caller's source file and line number for
147 poll_fd_wait_at(int fd
, short int events
, const char *where
)
149 poll_create_node(fd
, 0, events
, where
);
153 /* Registers for the next call to poll_block() to wake up when 'wevent' is
156 * The event registration is one-shot: only the following call to poll_block()
157 * is affected. The event will need to be re-registered after poll_block() is
158 * called if it is to persist.
160 * ('where' is used in debug logging. Commonly one would use
161 * poll_wevent_wait() to automatically provide the caller's source file and
162 * line number for 'where'.) */
164 poll_wevent_wait_at(HANDLE wevent
, const char *where
)
166 poll_create_node(0, wevent
, 0, where
);
170 /* Causes the following call to poll_block() to block for no more than 'msec'
171 * milliseconds. If 'msec' is nonpositive, the following call to poll_block()
172 * will not block at all.
174 * The timer registration is one-shot: only the following call to poll_block()
175 * is affected. The timer will need to be re-registered after poll_block() is
176 * called if it is to persist.
178 * ('where' is used in debug logging. Commonly one would use poll_timer_wait()
179 * to automatically provide the caller's source file and line number for
182 poll_timer_wait_at(long long int msec
, const char *where
)
184 long long int now
= time_msec();
188 /* Wake up immediately. */
190 } else if ((unsigned long long int) now
+ msec
<= LLONG_MAX
) {
194 /* now + msec would overflow. */
198 poll_timer_wait_until_at(when
, where
);
201 /* Causes the following call to poll_block() to wake up when the current time,
202 * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier
203 * than the current time, the following call to poll_block() will not block at
206 * The timer registration is one-shot: only the following call to poll_block()
207 * is affected. The timer will need to be re-registered after poll_block() is
208 * called if it is to persist.
210 * ('where' is used in debug logging. Commonly one would use
211 * poll_timer_wait_until() to automatically provide the caller's source file
212 * and line number for 'where'.) */
214 poll_timer_wait_until_at(long long int when
, const char *where
)
216 struct poll_loop
*loop
= poll_loop();
217 if (when
< loop
->timeout_when
) {
218 loop
->timeout_when
= when
;
219 loop
->timeout_where
= where
;
223 /* Causes the following call to poll_block() to wake up immediately, without
226 * ('where' is used in debug logging. Commonly one would use
227 * poll_immediate_wake() to automatically provide the caller's source file and
228 * line number for 'where'.) */
230 poll_immediate_wake_at(const char *where
)
232 poll_timer_wait_at(0, where
);
235 /* Logs, if appropriate, that the poll loop was awakened by an event
236 * registered at 'where' (typically a source file and line number). The other
237 * arguments have two possible interpretations:
239 * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused
240 * the wakeup. 'timeout' is ignored.
242 * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after
243 * which the poll loop woke up.
246 log_wakeup(const char *where
, const struct pollfd
*pollfd
, int timeout
)
248 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(10, 10);
249 enum vlog_level level
;
253 cpu_usage
= get_cpu_usage();
254 if (VLOG_IS_DBG_ENABLED()) {
256 } else if (cpu_usage
> 50
258 && !VLOG_DROP_INFO(&rl
)) {
265 ds_put_cstr(&s
, "wakeup due to ");
267 char *description
= describe_fd(pollfd
->fd
);
268 if (pollfd
->revents
& POLLIN
) {
269 ds_put_cstr(&s
, "[POLLIN]");
271 if (pollfd
->revents
& POLLOUT
) {
272 ds_put_cstr(&s
, "[POLLOUT]");
274 if (pollfd
->revents
& POLLERR
) {
275 ds_put_cstr(&s
, "[POLLERR]");
277 if (pollfd
->revents
& POLLHUP
) {
278 ds_put_cstr(&s
, "[POLLHUP]");
280 if (pollfd
->revents
& POLLNVAL
) {
281 ds_put_cstr(&s
, "[POLLNVAL]");
283 ds_put_format(&s
, " on fd %d (%s)", pollfd
->fd
, description
);
286 ds_put_format(&s
, "%d-ms timeout", timeout
);
289 ds_put_format(&s
, " at %s", where
);
291 if (cpu_usage
>= 0) {
292 ds_put_format(&s
, " (%d%% CPU usage)", cpu_usage
);
294 VLOG(level
, "%s", ds_cstr(&s
));
299 free_poll_nodes(struct poll_loop
*loop
)
301 struct poll_node
*node
, *next
;
303 HMAP_FOR_EACH_SAFE (node
, next
, hmap_node
, &loop
->poll_nodes
) {
304 hmap_remove(&loop
->poll_nodes
, &node
->hmap_node
);
306 if (node
->wevent
&& node
->pollfd
.fd
) {
307 WSAEventSelect(node
->pollfd
.fd
, NULL
, 0);
308 CloseHandle(node
->wevent
);
315 /* Blocks until one or more of the events registered with poll_fd_wait()
316 * occurs, or until the minimum duration registered with poll_timer_wait()
317 * elapses, or not at all if poll_immediate_wake() has been called. */
321 struct poll_loop
*loop
= poll_loop();
322 struct poll_node
*node
;
323 struct pollfd
*pollfds
;
324 HANDLE
*wevents
= NULL
;
329 /* Register fatal signal events before actually doing any real work for
333 if (loop
->timeout_when
== LLONG_MIN
) {
334 COVERAGE_INC(poll_zero_timeout
);
338 pollfds
= xmalloc(hmap_count(&loop
->poll_nodes
) * sizeof *pollfds
);
341 wevents
= xmalloc(hmap_count(&loop
->poll_nodes
) * sizeof *wevents
);
344 /* Populate with all the fds and events. */
346 HMAP_FOR_EACH (node
, hmap_node
, &loop
->poll_nodes
) {
347 pollfds
[i
] = node
->pollfd
;
349 wevents
[i
] = node
->wevent
;
350 if (node
->pollfd
.fd
&& node
->wevent
) {
351 short int wsa_events
= 0;
352 if (node
->pollfd
.events
& POLLIN
) {
353 wsa_events
|= FD_READ
| FD_ACCEPT
| FD_CLOSE
;
355 if (node
->pollfd
.events
& POLLOUT
) {
356 wsa_events
|= FD_WRITE
| FD_CONNECT
| FD_CLOSE
;
358 WSAEventSelect(node
->pollfd
.fd
, node
->wevent
, wsa_events
);
364 retval
= time_poll(pollfds
, hmap_count(&loop
->poll_nodes
), wevents
,
365 loop
->timeout_when
, &elapsed
);
367 static struct vlog_rate_limit rl
= VLOG_RATE_LIMIT_INIT(1, 5);
368 VLOG_ERR_RL(&rl
, "poll: %s", ovs_strerror(-retval
));
369 } else if (!retval
) {
370 log_wakeup(loop
->timeout_where
, NULL
, elapsed
);
371 } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) {
373 HMAP_FOR_EACH (node
, hmap_node
, &loop
->poll_nodes
) {
374 if (pollfds
[i
].revents
) {
375 log_wakeup(node
->where
, &pollfds
[i
], 0);
381 free_poll_nodes(loop
);
382 loop
->timeout_when
= LLONG_MAX
;
383 loop
->timeout_where
= NULL
;
387 /* Handle any pending signals before doing anything else. */
394 free_poll_loop(void *loop_
)
396 struct poll_loop
*loop
= loop_
;
398 free_poll_nodes(loop
);
399 hmap_destroy(&loop
->poll_nodes
);
403 static struct poll_loop
*
406 static struct ovsthread_once once
= OVSTHREAD_ONCE_INITIALIZER
;
407 static pthread_key_t key
;
408 struct poll_loop
*loop
;
410 if (ovsthread_once_start(&once
)) {
411 xpthread_key_create(&key
, free_poll_loop
);
412 ovsthread_once_done(&once
);
415 loop
= pthread_getspecific(key
);
417 loop
= xzalloc(sizeof *loop
);
418 hmap_init(&loop
->poll_nodes
);
419 xpthread_setspecific(key
, loop
);