]>
Commit | Line | Data |
---|---|---|
064af421 | 1 | /* |
4ca828d7 | 2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. |
064af421 | 3 | * |
a14bc59f BP |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
064af421 | 7 | * |
a14bc59f BP |
8 | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
064af421 BP |
15 | */ |
16 | ||
17 | #include <config.h> | |
18 | #include "poll-loop.h" | |
064af421 | 19 | #include <errno.h> |
2886875a | 20 | #include <inttypes.h> |
064af421 BP |
21 | #include <poll.h> |
22 | #include <stdlib.h> | |
23 | #include <string.h> | |
064af421 | 24 | #include "coverage.h" |
3e8a2ad1 | 25 | #include "openvswitch/dynamic-string.h" |
d8b30702 | 26 | #include "fatal-signal.h" |
b19bab5b | 27 | #include "openvswitch/list.h" |
2c06a966 | 28 | #include "ovs-thread.h" |
55b40355 | 29 | #include "seq.h" |
f89ffb0e | 30 | #include "socket-util.h" |
064af421 | 31 | #include "timeval.h" |
e6211adc | 32 | #include "openvswitch/vlog.h" |
ee89ea7b | 33 | #include "openvswitch/hmap.h" |
4ca828d7 | 34 | #include "hash.h" |
064af421 | 35 | |
d98e6007 | 36 | VLOG_DEFINE_THIS_MODULE(poll_loop); |
5136ce49 | 37 | |
1ca3348e | 38 | COVERAGE_DEFINE(poll_create_node); |
d76f09ea BP |
39 | COVERAGE_DEFINE(poll_zero_timeout); |
40 | ||
4ca828d7 LS |
41 | struct poll_node { |
42 | struct hmap_node hmap_node; | |
43 | struct pollfd pollfd; /* Events to pass to time_poll(). */ | |
44 | HANDLE wevent; /* Events for WaitForMultipleObjects(). */ | |
45 | const char *where; /* Where poll_node was created. */ | |
46 | }; | |
47 | ||
2c06a966 BP |
48 | struct poll_loop { |
49 | /* All active poll waiters. */ | |
4ca828d7 | 50 | struct hmap poll_nodes; |
064af421 | 51 | |
2c06a966 BP |
52 | /* Time at which to wake up the next call to poll_block(), LLONG_MIN to |
53 | * wake up immediately, or LLONG_MAX to wait forever. */ | |
54 | long long int timeout_when; /* In msecs as returned by time_msec(). */ | |
55 | const char *timeout_where; /* Where 'timeout_when' was set. */ | |
56 | }; | |
064af421 | 57 | |
2c06a966 | 58 | static struct poll_loop *poll_loop(void); |
064af421 | 59 | |
3ca600a3 | 60 | /* Look up the node with same fd or wevent. */ |
4ca828d7 | 61 | static struct poll_node * |
1ca3348e | 62 | find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent) |
4ca828d7 LS |
63 | { |
64 | struct poll_node *node; | |
65 | ||
3ca600a3 GS |
66 | /* Both 'fd' and 'wevent' cannot be set. */ |
67 | ovs_assert(!fd != !wevent); | |
68 | ||
1ca3348e GS |
69 | HMAP_FOR_EACH_WITH_HASH (node, hmap_node, |
70 | hash_2words(fd, (uint32_t)wevent), | |
4ca828d7 | 71 | &loop->poll_nodes) { |
3ca600a3 GS |
72 | if ((fd && node->pollfd.fd == fd) |
73 | || (wevent && node->wevent == wevent)) { | |
4ca828d7 LS |
74 | return node; |
75 | } | |
76 | } | |
77 | return NULL; | |
78 | } | |
79 | ||
80 | /* On Unix based systems: | |
064af421 | 81 | * |
4ca828d7 LS |
82 | * Registers 'fd' as waiting for the specified 'events' (which should be |
83 | * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to | |
84 | * poll_block() will wake up when 'fd' becomes ready for one or more of the | |
1ca3348e | 85 | * requested events. The 'fd's are given to poll() function later. |
4ca828d7 LS |
86 | * |
87 | * On Windows system: | |
88 | * | |
1ca3348e GS |
89 | * If 'fd' is specified, create a new 'wevent'. Association of 'fd' and |
90 | * 'wevent' for 'events' happens in poll_block(). If 'wevent' is specified, | |
91 | * it is assumed that it is unrelated to any sockets and poll_block() | |
92 | * will wake up on any event on that 'wevent'. It is an error to pass | |
93 | * both 'wevent' and 'fd'. | |
94 | * | |
95 | * The event registration is one-shot: only the following call to | |
96 | * poll_block() is affected. The event will need to be re-registered after | |
97 | * poll_block() is called if it is to persist. | |
f89ffb0e | 98 | * |
5453ae20 BP |
99 | * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to |
100 | * automatically provide the caller's source file and line number for | |
101 | * 'where'.) */ | |
1ca3348e GS |
102 | static void |
103 | poll_create_node(int fd, HANDLE wevent, short int events, const char *where) | |
064af421 | 104 | { |
2c06a966 | 105 | struct poll_loop *loop = poll_loop(); |
4ca828d7 | 106 | struct poll_node *node; |
2c06a966 | 107 | |
1ca3348e | 108 | COVERAGE_INC(poll_create_node); |
4ca828d7 | 109 | |
1ca3348e | 110 | /* Both 'fd' and 'wevent' cannot be set. */ |
18167ffe | 111 | ovs_assert(!fd != !wevent); |
2c06a966 | 112 | |
1ca3348e | 113 | /* Check for duplicate. If found, "or" the events. */ |
4ca828d7 LS |
114 | node = find_poll_node(loop, fd, wevent); |
115 | if (node) { | |
116 | node->pollfd.events |= events; | |
117 | } else { | |
118 | node = xzalloc(sizeof *node); | |
119 | hmap_insert(&loop->poll_nodes, &node->hmap_node, | |
1ca3348e | 120 | hash_2words(fd, (uint32_t)wevent)); |
4ca828d7 LS |
121 | node->pollfd.fd = fd; |
122 | node->pollfd.events = events; | |
1ca3348e GS |
123 | #ifdef _WIN32 |
124 | if (!wevent) { | |
125 | wevent = CreateEvent(NULL, FALSE, FALSE, NULL); | |
126 | } | |
127 | #endif | |
4ca828d7 LS |
128 | node->wevent = wevent; |
129 | node->where = where; | |
130 | } | |
064af421 BP |
131 | } |
132 | ||
1ca3348e GS |
133 | /* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN |
134 | * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will | |
135 | * wake up when 'fd' becomes ready for one or more of the requested events. | |
136 | * | |
137 | * On Windows, 'fd' must be a socket. | |
138 | * | |
139 | * The event registration is one-shot: only the following call to poll_block() | |
140 | * is affected. The event will need to be re-registered after poll_block() is | |
141 | * called if it is to persist. | |
142 | * | |
143 | * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to | |
144 | * automatically provide the caller's source file and line number for | |
145 | * 'where'.) */ | |
146 | void | |
147 | poll_fd_wait_at(int fd, short int events, const char *where) | |
148 | { | |
149 | poll_create_node(fd, 0, events, where); | |
150 | } | |
151 | ||
152 | #ifdef _WIN32 | |
153 | /* Registers for the next call to poll_block() to wake up when 'wevent' is | |
154 | * signaled. | |
155 | * | |
156 | * The event registration is one-shot: only the following call to poll_block() | |
157 | * is affected. The event will need to be re-registered after poll_block() is | |
158 | * called if it is to persist. | |
159 | * | |
160 | * ('where' is used in debug logging. Commonly one would use | |
161 | * poll_wevent_wait() to automatically provide the caller's source file and | |
162 | * line number for 'where'.) */ | |
163 | void | |
164 | poll_wevent_wait_at(HANDLE wevent, const char *where) | |
165 | { | |
18167ffe | 166 | poll_create_node(0, wevent, 0, where); |
1ca3348e GS |
167 | } |
168 | #endif /* _WIN32 */ | |
169 | ||
064af421 BP |
170 | /* Causes the following call to poll_block() to block for no more than 'msec' |
171 | * milliseconds. If 'msec' is nonpositive, the following call to poll_block() | |
172 | * will not block at all. | |
173 | * | |
174 | * The timer registration is one-shot: only the following call to poll_block() | |
175 | * is affected. The timer will need to be re-registered after poll_block() is | |
f89ffb0e BP |
176 | * called if it is to persist. |
177 | * | |
5453ae20 BP |
178 | * ('where' is used in debug logging. Commonly one would use poll_timer_wait() |
179 | * to automatically provide the caller's source file and line number for | |
180 | * 'where'.) */ | |
064af421 | 181 | void |
5453ae20 | 182 | poll_timer_wait_at(long long int msec, const char *where) |
064af421 | 183 | { |
cee03df4 BP |
184 | long long int now = time_msec(); |
185 | long long int when; | |
186 | ||
187 | if (msec <= 0) { | |
188 | /* Wake up immediately. */ | |
189 | when = LLONG_MIN; | |
190 | } else if ((unsigned long long int) now + msec <= LLONG_MAX) { | |
191 | /* Normal case. */ | |
192 | when = now + msec; | |
193 | } else { | |
194 | /* now + msec would overflow. */ | |
195 | when = LLONG_MAX; | |
196 | } | |
197 | ||
5453ae20 | 198 | poll_timer_wait_until_at(when, where); |
064af421 BP |
199 | } |
200 | ||
7cf8b266 | 201 | /* Causes the following call to poll_block() to wake up when the current time, |
cee03df4 | 202 | * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier |
7cf8b266 BP |
203 | * than the current time, the following call to poll_block() will not block at |
204 | * all. | |
205 | * | |
206 | * The timer registration is one-shot: only the following call to poll_block() | |
207 | * is affected. The timer will need to be re-registered after poll_block() is | |
f89ffb0e BP |
208 | * called if it is to persist. |
209 | * | |
5453ae20 BP |
210 | * ('where' is used in debug logging. Commonly one would use |
211 | * poll_timer_wait_until() to automatically provide the caller's source file | |
212 | * and line number for 'where'.) */ | |
7cf8b266 | 213 | void |
5453ae20 | 214 | poll_timer_wait_until_at(long long int when, const char *where) |
7cf8b266 | 215 | { |
2c06a966 BP |
216 | struct poll_loop *loop = poll_loop(); |
217 | if (when < loop->timeout_when) { | |
218 | loop->timeout_when = when; | |
219 | loop->timeout_where = where; | |
cee03df4 | 220 | } |
7cf8b266 BP |
221 | } |
222 | ||
064af421 | 223 | /* Causes the following call to poll_block() to wake up immediately, without |
f89ffb0e BP |
224 | * blocking. |
225 | * | |
5453ae20 BP |
226 | * ('where' is used in debug logging. Commonly one would use |
227 | * poll_immediate_wake() to automatically provide the caller's source file and | |
228 | * line number for 'where'.) */ | |
064af421 | 229 | void |
5453ae20 | 230 | poll_immediate_wake_at(const char *where) |
064af421 | 231 | { |
5453ae20 | 232 | poll_timer_wait_at(0, where); |
064af421 BP |
233 | } |
234 | ||
959ec62e BP |
235 | /* Logs, if appropriate, that the poll loop was awakened by an event |
236 | * registered at 'where' (typically a source file and line number). The other | |
237 | * arguments have two possible interpretations: | |
238 | * | |
239 | * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused | |
d19cedb2 | 240 | * the wakeup. 'timeout' is ignored. |
959ec62e | 241 | * |
d19cedb2 BP |
242 | * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after |
243 | * which the poll loop woke up. | |
959ec62e BP |
244 | */ |
245 | static void | |
246 | log_wakeup(const char *where, const struct pollfd *pollfd, int timeout) | |
064af421 | 247 | { |
cf1b8a92 | 248 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); |
959ec62e BP |
249 | enum vlog_level level; |
250 | int cpu_usage; | |
251 | struct ds s; | |
064af421 | 252 | |
959ec62e BP |
253 | cpu_usage = get_cpu_usage(); |
254 | if (VLOG_IS_DBG_ENABLED()) { | |
255 | level = VLL_DBG; | |
2f8932e8 IM |
256 | } else if (cpu_usage > 50 |
257 | && !thread_is_pmd() | |
258 | && !VLOG_DROP_INFO(&rl)) { | |
692bf61a | 259 | level = VLL_INFO; |
959ec62e BP |
260 | } else { |
261 | return; | |
262 | } | |
064af421 | 263 | |
959ec62e BP |
264 | ds_init(&s); |
265 | ds_put_cstr(&s, "wakeup due to "); | |
266 | if (pollfd) { | |
267 | char *description = describe_fd(pollfd->fd); | |
268 | if (pollfd->revents & POLLIN) { | |
269 | ds_put_cstr(&s, "[POLLIN]"); | |
270 | } | |
271 | if (pollfd->revents & POLLOUT) { | |
272 | ds_put_cstr(&s, "[POLLOUT]"); | |
273 | } | |
274 | if (pollfd->revents & POLLERR) { | |
275 | ds_put_cstr(&s, "[POLLERR]"); | |
276 | } | |
277 | if (pollfd->revents & POLLHUP) { | |
278 | ds_put_cstr(&s, "[POLLHUP]"); | |
279 | } | |
280 | if (pollfd->revents & POLLNVAL) { | |
281 | ds_put_cstr(&s, "[POLLNVAL]"); | |
282 | } | |
283 | ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description); | |
284 | free(description); | |
285 | } else { | |
286 | ds_put_format(&s, "%d-ms timeout", timeout); | |
287 | } | |
f89ffb0e | 288 | if (where) { |
959ec62e | 289 | ds_put_format(&s, " at %s", where); |
064af421 | 290 | } |
959ec62e BP |
291 | if (cpu_usage >= 0) { |
292 | ds_put_format(&s, " (%d%% CPU usage)", cpu_usage); | |
293 | } | |
294 | VLOG(level, "%s", ds_cstr(&s)); | |
295 | ds_destroy(&s); | |
064af421 BP |
296 | } |
297 | ||
4ca828d7 LS |
298 | static void |
299 | free_poll_nodes(struct poll_loop *loop) | |
300 | { | |
301 | struct poll_node *node, *next; | |
302 | ||
303 | HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) { | |
304 | hmap_remove(&loop->poll_nodes, &node->hmap_node); | |
1ca3348e GS |
305 | #ifdef _WIN32 |
306 | if (node->wevent && node->pollfd.fd) { | |
307 | WSAEventSelect(node->pollfd.fd, NULL, 0); | |
308 | CloseHandle(node->wevent); | |
309 | } | |
310 | #endif | |
4ca828d7 LS |
311 | free(node); |
312 | } | |
313 | } | |
314 | ||
064af421 BP |
315 | /* Blocks until one or more of the events registered with poll_fd_wait() |
316 | * occurs, or until the minimum duration registered with poll_timer_wait() | |
d474bd01 | 317 | * elapses, or not at all if poll_immediate_wake() has been called. */ |
064af421 BP |
318 | void |
319 | poll_block(void) | |
320 | { | |
2c06a966 | 321 | struct poll_loop *loop = poll_loop(); |
4ca828d7 LS |
322 | struct poll_node *node; |
323 | struct pollfd *pollfds; | |
324 | HANDLE *wevents = NULL; | |
cee03df4 | 325 | int elapsed; |
064af421 | 326 | int retval; |
4ca828d7 | 327 | int i; |
064af421 | 328 | |
d8b30702 JG |
329 | /* Register fatal signal events before actually doing any real work for |
330 | * poll_block. */ | |
331 | fatal_signal_wait(); | |
332 | ||
2c06a966 | 333 | if (loop->timeout_when == LLONG_MIN) { |
064af421 BP |
334 | COVERAGE_INC(poll_zero_timeout); |
335 | } | |
2c06a966 | 336 | |
8661af79 | 337 | timewarp_run(); |
4ca828d7 LS |
338 | pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); |
339 | ||
340 | #ifdef _WIN32 | |
341 | wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); | |
342 | #endif | |
343 | ||
344 | /* Populate with all the fds and events. */ | |
345 | i = 0; | |
346 | HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { | |
347 | pollfds[i] = node->pollfd; | |
348 | #ifdef _WIN32 | |
349 | wevents[i] = node->wevent; | |
55489d31 GS |
350 | if (node->pollfd.fd && node->wevent) { |
351 | short int wsa_events = 0; | |
352 | if (node->pollfd.events & POLLIN) { | |
353 | wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; | |
354 | } | |
355 | if (node->pollfd.events & POLLOUT) { | |
356 | wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; | |
357 | } | |
358 | WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); | |
359 | } | |
4ca828d7 LS |
360 | #endif |
361 | i++; | |
362 | } | |
363 | ||
364 | retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, | |
2c06a966 | 365 | loop->timeout_when, &elapsed); |
064af421 BP |
366 | if (retval < 0) { |
367 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
10a89ef0 | 368 | VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); |
959ec62e | 369 | } else if (!retval) { |
2c06a966 | 370 | log_wakeup(loop->timeout_where, NULL, elapsed); |
8f6c3ad7 | 371 | } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) { |
4ca828d7 LS |
372 | i = 0; |
373 | HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { | |
374 | if (pollfds[i].revents) { | |
375 | log_wakeup(node->where, &pollfds[i], 0); | |
8f6c3ad7 | 376 | } |
4ca828d7 | 377 | i++; |
064af421 | 378 | } |
064af421 BP |
379 | } |
380 | ||
4ca828d7 | 381 | free_poll_nodes(loop); |
2c06a966 BP |
382 | loop->timeout_when = LLONG_MAX; |
383 | loop->timeout_where = NULL; | |
4ca828d7 LS |
384 | free(pollfds); |
385 | free(wevents); | |
d8b30702 JG |
386 | |
387 | /* Handle any pending signals before doing anything else. */ | |
388 | fatal_signal_run(); | |
55b40355 BP |
389 | |
390 | seq_woke(); | |
064af421 | 391 | } |
064af421 | 392 | \f |
8f6c3ad7 | 393 | static void |
2c06a966 | 394 | free_poll_loop(void *loop_) |
064af421 | 395 | { |
2c06a966 BP |
396 | struct poll_loop *loop = loop_; |
397 | ||
4ca828d7 LS |
398 | free_poll_nodes(loop); |
399 | hmap_destroy(&loop->poll_nodes); | |
2c06a966 BP |
400 | free(loop); |
401 | } | |
402 | ||
403 | static struct poll_loop * | |
404 | poll_loop(void) | |
405 | { | |
406 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
407 | static pthread_key_t key; | |
408 | struct poll_loop *loop; | |
409 | ||
410 | if (ovsthread_once_start(&once)) { | |
411 | xpthread_key_create(&key, free_poll_loop); | |
412 | ovsthread_once_done(&once); | |
8f6c3ad7 BP |
413 | } |
414 | ||
2c06a966 BP |
415 | loop = pthread_getspecific(key); |
416 | if (!loop) { | |
417 | loop = xzalloc(sizeof *loop); | |
4ca828d7 | 418 | hmap_init(&loop->poll_nodes); |
9c4c45ed | 419 | xpthread_setspecific(key, loop); |
2c06a966 BP |
420 | } |
421 | return loop; | |
064af421 | 422 | } |
2c06a966 | 423 |