]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
18 | #include "openvswitch/poll-loop.h" | |
19 | #include <errno.h> | |
20 | #include <inttypes.h> | |
21 | #include <poll.h> | |
22 | #include <stdlib.h> | |
23 | #include <string.h> | |
24 | #include "coverage.h" | |
25 | #include "openvswitch/dynamic-string.h" | |
26 | #include "fatal-signal.h" | |
27 | #include "openvswitch/list.h" | |
28 | #include "ovs-thread.h" | |
29 | #include "seq.h" | |
30 | #include "socket-util.h" | |
31 | #include "timeval.h" | |
32 | #include "openvswitch/vlog.h" | |
33 | #include "openvswitch/hmap.h" | |
34 | #include "hash.h" | |
35 | ||
36 | VLOG_DEFINE_THIS_MODULE(poll_loop); | |
37 | ||
38 | COVERAGE_DEFINE(poll_create_node); | |
39 | COVERAGE_DEFINE(poll_zero_timeout); | |
40 | ||
41 | struct poll_node { | |
42 | struct hmap_node hmap_node; | |
43 | struct pollfd pollfd; /* Events to pass to time_poll(). */ | |
44 | HANDLE wevent; /* Events for WaitForMultipleObjects(). */ | |
45 | const char *where; /* Where poll_node was created. */ | |
46 | }; | |
47 | ||
48 | struct poll_loop { | |
49 | /* All active poll waiters. */ | |
50 | struct hmap poll_nodes; | |
51 | ||
52 | /* Time at which to wake up the next call to poll_block(), LLONG_MIN to | |
53 | * wake up immediately, or LLONG_MAX to wait forever. */ | |
54 | long long int timeout_when; /* In msecs as returned by time_msec(). */ | |
55 | const char *timeout_where; /* Where 'timeout_when' was set. */ | |
56 | }; | |
57 | ||
58 | static struct poll_loop *poll_loop(void); | |
59 | ||
60 | /* Look up the node with same fd or wevent. */ | |
61 | static struct poll_node * | |
62 | find_poll_node(struct poll_loop *loop, int fd, HANDLE wevent) | |
63 | { | |
64 | struct poll_node *node; | |
65 | ||
66 | /* Both 'fd' and 'wevent' cannot be set. */ | |
67 | ovs_assert(!fd != !wevent); | |
68 | ||
69 | HMAP_FOR_EACH_WITH_HASH (node, hmap_node, | |
70 | hash_2words(fd, (uint32_t)wevent), | |
71 | &loop->poll_nodes) { | |
72 | if ((fd && node->pollfd.fd == fd) | |
73 | || (wevent && node->wevent == wevent)) { | |
74 | return node; | |
75 | } | |
76 | } | |
77 | return NULL; | |
78 | } | |
79 | ||
80 | /* On Unix based systems: | |
81 | * | |
82 | * Registers 'fd' as waiting for the specified 'events' (which should be | |
83 | * POLLIN or POLLOUT or POLLIN | POLLOUT). The following call to | |
84 | * poll_block() will wake up when 'fd' becomes ready for one or more of the | |
85 | * requested events. The 'fd's are given to poll() function later. | |
86 | * | |
87 | * On Windows system: | |
88 | * | |
89 | * If 'fd' is specified, create a new 'wevent'. Association of 'fd' and | |
90 | * 'wevent' for 'events' happens in poll_block(). If 'wevent' is specified, | |
91 | * it is assumed that it is unrelated to any sockets and poll_block() | |
92 | * will wake up on any event on that 'wevent'. It is an error to pass | |
93 | * both 'wevent' and 'fd'. | |
94 | * | |
95 | * The event registration is one-shot: only the following call to | |
96 | * poll_block() is affected. The event will need to be re-registered after | |
97 | * poll_block() is called if it is to persist. | |
98 | * | |
99 | * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to | |
100 | * automatically provide the caller's source file and line number for | |
101 | * 'where'.) */ | |
102 | static void | |
103 | poll_create_node(int fd, HANDLE wevent, short int events, const char *where) | |
104 | { | |
105 | struct poll_loop *loop = poll_loop(); | |
106 | struct poll_node *node; | |
107 | ||
108 | COVERAGE_INC(poll_create_node); | |
109 | ||
110 | /* Both 'fd' and 'wevent' cannot be set. */ | |
111 | ovs_assert(!fd != !wevent); | |
112 | ||
113 | /* Check for duplicate. If found, "or" the events. */ | |
114 | node = find_poll_node(loop, fd, wevent); | |
115 | if (node) { | |
116 | node->pollfd.events |= events; | |
117 | } else { | |
118 | node = xzalloc(sizeof *node); | |
119 | hmap_insert(&loop->poll_nodes, &node->hmap_node, | |
120 | hash_2words(fd, (uint32_t)wevent)); | |
121 | node->pollfd.fd = fd; | |
122 | node->pollfd.events = events; | |
123 | #ifdef _WIN32 | |
124 | if (!wevent) { | |
125 | wevent = CreateEvent(NULL, FALSE, FALSE, NULL); | |
126 | } | |
127 | #endif | |
128 | node->wevent = wevent; | |
129 | node->where = where; | |
130 | } | |
131 | } | |
132 | ||
133 | /* Registers 'fd' as waiting for the specified 'events' (which should be POLLIN | |
134 | * or POLLOUT or POLLIN | POLLOUT). The following call to poll_block() will | |
135 | * wake up when 'fd' becomes ready for one or more of the requested events. | |
136 | * | |
137 | * On Windows, 'fd' must be a socket. | |
138 | * | |
139 | * The event registration is one-shot: only the following call to poll_block() | |
140 | * is affected. The event will need to be re-registered after poll_block() is | |
141 | * called if it is to persist. | |
142 | * | |
143 | * ('where' is used in debug logging. Commonly one would use poll_fd_wait() to | |
144 | * automatically provide the caller's source file and line number for | |
145 | * 'where'.) */ | |
146 | void | |
147 | poll_fd_wait_at(int fd, short int events, const char *where) | |
148 | { | |
149 | poll_create_node(fd, 0, events, where); | |
150 | } | |
151 | ||
152 | #ifdef _WIN32 | |
153 | /* Registers for the next call to poll_block() to wake up when 'wevent' is | |
154 | * signaled. | |
155 | * | |
156 | * The event registration is one-shot: only the following call to poll_block() | |
157 | * is affected. The event will need to be re-registered after poll_block() is | |
158 | * called if it is to persist. | |
159 | * | |
160 | * ('where' is used in debug logging. Commonly one would use | |
161 | * poll_wevent_wait() to automatically provide the caller's source file and | |
162 | * line number for 'where'.) */ | |
163 | void | |
164 | poll_wevent_wait_at(HANDLE wevent, const char *where) | |
165 | { | |
166 | poll_create_node(0, wevent, 0, where); | |
167 | } | |
168 | #endif /* _WIN32 */ | |
169 | ||
170 | /* Causes the following call to poll_block() to block for no more than 'msec' | |
171 | * milliseconds. If 'msec' is nonpositive, the following call to poll_block() | |
172 | * will not block at all. | |
173 | * | |
174 | * The timer registration is one-shot: only the following call to poll_block() | |
175 | * is affected. The timer will need to be re-registered after poll_block() is | |
176 | * called if it is to persist. | |
177 | * | |
178 | * ('where' is used in debug logging. Commonly one would use poll_timer_wait() | |
179 | * to automatically provide the caller's source file and line number for | |
180 | * 'where'.) */ | |
181 | void | |
182 | poll_timer_wait_at(long long int msec, const char *where) | |
183 | { | |
184 | long long int now = time_msec(); | |
185 | long long int when; | |
186 | ||
187 | if (msec <= 0) { | |
188 | /* Wake up immediately. */ | |
189 | when = LLONG_MIN; | |
190 | } else if ((unsigned long long int) now + msec <= LLONG_MAX) { | |
191 | /* Normal case. */ | |
192 | when = now + msec; | |
193 | } else { | |
194 | /* now + msec would overflow. */ | |
195 | when = LLONG_MAX; | |
196 | } | |
197 | ||
198 | poll_timer_wait_until_at(when, where); | |
199 | } | |
200 | ||
201 | /* Causes the following call to poll_block() to wake up when the current time, | |
202 | * as returned by time_msec(), reaches 'when' or later. If 'when' is earlier | |
203 | * than the current time, the following call to poll_block() will not block at | |
204 | * all. | |
205 | * | |
206 | * The timer registration is one-shot: only the following call to poll_block() | |
207 | * is affected. The timer will need to be re-registered after poll_block() is | |
208 | * called if it is to persist. | |
209 | * | |
210 | * ('where' is used in debug logging. Commonly one would use | |
211 | * poll_timer_wait_until() to automatically provide the caller's source file | |
212 | * and line number for 'where'.) */ | |
213 | void | |
214 | poll_timer_wait_until_at(long long int when, const char *where) | |
215 | { | |
216 | struct poll_loop *loop = poll_loop(); | |
217 | if (when < loop->timeout_when) { | |
218 | loop->timeout_when = when; | |
219 | loop->timeout_where = where; | |
220 | } | |
221 | } | |
222 | ||
223 | /* Causes the following call to poll_block() to wake up immediately, without | |
224 | * blocking. | |
225 | * | |
226 | * ('where' is used in debug logging. Commonly one would use | |
227 | * poll_immediate_wake() to automatically provide the caller's source file and | |
228 | * line number for 'where'.) */ | |
229 | void | |
230 | poll_immediate_wake_at(const char *where) | |
231 | { | |
232 | poll_timer_wait_at(0, where); | |
233 | } | |
234 | ||
235 | /* Logs, if appropriate, that the poll loop was awakened by an event | |
236 | * registered at 'where' (typically a source file and line number). The other | |
237 | * arguments have two possible interpretations: | |
238 | * | |
239 | * - If 'pollfd' is nonnull then it should be the "struct pollfd" that caused | |
240 | * the wakeup. 'timeout' is ignored. | |
241 | * | |
242 | * - If 'pollfd' is NULL then 'timeout' is the number of milliseconds after | |
243 | * which the poll loop woke up. | |
244 | */ | |
245 | static void | |
246 | log_wakeup(const char *where, const struct pollfd *pollfd, int timeout) | |
247 | { | |
248 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(10, 10); | |
249 | enum vlog_level level; | |
250 | int cpu_usage; | |
251 | struct ds s; | |
252 | ||
253 | cpu_usage = get_cpu_usage(); | |
254 | if (VLOG_IS_DBG_ENABLED()) { | |
255 | level = VLL_DBG; | |
256 | } else if (cpu_usage > 50 | |
257 | && !thread_is_pmd() | |
258 | && !VLOG_DROP_INFO(&rl)) { | |
259 | level = VLL_INFO; | |
260 | } else { | |
261 | return; | |
262 | } | |
263 | ||
264 | ds_init(&s); | |
265 | ds_put_cstr(&s, "wakeup due to "); | |
266 | if (pollfd) { | |
267 | char *description = describe_fd(pollfd->fd); | |
268 | if (pollfd->revents & POLLIN) { | |
269 | ds_put_cstr(&s, "[POLLIN]"); | |
270 | } | |
271 | if (pollfd->revents & POLLOUT) { | |
272 | ds_put_cstr(&s, "[POLLOUT]"); | |
273 | } | |
274 | if (pollfd->revents & POLLERR) { | |
275 | ds_put_cstr(&s, "[POLLERR]"); | |
276 | } | |
277 | if (pollfd->revents & POLLHUP) { | |
278 | ds_put_cstr(&s, "[POLLHUP]"); | |
279 | } | |
280 | if (pollfd->revents & POLLNVAL) { | |
281 | ds_put_cstr(&s, "[POLLNVAL]"); | |
282 | } | |
283 | ds_put_format(&s, " on fd %d (%s)", pollfd->fd, description); | |
284 | free(description); | |
285 | } else { | |
286 | ds_put_format(&s, "%d-ms timeout", timeout); | |
287 | } | |
288 | if (where) { | |
289 | ds_put_format(&s, " at %s", where); | |
290 | } | |
291 | if (cpu_usage >= 0) { | |
292 | ds_put_format(&s, " (%d%% CPU usage)", cpu_usage); | |
293 | } | |
294 | VLOG(level, "%s", ds_cstr(&s)); | |
295 | ds_destroy(&s); | |
296 | } | |
297 | ||
298 | static void | |
299 | free_poll_nodes(struct poll_loop *loop) | |
300 | { | |
301 | struct poll_node *node, *next; | |
302 | ||
303 | HMAP_FOR_EACH_SAFE (node, next, hmap_node, &loop->poll_nodes) { | |
304 | hmap_remove(&loop->poll_nodes, &node->hmap_node); | |
305 | #ifdef _WIN32 | |
306 | if (node->wevent && node->pollfd.fd) { | |
307 | WSAEventSelect(node->pollfd.fd, NULL, 0); | |
308 | CloseHandle(node->wevent); | |
309 | } | |
310 | #endif | |
311 | free(node); | |
312 | } | |
313 | } | |
314 | ||
315 | /* Blocks until one or more of the events registered with poll_fd_wait() | |
316 | * occurs, or until the minimum duration registered with poll_timer_wait() | |
317 | * elapses, or not at all if poll_immediate_wake() has been called. */ | |
318 | void | |
319 | poll_block(void) | |
320 | { | |
321 | struct poll_loop *loop = poll_loop(); | |
322 | struct poll_node *node; | |
323 | struct pollfd *pollfds; | |
324 | HANDLE *wevents = NULL; | |
325 | int elapsed; | |
326 | int retval; | |
327 | int i; | |
328 | ||
329 | /* Register fatal signal events before actually doing any real work for | |
330 | * poll_block. */ | |
331 | fatal_signal_wait(); | |
332 | ||
333 | if (loop->timeout_when == LLONG_MIN) { | |
334 | COVERAGE_INC(poll_zero_timeout); | |
335 | } | |
336 | ||
337 | timewarp_run(); | |
338 | pollfds = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *pollfds); | |
339 | ||
340 | #ifdef _WIN32 | |
341 | wevents = xmalloc(hmap_count(&loop->poll_nodes) * sizeof *wevents); | |
342 | #endif | |
343 | ||
344 | /* Populate with all the fds and events. */ | |
345 | i = 0; | |
346 | HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { | |
347 | pollfds[i] = node->pollfd; | |
348 | #ifdef _WIN32 | |
349 | wevents[i] = node->wevent; | |
350 | if (node->pollfd.fd && node->wevent) { | |
351 | short int wsa_events = 0; | |
352 | if (node->pollfd.events & POLLIN) { | |
353 | wsa_events |= FD_READ | FD_ACCEPT | FD_CLOSE; | |
354 | } | |
355 | if (node->pollfd.events & POLLOUT) { | |
356 | wsa_events |= FD_WRITE | FD_CONNECT | FD_CLOSE; | |
357 | } | |
358 | WSAEventSelect(node->pollfd.fd, node->wevent, wsa_events); | |
359 | } | |
360 | #endif | |
361 | i++; | |
362 | } | |
363 | ||
364 | retval = time_poll(pollfds, hmap_count(&loop->poll_nodes), wevents, | |
365 | loop->timeout_when, &elapsed); | |
366 | if (retval < 0) { | |
367 | static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); | |
368 | VLOG_ERR_RL(&rl, "poll: %s", ovs_strerror(-retval)); | |
369 | } else if (!retval) { | |
370 | log_wakeup(loop->timeout_where, NULL, elapsed); | |
371 | } else if (get_cpu_usage() > 50 || VLOG_IS_DBG_ENABLED()) { | |
372 | i = 0; | |
373 | HMAP_FOR_EACH (node, hmap_node, &loop->poll_nodes) { | |
374 | if (pollfds[i].revents) { | |
375 | log_wakeup(node->where, &pollfds[i], 0); | |
376 | } | |
377 | i++; | |
378 | } | |
379 | } | |
380 | ||
381 | free_poll_nodes(loop); | |
382 | loop->timeout_when = LLONG_MAX; | |
383 | loop->timeout_where = NULL; | |
384 | free(pollfds); | |
385 | free(wevents); | |
386 | ||
387 | /* Handle any pending signals before doing anything else. */ | |
388 | fatal_signal_run(); | |
389 | ||
390 | seq_woke(); | |
391 | } | |
392 | \f | |
393 | static void | |
394 | free_poll_loop(void *loop_) | |
395 | { | |
396 | struct poll_loop *loop = loop_; | |
397 | ||
398 | free_poll_nodes(loop); | |
399 | hmap_destroy(&loop->poll_nodes); | |
400 | free(loop); | |
401 | } | |
402 | ||
403 | static struct poll_loop * | |
404 | poll_loop(void) | |
405 | { | |
406 | static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; | |
407 | static pthread_key_t key; | |
408 | struct poll_loop *loop; | |
409 | ||
410 | if (ovsthread_once_start(&once)) { | |
411 | xpthread_key_create(&key, free_poll_loop); | |
412 | ovsthread_once_done(&once); | |
413 | } | |
414 | ||
415 | loop = pthread_getspecific(key); | |
416 | if (!loop) { | |
417 | loop = xzalloc(sizeof *loop); | |
418 | hmap_init(&loop->poll_nodes); | |
419 | xpthread_setspecific(key, loop); | |
420 | } | |
421 | return loop; | |
422 | } | |
423 |