]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Simple trace backend | |
3 | * | |
4 | * Copyright IBM, Corp. 2010 | |
5 | * | |
6 | * This work is licensed under the terms of the GNU GPL, version 2. See | |
7 | * the COPYING file in the top-level directory. | |
8 | * | |
9 | */ | |
10 | ||
11 | #include <stdlib.h> | |
12 | #include <stdint.h> | |
13 | #include <stdio.h> | |
14 | #include <time.h> | |
15 | #ifndef _WIN32 | |
16 | #include <signal.h> | |
17 | #include <pthread.h> | |
18 | #endif | |
19 | #include "qemu-timer.h" | |
20 | #include "trace.h" | |
21 | #include "trace/control.h" | |
22 | ||
23 | /** Trace file header event ID */ | |
24 | #define HEADER_EVENT_ID (~(uint64_t)0) /* avoids conflicting with TraceEventIDs */ | |
25 | ||
26 | /** Trace file magic number */ | |
27 | #define HEADER_MAGIC 0xf2b177cb0aa429b4ULL | |
28 | ||
29 | /** Trace file version number, bump if format changes */ | |
30 | #define HEADER_VERSION 2 | |
31 | ||
32 | /** Records were dropped event ID */ | |
33 | #define DROPPED_EVENT_ID (~(uint64_t)0 - 1) | |
34 | ||
35 | /** Trace record is valid */ | |
36 | #define TRACE_RECORD_VALID ((uint64_t)1 << 63) | |
37 | ||
38 | /* | |
39 | * Trace records are written out by a dedicated thread. The thread waits for | |
40 | * records to become available, writes them out, and then waits again. | |
41 | */ | |
42 | static GStaticMutex trace_lock = G_STATIC_MUTEX_INIT; | |
43 | static GCond *trace_available_cond; | |
44 | static GCond *trace_empty_cond; | |
45 | static bool trace_available; | |
46 | static bool trace_writeout_enabled; | |
47 | ||
48 | enum { | |
49 | TRACE_BUF_LEN = 4096 * 64, | |
50 | TRACE_BUF_FLUSH_THRESHOLD = TRACE_BUF_LEN / 4, | |
51 | }; | |
52 | ||
53 | uint8_t trace_buf[TRACE_BUF_LEN]; | |
54 | static unsigned int trace_idx; | |
55 | static unsigned int writeout_idx; | |
56 | static uint64_t dropped_events; | |
57 | static FILE *trace_fp; | |
58 | static char *trace_file_name; | |
59 | ||
60 | /* * Trace buffer entry */ | |
61 | typedef struct { | |
62 | uint64_t event; /* TraceEventID */ | |
63 | uint64_t timestamp_ns; | |
64 | uint32_t length; /* in bytes */ | |
65 | uint32_t reserved; /* unused */ | |
66 | uint8_t arguments[]; | |
67 | } TraceRecord; | |
68 | ||
69 | typedef struct { | |
70 | uint64_t header_event_id; /* HEADER_EVENT_ID */ | |
71 | uint64_t header_magic; /* HEADER_MAGIC */ | |
72 | uint64_t header_version; /* HEADER_VERSION */ | |
73 | } TraceLogHeader; | |
74 | ||
75 | ||
76 | static void read_from_buffer(unsigned int idx, void *dataptr, size_t size); | |
77 | static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size); | |
78 | ||
79 | static void clear_buffer_range(unsigned int idx, size_t len) | |
80 | { | |
81 | uint32_t num = 0; | |
82 | while (num < len) { | |
83 | if (idx >= TRACE_BUF_LEN) { | |
84 | idx = idx % TRACE_BUF_LEN; | |
85 | } | |
86 | trace_buf[idx++] = 0; | |
87 | num++; | |
88 | } | |
89 | } | |
90 | /** | |
91 | * Read a trace record from the trace buffer | |
92 | * | |
93 | * @idx Trace buffer index | |
94 | * @record Trace record to fill | |
95 | * | |
96 | * Returns false if the record is not valid. | |
97 | */ | |
98 | static bool get_trace_record(unsigned int idx, TraceRecord **recordptr) | |
99 | { | |
100 | uint64_t event_flag = 0; | |
101 | TraceRecord record; | |
102 | /* read the event flag to see if its a valid record */ | |
103 | read_from_buffer(idx, &record, sizeof(event_flag)); | |
104 | ||
105 | if (!(record.event & TRACE_RECORD_VALID)) { | |
106 | return false; | |
107 | } | |
108 | ||
109 | smp_rmb(); /* read memory barrier before accessing record */ | |
110 | /* read the record header to know record length */ | |
111 | read_from_buffer(idx, &record, sizeof(TraceRecord)); | |
112 | *recordptr = malloc(record.length); /* dont use g_malloc, can deadlock when traced */ | |
113 | /* make a copy of record to avoid being overwritten */ | |
114 | read_from_buffer(idx, *recordptr, record.length); | |
115 | smp_rmb(); /* memory barrier before clearing valid flag */ | |
116 | (*recordptr)->event &= ~TRACE_RECORD_VALID; | |
117 | /* clear the trace buffer range for consumed record otherwise any byte | |
118 | * with its MSB set may be considered as a valid event id when the writer | |
119 | * thread crosses this range of buffer again. | |
120 | */ | |
121 | clear_buffer_range(idx, record.length); | |
122 | return true; | |
123 | } | |
124 | ||
125 | /** | |
126 | * Kick writeout thread | |
127 | * | |
128 | * @wait Whether to wait for writeout thread to complete | |
129 | */ | |
130 | static void flush_trace_file(bool wait) | |
131 | { | |
132 | g_static_mutex_lock(&trace_lock); | |
133 | trace_available = true; | |
134 | g_cond_signal(trace_available_cond); | |
135 | ||
136 | if (wait) { | |
137 | g_cond_wait(trace_empty_cond, g_static_mutex_get_mutex(&trace_lock)); | |
138 | } | |
139 | ||
140 | g_static_mutex_unlock(&trace_lock); | |
141 | } | |
142 | ||
143 | static void wait_for_trace_records_available(void) | |
144 | { | |
145 | g_static_mutex_lock(&trace_lock); | |
146 | while (!(trace_available && trace_writeout_enabled)) { | |
147 | g_cond_signal(trace_empty_cond); | |
148 | g_cond_wait(trace_available_cond, | |
149 | g_static_mutex_get_mutex(&trace_lock)); | |
150 | } | |
151 | trace_available = false; | |
152 | g_static_mutex_unlock(&trace_lock); | |
153 | } | |
154 | ||
155 | static gpointer writeout_thread(gpointer opaque) | |
156 | { | |
157 | TraceRecord *recordptr; | |
158 | union { | |
159 | TraceRecord rec; | |
160 | uint8_t bytes[sizeof(TraceRecord) + sizeof(uint64_t)]; | |
161 | } dropped; | |
162 | unsigned int idx = 0; | |
163 | uint64_t dropped_count; | |
164 | size_t unused __attribute__ ((unused)); | |
165 | ||
166 | for (;;) { | |
167 | wait_for_trace_records_available(); | |
168 | ||
169 | if (dropped_events) { | |
170 | dropped.rec.event = DROPPED_EVENT_ID, | |
171 | dropped.rec.timestamp_ns = get_clock(); | |
172 | dropped.rec.length = sizeof(TraceRecord) + sizeof(dropped_events), | |
173 | dropped.rec.reserved = 0; | |
174 | while (1) { | |
175 | dropped_count = dropped_events; | |
176 | if (g_atomic_int_compare_and_exchange((gint *)&dropped_events, | |
177 | dropped_count, 0)) { | |
178 | break; | |
179 | } | |
180 | } | |
181 | memcpy(dropped.rec.arguments, &dropped_count, sizeof(uint64_t)); | |
182 | unused = fwrite(&dropped.rec, dropped.rec.length, 1, trace_fp); | |
183 | } | |
184 | ||
185 | while (get_trace_record(idx, &recordptr)) { | |
186 | unused = fwrite(recordptr, recordptr->length, 1, trace_fp); | |
187 | writeout_idx += recordptr->length; | |
188 | free(recordptr); /* dont use g_free, can deadlock when traced */ | |
189 | idx = writeout_idx % TRACE_BUF_LEN; | |
190 | } | |
191 | ||
192 | fflush(trace_fp); | |
193 | } | |
194 | return NULL; | |
195 | } | |
196 | ||
197 | void trace_record_write_u64(TraceBufferRecord *rec, uint64_t val) | |
198 | { | |
199 | rec->rec_off = write_to_buffer(rec->rec_off, &val, sizeof(uint64_t)); | |
200 | } | |
201 | ||
202 | void trace_record_write_str(TraceBufferRecord *rec, const char *s, uint32_t slen) | |
203 | { | |
204 | /* Write string length first */ | |
205 | rec->rec_off = write_to_buffer(rec->rec_off, &slen, sizeof(slen)); | |
206 | /* Write actual string now */ | |
207 | rec->rec_off = write_to_buffer(rec->rec_off, (void*)s, slen); | |
208 | } | |
209 | ||
210 | int trace_record_start(TraceBufferRecord *rec, TraceEventID event, size_t datasize) | |
211 | { | |
212 | unsigned int idx, rec_off, old_idx, new_idx; | |
213 | uint32_t rec_len = sizeof(TraceRecord) + datasize; | |
214 | uint64_t timestamp_ns = get_clock(); | |
215 | ||
216 | while (1) { | |
217 | old_idx = trace_idx; | |
218 | smp_rmb(); | |
219 | new_idx = old_idx + rec_len; | |
220 | ||
221 | if (new_idx - writeout_idx > TRACE_BUF_LEN) { | |
222 | /* Trace Buffer Full, Event dropped ! */ | |
223 | g_atomic_int_inc((gint *)&dropped_events); | |
224 | return -ENOSPC; | |
225 | } | |
226 | ||
227 | if (g_atomic_int_compare_and_exchange((gint *)&trace_idx, | |
228 | old_idx, new_idx)) { | |
229 | break; | |
230 | } | |
231 | } | |
232 | ||
233 | idx = old_idx % TRACE_BUF_LEN; | |
234 | ||
235 | rec_off = idx; | |
236 | rec_off = write_to_buffer(rec_off, &event, sizeof(event)); | |
237 | rec_off = write_to_buffer(rec_off, ×tamp_ns, sizeof(timestamp_ns)); | |
238 | rec_off = write_to_buffer(rec_off, &rec_len, sizeof(rec_len)); | |
239 | ||
240 | rec->tbuf_idx = idx; | |
241 | rec->rec_off = (idx + sizeof(TraceRecord)) % TRACE_BUF_LEN; | |
242 | return 0; | |
243 | } | |
244 | ||
245 | static void read_from_buffer(unsigned int idx, void *dataptr, size_t size) | |
246 | { | |
247 | uint8_t *data_ptr = dataptr; | |
248 | uint32_t x = 0; | |
249 | while (x < size) { | |
250 | if (idx >= TRACE_BUF_LEN) { | |
251 | idx = idx % TRACE_BUF_LEN; | |
252 | } | |
253 | data_ptr[x++] = trace_buf[idx++]; | |
254 | } | |
255 | } | |
256 | ||
257 | static unsigned int write_to_buffer(unsigned int idx, void *dataptr, size_t size) | |
258 | { | |
259 | uint8_t *data_ptr = dataptr; | |
260 | uint32_t x = 0; | |
261 | while (x < size) { | |
262 | if (idx >= TRACE_BUF_LEN) { | |
263 | idx = idx % TRACE_BUF_LEN; | |
264 | } | |
265 | trace_buf[idx++] = data_ptr[x++]; | |
266 | } | |
267 | return idx; /* most callers wants to know where to write next */ | |
268 | } | |
269 | ||
270 | void trace_record_finish(TraceBufferRecord *rec) | |
271 | { | |
272 | TraceRecord record; | |
273 | read_from_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); | |
274 | smp_wmb(); /* write barrier before marking as valid */ | |
275 | record.event |= TRACE_RECORD_VALID; | |
276 | write_to_buffer(rec->tbuf_idx, &record, sizeof(TraceRecord)); | |
277 | ||
278 | if ((trace_idx - writeout_idx) > TRACE_BUF_FLUSH_THRESHOLD) { | |
279 | flush_trace_file(false); | |
280 | } | |
281 | } | |
282 | ||
283 | void st_set_trace_file_enabled(bool enable) | |
284 | { | |
285 | if (enable == !!trace_fp) { | |
286 | return; /* no change */ | |
287 | } | |
288 | ||
289 | /* Halt trace writeout */ | |
290 | flush_trace_file(true); | |
291 | trace_writeout_enabled = false; | |
292 | flush_trace_file(true); | |
293 | ||
294 | if (enable) { | |
295 | static const TraceLogHeader header = { | |
296 | .header_event_id = HEADER_EVENT_ID, | |
297 | .header_magic = HEADER_MAGIC, | |
298 | /* Older log readers will check for version at next location */ | |
299 | .header_version = HEADER_VERSION, | |
300 | }; | |
301 | ||
302 | trace_fp = fopen(trace_file_name, "wb"); | |
303 | if (!trace_fp) { | |
304 | return; | |
305 | } | |
306 | ||
307 | if (fwrite(&header, sizeof header, 1, trace_fp) != 1) { | |
308 | fclose(trace_fp); | |
309 | trace_fp = NULL; | |
310 | return; | |
311 | } | |
312 | ||
313 | /* Resume trace writeout */ | |
314 | trace_writeout_enabled = true; | |
315 | flush_trace_file(false); | |
316 | } else { | |
317 | fclose(trace_fp); | |
318 | trace_fp = NULL; | |
319 | } | |
320 | } | |
321 | ||
322 | /** | |
323 | * Set the name of a trace file | |
324 | * | |
325 | * @file The trace file name or NULL for the default name-<pid> set at | |
326 | * config time | |
327 | */ | |
328 | bool st_set_trace_file(const char *file) | |
329 | { | |
330 | st_set_trace_file_enabled(false); | |
331 | ||
332 | g_free(trace_file_name); | |
333 | ||
334 | if (!file) { | |
335 | trace_file_name = g_strdup_printf(CONFIG_TRACE_FILE, getpid()); | |
336 | } else { | |
337 | trace_file_name = g_strdup_printf("%s", file); | |
338 | } | |
339 | ||
340 | st_set_trace_file_enabled(true); | |
341 | return true; | |
342 | } | |
343 | ||
344 | void st_print_trace_file_status(FILE *stream, int (*stream_printf)(FILE *stream, const char *fmt, ...)) | |
345 | { | |
346 | stream_printf(stream, "Trace file \"%s\" %s.\n", | |
347 | trace_file_name, trace_fp ? "on" : "off"); | |
348 | } | |
349 | ||
350 | void st_flush_trace_buffer(void) | |
351 | { | |
352 | flush_trace_file(true); | |
353 | } | |
354 | ||
355 | void trace_print_events(FILE *stream, fprintf_function stream_printf) | |
356 | { | |
357 | unsigned int i; | |
358 | ||
359 | for (i = 0; i < NR_TRACE_EVENTS; i++) { | |
360 | stream_printf(stream, "%s [Event ID %u] : state %u\n", | |
361 | trace_list[i].tp_name, i, trace_list[i].state); | |
362 | } | |
363 | } | |
364 | ||
365 | bool trace_event_set_state(const char *name, bool state) | |
366 | { | |
367 | unsigned int i; | |
368 | unsigned int len; | |
369 | bool wildcard = false; | |
370 | bool matched = false; | |
371 | ||
372 | len = strlen(name); | |
373 | if (len > 0 && name[len - 1] == '*') { | |
374 | wildcard = true; | |
375 | len -= 1; | |
376 | } | |
377 | for (i = 0; i < NR_TRACE_EVENTS; i++) { | |
378 | if (wildcard) { | |
379 | if (!strncmp(trace_list[i].tp_name, name, len)) { | |
380 | trace_list[i].state = state; | |
381 | matched = true; | |
382 | } | |
383 | continue; | |
384 | } | |
385 | if (!strcmp(trace_list[i].tp_name, name)) { | |
386 | trace_list[i].state = state; | |
387 | return true; | |
388 | } | |
389 | } | |
390 | return matched; | |
391 | } | |
392 | ||
393 | /* Helper function to create a thread with signals blocked. Use glib's | |
394 | * portable threads since QEMU abstractions cannot be used due to reentrancy in | |
395 | * the tracer. Also note the signal masking on POSIX hosts so that the thread | |
396 | * does not steal signals when the rest of the program wants them blocked. | |
397 | */ | |
398 | static GThread *trace_thread_create(GThreadFunc fn) | |
399 | { | |
400 | GThread *thread; | |
401 | #ifndef _WIN32 | |
402 | sigset_t set, oldset; | |
403 | ||
404 | sigfillset(&set); | |
405 | pthread_sigmask(SIG_SETMASK, &set, &oldset); | |
406 | #endif | |
407 | thread = g_thread_create(fn, NULL, FALSE, NULL); | |
408 | #ifndef _WIN32 | |
409 | pthread_sigmask(SIG_SETMASK, &oldset, NULL); | |
410 | #endif | |
411 | ||
412 | return thread; | |
413 | } | |
414 | ||
415 | bool trace_backend_init(const char *events, const char *file) | |
416 | { | |
417 | GThread *thread; | |
418 | ||
419 | if (!g_thread_supported()) { | |
420 | #if !GLIB_CHECK_VERSION(2, 31, 0) | |
421 | g_thread_init(NULL); | |
422 | #else | |
423 | fprintf(stderr, "glib threading failed to initialize.\n"); | |
424 | exit(1); | |
425 | #endif | |
426 | } | |
427 | ||
428 | trace_available_cond = g_cond_new(); | |
429 | trace_empty_cond = g_cond_new(); | |
430 | ||
431 | thread = trace_thread_create(writeout_thread); | |
432 | if (!thread) { | |
433 | fprintf(stderr, "warning: unable to initialize simple trace backend\n"); | |
434 | return false; | |
435 | } | |
436 | ||
437 | atexit(st_flush_trace_buffer); | |
438 | trace_backend_init_events(events); | |
439 | st_set_trace_file(file); | |
440 | return true; | |
441 | } |