]>
Commit | Line | Data |
---|---|---|
97e1c18e MD |
1 | /* |
2 | * Copyright (C) 2008 Mathieu Desnoyers | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * You should have received a copy of the GNU General Public License | |
15 | * along with this program; if not, write to the Free Software | |
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
17 | */ | |
18 | #include <linux/module.h> | |
19 | #include <linux/mutex.h> | |
20 | #include <linux/types.h> | |
21 | #include <linux/jhash.h> | |
22 | #include <linux/list.h> | |
23 | #include <linux/rcupdate.h> | |
24 | #include <linux/tracepoint.h> | |
25 | #include <linux/err.h> | |
26 | #include <linux/slab.h> | |
27 | ||
28 | extern struct tracepoint __start___tracepoints[]; | |
29 | extern struct tracepoint __stop___tracepoints[]; | |
30 | ||
31 | /* Set to 1 to enable tracepoint debug output */ | |
32 | static const int tracepoint_debug; | |
33 | ||
34 | /* | |
35 | * tracepoints_mutex nests inside module_mutex. Tracepoints mutex protects the | |
36 | * builtin and module tracepoints and the hash table. | |
37 | */ | |
38 | static DEFINE_MUTEX(tracepoints_mutex); | |
39 | ||
40 | /* | |
41 | * Tracepoint hash table, containing the active tracepoints. | |
42 | * Protected by tracepoints_mutex. | |
43 | */ | |
44 | #define TRACEPOINT_HASH_BITS 6 | |
45 | #define TRACEPOINT_TABLE_SIZE (1 << TRACEPOINT_HASH_BITS) | |
46 | ||
47 | /* | |
48 | * Note about RCU : | |
49 | * It is used to to delay the free of multiple probes array until a quiescent | |
50 | * state is reached. | |
51 | * Tracepoint entries modifications are protected by the tracepoints_mutex. | |
52 | */ | |
53 | struct tracepoint_entry { | |
54 | struct hlist_node hlist; | |
55 | void **funcs; | |
56 | int refcount; /* Number of times armed. 0 if disarmed. */ | |
57 | struct rcu_head rcu; | |
58 | void *oldptr; | |
59 | unsigned char rcu_pending:1; | |
60 | char name[0]; | |
61 | }; | |
62 | ||
63 | static struct hlist_head tracepoint_table[TRACEPOINT_TABLE_SIZE]; | |
64 | ||
65 | static void free_old_closure(struct rcu_head *head) | |
66 | { | |
67 | struct tracepoint_entry *entry = container_of(head, | |
68 | struct tracepoint_entry, rcu); | |
69 | kfree(entry->oldptr); | |
70 | /* Make sure we free the data before setting the pending flag to 0 */ | |
71 | smp_wmb(); | |
72 | entry->rcu_pending = 0; | |
73 | } | |
74 | ||
75 | static void tracepoint_entry_free_old(struct tracepoint_entry *entry, void *old) | |
76 | { | |
77 | if (!old) | |
78 | return; | |
79 | entry->oldptr = old; | |
80 | entry->rcu_pending = 1; | |
81 | /* write rcu_pending before calling the RCU callback */ | |
82 | smp_wmb(); | |
ca2db6cf | 83 | call_rcu_sched(&entry->rcu, free_old_closure); |
97e1c18e MD |
84 | } |
85 | ||
86 | static void debug_print_probes(struct tracepoint_entry *entry) | |
87 | { | |
88 | int i; | |
89 | ||
90 | if (!tracepoint_debug) | |
91 | return; | |
92 | ||
93 | for (i = 0; entry->funcs[i]; i++) | |
94 | printk(KERN_DEBUG "Probe %d : %p\n", i, entry->funcs[i]); | |
95 | } | |
96 | ||
97 | static void * | |
98 | tracepoint_entry_add_probe(struct tracepoint_entry *entry, void *probe) | |
99 | { | |
100 | int nr_probes = 0; | |
101 | void **old, **new; | |
102 | ||
103 | WARN_ON(!probe); | |
104 | ||
105 | debug_print_probes(entry); | |
106 | old = entry->funcs; | |
107 | if (old) { | |
108 | /* (N -> N+1), (N != 0, 1) probes */ | |
109 | for (nr_probes = 0; old[nr_probes]; nr_probes++) | |
110 | if (old[nr_probes] == probe) | |
111 | return ERR_PTR(-EEXIST); | |
112 | } | |
113 | /* + 2 : one for new probe, one for NULL func */ | |
114 | new = kzalloc((nr_probes + 2) * sizeof(void *), GFP_KERNEL); | |
115 | if (new == NULL) | |
116 | return ERR_PTR(-ENOMEM); | |
117 | if (old) | |
118 | memcpy(new, old, nr_probes * sizeof(void *)); | |
119 | new[nr_probes] = probe; | |
120 | entry->refcount = nr_probes + 1; | |
121 | entry->funcs = new; | |
122 | debug_print_probes(entry); | |
123 | return old; | |
124 | } | |
125 | ||
126 | static void * | |
127 | tracepoint_entry_remove_probe(struct tracepoint_entry *entry, void *probe) | |
128 | { | |
129 | int nr_probes = 0, nr_del = 0, i; | |
130 | void **old, **new; | |
131 | ||
132 | old = entry->funcs; | |
133 | ||
134 | debug_print_probes(entry); | |
135 | /* (N -> M), (N > 1, M >= 0) probes */ | |
136 | for (nr_probes = 0; old[nr_probes]; nr_probes++) { | |
137 | if ((!probe || old[nr_probes] == probe)) | |
138 | nr_del++; | |
139 | } | |
140 | ||
141 | if (nr_probes - nr_del == 0) { | |
142 | /* N -> 0, (N > 1) */ | |
143 | entry->funcs = NULL; | |
144 | entry->refcount = 0; | |
145 | debug_print_probes(entry); | |
146 | return old; | |
147 | } else { | |
148 | int j = 0; | |
149 | /* N -> M, (N > 1, M > 0) */ | |
150 | /* + 1 for NULL */ | |
151 | new = kzalloc((nr_probes - nr_del + 1) | |
152 | * sizeof(void *), GFP_KERNEL); | |
153 | if (new == NULL) | |
154 | return ERR_PTR(-ENOMEM); | |
155 | for (i = 0; old[i]; i++) | |
156 | if ((probe && old[i] != probe)) | |
157 | new[j++] = old[i]; | |
158 | entry->refcount = nr_probes - nr_del; | |
159 | entry->funcs = new; | |
160 | } | |
161 | debug_print_probes(entry); | |
162 | return old; | |
163 | } | |
164 | ||
165 | /* | |
166 | * Get tracepoint if the tracepoint is present in the tracepoint hash table. | |
167 | * Must be called with tracepoints_mutex held. | |
168 | * Returns NULL if not present. | |
169 | */ | |
170 | static struct tracepoint_entry *get_tracepoint(const char *name) | |
171 | { | |
172 | struct hlist_head *head; | |
173 | struct hlist_node *node; | |
174 | struct tracepoint_entry *e; | |
175 | u32 hash = jhash(name, strlen(name), 0); | |
176 | ||
9795302a | 177 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
97e1c18e MD |
178 | hlist_for_each_entry(e, node, head, hlist) { |
179 | if (!strcmp(name, e->name)) | |
180 | return e; | |
181 | } | |
182 | return NULL; | |
183 | } | |
184 | ||
185 | /* | |
186 | * Add the tracepoint to the tracepoint hash table. Must be called with | |
187 | * tracepoints_mutex held. | |
188 | */ | |
189 | static struct tracepoint_entry *add_tracepoint(const char *name) | |
190 | { | |
191 | struct hlist_head *head; | |
192 | struct hlist_node *node; | |
193 | struct tracepoint_entry *e; | |
194 | size_t name_len = strlen(name) + 1; | |
195 | u32 hash = jhash(name, name_len-1, 0); | |
196 | ||
9795302a | 197 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
97e1c18e MD |
198 | hlist_for_each_entry(e, node, head, hlist) { |
199 | if (!strcmp(name, e->name)) { | |
200 | printk(KERN_NOTICE | |
201 | "tracepoint %s busy\n", name); | |
202 | return ERR_PTR(-EEXIST); /* Already there */ | |
203 | } | |
204 | } | |
205 | /* | |
206 | * Using kmalloc here to allocate a variable length element. Could | |
207 | * cause some memory fragmentation if overused. | |
208 | */ | |
209 | e = kmalloc(sizeof(struct tracepoint_entry) + name_len, GFP_KERNEL); | |
210 | if (!e) | |
211 | return ERR_PTR(-ENOMEM); | |
212 | memcpy(&e->name[0], name, name_len); | |
213 | e->funcs = NULL; | |
214 | e->refcount = 0; | |
215 | e->rcu_pending = 0; | |
216 | hlist_add_head(&e->hlist, head); | |
217 | return e; | |
218 | } | |
219 | ||
220 | /* | |
221 | * Remove the tracepoint from the tracepoint hash table. Must be called with | |
222 | * mutex_lock held. | |
223 | */ | |
224 | static int remove_tracepoint(const char *name) | |
225 | { | |
226 | struct hlist_head *head; | |
227 | struct hlist_node *node; | |
228 | struct tracepoint_entry *e; | |
229 | int found = 0; | |
230 | size_t len = strlen(name) + 1; | |
231 | u32 hash = jhash(name, len-1, 0); | |
232 | ||
9795302a | 233 | head = &tracepoint_table[hash & (TRACEPOINT_TABLE_SIZE - 1)]; |
97e1c18e MD |
234 | hlist_for_each_entry(e, node, head, hlist) { |
235 | if (!strcmp(name, e->name)) { | |
236 | found = 1; | |
237 | break; | |
238 | } | |
239 | } | |
240 | if (!found) | |
241 | return -ENOENT; | |
242 | if (e->refcount) | |
243 | return -EBUSY; | |
244 | hlist_del(&e->hlist); | |
ca2db6cf | 245 | /* Make sure the call_rcu_sched has been executed */ |
97e1c18e | 246 | if (e->rcu_pending) |
ca2db6cf | 247 | rcu_barrier_sched(); |
97e1c18e MD |
248 | kfree(e); |
249 | return 0; | |
250 | } | |
251 | ||
252 | /* | |
253 | * Sets the probe callback corresponding to one tracepoint. | |
254 | */ | |
255 | static void set_tracepoint(struct tracepoint_entry **entry, | |
256 | struct tracepoint *elem, int active) | |
257 | { | |
258 | WARN_ON(strcmp((*entry)->name, elem->name) != 0); | |
259 | ||
260 | /* | |
261 | * rcu_assign_pointer has a smp_wmb() which makes sure that the new | |
262 | * probe callbacks array is consistent before setting a pointer to it. | |
263 | * This array is referenced by __DO_TRACE from | |
264 | * include/linux/tracepoints.h. A matching smp_read_barrier_depends() | |
265 | * is used. | |
266 | */ | |
267 | rcu_assign_pointer(elem->funcs, (*entry)->funcs); | |
268 | elem->state = active; | |
269 | } | |
270 | ||
271 | /* | |
272 | * Disable a tracepoint and its probe callback. | |
273 | * Note: only waiting an RCU period after setting elem->call to the empty | |
274 | * function insures that the original callback is not used anymore. This insured | |
275 | * by preempt_disable around the call site. | |
276 | */ | |
277 | static void disable_tracepoint(struct tracepoint *elem) | |
278 | { | |
279 | elem->state = 0; | |
280 | } | |
281 | ||
282 | /** | |
283 | * tracepoint_update_probe_range - Update a probe range | |
284 | * @begin: beginning of the range | |
285 | * @end: end of the range | |
286 | * | |
287 | * Updates the probe callback corresponding to a range of tracepoints. | |
288 | */ | |
289 | void tracepoint_update_probe_range(struct tracepoint *begin, | |
290 | struct tracepoint *end) | |
291 | { | |
292 | struct tracepoint *iter; | |
293 | struct tracepoint_entry *mark_entry; | |
294 | ||
295 | mutex_lock(&tracepoints_mutex); | |
296 | for (iter = begin; iter < end; iter++) { | |
297 | mark_entry = get_tracepoint(iter->name); | |
298 | if (mark_entry) { | |
299 | set_tracepoint(&mark_entry, iter, | |
300 | !!mark_entry->refcount); | |
301 | } else { | |
302 | disable_tracepoint(iter); | |
303 | } | |
304 | } | |
305 | mutex_unlock(&tracepoints_mutex); | |
306 | } | |
307 | ||
308 | /* | |
309 | * Update probes, removing the faulty probes. | |
310 | */ | |
311 | static void tracepoint_update_probes(void) | |
312 | { | |
313 | /* Core kernel tracepoints */ | |
314 | tracepoint_update_probe_range(__start___tracepoints, | |
315 | __stop___tracepoints); | |
316 | /* tracepoints in modules. */ | |
317 | module_update_tracepoints(); | |
318 | } | |
319 | ||
320 | /** | |
321 | * tracepoint_probe_register - Connect a probe to a tracepoint | |
322 | * @name: tracepoint name | |
323 | * @probe: probe handler | |
324 | * | |
325 | * Returns 0 if ok, error value on error. | |
326 | * The probe address must at least be aligned on the architecture pointer size. | |
327 | */ | |
328 | int tracepoint_probe_register(const char *name, void *probe) | |
329 | { | |
330 | struct tracepoint_entry *entry; | |
331 | int ret = 0; | |
332 | void *old; | |
333 | ||
334 | mutex_lock(&tracepoints_mutex); | |
335 | entry = get_tracepoint(name); | |
336 | if (!entry) { | |
337 | entry = add_tracepoint(name); | |
338 | if (IS_ERR(entry)) { | |
339 | ret = PTR_ERR(entry); | |
340 | goto end; | |
341 | } | |
342 | } | |
343 | /* | |
ca2db6cf | 344 | * If we detect that a call_rcu_sched is pending for this tracepoint, |
97e1c18e MD |
345 | * make sure it's executed now. |
346 | */ | |
347 | if (entry->rcu_pending) | |
ca2db6cf | 348 | rcu_barrier_sched(); |
97e1c18e MD |
349 | old = tracepoint_entry_add_probe(entry, probe); |
350 | if (IS_ERR(old)) { | |
351 | ret = PTR_ERR(old); | |
352 | goto end; | |
353 | } | |
354 | mutex_unlock(&tracepoints_mutex); | |
355 | tracepoint_update_probes(); /* may update entry */ | |
356 | mutex_lock(&tracepoints_mutex); | |
357 | entry = get_tracepoint(name); | |
358 | WARN_ON(!entry); | |
9a1e9693 MD |
359 | if (entry->rcu_pending) |
360 | rcu_barrier_sched(); | |
97e1c18e MD |
361 | tracepoint_entry_free_old(entry, old); |
362 | end: | |
363 | mutex_unlock(&tracepoints_mutex); | |
364 | return ret; | |
365 | } | |
366 | EXPORT_SYMBOL_GPL(tracepoint_probe_register); | |
367 | ||
368 | /** | |
369 | * tracepoint_probe_unregister - Disconnect a probe from a tracepoint | |
370 | * @name: tracepoint name | |
371 | * @probe: probe function pointer | |
372 | * | |
373 | * We do not need to call a synchronize_sched to make sure the probes have | |
374 | * finished running before doing a module unload, because the module unload | |
375 | * itself uses stop_machine(), which insures that every preempt disabled section | |
376 | * have finished. | |
377 | */ | |
378 | int tracepoint_probe_unregister(const char *name, void *probe) | |
379 | { | |
380 | struct tracepoint_entry *entry; | |
381 | void *old; | |
382 | int ret = -ENOENT; | |
383 | ||
384 | mutex_lock(&tracepoints_mutex); | |
385 | entry = get_tracepoint(name); | |
386 | if (!entry) | |
387 | goto end; | |
388 | if (entry->rcu_pending) | |
ca2db6cf | 389 | rcu_barrier_sched(); |
97e1c18e MD |
390 | old = tracepoint_entry_remove_probe(entry, probe); |
391 | mutex_unlock(&tracepoints_mutex); | |
392 | tracepoint_update_probes(); /* may update entry */ | |
393 | mutex_lock(&tracepoints_mutex); | |
394 | entry = get_tracepoint(name); | |
395 | if (!entry) | |
396 | goto end; | |
9a1e9693 MD |
397 | if (entry->rcu_pending) |
398 | rcu_barrier_sched(); | |
97e1c18e MD |
399 | tracepoint_entry_free_old(entry, old); |
400 | remove_tracepoint(name); /* Ignore busy error message */ | |
401 | ret = 0; | |
402 | end: | |
403 | mutex_unlock(&tracepoints_mutex); | |
404 | return ret; | |
405 | } | |
406 | EXPORT_SYMBOL_GPL(tracepoint_probe_unregister); | |
407 | ||
408 | /** | |
409 | * tracepoint_get_iter_range - Get a next tracepoint iterator given a range. | |
410 | * @tracepoint: current tracepoints (in), next tracepoint (out) | |
411 | * @begin: beginning of the range | |
412 | * @end: end of the range | |
413 | * | |
414 | * Returns whether a next tracepoint has been found (1) or not (0). | |
415 | * Will return the first tracepoint in the range if the input tracepoint is | |
416 | * NULL. | |
417 | */ | |
418 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, | |
419 | struct tracepoint *begin, struct tracepoint *end) | |
420 | { | |
421 | if (!*tracepoint && begin != end) { | |
422 | *tracepoint = begin; | |
423 | return 1; | |
424 | } | |
425 | if (*tracepoint >= begin && *tracepoint < end) | |
426 | return 1; | |
427 | return 0; | |
428 | } | |
429 | EXPORT_SYMBOL_GPL(tracepoint_get_iter_range); | |
430 | ||
431 | static void tracepoint_get_iter(struct tracepoint_iter *iter) | |
432 | { | |
433 | int found = 0; | |
434 | ||
435 | /* Core kernel tracepoints */ | |
436 | if (!iter->module) { | |
437 | found = tracepoint_get_iter_range(&iter->tracepoint, | |
438 | __start___tracepoints, __stop___tracepoints); | |
439 | if (found) | |
440 | goto end; | |
441 | } | |
442 | /* tracepoints in modules. */ | |
443 | found = module_get_iter_tracepoints(iter); | |
444 | end: | |
445 | if (!found) | |
446 | tracepoint_iter_reset(iter); | |
447 | } | |
448 | ||
449 | void tracepoint_iter_start(struct tracepoint_iter *iter) | |
450 | { | |
451 | tracepoint_get_iter(iter); | |
452 | } | |
453 | EXPORT_SYMBOL_GPL(tracepoint_iter_start); | |
454 | ||
455 | void tracepoint_iter_next(struct tracepoint_iter *iter) | |
456 | { | |
457 | iter->tracepoint++; | |
458 | /* | |
459 | * iter->tracepoint may be invalid because we blindly incremented it. | |
460 | * Make sure it is valid by marshalling on the tracepoints, getting the | |
461 | * tracepoints from following modules if necessary. | |
462 | */ | |
463 | tracepoint_get_iter(iter); | |
464 | } | |
465 | EXPORT_SYMBOL_GPL(tracepoint_iter_next); | |
466 | ||
467 | void tracepoint_iter_stop(struct tracepoint_iter *iter) | |
468 | { | |
469 | } | |
470 | EXPORT_SYMBOL_GPL(tracepoint_iter_stop); | |
471 | ||
472 | void tracepoint_iter_reset(struct tracepoint_iter *iter) | |
473 | { | |
474 | iter->module = NULL; | |
475 | iter->tracepoint = NULL; | |
476 | } | |
477 | EXPORT_SYMBOL_GPL(tracepoint_iter_reset); |