]>
Commit | Line | Data |
---|---|---|
d5de8841 JF |
1 | /* |
2 | * Split spinlock implementation out into its own file, so it can be | |
3 | * compiled in a FTRACE-compatible way. | |
4 | */ | |
5 | #include <linux/kernel_stat.h> | |
6 | #include <linux/spinlock.h> | |
994025ca JF |
7 | #include <linux/debugfs.h> |
8 | #include <linux/log2.h> | |
5a0e3ad6 | 9 | #include <linux/gfp.h> |
354e7b76 | 10 | #include <linux/slab.h> |
d5de8841 JF |
11 | |
12 | #include <asm/paravirt.h> | |
13 | ||
14 | #include <xen/interface/xen.h> | |
15 | #include <xen/events.h> | |
16 | ||
17 | #include "xen-ops.h" | |
994025ca JF |
18 | #include "debugfs.h" |
19 | ||
20 | #ifdef CONFIG_XEN_DEBUG_FS | |
21 | static struct xen_spinlock_stats | |
22 | { | |
23 | u64 taken; | |
24 | u32 taken_slow; | |
25 | u32 taken_slow_nested; | |
26 | u32 taken_slow_pickup; | |
27 | u32 taken_slow_spurious; | |
1e696f63 | 28 | u32 taken_slow_irqenable; |
994025ca JF |
29 | |
30 | u64 released; | |
31 | u32 released_slow; | |
32 | u32 released_slow_kicked; | |
33 | ||
f8eca41f JF |
34 | #define HISTO_BUCKETS 30 |
35 | u32 histo_spin_total[HISTO_BUCKETS+1]; | |
36 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | |
37 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | |
38 | ||
39 | u64 time_total; | |
40 | u64 time_spinning; | |
41 | u64 time_blocked; | |
994025ca JF |
42 | } spinlock_stats; |
43 | ||
44 | static u8 zero_stats; | |
45 | ||
46 | static unsigned lock_timeout = 1 << 10; | |
47 | #define TIMEOUT lock_timeout | |
48 | ||
49 | static inline void check_zero(void) | |
50 | { | |
51 | if (unlikely(zero_stats)) { | |
52 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | |
53 | zero_stats = 0; | |
54 | } | |
55 | } | |
56 | ||
57 | #define ADD_STATS(elem, val) \ | |
58 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | |
59 | ||
60 | static inline u64 spin_time_start(void) | |
61 | { | |
62 | return xen_clocksource_read(); | |
63 | } | |
64 | ||
65 | static void __spin_time_accum(u64 delta, u32 *array) | |
66 | { | |
67 | unsigned index = ilog2(delta); | |
68 | ||
69 | check_zero(); | |
70 | ||
71 | if (index < HISTO_BUCKETS) | |
72 | array[index]++; | |
73 | else | |
74 | array[HISTO_BUCKETS]++; | |
75 | } | |
76 | ||
f8eca41f JF |
77 | static inline void spin_time_accum_spinning(u64 start) |
78 | { | |
79 | u32 delta = xen_clocksource_read() - start; | |
80 | ||
81 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | |
82 | spinlock_stats.time_spinning += delta; | |
83 | } | |
84 | ||
85 | static inline void spin_time_accum_total(u64 start) | |
994025ca JF |
86 | { |
87 | u32 delta = xen_clocksource_read() - start; | |
88 | ||
f8eca41f JF |
89 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); |
90 | spinlock_stats.time_total += delta; | |
994025ca JF |
91 | } |
92 | ||
f8eca41f | 93 | static inline void spin_time_accum_blocked(u64 start) |
994025ca JF |
94 | { |
95 | u32 delta = xen_clocksource_read() - start; | |
96 | ||
f8eca41f JF |
97 | __spin_time_accum(delta, spinlock_stats.histo_spin_blocked); |
98 | spinlock_stats.time_blocked += delta; | |
994025ca JF |
99 | } |
100 | #else /* !CONFIG_XEN_DEBUG_FS */ | |
101 | #define TIMEOUT (1 << 10) | |
102 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | |
103 | ||
104 | static inline u64 spin_time_start(void) | |
105 | { | |
106 | return 0; | |
107 | } | |
108 | ||
f8eca41f JF |
109 | static inline void spin_time_accum_total(u64 start) |
110 | { | |
111 | } | |
112 | static inline void spin_time_accum_spinning(u64 start) | |
994025ca JF |
113 | { |
114 | } | |
f8eca41f | 115 | static inline void spin_time_accum_blocked(u64 start) |
994025ca JF |
116 | { |
117 | } | |
118 | #endif /* CONFIG_XEN_DEBUG_FS */ | |
d5de8841 | 119 | |
7a7546b3 DV |
120 | /* |
121 | * Size struct xen_spinlock so it's the same as arch_spinlock_t. | |
122 | */ | |
123 | #if NR_CPUS < 256 | |
124 | typedef u8 xen_spinners_t; | |
125 | # define inc_spinners(xl) \ | |
126 | asm(LOCK_PREFIX " incb %0" : "+m" ((xl)->spinners) : : "memory"); | |
127 | # define dec_spinners(xl) \ | |
128 | asm(LOCK_PREFIX " decb %0" : "+m" ((xl)->spinners) : : "memory"); | |
129 | #else | |
130 | typedef u16 xen_spinners_t; | |
131 | # define inc_spinners(xl) \ | |
132 | asm(LOCK_PREFIX " incw %0" : "+m" ((xl)->spinners) : : "memory"); | |
133 | # define dec_spinners(xl) \ | |
134 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | |
135 | #endif | |
136 | ||
d5de8841 JF |
137 | struct xen_spinlock { |
138 | unsigned char lock; /* 0 -> free; 1 -> locked */ | |
7a7546b3 | 139 | xen_spinners_t spinners; /* count of waiting cpus */ |
d5de8841 JF |
140 | }; |
141 | ||
445c8951 | 142 | static int xen_spin_is_locked(struct arch_spinlock *lock) |
d5de8841 JF |
143 | { |
144 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | |
145 | ||
146 | return xl->lock != 0; | |
147 | } | |
148 | ||
445c8951 | 149 | static int xen_spin_is_contended(struct arch_spinlock *lock) |
d5de8841 JF |
150 | { |
151 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | |
152 | ||
153 | /* Not strictly true; this is only the count of contended | |
154 | lock-takers entering the slow path. */ | |
155 | return xl->spinners != 0; | |
156 | } | |
157 | ||
445c8951 | 158 | static int xen_spin_trylock(struct arch_spinlock *lock) |
d5de8841 JF |
159 | { |
160 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | |
161 | u8 old = 1; | |
162 | ||
163 | asm("xchgb %b0,%1" | |
164 | : "+q" (old), "+m" (xl->lock) : : "memory"); | |
165 | ||
166 | return old == 0; | |
167 | } | |
168 | ||
354e7b76 | 169 | static DEFINE_PER_CPU(char *, irq_name); |
d5de8841 JF |
170 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; |
171 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | |
172 | ||
168d2f46 JF |
173 | /* |
174 | * Mark a cpu as interested in a lock. Returns the CPU's previous | |
175 | * lock of interest, in case we got preempted by an interrupt. | |
176 | */ | |
177 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | |
d5de8841 | 178 | { |
168d2f46 JF |
179 | struct xen_spinlock *prev; |
180 | ||
780f36d8 CL |
181 | prev = __this_cpu_read(lock_spinners); |
182 | __this_cpu_write(lock_spinners, xl); | |
168d2f46 | 183 | |
d5de8841 | 184 | wmb(); /* set lock of interest before count */ |
168d2f46 | 185 | |
7a7546b3 | 186 | inc_spinners(xl); |
168d2f46 JF |
187 | |
188 | return prev; | |
d5de8841 JF |
189 | } |
190 | ||
168d2f46 JF |
191 | /* |
192 | * Mark a cpu as no longer interested in a lock. Restores previous | |
193 | * lock of interest (NULL for none). | |
194 | */ | |
195 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | |
d5de8841 | 196 | { |
7a7546b3 | 197 | dec_spinners(xl); |
168d2f46 | 198 | wmb(); /* decrement count before restoring lock */ |
780f36d8 | 199 | __this_cpu_write(lock_spinners, prev); |
d5de8841 JF |
200 | } |
201 | ||
445c8951 | 202 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) |
d5de8841 JF |
203 | { |
204 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | |
168d2f46 | 205 | struct xen_spinlock *prev; |
780f36d8 | 206 | int irq = __this_cpu_read(lock_kicker_irq); |
d5de8841 | 207 | int ret; |
f8eca41f | 208 | u64 start; |
d5de8841 JF |
209 | |
210 | /* If kicker interrupts not initialized yet, just spin */ | |
211 | if (irq == -1) | |
212 | return 0; | |
213 | ||
f8eca41f JF |
214 | start = spin_time_start(); |
215 | ||
d5de8841 | 216 | /* announce we're spinning */ |
168d2f46 | 217 | prev = spinning_lock(xl); |
d5de8841 | 218 | |
994025ca JF |
219 | ADD_STATS(taken_slow, 1); |
220 | ADD_STATS(taken_slow_nested, prev != NULL); | |
221 | ||
168d2f46 | 222 | do { |
4d576b57 JF |
223 | unsigned long flags; |
224 | ||
168d2f46 JF |
225 | /* clear pending */ |
226 | xen_clear_irq_pending(irq); | |
227 | ||
228 | /* check again make sure it didn't become free while | |
229 | we weren't looking */ | |
230 | ret = xen_spin_trylock(lock); | |
231 | if (ret) { | |
994025ca JF |
232 | ADD_STATS(taken_slow_pickup, 1); |
233 | ||
168d2f46 JF |
234 | /* |
235 | * If we interrupted another spinlock while it | |
236 | * was blocking, make sure it doesn't block | |
237 | * without rechecking the lock. | |
238 | */ | |
239 | if (prev != NULL) | |
240 | xen_set_irq_pending(irq); | |
241 | goto out; | |
242 | } | |
d5de8841 | 243 | |
df9ee292 | 244 | flags = arch_local_save_flags(); |
4d576b57 JF |
245 | if (irq_enable) { |
246 | ADD_STATS(taken_slow_irqenable, 1); | |
247 | raw_local_irq_enable(); | |
248 | } | |
249 | ||
168d2f46 JF |
250 | /* |
251 | * Block until irq becomes pending. If we're | |
252 | * interrupted at this point (after the trylock but | |
253 | * before entering the block), then the nested lock | |
254 | * handler guarantees that the irq will be left | |
255 | * pending if there's any chance the lock became free; | |
256 | * xen_poll_irq() returns immediately if the irq is | |
257 | * pending. | |
258 | */ | |
259 | xen_poll_irq(irq); | |
4d576b57 JF |
260 | |
261 | raw_local_irq_restore(flags); | |
262 | ||
994025ca | 263 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); |
168d2f46 | 264 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ |
d5de8841 | 265 | |
d6c88a50 | 266 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
d5de8841 JF |
267 | |
268 | out: | |
168d2f46 | 269 | unspinning_lock(xl, prev); |
f8eca41f JF |
270 | spin_time_accum_blocked(start); |
271 | ||
d5de8841 JF |
272 | return ret; |
273 | } | |
274 | ||
445c8951 | 275 | static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) |
d5de8841 JF |
276 | { |
277 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | |
994025ca | 278 | unsigned timeout; |
d5de8841 | 279 | u8 oldval; |
994025ca JF |
280 | u64 start_spin; |
281 | ||
282 | ADD_STATS(taken, 1); | |
283 | ||
284 | start_spin = spin_time_start(); | |
d5de8841 JF |
285 | |
286 | do { | |
994025ca JF |
287 | u64 start_spin_fast = spin_time_start(); |
288 | ||
289 | timeout = TIMEOUT; | |
d5de8841 JF |
290 | |
291 | asm("1: xchgb %1,%0\n" | |
292 | " testb %1,%1\n" | |
293 | " jz 3f\n" | |
294 | "2: rep;nop\n" | |
295 | " cmpb $0,%0\n" | |
296 | " je 1b\n" | |
297 | " dec %2\n" | |
298 | " jnz 2b\n" | |
299 | "3:\n" | |
300 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | |
301 | : "1" (1) | |
302 | : "memory"); | |
303 | ||
f8eca41f | 304 | spin_time_accum_spinning(start_spin_fast); |
1e696f63 JF |
305 | |
306 | } while (unlikely(oldval != 0 && | |
307 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | |
994025ca | 308 | |
f8eca41f | 309 | spin_time_accum_total(start_spin); |
d5de8841 JF |
310 | } |
311 | ||
445c8951 | 312 | static void xen_spin_lock(struct arch_spinlock *lock) |
1e696f63 JF |
313 | { |
314 | __xen_spin_lock(lock, false); | |
315 | } | |
316 | ||
445c8951 | 317 | static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) |
1e696f63 JF |
318 | { |
319 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | |
320 | } | |
321 | ||
d5de8841 JF |
322 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) |
323 | { | |
324 | int cpu; | |
325 | ||
994025ca JF |
326 | ADD_STATS(released_slow, 1); |
327 | ||
d5de8841 JF |
328 | for_each_online_cpu(cpu) { |
329 | /* XXX should mix up next cpu selection */ | |
330 | if (per_cpu(lock_spinners, cpu) == xl) { | |
994025ca | 331 | ADD_STATS(released_slow_kicked, 1); |
d5de8841 | 332 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
d5de8841 JF |
333 | } |
334 | } | |
335 | } | |
336 | ||
445c8951 | 337 | static void xen_spin_unlock(struct arch_spinlock *lock) |
d5de8841 JF |
338 | { |
339 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | |
340 | ||
994025ca JF |
341 | ADD_STATS(released, 1); |
342 | ||
d5de8841 JF |
343 | smp_wmb(); /* make sure no writes get moved after unlock */ |
344 | xl->lock = 0; /* release lock */ | |
345 | ||
2496afbf YX |
346 | /* |
347 | * Make sure unlock happens before checking for waiting | |
348 | * spinners. We need a strong barrier to enforce the | |
349 | * write-read ordering to different memory locations, as the | |
350 | * CPU makes no implied guarantees about their ordering. | |
351 | */ | |
352 | mb(); | |
d5de8841 JF |
353 | |
354 | if (unlikely(xl->spinners)) | |
355 | xen_spin_unlock_slow(xl); | |
356 | } | |
357 | ||
358 | static irqreturn_t dummy_handler(int irq, void *dev_id) | |
359 | { | |
360 | BUG(); | |
361 | return IRQ_HANDLED; | |
362 | } | |
363 | ||
364 | void __cpuinit xen_init_lock_cpu(int cpu) | |
365 | { | |
366 | int irq; | |
354e7b76 | 367 | char *name; |
d5de8841 | 368 | |
cb91f8f4 | 369 | WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n", |
cb9c6f15 KRW |
370 | cpu, per_cpu(lock_kicker_irq, cpu)); |
371 | ||
70dd4998 KRW |
372 | /* |
373 | * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 | |
374 | * (xen: disable PV spinlocks on HVM) | |
375 | */ | |
376 | if (xen_hvm_domain()) | |
377 | return; | |
378 | ||
d5de8841 JF |
379 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); |
380 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | |
381 | cpu, | |
382 | dummy_handler, | |
383 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | |
384 | name, | |
385 | NULL); | |
386 | ||
387 | if (irq >= 0) { | |
388 | disable_irq(irq); /* make sure it's never delivered */ | |
389 | per_cpu(lock_kicker_irq, cpu) = irq; | |
354e7b76 | 390 | per_cpu(irq_name, cpu) = name; |
d5de8841 JF |
391 | } |
392 | ||
393 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | |
394 | } | |
395 | ||
d68d82af AN |
396 | void xen_uninit_lock_cpu(int cpu) |
397 | { | |
70dd4998 KRW |
398 | /* |
399 | * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 | |
400 | * (xen: disable PV spinlocks on HVM) | |
401 | */ | |
402 | if (xen_hvm_domain()) | |
403 | return; | |
404 | ||
d68d82af | 405 | unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL); |
cb9c6f15 | 406 | per_cpu(lock_kicker_irq, cpu) = -1; |
354e7b76 KRW |
407 | kfree(per_cpu(irq_name, cpu)); |
408 | per_cpu(irq_name, cpu) = NULL; | |
d68d82af AN |
409 | } |
410 | ||
d5de8841 JF |
411 | void __init xen_init_spinlocks(void) |
412 | { | |
70dd4998 KRW |
413 | /* |
414 | * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23 | |
415 | * (xen: disable PV spinlocks on HVM) | |
416 | */ | |
417 | if (xen_hvm_domain()) | |
418 | return; | |
419 | ||
7a7546b3 DV |
420 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); |
421 | ||
d5de8841 JF |
422 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; |
423 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | |
424 | pv_lock_ops.spin_lock = xen_spin_lock; | |
1e696f63 | 425 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; |
d5de8841 JF |
426 | pv_lock_ops.spin_trylock = xen_spin_trylock; |
427 | pv_lock_ops.spin_unlock = xen_spin_unlock; | |
428 | } | |
994025ca JF |
429 | |
430 | #ifdef CONFIG_XEN_DEBUG_FS | |
431 | ||
432 | static struct dentry *d_spin_debug; | |
433 | ||
434 | static int __init xen_spinlock_debugfs(void) | |
435 | { | |
436 | struct dentry *d_xen = xen_init_debugfs(); | |
437 | ||
438 | if (d_xen == NULL) | |
439 | return -ENOMEM; | |
440 | ||
441 | d_spin_debug = debugfs_create_dir("spinlocks", d_xen); | |
442 | ||
443 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | |
444 | ||
445 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | |
446 | ||
447 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | |
448 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | |
449 | &spinlock_stats.taken_slow); | |
450 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | |
451 | &spinlock_stats.taken_slow_nested); | |
452 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | |
453 | &spinlock_stats.taken_slow_pickup); | |
454 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | |
455 | &spinlock_stats.taken_slow_spurious); | |
1e696f63 JF |
456 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, |
457 | &spinlock_stats.taken_slow_irqenable); | |
994025ca JF |
458 | |
459 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | |
460 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | |
461 | &spinlock_stats.released_slow); | |
462 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | |
463 | &spinlock_stats.released_slow_kicked); | |
464 | ||
465 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | |
f8eca41f JF |
466 | &spinlock_stats.time_spinning); |
467 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | |
468 | &spinlock_stats.time_blocked); | |
994025ca | 469 | debugfs_create_u64("time_total", 0444, d_spin_debug, |
f8eca41f | 470 | &spinlock_stats.time_total); |
994025ca | 471 | |
9fe2a701 SV |
472 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, |
473 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | |
474 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | |
475 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | |
476 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | |
477 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | |
994025ca JF |
478 | |
479 | return 0; | |
480 | } | |
481 | fs_initcall(xen_spinlock_debugfs); | |
482 | ||
483 | #endif /* CONFIG_XEN_DEBUG_FS */ |