]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - arch/i386/kernel/timers/timer_tsc.c
47675bbbb316eedb7edd0ba6a0bd7982d8d982a0
[mirror_ubuntu-artful-kernel.git] / arch / i386 / kernel / timers / timer_tsc.c
1 /*
2 * This code largely moved from arch/i386/kernel/time.c.
3 * See comments there for proper credits.
4 *
5 * 2004-06-25 Jesper Juhl
6 * moved mark_offset_tsc below cpufreq_delayed_get to avoid gcc 3.4
7 * failing to inline.
8 */
9
10 #include <linux/spinlock.h>
11 #include <linux/init.h>
12 #include <linux/timex.h>
13 #include <linux/errno.h>
14 #include <linux/cpufreq.h>
15 #include <linux/string.h>
16 #include <linux/jiffies.h>
17
18 #include <asm/timer.h>
19 #include <asm/io.h>
20 /* processor.h for distable_tsc flag */
21 #include <asm/processor.h>
22
23 #include "io_ports.h"
24 #include "mach_timer.h"
25
26 #include <asm/hpet.h>
27 #include <asm/i8253.h>
28
29 #ifdef CONFIG_HPET_TIMER
30 static unsigned long hpet_usec_quotient;
31 static unsigned long hpet_last;
32 static struct timer_opts timer_tsc;
33 #endif
34
35 static inline void cpufreq_delayed_get(void);
36
37 int tsc_disable __devinitdata = 0;
38
39 static int use_tsc;
40 /* Number of usecs that the last interrupt was delayed */
41 static int delay_at_last_interrupt;
42
43 static unsigned long last_tsc_low; /* lsb 32 bits of Time Stamp Counter */
44 static unsigned long last_tsc_high; /* msb 32 bits of Time Stamp Counter */
45 static unsigned long long monotonic_base;
46 static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
47
48 /* convert from cycles(64bits) => nanoseconds (64bits)
49 * basic equation:
50 * ns = cycles / (freq / ns_per_sec)
51 * ns = cycles * (ns_per_sec / freq)
52 * ns = cycles * (10^9 / (cpu_khz * 10^3))
53 * ns = cycles * (10^6 / cpu_khz)
54 *
55 * Then we use scaling math (suggested by george@mvista.com) to get:
56 * ns = cycles * (10^6 * SC / cpu_khz) / SC
57 * ns = cycles * cyc2ns_scale / SC
58 *
59 * And since SC is a constant power of two, we can convert the div
60 * into a shift.
61 *
62 * We can use khz divisor instead of mhz to keep a better percision, since
63 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
64 * (mathieu.desnoyers@polymtl.ca)
65 *
66 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
67 */
68 static unsigned long cyc2ns_scale;
69 #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
70
71 static inline void set_cyc2ns_scale(unsigned long cpu_khz)
72 {
73 cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
74 }
75
76 static inline unsigned long long cycles_2_ns(unsigned long long cyc)
77 {
78 return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
79 }
80
81 static int count2; /* counter for mark_offset_tsc() */
82
83 /* Cached *multiplier* to convert TSC counts to microseconds.
84 * (see the equation below).
85 * Equal to 2^32 * (1 / (clocks per usec) ).
86 * Initialized in time_init.
87 */
88 static unsigned long fast_gettimeoffset_quotient;
89
90 static unsigned long get_offset_tsc(void)
91 {
92 register unsigned long eax, edx;
93
94 /* Read the Time Stamp Counter */
95
96 rdtsc(eax,edx);
97
98 /* .. relative to previous jiffy (32 bits is enough) */
99 eax -= last_tsc_low; /* tsc_low delta */
100
101 /*
102 * Time offset = (tsc_low delta) * fast_gettimeoffset_quotient
103 * = (tsc_low delta) * (usecs_per_clock)
104 * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
105 *
106 * Using a mull instead of a divl saves up to 31 clock cycles
107 * in the critical path.
108 */
109
110 __asm__("mull %2"
111 :"=a" (eax), "=d" (edx)
112 :"rm" (fast_gettimeoffset_quotient),
113 "0" (eax));
114
115 /* our adjusted time offset in microseconds */
116 return delay_at_last_interrupt + edx;
117 }
118
119 static unsigned long long monotonic_clock_tsc(void)
120 {
121 unsigned long long last_offset, this_offset, base;
122 unsigned seq;
123
124 /* atomically read monotonic base & last_offset */
125 do {
126 seq = read_seqbegin(&monotonic_lock);
127 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
128 base = monotonic_base;
129 } while (read_seqretry(&monotonic_lock, seq));
130
131 /* Read the Time Stamp Counter */
132 rdtscll(this_offset);
133
134 /* return the value in ns */
135 return base + cycles_2_ns(this_offset - last_offset);
136 }
137
138 /*
139 * Scheduler clock - returns current time in nanosec units.
140 */
141 unsigned long long sched_clock(void)
142 {
143 unsigned long long this_offset;
144
145 /*
146 * In the NUMA case we dont use the TSC as they are not
147 * synchronized across all CPUs.
148 */
149 #ifndef CONFIG_NUMA
150 if (!use_tsc)
151 #endif
152 /* no locking but a rare wrong value is not a big deal */
153 return jiffies_64 * (1000000000 / HZ);
154
155 /* Read the Time Stamp Counter */
156 rdtscll(this_offset);
157
158 /* return the value in ns */
159 return cycles_2_ns(this_offset);
160 }
161
162 static void delay_tsc(unsigned long loops)
163 {
164 unsigned long bclock, now;
165
166 rdtscl(bclock);
167 do
168 {
169 rep_nop();
170 rdtscl(now);
171 } while ((now-bclock) < loops);
172 }
173
174 #ifdef CONFIG_HPET_TIMER
175 static void mark_offset_tsc_hpet(void)
176 {
177 unsigned long long this_offset, last_offset;
178 unsigned long offset, temp, hpet_current;
179
180 write_seqlock(&monotonic_lock);
181 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
182 /*
183 * It is important that these two operations happen almost at
184 * the same time. We do the RDTSC stuff first, since it's
185 * faster. To avoid any inconsistencies, we need interrupts
186 * disabled locally.
187 */
188 /*
189 * Interrupts are just disabled locally since the timer irq
190 * has the SA_INTERRUPT flag set. -arca
191 */
192 /* read Pentium cycle counter */
193
194 hpet_current = hpet_readl(HPET_COUNTER);
195 rdtsc(last_tsc_low, last_tsc_high);
196
197 /* lost tick compensation */
198 offset = hpet_readl(HPET_T0_CMP) - hpet_tick;
199 if (unlikely(((offset - hpet_last) > hpet_tick) && (hpet_last != 0))) {
200 int lost_ticks = (offset - hpet_last) / hpet_tick;
201 jiffies_64 += lost_ticks;
202 }
203 hpet_last = hpet_current;
204
205 /* update the monotonic base value */
206 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
207 monotonic_base += cycles_2_ns(this_offset - last_offset);
208 write_sequnlock(&monotonic_lock);
209
210 /* calculate delay_at_last_interrupt */
211 /*
212 * Time offset = (hpet delta) * ( usecs per HPET clock )
213 * = (hpet delta) * ( usecs per tick / HPET clocks per tick)
214 * = (hpet delta) * ( hpet_usec_quotient ) / (2^32)
215 * Where,
216 * hpet_usec_quotient = (2^32 * usecs per tick)/HPET clocks per tick
217 */
218 delay_at_last_interrupt = hpet_current - offset;
219 ASM_MUL64_REG(temp, delay_at_last_interrupt,
220 hpet_usec_quotient, delay_at_last_interrupt);
221 }
222 #endif
223
224
225 #ifdef CONFIG_CPU_FREQ
226 #include <linux/workqueue.h>
227
228 static unsigned int cpufreq_delayed_issched = 0;
229 static unsigned int cpufreq_init = 0;
230 static struct work_struct cpufreq_delayed_get_work;
231
232 static void handle_cpufreq_delayed_get(void *v)
233 {
234 unsigned int cpu;
235 for_each_online_cpu(cpu) {
236 cpufreq_get(cpu);
237 }
238 cpufreq_delayed_issched = 0;
239 }
240
241 /* if we notice lost ticks, schedule a call to cpufreq_get() as it tries
242 * to verify the CPU frequency the timing core thinks the CPU is running
243 * at is still correct.
244 */
245 static inline void cpufreq_delayed_get(void)
246 {
247 if (cpufreq_init && !cpufreq_delayed_issched) {
248 cpufreq_delayed_issched = 1;
249 printk(KERN_DEBUG "Losing some ticks... checking if CPU frequency changed.\n");
250 schedule_work(&cpufreq_delayed_get_work);
251 }
252 }
253
254 /* If the CPU frequency is scaled, TSC-based delays will need a different
255 * loops_per_jiffy value to function properly.
256 */
257
258 static unsigned int ref_freq = 0;
259 static unsigned long loops_per_jiffy_ref = 0;
260
261 #ifndef CONFIG_SMP
262 static unsigned long fast_gettimeoffset_ref = 0;
263 static unsigned int cpu_khz_ref = 0;
264 #endif
265
266 static int
267 time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
268 void *data)
269 {
270 struct cpufreq_freqs *freq = data;
271
272 if (val != CPUFREQ_RESUMECHANGE)
273 write_seqlock_irq(&xtime_lock);
274 if (!ref_freq) {
275 ref_freq = freq->old;
276 loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
277 #ifndef CONFIG_SMP
278 fast_gettimeoffset_ref = fast_gettimeoffset_quotient;
279 cpu_khz_ref = cpu_khz;
280 #endif
281 }
282
283 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
284 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
285 (val == CPUFREQ_RESUMECHANGE)) {
286 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
287 cpu_data[freq->cpu].loops_per_jiffy = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
288 #ifndef CONFIG_SMP
289 if (cpu_khz)
290 cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new);
291 if (use_tsc) {
292 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
293 fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq);
294 set_cyc2ns_scale(cpu_khz);
295 }
296 }
297 #endif
298 }
299
300 if (val != CPUFREQ_RESUMECHANGE)
301 write_sequnlock_irq(&xtime_lock);
302
303 return 0;
304 }
305
306 static struct notifier_block time_cpufreq_notifier_block = {
307 .notifier_call = time_cpufreq_notifier
308 };
309
310
311 static int __init cpufreq_tsc(void)
312 {
313 int ret;
314 INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get, NULL);
315 ret = cpufreq_register_notifier(&time_cpufreq_notifier_block,
316 CPUFREQ_TRANSITION_NOTIFIER);
317 if (!ret)
318 cpufreq_init = 1;
319 return ret;
320 }
321 core_initcall(cpufreq_tsc);
322
323 #else /* CONFIG_CPU_FREQ */
324 static inline void cpufreq_delayed_get(void) { return; }
325 #endif
326
327 int recalibrate_cpu_khz(void)
328 {
329 #ifndef CONFIG_SMP
330 unsigned int cpu_khz_old = cpu_khz;
331
332 if (cpu_has_tsc) {
333 local_irq_disable();
334 init_cpu_khz();
335 local_irq_enable();
336 cpu_data[0].loops_per_jiffy =
337 cpufreq_scale(cpu_data[0].loops_per_jiffy,
338 cpu_khz_old,
339 cpu_khz);
340 return 0;
341 } else
342 return -ENODEV;
343 #else
344 return -ENODEV;
345 #endif
346 }
347 EXPORT_SYMBOL(recalibrate_cpu_khz);
348
349 static void mark_offset_tsc(void)
350 {
351 unsigned long lost,delay;
352 unsigned long delta = last_tsc_low;
353 int count;
354 int countmp;
355 static int count1 = 0;
356 unsigned long long this_offset, last_offset;
357 static int lost_count = 0;
358
359 write_seqlock(&monotonic_lock);
360 last_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
361 /*
362 * It is important that these two operations happen almost at
363 * the same time. We do the RDTSC stuff first, since it's
364 * faster. To avoid any inconsistencies, we need interrupts
365 * disabled locally.
366 */
367
368 /*
369 * Interrupts are just disabled locally since the timer irq
370 * has the SA_INTERRUPT flag set. -arca
371 */
372
373 /* read Pentium cycle counter */
374
375 rdtsc(last_tsc_low, last_tsc_high);
376
377 spin_lock(&i8253_lock);
378 outb_p(0x00, PIT_MODE); /* latch the count ASAP */
379
380 count = inb_p(PIT_CH0); /* read the latched count */
381 count |= inb(PIT_CH0) << 8;
382
383 /*
384 * VIA686a test code... reset the latch if count > max + 1
385 * from timer_pit.c - cjb
386 */
387 if (count > LATCH) {
388 outb_p(0x34, PIT_MODE);
389 outb_p(LATCH & 0xff, PIT_CH0);
390 outb(LATCH >> 8, PIT_CH0);
391 count = LATCH - 1;
392 }
393
394 spin_unlock(&i8253_lock);
395
396 if (pit_latch_buggy) {
397 /* get center value of last 3 time lutch */
398 if ((count2 >= count && count >= count1)
399 || (count1 >= count && count >= count2)) {
400 count2 = count1; count1 = count;
401 } else if ((count1 >= count2 && count2 >= count)
402 || (count >= count2 && count2 >= count1)) {
403 countmp = count;count = count2;
404 count2 = count1;count1 = countmp;
405 } else {
406 count2 = count1; count1 = count; count = count1;
407 }
408 }
409
410 /* lost tick compensation */
411 delta = last_tsc_low - delta;
412 {
413 register unsigned long eax, edx;
414 eax = delta;
415 __asm__("mull %2"
416 :"=a" (eax), "=d" (edx)
417 :"rm" (fast_gettimeoffset_quotient),
418 "0" (eax));
419 delta = edx;
420 }
421 delta += delay_at_last_interrupt;
422 lost = delta/(1000000/HZ);
423 delay = delta%(1000000/HZ);
424 if (lost >= 2) {
425 jiffies_64 += lost-1;
426
427 /* sanity check to ensure we're not always losing ticks */
428 if (lost_count++ > 100) {
429 printk(KERN_WARNING "Losing too many ticks!\n");
430 printk(KERN_WARNING "TSC cannot be used as a timesource. \n");
431 printk(KERN_WARNING "Possible reasons for this are:\n");
432 printk(KERN_WARNING " You're running with Speedstep,\n");
433 printk(KERN_WARNING " You don't have DMA enabled for your hard disk (see hdparm),\n");
434 printk(KERN_WARNING " Incorrect TSC synchronization on an SMP system (see dmesg).\n");
435 printk(KERN_WARNING "Falling back to a sane timesource now.\n");
436
437 clock_fallback();
438 }
439 /* ... but give the TSC a fair chance */
440 if (lost_count > 25)
441 cpufreq_delayed_get();
442 } else
443 lost_count = 0;
444 /* update the monotonic base value */
445 this_offset = ((unsigned long long)last_tsc_high<<32)|last_tsc_low;
446 monotonic_base += cycles_2_ns(this_offset - last_offset);
447 write_sequnlock(&monotonic_lock);
448
449 /* calculate delay_at_last_interrupt */
450 count = ((LATCH-1) - count) * TICK_SIZE;
451 delay_at_last_interrupt = (count + LATCH/2) / LATCH;
452
453 /* catch corner case where tick rollover occured
454 * between tsc and pit reads (as noted when
455 * usec delta is > 90% # of usecs/tick)
456 */
457 if (lost && abs(delay - delay_at_last_interrupt) > (900000/HZ))
458 jiffies_64++;
459 }
460
461 static int __init init_tsc(char* override)
462 {
463
464 /* check clock override */
465 if (override[0] && strncmp(override,"tsc",3)) {
466 #ifdef CONFIG_HPET_TIMER
467 if (is_hpet_enabled()) {
468 printk(KERN_ERR "Warning: clock= override failed. Defaulting to tsc\n");
469 } else
470 #endif
471 {
472 return -ENODEV;
473 }
474 }
475
476 /*
477 * If we have APM enabled or the CPU clock speed is variable
478 * (CPU stops clock on HLT or slows clock to save power)
479 * then the TSC timestamps may diverge by up to 1 jiffy from
480 * 'real time' but nothing will break.
481 * The most frequent case is that the CPU is "woken" from a halt
482 * state by the timer interrupt itself, so we get 0 error. In the
483 * rare cases where a driver would "wake" the CPU and request a
484 * timestamp, the maximum error is < 1 jiffy. But timestamps are
485 * still perfectly ordered.
486 * Note that the TSC counter will be reset if APM suspends
487 * to disk; this won't break the kernel, though, 'cuz we're
488 * smart. See arch/i386/kernel/apm.c.
489 */
490 /*
491 * Firstly we have to do a CPU check for chips with
492 * a potentially buggy TSC. At this point we haven't run
493 * the ident/bugs checks so we must run this hook as it
494 * may turn off the TSC flag.
495 *
496 * NOTE: this doesn't yet handle SMP 486 machines where only
497 * some CPU's have a TSC. Thats never worked and nobody has
498 * moaned if you have the only one in the world - you fix it!
499 */
500
501 count2 = LATCH; /* initialize counter for mark_offset_tsc() */
502
503 if (cpu_has_tsc) {
504 unsigned long tsc_quotient;
505 #ifdef CONFIG_HPET_TIMER
506 if (is_hpet_enabled() && hpet_use_timer) {
507 unsigned long result, remain;
508 printk("Using TSC for gettimeofday\n");
509 tsc_quotient = calibrate_tsc_hpet(NULL);
510 timer_tsc.mark_offset = &mark_offset_tsc_hpet;
511 /*
512 * Math to calculate hpet to usec multiplier
513 * Look for the comments at get_offset_tsc_hpet()
514 */
515 ASM_DIV64_REG(result, remain, hpet_tick,
516 0, KERNEL_TICK_USEC);
517 if (remain > (hpet_tick >> 1))
518 result++; /* rounding the result */
519
520 hpet_usec_quotient = result;
521 } else
522 #endif
523 {
524 tsc_quotient = calibrate_tsc();
525 }
526
527 if (tsc_quotient) {
528 fast_gettimeoffset_quotient = tsc_quotient;
529 use_tsc = 1;
530 /*
531 * We could be more selective here I suspect
532 * and just enable this for the next intel chips ?
533 */
534 /* report CPU clock rate in Hz.
535 * The formula is (10^6 * 2^32) / (2^32 * 1 / (clocks/us)) =
536 * clock/second. Our precision is about 100 ppm.
537 */
538 { unsigned long eax=0, edx=1000;
539 __asm__("divl %2"
540 :"=a" (cpu_khz), "=d" (edx)
541 :"r" (tsc_quotient),
542 "0" (eax), "1" (edx));
543 printk("Detected %u.%03u MHz processor.\n",
544 cpu_khz / 1000, cpu_khz % 1000);
545 }
546 set_cyc2ns_scale(cpu_khz);
547 return 0;
548 }
549 }
550 return -ENODEV;
551 }
552
553 static int tsc_resume(void)
554 {
555 write_seqlock(&monotonic_lock);
556 /* Assume this is the last mark offset time */
557 rdtsc(last_tsc_low, last_tsc_high);
558 #ifdef CONFIG_HPET_TIMER
559 if (is_hpet_enabled() && hpet_use_timer)
560 hpet_last = hpet_readl(HPET_COUNTER);
561 #endif
562 write_sequnlock(&monotonic_lock);
563 return 0;
564 }
565
566 #ifndef CONFIG_X86_TSC
567 /* disable flag for tsc. Takes effect by clearing the TSC cpu flag
568 * in cpu/common.c */
569 static int __init tsc_setup(char *str)
570 {
571 tsc_disable = 1;
572 return 1;
573 }
574 #else
575 static int __init tsc_setup(char *str)
576 {
577 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
578 "cannot disable TSC.\n");
579 return 1;
580 }
581 #endif
582 __setup("notsc", tsc_setup);
583
584
585
586 /************************************************************/
587
588 /* tsc timer_opts struct */
589 static struct timer_opts timer_tsc = {
590 .name = "tsc",
591 .mark_offset = mark_offset_tsc,
592 .get_offset = get_offset_tsc,
593 .monotonic_clock = monotonic_clock_tsc,
594 .delay = delay_tsc,
595 .read_timer = read_timer_tsc,
596 .resume = tsc_resume,
597 };
598
599 struct init_timer_opts __initdata timer_tsc_init = {
600 .init = init_tsc,
601 .opts = &timer_tsc,
602 };