]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - arch/x86/kernel/vsyscall_64.c
softlockup: fix watchdog task wakeup frequency
[mirror_ubuntu-hirsute-kernel.git] / arch / x86 / kernel / vsyscall_64.c
CommitLineData
1da177e4 1/*
1da177e4
LT
2 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs.
4 *
5 * Thanks to hpa@transmeta.com for some useful hint.
6 * Special thanks to Ingo Molnar for his early experience with
7 * a different vsyscall implementation for Linux/IA32 and for the name.
8 *
9 * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
10 * at virtual address -10Mbyte+1024bytes etc... There are at max 4
11 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
12 * jumping out of line if necessary. We cannot add more with this
13 * mechanism because older kernels won't return -ENOSYS.
14 * If we want more than four we need a vDSO.
15 *
16 * Note: the concept clashes with user mode linux. If you use UML and
17 * want per guest time just set the kernel.vsyscall64 sysctl to 0.
18 */
19
20#include <linux/time.h>
21#include <linux/init.h>
22#include <linux/kernel.h>
23#include <linux/timer.h>
24#include <linux/seqlock.h>
25#include <linux/jiffies.h>
26#include <linux/sysctl.h>
7460ed28 27#include <linux/clocksource.h>
c08c8205 28#include <linux/getcpu.h>
8c131af1
AK
29#include <linux/cpu.h>
30#include <linux/smp.h>
31#include <linux/notifier.h>
1da177e4
LT
32
33#include <asm/vsyscall.h>
34#include <asm/pgtable.h>
35#include <asm/page.h>
7460ed28 36#include <asm/unistd.h>
1da177e4
LT
37#include <asm/fixmap.h>
38#include <asm/errno.h>
39#include <asm/io.h>
c08c8205
VP
40#include <asm/segment.h>
41#include <asm/desc.h>
42#include <asm/topology.h>
2aae950b 43#include <asm/vgtod.h>
1da177e4
LT
44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
65ea5b03 46#define __syscall_clobber "r11","cx","memory"
1da177e4 47
c8118c6c
ED
48/*
49 * vsyscall_gtod_data contains data that is :
50 * - readonly from vsyscalls
676b1855 51 * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
c8118c6c
ED
52 * Try to keep this structure as small as possible to avoid cache line ping pongs
53 */
c08c8205 54int __vgetcpu_mode __section_vgetcpu_mode;
1da177e4 55
2aae950b 56struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
1da177e4 57{
7460ed28
JS
58 .lock = SEQLOCK_UNLOCKED,
59 .sysctl_enabled = 1,
60};
1da177e4 61
2c622148
TB
62void update_vsyscall_tz(void)
63{
64 unsigned long flags;
65
66 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
67 /* sys_tz has changed */
68 vsyscall_gtod_data.sys_tz = sys_tz;
69 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
70}
71
7460ed28 72void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
1da177e4 73{
7460ed28 74 unsigned long flags;
1da177e4 75
7460ed28
JS
76 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
77 /* copy vsyscall data */
c8118c6c
ED
78 vsyscall_gtod_data.clock.vread = clock->vread;
79 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
80 vsyscall_gtod_data.clock.mask = clock->mask;
81 vsyscall_gtod_data.clock.mult = clock->mult;
82 vsyscall_gtod_data.clock.shift = clock->shift;
83 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
84 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
2aae950b 85 vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
7460ed28 86 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
1da177e4
LT
87}
88
7460ed28
JS
89/* RED-PEN may want to readd seq locking, but then the variable should be
90 * write-once.
91 */
2c8bc944 92static __always_inline void do_get_tz(struct timezone * tz)
1da177e4 93{
7460ed28 94 *tz = __vsyscall_gtod_data.sys_tz;
1da177e4
LT
95}
96
2c8bc944 97static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz)
1da177e4
LT
98{
99 int ret;
ce28b986 100 asm volatile("syscall"
1da177e4 101 : "=a" (ret)
7460ed28
JS
102 : "0" (__NR_gettimeofday),"D" (tv),"S" (tz)
103 : __syscall_clobber );
1da177e4
LT
104 return ret;
105}
106
2c8bc944 107static __always_inline long time_syscall(long *t)
1da177e4
LT
108{
109 long secs;
ce28b986 110 asm volatile("syscall"
1da177e4
LT
111 : "=a" (secs)
112 : "0" (__NR_time),"D" (t) : __syscall_clobber);
113 return secs;
114}
115
7460ed28
JS
116static __always_inline void do_vgettimeofday(struct timeval * tv)
117{
118 cycle_t now, base, mask, cycle_delta;
c8118c6c
ED
119 unsigned seq;
120 unsigned long mult, shift, nsec;
7460ed28
JS
121 cycle_t (*vread)(void);
122 do {
123 seq = read_seqbegin(&__vsyscall_gtod_data.lock);
124
125 vread = __vsyscall_gtod_data.clock.vread;
126 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled || !vread)) {
89952d13 127 gettimeofday(tv,NULL);
7460ed28
JS
128 return;
129 }
130 now = vread();
131 base = __vsyscall_gtod_data.clock.cycle_last;
132 mask = __vsyscall_gtod_data.clock.mask;
133 mult = __vsyscall_gtod_data.clock.mult;
134 shift = __vsyscall_gtod_data.clock.shift;
135
c8118c6c
ED
136 tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
137 nsec = __vsyscall_gtod_data.wall_time_nsec;
7460ed28
JS
138 } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
139
140 /* calculate interval: */
141 cycle_delta = (now - base) & mask;
142 /* convert to nsecs: */
c8118c6c 143 nsec += (cycle_delta * mult) >> shift;
7460ed28 144
c8118c6c 145 while (nsec >= NSEC_PER_SEC) {
7460ed28 146 tv->tv_sec += 1;
c8118c6c 147 nsec -= NSEC_PER_SEC;
7460ed28 148 }
c8118c6c 149 tv->tv_usec = nsec / NSEC_PER_USEC;
7460ed28
JS
150}
151
2e8ad43e 152int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
1da177e4 153{
1da177e4
LT
154 if (tv)
155 do_vgettimeofday(tv);
156 if (tz)
157 do_get_tz(tz);
158 return 0;
159}
160
161/* This will break when the xtime seconds get inaccurate, but that is
162 * unlikely */
2e8ad43e 163time_t __vsyscall(1) vtime(time_t *t)
1da177e4 164{
d0aff6e6 165 struct timeval tv;
272a3713 166 time_t result;
7460ed28 167 if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
1da177e4 168 return time_syscall(t);
d0aff6e6 169
c80544dc 170 vgettimeofday(&tv, NULL);
d0aff6e6 171 result = tv.tv_sec;
272a3713
ED
172 if (t)
173 *t = result;
174 return result;
1da177e4
LT
175}
176
c08c8205
VP
177/* Fast way to get current CPU and node.
178 This helps to do per node and per CPU caches in user space.
179 The result is not guaranteed without CPU affinity, but usually
180 works out because the scheduler tries to keep a thread on the same
181 CPU.
182
183 tcache must point to a two element sized long array.
184 All arguments can be NULL. */
185long __vsyscall(2)
186vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
1da177e4 187{
8f12dea6 188 unsigned int p;
c08c8205
VP
189 unsigned long j = 0;
190
191 /* Fast cache - only recompute value once per jiffies and avoid
192 relatively costly rdtscp/cpuid otherwise.
193 This works because the scheduler usually keeps the process
194 on the same CPU and this syscall doesn't guarantee its
195 results anyways.
196 We do this here because otherwise user space would do it on
197 its own in a likely inferior way (no access to jiffies).
198 If you don't like it pass NULL. */
34596dc9
AK
199 if (tcache && tcache->blob[0] == (j = __jiffies)) {
200 p = tcache->blob[1];
c08c8205
VP
201 } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
202 /* Load per CPU data from RDTSCP */
8f12dea6 203 native_read_tscp(&p);
c08c8205
VP
204 } else {
205 /* Load per CPU data from GDT */
206 asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
207 }
208 if (tcache) {
34596dc9
AK
209 tcache->blob[0] = j;
210 tcache->blob[1] = p;
c08c8205
VP
211 }
212 if (cpu)
213 *cpu = p & 0xfff;
214 if (node)
215 *node = p >> 12;
216 return 0;
1da177e4
LT
217}
218
a4928cff 219static long __vsyscall(3) venosys_1(void)
1da177e4
LT
220{
221 return -ENOSYS;
222}
223
224#ifdef CONFIG_SYSCTL
d67bbacb
TG
225
226static int
227vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
228 void __user *buffer, size_t *lenp, loff_t *ppos)
229{
230 return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
231}
232
1da177e4 233static ctl_table kernel_table2[] = {
282a821f 234 { .procname = "vsyscall64",
7460ed28 235 .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
d67bbacb
TG
236 .mode = 0644,
237 .proc_handler = vsyscall_sysctl_change },
7a44d37d 238 {}
1da177e4
LT
239};
240
241static ctl_table kernel_root_table2[] = {
242 { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
243 .child = kernel_table2 },
7a44d37d 244 {}
1da177e4 245};
1da177e4
LT
246#endif
247
8c131af1
AK
248/* Assume __initcall executes before all user space. Hopefully kmod
249 doesn't violate that. We'll find out if it does. */
250static void __cpuinit vsyscall_set_cpu(int cpu)
c08c8205
VP
251{
252 unsigned long *d;
253 unsigned long node = 0;
254#ifdef CONFIG_NUMA
98c9e27a 255 node = cpu_to_node(cpu);
c08c8205 256#endif
92cb7612 257 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
8c131af1 258 write_rdtscp_aux((node << 12) | cpu);
c08c8205
VP
259
260 /* Store cpu number in limit so that it can be loaded quickly
261 in user space in vgetcpu.
262 12 bits for the CPU and 8 bits for the node. */
f6dc247c 263 d = (unsigned long *)(get_cpu_gdt_table(cpu) + GDT_ENTRY_PER_CPU);
c08c8205
VP
264 *d = 0x0f40000000000ULL;
265 *d |= cpu;
266 *d |= (node & 0xf) << 12;
267 *d |= (node >> 4) << 48;
268}
269
8c131af1
AK
270static void __cpuinit cpu_vsyscall_init(void *arg)
271{
272 /* preemption should be already off */
273 vsyscall_set_cpu(raw_smp_processor_id());
274}
275
276static int __cpuinit
277cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
278{
279 long cpu = (long)arg;
8bb78442 280 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
8c131af1
AK
281 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
282 return NOTIFY_DONE;
283}
284
e4026440 285void __init map_vsyscall(void)
1da177e4
LT
286{
287 extern char __vsyscall_0;
288 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
289
103efcd9 290 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
1da177e4
LT
291 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
292}
293
294static int __init vsyscall_init(void)
295{
296 BUG_ON(((unsigned long) &vgettimeofday !=
297 VSYSCALL_ADDR(__NR_vgettimeofday)));
298 BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
299 BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
c08c8205 300 BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
f3c5f5e7 301#ifdef CONFIG_SYSCTL
0b4d4147 302 register_sysctl_table(kernel_root_table2);
f3c5f5e7 303#endif
8c131af1
AK
304 on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
305 hotcpu_notifier(cpu_vsyscall_notifier, 0);
1da177e4
LT
306 return 0;
307}
308
309__initcall(vsyscall_init);