]>
Commit | Line | Data |
---|---|---|
2aae950b AK |
1 | /* |
2 | * Copyright 2006 Andi Kleen, SUSE Labs. | |
3 | * Subject to the GNU Public License, v.2 | |
4 | * | |
f144a6b4 | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
2aae950b | 6 | * |
7a59ed41 SS |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> |
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | |
9 | * | |
2aae950b AK |
10 | * The code should have no internal unresolved relocations. |
11 | * Check with readelf after changing. | |
2aae950b AK |
12 | */ |
13 | ||
7a59ed41 | 14 | #include <uapi/linux/time.h> |
2aae950b | 15 | #include <asm/vgtod.h> |
7c03156f | 16 | #include <asm/vvar.h> |
2aae950b | 17 | #include <asm/unistd.h> |
7c03156f | 18 | #include <asm/msr.h> |
76480a6a | 19 | #include <asm/pvclock.h> |
90b20432 | 20 | #include <asm/mshyperv.h> |
7c03156f SS |
21 | #include <linux/math64.h> |
22 | #include <linux/time.h> | |
76480a6a | 23 | #include <linux/kernel.h> |
2aae950b | 24 | |
8c49d9a7 | 25 | #define gtod (&VVAR(vsyscall_gtod_data)) |
2aae950b | 26 | |
7a59ed41 SS |
27 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
28 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); | |
29 | extern time_t __vdso_time(time_t *t); | |
30 | ||
dac16fba AL |
31 | #ifdef CONFIG_PARAVIRT_CLOCK |
32 | extern u8 pvclock_page | |
33 | __attribute__((visibility("hidden"))); | |
34 | #endif | |
35 | ||
90b20432 VK |
36 | #ifdef CONFIG_HYPERV_TSCPAGE |
37 | extern u8 hvclock_page | |
38 | __attribute__((visibility("hidden"))); | |
39 | #endif | |
40 | ||
7a59ed41 SS |
41 | #ifndef BUILD_VDSO32 |
42 | ||
411f790c SS |
43 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
44 | { | |
45 | long ret; | |
46 | asm("syscall" : "=a" (ret) : | |
47 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); | |
48 | return ret; | |
98d0ac38 AL |
49 | } |
50 | ||
411f790c | 51 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) |
98d0ac38 | 52 | { |
411f790c SS |
53 | long ret; |
54 | ||
55 | asm("syscall" : "=a" (ret) : | |
56 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | |
57 | return ret; | |
98d0ac38 AL |
58 | } |
59 | ||
51c19b4f | 60 | |
76480a6a AL |
61 | #else |
62 | ||
63 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | |
64 | { | |
65 | long ret; | |
66 | ||
67 | asm( | |
68 | "mov %%ebx, %%edx \n" | |
69 | "mov %2, %%ebx \n" | |
70 | "call __kernel_vsyscall \n" | |
71 | "mov %%edx, %%ebx \n" | |
72 | : "=a" (ret) | |
73 | : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) | |
74 | : "memory", "edx"); | |
75 | return ret; | |
76 | } | |
77 | ||
78 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) | |
79 | { | |
80 | long ret; | |
81 | ||
82 | asm( | |
83 | "mov %%ebx, %%edx \n" | |
84 | "mov %2, %%ebx \n" | |
85 | "call __kernel_vsyscall \n" | |
86 | "mov %%edx, %%ebx \n" | |
87 | : "=a" (ret) | |
88 | : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) | |
89 | : "memory", "edx"); | |
90 | return ret; | |
91 | } | |
92 | ||
93 | #endif | |
94 | ||
95 | #ifdef CONFIG_PARAVIRT_CLOCK | |
dac16fba | 96 | static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) |
51c19b4f | 97 | { |
dac16fba | 98 | return (const struct pvclock_vsyscall_time_info *)&pvclock_page; |
51c19b4f MT |
99 | } |
100 | ||
a5a1d1c2 | 101 | static notrace u64 vread_pvclock(int *mode) |
51c19b4f | 102 | { |
dac16fba | 103 | const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; |
a5a1d1c2 | 104 | u64 ret; |
abe9efa7 PB |
105 | u64 last; |
106 | u32 version; | |
51c19b4f MT |
107 | |
108 | /* | |
6b078f5d AL |
109 | * Note: The kernel and hypervisor must guarantee that cpu ID |
110 | * number maps 1:1 to per-CPU pvclock time info. | |
111 | * | |
112 | * Because the hypervisor is entirely unaware of guest userspace | |
113 | * preemption, it cannot guarantee that per-CPU pvclock time | |
114 | * info is updated if the underlying CPU changes or that that | |
115 | * version is increased whenever underlying CPU changes. | |
116 | * | |
117 | * On KVM, we are guaranteed that pvti updates for any vCPU are | |
118 | * atomic as seen by *all* vCPUs. This is an even stronger | |
119 | * guarantee than we get with a normal seqlock. | |
73459e2a | 120 | * |
6b078f5d AL |
121 | * On Xen, we don't appear to have that guarantee, but Xen still |
122 | * supplies a valid seqlock using the version field. | |
78fd8c72 | 123 | * |
6b078f5d AL |
124 | * We only do pvclock vdso timing at all if |
125 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to | |
126 | * mean that all vCPUs have matching pvti and that the TSC is | |
127 | * synced, so we can just look at vCPU 0's pvti. | |
51c19b4f | 128 | */ |
6b078f5d | 129 | |
6b078f5d | 130 | do { |
3aed64f6 | 131 | version = pvclock_read_begin(pvti); |
6b078f5d | 132 | |
78fd8c72 AL |
133 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { |
134 | *mode = VCLOCK_NONE; | |
135 | return 0; | |
136 | } | |
137 | ||
108b249c | 138 | ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); |
3aed64f6 | 139 | } while (pvclock_read_retry(pvti, version)); |
6b078f5d | 140 | |
76480a6a | 141 | /* refer to vread_tsc() comment for rationale */ |
7c03156f | 142 | last = gtod->cycle_last; |
51c19b4f MT |
143 | |
144 | if (likely(ret >= last)) | |
145 | return ret; | |
146 | ||
147 | return last; | |
148 | } | |
149 | #endif | |
90b20432 VK |
150 | #ifdef CONFIG_HYPERV_TSCPAGE |
151 | static notrace u64 vread_hvclock(int *mode) | |
152 | { | |
153 | const struct ms_hyperv_tsc_page *tsc_pg = | |
154 | (const struct ms_hyperv_tsc_page *)&hvclock_page; | |
155 | u64 current_tick = hv_read_tsc_page(tsc_pg); | |
156 | ||
157 | if (current_tick != U64_MAX) | |
158 | return current_tick; | |
159 | ||
160 | *mode = VCLOCK_NONE; | |
161 | return 0; | |
162 | } | |
163 | #endif | |
51c19b4f | 164 | |
a5a1d1c2 | 165 | notrace static u64 vread_tsc(void) |
2aae950b | 166 | { |
a5a1d1c2 | 167 | u64 ret = (u64)rdtsc_ordered(); |
03b9730b | 168 | u64 last = gtod->cycle_last; |
a939e817 | 169 | |
411f790c SS |
170 | if (likely(ret >= last)) |
171 | return ret; | |
172 | ||
173 | /* | |
174 | * GCC likes to generate cmov here, but this branch is extremely | |
6a6256f9 | 175 | * predictable (it's just a function of time and the likely is |
411f790c SS |
176 | * very likely) and there's a data dependence, so force GCC |
177 | * to generate a branch instead. I don't barrier() because | |
178 | * we don't actually need a barrier, and if this function | |
179 | * ever gets inlined it will generate worse code. | |
180 | */ | |
181 | asm volatile (""); | |
182 | return last; | |
183 | } | |
a939e817 | 184 | |
51c19b4f | 185 | notrace static inline u64 vgetsns(int *mode) |
2aae950b | 186 | { |
7a59ed41 | 187 | u64 v; |
98d0ac38 | 188 | cycles_t cycles; |
7c03156f SS |
189 | |
190 | if (gtod->vclock_mode == VCLOCK_TSC) | |
98d0ac38 | 191 | cycles = vread_tsc(); |
51c19b4f | 192 | #ifdef CONFIG_PARAVIRT_CLOCK |
7c03156f | 193 | else if (gtod->vclock_mode == VCLOCK_PVCLOCK) |
51c19b4f | 194 | cycles = vread_pvclock(mode); |
90b20432 VK |
195 | #endif |
196 | #ifdef CONFIG_HYPERV_TSCPAGE | |
197 | else if (gtod->vclock_mode == VCLOCK_HVCLOCK) | |
198 | cycles = vread_hvclock(mode); | |
51c19b4f | 199 | #endif |
a939e817 JS |
200 | else |
201 | return 0; | |
7c03156f SS |
202 | v = (cycles - gtod->cycle_last) & gtod->mask; |
203 | return v * gtod->mult; | |
2aae950b AK |
204 | } |
205 | ||
5f293474 AL |
206 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ |
207 | notrace static int __always_inline do_realtime(struct timespec *ts) | |
2aae950b | 208 | { |
650ea024 JS |
209 | unsigned long seq; |
210 | u64 ns; | |
a939e817 JS |
211 | int mode; |
212 | ||
2aae950b | 213 | do { |
7c03156f SS |
214 | seq = gtod_read_begin(gtod); |
215 | mode = gtod->vclock_mode; | |
2aae950b | 216 | ts->tv_sec = gtod->wall_time_sec; |
650ea024 | 217 | ns = gtod->wall_time_snsec; |
51c19b4f | 218 | ns += vgetsns(&mode); |
7c03156f SS |
219 | ns >>= gtod->shift; |
220 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
221 | ||
222 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | |
223 | ts->tv_nsec = ns; | |
a939e817 | 224 | |
a939e817 | 225 | return mode; |
2aae950b AK |
226 | } |
227 | ||
7a59ed41 | 228 | notrace static int __always_inline do_monotonic(struct timespec *ts) |
2aae950b | 229 | { |
650ea024 JS |
230 | unsigned long seq; |
231 | u64 ns; | |
a939e817 JS |
232 | int mode; |
233 | ||
2aae950b | 234 | do { |
7c03156f SS |
235 | seq = gtod_read_begin(gtod); |
236 | mode = gtod->vclock_mode; | |
91ec87d5 | 237 | ts->tv_sec = gtod->monotonic_time_sec; |
650ea024 | 238 | ns = gtod->monotonic_time_snsec; |
51c19b4f | 239 | ns += vgetsns(&mode); |
7c03156f SS |
240 | ns >>= gtod->shift; |
241 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
242 | ||
243 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | |
244 | ts->tv_nsec = ns; | |
0f51f285 | 245 | |
a939e817 | 246 | return mode; |
2aae950b AK |
247 | } |
248 | ||
ce39c640 | 249 | notrace static void do_realtime_coarse(struct timespec *ts) |
da15cfda JS |
250 | { |
251 | unsigned long seq; | |
252 | do { | |
7c03156f SS |
253 | seq = gtod_read_begin(gtod); |
254 | ts->tv_sec = gtod->wall_time_coarse_sec; | |
255 | ts->tv_nsec = gtod->wall_time_coarse_nsec; | |
256 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
da15cfda JS |
257 | } |
258 | ||
ce39c640 | 259 | notrace static void do_monotonic_coarse(struct timespec *ts) |
da15cfda | 260 | { |
91ec87d5 | 261 | unsigned long seq; |
da15cfda | 262 | do { |
7c03156f SS |
263 | seq = gtod_read_begin(gtod); |
264 | ts->tv_sec = gtod->monotonic_time_coarse_sec; | |
265 | ts->tv_nsec = gtod->monotonic_time_coarse_nsec; | |
266 | } while (unlikely(gtod_read_retry(gtod, seq))); | |
da15cfda JS |
267 | } |
268 | ||
23adec55 | 269 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
2aae950b | 270 | { |
0d7b8547 AL |
271 | switch (clock) { |
272 | case CLOCK_REALTIME: | |
ce39c640 SS |
273 | if (do_realtime(ts) == VCLOCK_NONE) |
274 | goto fallback; | |
0d7b8547 AL |
275 | break; |
276 | case CLOCK_MONOTONIC: | |
ce39c640 SS |
277 | if (do_monotonic(ts) == VCLOCK_NONE) |
278 | goto fallback; | |
0d7b8547 AL |
279 | break; |
280 | case CLOCK_REALTIME_COARSE: | |
ce39c640 SS |
281 | do_realtime_coarse(ts); |
282 | break; | |
0d7b8547 | 283 | case CLOCK_MONOTONIC_COARSE: |
ce39c640 SS |
284 | do_monotonic_coarse(ts); |
285 | break; | |
286 | default: | |
287 | goto fallback; | |
0d7b8547 AL |
288 | } |
289 | ||
a939e817 | 290 | return 0; |
ce39c640 SS |
291 | fallback: |
292 | return vdso_fallback_gettime(clock, ts); | |
2aae950b AK |
293 | } |
294 | int clock_gettime(clockid_t, struct timespec *) | |
295 | __attribute__((weak, alias("__vdso_clock_gettime"))); | |
296 | ||
23adec55 | 297 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
2aae950b | 298 | { |
a939e817 | 299 | if (likely(tv != NULL)) { |
0df1ea2b SS |
300 | if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) |
301 | return vdso_fallback_gtod(tv, tz); | |
a939e817 | 302 | tv->tv_usec /= 1000; |
2aae950b | 303 | } |
a939e817 | 304 | if (unlikely(tz != NULL)) { |
7c03156f SS |
305 | tz->tz_minuteswest = gtod->tz_minuteswest; |
306 | tz->tz_dsttime = gtod->tz_dsttime; | |
a939e817 JS |
307 | } |
308 | ||
a939e817 | 309 | return 0; |
2aae950b AK |
310 | } |
311 | int gettimeofday(struct timeval *, struct timezone *) | |
312 | __attribute__((weak, alias("__vdso_gettimeofday"))); | |
f144a6b4 | 313 | |
0d7b8547 AL |
314 | /* |
315 | * This will break when the xtime seconds get inaccurate, but that is | |
316 | * unlikely | |
317 | */ | |
f144a6b4 AL |
318 | notrace time_t __vdso_time(time_t *t) |
319 | { | |
7a59ed41 | 320 | /* This is atomic on x86 so we don't need any locks. */ |
6aa7de05 | 321 | time_t result = READ_ONCE(gtod->wall_time_sec); |
f144a6b4 AL |
322 | |
323 | if (t) | |
324 | *t = result; | |
325 | return result; | |
326 | } | |
88edb57d | 327 | time_t time(time_t *t) |
f144a6b4 | 328 | __attribute__((weak, alias("__vdso_time"))); |