]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - arch/x86/entry/vdso/vclock_gettime.c
Merge branch 'work.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
[mirror_ubuntu-jammy-kernel.git] / arch / x86 / entry / vdso / vclock_gettime.c
CommitLineData
2aae950b
AK
1/*
2 * Copyright 2006 Andi Kleen, SUSE Labs.
3 * Subject to the GNU Public License, v.2
4 *
f144a6b4 5 * Fast user context implementation of clock_gettime, gettimeofday, and time.
2aae950b 6 *
7a59ed41
SS
7 * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net>
8 * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany
9 *
2aae950b
AK
10 * The code should have no internal unresolved relocations.
11 * Check with readelf after changing.
2aae950b
AK
12 */
13
7a59ed41 14#include <uapi/linux/time.h>
2aae950b 15#include <asm/vgtod.h>
7c03156f 16#include <asm/vvar.h>
2aae950b 17#include <asm/unistd.h>
7c03156f 18#include <asm/msr.h>
76480a6a 19#include <asm/pvclock.h>
90b20432 20#include <asm/mshyperv.h>
7c03156f
SS
21#include <linux/math64.h>
22#include <linux/time.h>
76480a6a 23#include <linux/kernel.h>
2aae950b 24
8c49d9a7 25#define gtod (&VVAR(vsyscall_gtod_data))
2aae950b 26
7a59ed41
SS
27extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts);
28extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
29extern time_t __vdso_time(time_t *t);
30
dac16fba
AL
31#ifdef CONFIG_PARAVIRT_CLOCK
32extern u8 pvclock_page
33 __attribute__((visibility("hidden")));
34#endif
35
90b20432
VK
36#ifdef CONFIG_HYPERV_TSCPAGE
37extern u8 hvclock_page
38 __attribute__((visibility("hidden")));
39#endif
40
7a59ed41
SS
41#ifndef BUILD_VDSO32
42
411f790c
SS
43notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
44{
45 long ret;
46 asm("syscall" : "=a" (ret) :
47 "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory");
48 return ret;
98d0ac38
AL
49}
50
411f790c 51notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
98d0ac38 52{
411f790c
SS
53 long ret;
54
55 asm("syscall" : "=a" (ret) :
56 "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
57 return ret;
98d0ac38
AL
58}
59
51c19b4f 60
76480a6a
AL
61#else
62
63notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
64{
65 long ret;
66
67 asm(
68 "mov %%ebx, %%edx \n"
69 "mov %2, %%ebx \n"
70 "call __kernel_vsyscall \n"
71 "mov %%edx, %%ebx \n"
72 : "=a" (ret)
73 : "0" (__NR_clock_gettime), "g" (clock), "c" (ts)
74 : "memory", "edx");
75 return ret;
76}
77
78notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
79{
80 long ret;
81
82 asm(
83 "mov %%ebx, %%edx \n"
84 "mov %2, %%ebx \n"
85 "call __kernel_vsyscall \n"
86 "mov %%edx, %%ebx \n"
87 : "=a" (ret)
88 : "0" (__NR_gettimeofday), "g" (tv), "c" (tz)
89 : "memory", "edx");
90 return ret;
91}
92
93#endif
94
95#ifdef CONFIG_PARAVIRT_CLOCK
dac16fba 96static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
51c19b4f 97{
dac16fba 98 return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
51c19b4f
MT
99}
100
a5a1d1c2 101static notrace u64 vread_pvclock(int *mode)
51c19b4f 102{
dac16fba 103 const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
a5a1d1c2 104 u64 ret;
abe9efa7
PB
105 u64 last;
106 u32 version;
51c19b4f
MT
107
108 /*
6b078f5d
AL
109 * Note: The kernel and hypervisor must guarantee that cpu ID
110 * number maps 1:1 to per-CPU pvclock time info.
111 *
112 * Because the hypervisor is entirely unaware of guest userspace
113 * preemption, it cannot guarantee that per-CPU pvclock time
114 * info is updated if the underlying CPU changes or that that
115 * version is increased whenever underlying CPU changes.
116 *
117 * On KVM, we are guaranteed that pvti updates for any vCPU are
118 * atomic as seen by *all* vCPUs. This is an even stronger
119 * guarantee than we get with a normal seqlock.
73459e2a 120 *
6b078f5d
AL
121 * On Xen, we don't appear to have that guarantee, but Xen still
122 * supplies a valid seqlock using the version field.
78fd8c72 123 *
6b078f5d
AL
124 * We only do pvclock vdso timing at all if
125 * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
126 * mean that all vCPUs have matching pvti and that the TSC is
127 * synced, so we can just look at vCPU 0's pvti.
51c19b4f 128 */
6b078f5d 129
6b078f5d 130 do {
3aed64f6 131 version = pvclock_read_begin(pvti);
6b078f5d 132
78fd8c72
AL
133 if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
134 *mode = VCLOCK_NONE;
135 return 0;
136 }
137
108b249c 138 ret = __pvclock_read_cycles(pvti, rdtsc_ordered());
3aed64f6 139 } while (pvclock_read_retry(pvti, version));
6b078f5d 140
76480a6a 141 /* refer to vread_tsc() comment for rationale */
7c03156f 142 last = gtod->cycle_last;
51c19b4f
MT
143
144 if (likely(ret >= last))
145 return ret;
146
147 return last;
148}
149#endif
90b20432
VK
150#ifdef CONFIG_HYPERV_TSCPAGE
151static notrace u64 vread_hvclock(int *mode)
152{
153 const struct ms_hyperv_tsc_page *tsc_pg =
154 (const struct ms_hyperv_tsc_page *)&hvclock_page;
155 u64 current_tick = hv_read_tsc_page(tsc_pg);
156
157 if (current_tick != U64_MAX)
158 return current_tick;
159
160 *mode = VCLOCK_NONE;
161 return 0;
162}
163#endif
51c19b4f 164
a5a1d1c2 165notrace static u64 vread_tsc(void)
2aae950b 166{
a5a1d1c2 167 u64 ret = (u64)rdtsc_ordered();
03b9730b 168 u64 last = gtod->cycle_last;
a939e817 169
411f790c
SS
170 if (likely(ret >= last))
171 return ret;
172
173 /*
174 * GCC likes to generate cmov here, but this branch is extremely
6a6256f9 175 * predictable (it's just a function of time and the likely is
411f790c
SS
176 * very likely) and there's a data dependence, so force GCC
177 * to generate a branch instead. I don't barrier() because
178 * we don't actually need a barrier, and if this function
179 * ever gets inlined it will generate worse code.
180 */
181 asm volatile ("");
182 return last;
183}
a939e817 184
51c19b4f 185notrace static inline u64 vgetsns(int *mode)
2aae950b 186{
7a59ed41 187 u64 v;
98d0ac38 188 cycles_t cycles;
7c03156f
SS
189
190 if (gtod->vclock_mode == VCLOCK_TSC)
98d0ac38 191 cycles = vread_tsc();
51c19b4f 192#ifdef CONFIG_PARAVIRT_CLOCK
7c03156f 193 else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
51c19b4f 194 cycles = vread_pvclock(mode);
90b20432
VK
195#endif
196#ifdef CONFIG_HYPERV_TSCPAGE
197 else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
198 cycles = vread_hvclock(mode);
51c19b4f 199#endif
a939e817
JS
200 else
201 return 0;
7c03156f
SS
202 v = (cycles - gtod->cycle_last) & gtod->mask;
203 return v * gtod->mult;
2aae950b
AK
204}
205
5f293474
AL
206/* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
207notrace static int __always_inline do_realtime(struct timespec *ts)
2aae950b 208{
650ea024
JS
209 unsigned long seq;
210 u64 ns;
a939e817
JS
211 int mode;
212
2aae950b 213 do {
7c03156f
SS
214 seq = gtod_read_begin(gtod);
215 mode = gtod->vclock_mode;
2aae950b 216 ts->tv_sec = gtod->wall_time_sec;
650ea024 217 ns = gtod->wall_time_snsec;
51c19b4f 218 ns += vgetsns(&mode);
7c03156f
SS
219 ns >>= gtod->shift;
220 } while (unlikely(gtod_read_retry(gtod, seq)));
221
222 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
223 ts->tv_nsec = ns;
a939e817 224
a939e817 225 return mode;
2aae950b
AK
226}
227
7a59ed41 228notrace static int __always_inline do_monotonic(struct timespec *ts)
2aae950b 229{
650ea024
JS
230 unsigned long seq;
231 u64 ns;
a939e817
JS
232 int mode;
233
2aae950b 234 do {
7c03156f
SS
235 seq = gtod_read_begin(gtod);
236 mode = gtod->vclock_mode;
91ec87d5 237 ts->tv_sec = gtod->monotonic_time_sec;
650ea024 238 ns = gtod->monotonic_time_snsec;
51c19b4f 239 ns += vgetsns(&mode);
7c03156f
SS
240 ns >>= gtod->shift;
241 } while (unlikely(gtod_read_retry(gtod, seq)));
242
243 ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
244 ts->tv_nsec = ns;
0f51f285 245
a939e817 246 return mode;
2aae950b
AK
247}
248
ce39c640 249notrace static void do_realtime_coarse(struct timespec *ts)
da15cfda
JS
250{
251 unsigned long seq;
252 do {
7c03156f
SS
253 seq = gtod_read_begin(gtod);
254 ts->tv_sec = gtod->wall_time_coarse_sec;
255 ts->tv_nsec = gtod->wall_time_coarse_nsec;
256 } while (unlikely(gtod_read_retry(gtod, seq)));
da15cfda
JS
257}
258
ce39c640 259notrace static void do_monotonic_coarse(struct timespec *ts)
da15cfda 260{
91ec87d5 261 unsigned long seq;
da15cfda 262 do {
7c03156f
SS
263 seq = gtod_read_begin(gtod);
264 ts->tv_sec = gtod->monotonic_time_coarse_sec;
265 ts->tv_nsec = gtod->monotonic_time_coarse_nsec;
266 } while (unlikely(gtod_read_retry(gtod, seq)));
da15cfda
JS
267}
268
23adec55 269notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
2aae950b 270{
0d7b8547
AL
271 switch (clock) {
272 case CLOCK_REALTIME:
ce39c640
SS
273 if (do_realtime(ts) == VCLOCK_NONE)
274 goto fallback;
0d7b8547
AL
275 break;
276 case CLOCK_MONOTONIC:
ce39c640
SS
277 if (do_monotonic(ts) == VCLOCK_NONE)
278 goto fallback;
0d7b8547
AL
279 break;
280 case CLOCK_REALTIME_COARSE:
ce39c640
SS
281 do_realtime_coarse(ts);
282 break;
0d7b8547 283 case CLOCK_MONOTONIC_COARSE:
ce39c640
SS
284 do_monotonic_coarse(ts);
285 break;
286 default:
287 goto fallback;
0d7b8547
AL
288 }
289
a939e817 290 return 0;
ce39c640
SS
291fallback:
292 return vdso_fallback_gettime(clock, ts);
2aae950b
AK
293}
294int clock_gettime(clockid_t, struct timespec *)
295 __attribute__((weak, alias("__vdso_clock_gettime")));
296
23adec55 297notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
2aae950b 298{
a939e817 299 if (likely(tv != NULL)) {
0df1ea2b
SS
300 if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE))
301 return vdso_fallback_gtod(tv, tz);
a939e817 302 tv->tv_usec /= 1000;
2aae950b 303 }
a939e817 304 if (unlikely(tz != NULL)) {
7c03156f
SS
305 tz->tz_minuteswest = gtod->tz_minuteswest;
306 tz->tz_dsttime = gtod->tz_dsttime;
a939e817
JS
307 }
308
a939e817 309 return 0;
2aae950b
AK
310}
311int gettimeofday(struct timeval *, struct timezone *)
312 __attribute__((weak, alias("__vdso_gettimeofday")));
f144a6b4 313
0d7b8547
AL
314/*
315 * This will break when the xtime seconds get inaccurate, but that is
316 * unlikely
317 */
f144a6b4
AL
318notrace time_t __vdso_time(time_t *t)
319{
7a59ed41 320 /* This is atomic on x86 so we don't need any locks. */
6aa7de05 321 time_t result = READ_ONCE(gtod->wall_time_sec);
f144a6b4
AL
322
323 if (t)
324 *t = result;
325 return result;
326}
88edb57d 327time_t time(time_t *t)
f144a6b4 328 __attribute__((weak, alias("__vdso_time")));