Message ID | 20190621095252.32307-5-vincenzo.frascino@arm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Unify vDSOs across more architectures | expand |
Hi Vincenzo, On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: > To take advantage of the commonly defined vdso interface for > gettimeofday the architectural code requires an adaptation. > > Re-implement the gettimeofday vdso in C in order to use lib/vdso. > > With the new implementation arm64 gains support for CLOCK_BOOTTIME > and CLOCK_TAI. > > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will.deacon@arm.com> > Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> > Tested-by: Shijith Thotton <sthotton@marvell.com> > Tested-by: Andre Przywara <andre.przywara@arm.com> > --- > arch/arm64/Kconfig | 2 + > arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++ > arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++ > arch/arm64/include/asm/vdso_datapage.h | 48 --- > arch/arm64/kernel/asm-offsets.c | 33 +- > arch/arm64/kernel/vdso.c | 51 +--- > arch/arm64/kernel/vdso/Makefile | 34 ++- > arch/arm64/kernel/vdso/gettimeofday.S | 334 --------------------- > arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++ I'm concerned about an apparent semantic change introduced by your patch: > +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) > +{ > + u64 res; > + > + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); > + > + return res; > +} vs: > - .macro get_clock_shifted_nsec res, cycle_last, mult > - /* Read the virtual counter. */ > - isb > - mrs x_tmp, cntvct_el0 > - /* Calculate cycle delta and convert to ns. */ > - sub \res, x_tmp, \cycle_last > - /* We can only guarantee 56 bits of precision. */ > - movn x_tmp, #0xff00, lsl #48 > - and \res, x_tmp, \res > - mul \res, \res, \mult > - /* > - * Fake address dependency from the value computed from the counter > - * register to subsequent data page accesses so that the sequence > - * locking also orders the read of the counter. > - */ > - and x_tmp, \res, xzr > - add vdso_data, vdso_data, x_tmp > - .endm It looks like you're dropping both the preceding ISB (allowing the counter value to be speculated) and also the subsequent dependency (allowing the seq lock to be speculated). If I've missed them, apologies, but I couldn't spot them elsewhere in this patch. __arch_get_hw_counter should probably be identical to __arch_counter_get_cntvct to avoid these problems. I guess we don't need to care about the case where the counter is unstable, since we'll just disable the vDSO altogether on such systems? Will
On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > index 947e39896e28..9e4b7ccbab2f 100644 > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -25,13 +25,13 @@ > #include <linux/kvm_host.h> > #include <linux/preempt.h> > #include <linux/suspend.h> > +#include <vdso/datapage.h> > #include <asm/cpufeature.h> > #include <asm/fixmap.h> > #include <asm/thread_info.h> > #include <asm/memory.h> > #include <asm/smp_plat.h> > #include <asm/suspend.h> > -#include <asm/vdso_datapage.h> > #include <linux/kbuild.h> > #include <linux/arm-smccc.h> > > @@ -100,17 +100,28 @@ int main(void) > DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); > DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); > BLANK(); > - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); > - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); > - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); > - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); > - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); > - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); > - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); > - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); > - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); > + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq)); > + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode)); > + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last)); > + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask)); > + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult)); > + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift)); > + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec)); > + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec)); > + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec)); > + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec)); > + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec)); > + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec)); > + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec)); > + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec)); > + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec)); > + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec)); > + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec)); > + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec)); > + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec)); > + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec)); > DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); > - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); > + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); > BLANK(); > DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); > DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); Now that we are moving this to C, do we actually need the asm-offsets? If not, here's a clean-up patch: ---------------8<-------------------------------------- From 7e818178a8b225b522fe547cf00ba8508d4cdcf0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas <catalin.marinas@arm.com> Date: Mon, 24 Jun 2019 14:12:48 +0100 Subject: [PATCH] arm64: vdso: Remove unnecessary asm-offsets.c definitions Since the VDSO code is moving to C from assembly, there is no need to define and maintain the corresponding asm offsets. Fixes: 28b1a824a4f4 ("arm64: vdso: Substitute gettimeofday() with C implementation") Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/kernel/asm-offsets.c | 39 --------------------------------- 1 file changed, 39 deletions(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index e6f7409a78a4..214685760e1c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -14,7 +14,6 @@ #include <linux/kvm_host.h> #include <linux/preempt.h> #include <linux/suspend.h> -#include <vdso/datapage.h> #include <asm/cpufeature.h> #include <asm/fixmap.h> #include <asm/thread_info.h> @@ -86,44 +85,6 @@ int main(void) BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); BLANK(); - DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); - DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); - DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res)); - DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); - DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); - DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); - DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - BLANK(); - DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq)); - DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode)); - DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last)); - DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask)); - DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult)); - DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift)); - DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec)); - DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec)); - DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec)); - DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec)); - DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec)); - DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec)); - DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec)); - DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec)); - DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec)); - DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec)); - DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec)); - DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec)); - DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec)); - DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec)); - DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); - BLANK(); - DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); - DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); - BLANK(); - DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); - DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); - BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK();
On 24/06/2019 14:36, Will Deacon wrote: > Hi Vincenzo, > > On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: >> To take advantage of the commonly defined vdso interface for >> gettimeofday the architectural code requires an adaptation. >> >> Re-implement the gettimeofday vdso in C in order to use lib/vdso. >> >> With the new implementation arm64 gains support for CLOCK_BOOTTIME >> and CLOCK_TAI. >> >> Cc: Catalin Marinas <catalin.marinas@arm.com> >> Cc: Will Deacon <will.deacon@arm.com> >> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> >> Tested-by: Shijith Thotton <sthotton@marvell.com> >> Tested-by: Andre Przywara <andre.przywara@arm.com> >> --- >> arch/arm64/Kconfig | 2 + >> arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++ >> arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++ >> arch/arm64/include/asm/vdso_datapage.h | 48 --- >> arch/arm64/kernel/asm-offsets.c | 33 +- >> arch/arm64/kernel/vdso.c | 51 +--- >> arch/arm64/kernel/vdso/Makefile | 34 ++- >> arch/arm64/kernel/vdso/gettimeofday.S | 334 --------------------- >> arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++ > > I'm concerned about an apparent semantic change introduced by your patch: > >> +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) >> +{ >> + u64 res; >> + >> + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); >> + >> + return res; >> +} > > vs: > >> - .macro get_clock_shifted_nsec res, cycle_last, mult >> - /* Read the virtual counter. */ >> - isb >> - mrs x_tmp, cntvct_el0 >> - /* Calculate cycle delta and convert to ns. */ >> - sub \res, x_tmp, \cycle_last >> - /* We can only guarantee 56 bits of precision. */ >> - movn x_tmp, #0xff00, lsl #48 >> - and \res, x_tmp, \res >> - mul \res, \res, \mult >> - /* >> - * Fake address dependency from the value computed from the counter >> - * register to subsequent data page accesses so that the sequence >> - * locking also orders the read of the counter. >> - */ >> - and x_tmp, \res, xzr >> - add vdso_data, vdso_data, x_tmp >> - .endm > > It looks like you're dropping both the preceding ISB (allowing the counter > value to be speculated) and also the subsequent dependency (allowing the > seq lock to be speculated). If I've missed them, apologies, but I couldn't > spot them elsewhere in this patch. > > __arch_get_hw_counter should probably be identical to __arch_counter_get_cntvct > to avoid these problems. I guess we don't need to care about the case where > the counter is unstable, since we'll just disable the vDSO altogether on > such systems? > Oops, I forgot to mirror your patch that introduces this change. I will post a fix in reply to this email. > Will >
On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: > To take advantage of the commonly defined vdso interface for > gettimeofday the architectural code requires an adaptation. > > Re-implement the gettimeofday vdso in C in order to use lib/vdso. > > With the new implementation arm64 gains support for CLOCK_BOOTTIME > and CLOCK_TAI. > > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will.deacon@arm.com> > Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> > Tested-by: Shijith Thotton <sthotton@marvell.com> > Tested-by: Andre Przywara <andre.przywara@arm.com> [...] > diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h > new file mode 100644 > index 000000000000..bc3cb6738051 > --- /dev/null > +++ b/arch/arm64/include/asm/vdso/gettimeofday.h > @@ -0,0 +1,86 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (C) 2018 ARM Limited > + */ > +#ifndef __ASM_VDSO_GETTIMEOFDAY_H > +#define __ASM_VDSO_GETTIMEOFDAY_H > + > +#ifndef __ASSEMBLY__ > + > +#include <asm/unistd.h> > +#include <uapi/linux/time.h> > + > +#define VDSO_HAS_CLOCK_GETRES 1 > + > +static __always_inline int gettimeofday_fallback( > + struct __kernel_old_timeval *_tv, > + struct timezone *_tz) Out of interest, does this need to be __always_inline? > +{ > + register struct timezone *tz asm("x1") = _tz; > + register struct __kernel_old_timeval *tv asm("x0") = _tv; > + register long ret asm ("x0"); > + register long nr asm("x8") = __NR_gettimeofday; > + > + asm volatile( > + " svc #0\n" Can inlining of this function result in non-trivial expressions being substituted for _tz or _tv? A function call can clobber register asm vars that are assigned to the caller-save registers or that the PCS uses for function arguments, and the situations where this can happen are poorly defined AFAICT. There's also no reliable way to detect at build time whether the compiler has done this, and no robust way to stop if happening. (IMHO the compiler is wrong to do this, but it's been that way for ever, and I think I saw GCC 9 show this behaviour recently when I was investigating something related.) To be safe, it's better to put this out of line, or remove the reg asm() specifiers, mark x0-x18 and lr as clobbered here (so that the compiler doesn't map arguments to them), and put movs in the asm to move things into the right registers. The syscall number can be passed with an "i" constraint. (And yes, this sucks.) If the code this is inlined in is simple enough though, we can be fairly confident of getting away with it. [...] Cheers ---Dave
Hi Dave, On 25/06/2019 16:33, Dave Martin wrote: > On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: >> To take advantage of the commonly defined vdso interface for >> gettimeofday the architectural code requires an adaptation. >> >> Re-implement the gettimeofday vdso in C in order to use lib/vdso. >> >> With the new implementation arm64 gains support for CLOCK_BOOTTIME >> and CLOCK_TAI. >> >> Cc: Catalin Marinas <catalin.marinas@arm.com> >> Cc: Will Deacon <will.deacon@arm.com> >> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> >> Tested-by: Shijith Thotton <sthotton@marvell.com> >> Tested-by: Andre Przywara <andre.przywara@arm.com> > > [...] > >> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h >> new file mode 100644 >> index 000000000000..bc3cb6738051 >> --- /dev/null >> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h >> @@ -0,0 +1,86 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * Copyright (C) 2018 ARM Limited >> + */ >> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H >> +#define __ASM_VDSO_GETTIMEOFDAY_H >> + >> +#ifndef __ASSEMBLY__ >> + >> +#include <asm/unistd.h> >> +#include <uapi/linux/time.h> >> + >> +#define VDSO_HAS_CLOCK_GETRES 1 >> + >> +static __always_inline int gettimeofday_fallback( >> + struct __kernel_old_timeval *_tv, >> + struct timezone *_tz) > > Out of interest, does this need to be __always_inline? > It is a design choice. Philosophically, I prefer to control and reduce the scope of the decisions the compiler has to make in order to not have surprises. >> +{ >> + register struct timezone *tz asm("x1") = _tz; >> + register struct __kernel_old_timeval *tv asm("x0") = _tv; >> + register long ret asm ("x0"); >> + register long nr asm("x8") = __NR_gettimeofday; >> + >> + asm volatile( >> + " svc #0\n" > > Can inlining of this function result in non-trivial expressions being > substituted for _tz or _tv? > > A function call can clobber register asm vars that are assigned to the > caller-save registers or that the PCS uses for function arguments, and > the situations where this can happen are poorly defined AFAICT. There's > also no reliable way to detect at build time whether the compiler has > done this, and no robust way to stop if happening. > > (IMHO the compiler is wrong to do this, but it's been that way for ever, > and I think I saw GCC 9 show this behaviour recently when I was > investigating something related.) > > > To be safe, it's better to put this out of line, or remove the reg asm() > specifiers, mark x0-x18 and lr as clobbered here (so that the compiler > doesn't map arguments to them), and put movs in the asm to move things > into the right registers. The syscall number can be passed with an "i" > constraint. (And yes, this sucks.) > > If the code this is inlined in is simple enough though, we can be fairly > confident of getting away with it. > I took very seriously what you are mentioning here because I think that robustness of the code comes before than everything especially in the kernel and I carried on some experiments to try to verify if in this case is safe to assume that the compiler is doing the right thing. Based on my investigation and on previous observations of the generation of the vDSO library, I can conclude that the approach seems safe due to the fact that the usage of this code is very limited, the code itself is simple enough and that gcc would inline this code anyway based on the current compilation options. The experiment that I did was to define some self-contained code that tries to mimic what you are describing and compile it with 3 different versions of gcc (6.4, 8.1 and 8.3) and in all the tree cases the behavior seems correct. Code: ===== typedef int ssize_t; typedef int size_t; static int my_strlen(const char *s) { int i = 0; while (s[i] == '\0') i++; return i; } static inline ssize_t my_syscall(int fd, const void *buf, size_t count) { register ssize_t arg1 asm ("x0") = fd; register const void *arg2 asm ("x1") = buf; register size_t arg3 asm ("x2") = count; __asm__ volatile ( "mov x8, #64\n" "svc #0\n" : "=&r" (arg1) : "r" (arg2), "r" (arg3) : "x8" ); return arg1; } void sys_caller(const char *s) { my_syscall(1, s, my_strlen(s)); } GCC 8.3.0: ========== main.8.3.0.o: file format elf64-littleaarch64 Disassembly of section .text: 0000000000000000 <sys_caller>: 0: 39400001 ldrb w1, [x0] 4: 35000161 cbnz w1, 30 <sys_caller+0x30> 8: d2800023 mov x3, #0x1 // #1 c: d1000404 sub x4, x0, #0x1 10: 2a0303e2 mov w2, w3 14: 91000463 add x3, x3, #0x1 18: 38636881 ldrb w1, [x4, x3] 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> 20: aa0003e1 mov x1, x0 24: d2800808 mov x8, #0x40 // #64 28: d4000001 svc #0x0 2c: d65f03c0 ret 30: 52800002 mov w2, #0x0 // #0 34: 17fffffb b 20 <sys_caller+0x20> GCC 8.1.0: ========== main.8.1.0.o: file format elf64-littleaarch64 Disassembly of section .text: 0000000000000000 <sys_caller>: 0: 39400001 ldrb w1, [x0] 4: 35000161 cbnz w1, 30 <sys_caller+0x30> 8: d2800023 mov x3, #0x1 // #1 c: d1000404 sub x4, x0, #0x1 10: 2a0303e2 mov w2, w3 14: 91000463 add x3, x3, #0x1 18: 38636881 ldrb w1, [x4, x3] 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> 20: aa0003e1 mov x1, x0 24: d2800808 mov x8, #0x40 // #64 28: d4000001 svc #0x0 2c: d65f03c0 ret 30: 52800002 mov w2, #0x0 // #0 34: 17fffffb b 20 <sys_caller+0x20> GCC 6.4.0: ========== main.6.4.0.o: file format elf64-littleaarch64 Disassembly of section .text: 0000000000000000 <sys_caller>: 0: 39400001 ldrb w1, [x0] 4: 35000161 cbnz w1, 30 <sys_caller+0x30> 8: d2800023 mov x3, #0x1 // #1 c: d1000404 sub x4, x0, #0x1 10: 2a0303e2 mov w2, w3 14: 91000463 add x3, x3, #0x1 18: 38636881 ldrb w1, [x4, x3] 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> 20: aa0003e1 mov x1, x0 24: d2800808 mov x8, #0x40 // #64 28: d4000001 svc #0x0 2c: d65f03c0 ret 30: 52800002 mov w2, #0x0 // #0 34: 17fffffb b 20 <sys_caller+0x20> > [...] > > Cheers > ---Dave >
On Wed, Jun 26, 2019 at 02:27:59PM +0100, Vincenzo Frascino wrote: > Hi Dave, > > On 25/06/2019 16:33, Dave Martin wrote: > > On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: > >> To take advantage of the commonly defined vdso interface for > >> gettimeofday the architectural code requires an adaptation. > >> > >> Re-implement the gettimeofday vdso in C in order to use lib/vdso. > >> > >> With the new implementation arm64 gains support for CLOCK_BOOTTIME > >> and CLOCK_TAI. > >> > >> Cc: Catalin Marinas <catalin.marinas@arm.com> > >> Cc: Will Deacon <will.deacon@arm.com> > >> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> > >> Tested-by: Shijith Thotton <sthotton@marvell.com> > >> Tested-by: Andre Przywara <andre.przywara@arm.com> > > > > [...] > > > >> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h > >> new file mode 100644 > >> index 000000000000..bc3cb6738051 > >> --- /dev/null > >> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h > >> @@ -0,0 +1,86 @@ > >> +/* SPDX-License-Identifier: GPL-2.0 */ > >> +/* > >> + * Copyright (C) 2018 ARM Limited > >> + */ > >> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H > >> +#define __ASM_VDSO_GETTIMEOFDAY_H > >> + > >> +#ifndef __ASSEMBLY__ > >> + > >> +#include <asm/unistd.h> > >> +#include <uapi/linux/time.h> > >> + > >> +#define VDSO_HAS_CLOCK_GETRES 1 > >> + > >> +static __always_inline int gettimeofday_fallback( > >> + struct __kernel_old_timeval *_tv, > >> + struct timezone *_tz) > > > > Out of interest, does this need to be __always_inline? > > > > It is a design choice. Philosophically, I prefer to control and reduce the scope > of the decisions the compiler has to make in order to not have surprises. > > >> +{ > >> + register struct timezone *tz asm("x1") = _tz; > >> + register struct __kernel_old_timeval *tv asm("x0") = _tv; > >> + register long ret asm ("x0"); > >> + register long nr asm("x8") = __NR_gettimeofday; > >> + > >> + asm volatile( > >> + " svc #0\n" > > > > Can inlining of this function result in non-trivial expressions being > > substituted for _tz or _tv? > > > > A function call can clobber register asm vars that are assigned to the > > caller-save registers or that the PCS uses for function arguments, and > > the situations where this can happen are poorly defined AFAICT. There's > > also no reliable way to detect at build time whether the compiler has > > done this, and no robust way to stop if happening. > > > > (IMHO the compiler is wrong to do this, but it's been that way for ever, > > and I think I saw GCC 9 show this behaviour recently when I was > > investigating something related.) > > > > > > To be safe, it's better to put this out of line, or remove the reg asm() > > specifiers, mark x0-x18 and lr as clobbered here (so that the compiler > > doesn't map arguments to them), and put movs in the asm to move things > > into the right registers. The syscall number can be passed with an "i" > > constraint. (And yes, this sucks.) > > > > If the code this is inlined in is simple enough though, we can be fairly > > confident of getting away with it. > > > > I took very seriously what you are mentioning here because I think > that robustness of the code comes before than everything especially > in the kernel and I carried on some experiments to try to verify if > in this case is safe to assume that the compiler is doing the right > thing. > > Based on my investigation and on previous observations of the > generation of the vDSO library, I can conclude that the approach > seems safe due to the fact that the usage of this code is very > limited, the code itself is simple enough and that gcc would inline > this code anyway based on the current compilation options. I'd caution about "seems safe". A lot of subtly wrong code not only seems safe, but _is_ safe in its original context, in practice. Add some code to the vdso over time though, or tweak the compilation options at some point in the future, or use a different compiler, and things could still go wrong. (Further comments below.) > The experiment that I did was to define some self-contained code that > tries to mimic what you are describing and compile it with 3 > different versions of gcc (6.4, 8.1 and 8.3) and in all the tree > cases the behavior seems correct. > > Code: > ===== > > typedef int ssize_t; > typedef int size_t; > > static int my_strlen(const char *s) > { > int i = 0; > > while (s[i] == '\0') > i++; > > return i; > } > > static inline ssize_t my_syscall(int fd, const void *buf, size_t count) > { > register ssize_t arg1 asm ("x0") = fd; > register const void *arg2 asm ("x1") = buf; > register size_t arg3 asm ("x2") = count; > > __asm__ volatile ( > "mov x8, #64\n" > "svc #0\n" > : "=&r" (arg1) > : "r" (arg2), "r" (arg3) > : "x8" > ); > > return arg1; > } > > void sys_caller(const char *s) > { > my_syscall(1, s, my_strlen(s)); > } > > > GCC 8.3.0: > ========== > > main.8.3.0.o: file format elf64-littleaarch64 > > > Disassembly of section .text: > > 0000000000000000 <sys_caller>: > 0: 39400001 ldrb w1, [x0] > 4: 35000161 cbnz w1, 30 <sys_caller+0x30> > 8: d2800023 mov x3, #0x1 // #1 > c: d1000404 sub x4, x0, #0x1 > 10: 2a0303e2 mov w2, w3 > 14: 91000463 add x3, x3, #0x1 > 18: 38636881 ldrb w1, [x4, x3] > 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> > 20: aa0003e1 mov x1, x0 > 24: d2800808 mov x8, #0x40 // #64 > 28: d4000001 svc #0x0 > 2c: d65f03c0 ret > 30: 52800002 mov w2, #0x0 // #0 > 34: 17fffffb b 20 <sys_caller+0x20> > > > GCC 8.1.0: > ========== > > main.8.1.0.o: file format elf64-littleaarch64 > > > Disassembly of section .text: > > 0000000000000000 <sys_caller>: > 0: 39400001 ldrb w1, [x0] > 4: 35000161 cbnz w1, 30 <sys_caller+0x30> > 8: d2800023 mov x3, #0x1 // #1 > c: d1000404 sub x4, x0, #0x1 > 10: 2a0303e2 mov w2, w3 > 14: 91000463 add x3, x3, #0x1 > 18: 38636881 ldrb w1, [x4, x3] > 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> > 20: aa0003e1 mov x1, x0 > 24: d2800808 mov x8, #0x40 // #64 > 28: d4000001 svc #0x0 > 2c: d65f03c0 ret > 30: 52800002 mov w2, #0x0 // #0 > 34: 17fffffb b 20 <sys_caller+0x20> > > > > GCC 6.4.0: > ========== > > main.6.4.0.o: file format elf64-littleaarch64 > > > Disassembly of section .text: > > 0000000000000000 <sys_caller>: > 0: 39400001 ldrb w1, [x0] > 4: 35000161 cbnz w1, 30 <sys_caller+0x30> > 8: d2800023 mov x3, #0x1 // #1 > c: d1000404 sub x4, x0, #0x1 > 10: 2a0303e2 mov w2, w3 > 14: 91000463 add x3, x3, #0x1 > 18: 38636881 ldrb w1, [x4, x3] > 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> > 20: aa0003e1 mov x1, x0 > 24: d2800808 mov x8, #0x40 // #64 > 28: d4000001 svc #0x0 > 2c: d65f03c0 ret > 30: 52800002 mov w2, #0x0 // #0 > 34: 17fffffb b 20 <sys_caller+0x20> Thanks for having a go at this. If the compiler can show the problematic behaviour, it looks like your could could probably trigger it, and as you observe, it doesn't trigger. I am sure I have seen it in the past, but today I am struggling to tickle the compiler in the right way. My original reproducer may have involved LTO, but either way I don't still have it :( The classic example of this (triggered directly and not due to inlining) would be something like: int bar(int, int); void foo(int x, int y) { register int x_ asm("r0") = x; register int y_ asm("r1") = bar(x, y); asm volatile ( "svc #0" :: "r" (x_), "r" (y_) : "memory" ); } -> 0000000000000000 <foo>: 0: a9bf7bfd stp x29, x30, [sp, #-16]! 4: 910003fd mov x29, sp 8: 94000000 bl 0 <bar> c: 2a0003e1 mov w1, w0 10: d4000001 svc #0x0 14: a8c17bfd ldp x29, x30, [sp], #16 18: d65f03c0 ret The gcc documentation is vague and ambiguous about precisely whan this can happen and about how to avoid it. The case where this behaviour is triggered by inlining an expression that involves a (possibly implicit) function call seems hard to reproduce. However, the workaround is cheap, and to avoid the chance of subtle intermittent code gen bugs it may be worth it: void foo(int x, int y) { asm volatile ( "mov x0, %0\n\t" "mov x1, %1\n\t" "svc #0" :: "r" (x), "r" (bar(x, y)) : "r0", "r1", "memory" ); } -> 0000000000000000 <foo>: 0: a9be7bfd stp x29, x30, [sp, #-32]! 4: 910003fd mov x29, sp 8: f9000bf3 str x19, [sp, #16] c: 2a0003f3 mov w19, w0 10: 94000000 bl 0 <bar> 14: 2a0003e2 mov w2, w0 18: aa1303e0 mov x0, x19 1c: aa0203e1 mov x1, x2 20: d4000001 svc #0x0 24: f9400bf3 ldr x19, [sp, #16] 28: a8c27bfd ldp x29, x30, [sp], #32 2c: d65f03c0 ret What do you think? Cheers ---Dave
Hi Dave, thank you for the quick turn around. On 6/26/19 5:14 PM, Dave Martin wrote: > On Wed, Jun 26, 2019 at 02:27:59PM +0100, Vincenzo Frascino wrote: >> Hi Dave, >> >> On 25/06/2019 16:33, Dave Martin wrote: >>> On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: >>>> To take advantage of the commonly defined vdso interface for >>>> gettimeofday the architectural code requires an adaptation. >>>> >>>> Re-implement the gettimeofday vdso in C in order to use lib/vdso. >>>> >>>> With the new implementation arm64 gains support for CLOCK_BOOTTIME >>>> and CLOCK_TAI. >>>> >>>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>>> Cc: Will Deacon <will.deacon@arm.com> >>>> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> >>>> Tested-by: Shijith Thotton <sthotton@marvell.com> >>>> Tested-by: Andre Przywara <andre.przywara@arm.com> >>> >>> [...] >>> >>>> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h >>>> new file mode 100644 >>>> index 000000000000..bc3cb6738051 >>>> --- /dev/null >>>> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h >>>> @@ -0,0 +1,86 @@ >>>> +/* SPDX-License-Identifier: GPL-2.0 */ >>>> +/* >>>> + * Copyright (C) 2018 ARM Limited >>>> + */ >>>> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H >>>> +#define __ASM_VDSO_GETTIMEOFDAY_H >>>> + >>>> +#ifndef __ASSEMBLY__ >>>> + >>>> +#include <asm/unistd.h> >>>> +#include <uapi/linux/time.h> >>>> + >>>> +#define VDSO_HAS_CLOCK_GETRES 1 >>>> + >>>> +static __always_inline int gettimeofday_fallback( >>>> + struct __kernel_old_timeval *_tv, >>>> + struct timezone *_tz) >>> >>> Out of interest, does this need to be __always_inline? >>> >> >> It is a design choice. Philosophically, I prefer to control and reduce the scope >> of the decisions the compiler has to make in order to not have surprises. >> >>>> +{ >>>> + register struct timezone *tz asm("x1") = _tz; >>>> + register struct __kernel_old_timeval *tv asm("x0") = _tv; >>>> + register long ret asm ("x0"); >>>> + register long nr asm("x8") = __NR_gettimeofday; >>>> + >>>> + asm volatile( >>>> + " svc #0\n" >>> >>> Can inlining of this function result in non-trivial expressions being >>> substituted for _tz or _tv? >>> >>> A function call can clobber register asm vars that are assigned to the >>> caller-save registers or that the PCS uses for function arguments, and >>> the situations where this can happen are poorly defined AFAICT. There's >>> also no reliable way to detect at build time whether the compiler has >>> done this, and no robust way to stop if happening. >>> >>> (IMHO the compiler is wrong to do this, but it's been that way for ever, >>> and I think I saw GCC 9 show this behaviour recently when I was >>> investigating something related.) >>> >>> >>> To be safe, it's better to put this out of line, or remove the reg asm() >>> specifiers, mark x0-x18 and lr as clobbered here (so that the compiler >>> doesn't map arguments to them), and put movs in the asm to move things >>> into the right registers. The syscall number can be passed with an "i" >>> constraint. (And yes, this sucks.) >>> >>> If the code this is inlined in is simple enough though, we can be fairly >>> confident of getting away with it. >>> >> >> I took very seriously what you are mentioning here because I think >> that robustness of the code comes before than everything especially >> in the kernel and I carried on some experiments to try to verify if >> in this case is safe to assume that the compiler is doing the right >> thing. >> >> Based on my investigation and on previous observations of the >> generation of the vDSO library, I can conclude that the approach >> seems safe due to the fact that the usage of this code is very >> limited, the code itself is simple enough and that gcc would inline >> this code anyway based on the current compilation options. > > I'd caution about "seems safe". A lot of subtly wrong code not only > seems safe, but _is_ safe in its original context, in practice. Add > some code to the vdso over time though, or tweak the compilation options > at some point in the future, or use a different compiler, and things > could still go wrong. > > (Further comments below.) > Allow me to provide a clarification on "seems safe" vs "is safe": my approach "seems safe" because I am providing empirical evidence to support my thesis, but I guess we both know that there is no simple way to prove in one way or another that the problem has a complete solution. The proposed problem involves suppositions on potential future code additions and changes of behavior of the compiler that I can't either control or prevent. In other words, I can comment and propose solutions only based on the current status of the things, and it is what my analysis targets, not on what will happen in future. I will reply point by point below. >> The experiment that I did was to define some self-contained code that >> tries to mimic what you are describing and compile it with 3 >> different versions of gcc (6.4, 8.1 and 8.3) and in all the tree >> cases the behavior seems correct. >> >> Code: >> ===== >> >> typedef int ssize_t; >> typedef int size_t; >> >> static int my_strlen(const char *s) >> { >> int i = 0; >> >> while (s[i] == '\0') >> i++; >> >> return i; >> } >> >> static inline ssize_t my_syscall(int fd, const void *buf, size_t count) >> { >> register ssize_t arg1 asm ("x0") = fd; >> register const void *arg2 asm ("x1") = buf; >> register size_t arg3 asm ("x2") = count; >> >> __asm__ volatile ( >> "mov x8, #64\n" >> "svc #0\n" >> : "=&r" (arg1) >> : "r" (arg2), "r" (arg3) >> : "x8" >> ); >> >> return arg1; >> } >> >> void sys_caller(const char *s) >> { >> my_syscall(1, s, my_strlen(s)); >> } >> >> >> GCC 8.3.0: >> ========== >> >> main.8.3.0.o: file format elf64-littleaarch64 >> >> >> Disassembly of section .text: >> >> 0000000000000000 <sys_caller>: >> 0: 39400001 ldrb w1, [x0] >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30> >> 8: d2800023 mov x3, #0x1 // #1 >> c: d1000404 sub x4, x0, #0x1 >> 10: 2a0303e2 mov w2, w3 >> 14: 91000463 add x3, x3, #0x1 >> 18: 38636881 ldrb w1, [x4, x3] >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> >> 20: aa0003e1 mov x1, x0 >> 24: d2800808 mov x8, #0x40 // #64 >> 28: d4000001 svc #0x0 >> 2c: d65f03c0 ret >> 30: 52800002 mov w2, #0x0 // #0 >> 34: 17fffffb b 20 <sys_caller+0x20> >> >> >> GCC 8.1.0: >> ========== >> >> main.8.1.0.o: file format elf64-littleaarch64 >> >> >> Disassembly of section .text: >> >> 0000000000000000 <sys_caller>: >> 0: 39400001 ldrb w1, [x0] >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30> >> 8: d2800023 mov x3, #0x1 // #1 >> c: d1000404 sub x4, x0, #0x1 >> 10: 2a0303e2 mov w2, w3 >> 14: 91000463 add x3, x3, #0x1 >> 18: 38636881 ldrb w1, [x4, x3] >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> >> 20: aa0003e1 mov x1, x0 >> 24: d2800808 mov x8, #0x40 // #64 >> 28: d4000001 svc #0x0 >> 2c: d65f03c0 ret >> 30: 52800002 mov w2, #0x0 // #0 >> 34: 17fffffb b 20 <sys_caller+0x20> >> >> >> >> GCC 6.4.0: >> ========== >> >> main.6.4.0.o: file format elf64-littleaarch64 >> >> >> Disassembly of section .text: >> >> 0000000000000000 <sys_caller>: >> 0: 39400001 ldrb w1, [x0] >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30> >> 8: d2800023 mov x3, #0x1 // #1 >> c: d1000404 sub x4, x0, #0x1 >> 10: 2a0303e2 mov w2, w3 >> 14: 91000463 add x3, x3, #0x1 >> 18: 38636881 ldrb w1, [x4, x3] >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> >> 20: aa0003e1 mov x1, x0 >> 24: d2800808 mov x8, #0x40 // #64 >> 28: d4000001 svc #0x0 >> 2c: d65f03c0 ret >> 30: 52800002 mov w2, #0x0 // #0 >> 34: 17fffffb b 20 <sys_caller+0x20> > > Thanks for having a go at this. If the compiler can show the > problematic behaviour, it looks like your could could probably trigger > it, and as you observe, it doesn't trigger. > > I am sure I have seen it in the past, but today I am struggling > to tickle the compiler in the right way. My original reproducer may > have involved LTO, but either way I don't still have it :( > vDSO library is a shared object not compiled with LTO as far as I can see, hence if this involved LTO should not applicable in this case. > > The classic example of this (triggered directly and not due to inlining) > would be something like: > > int bar(int, int); > > void foo(int x, int y) > { > register int x_ asm("r0") = x; > register int y_ asm("r1") = bar(x, y); > > asm volatile ( > "svc #0" > :: "r" (x_), "r" (y_) > : "memory" > ); > } > > -> > > 0000000000000000 <foo>: > 0: a9bf7bfd stp x29, x30, [sp, #-16]! > 4: 910003fd mov x29, sp > 8: 94000000 bl 0 <bar> > c: 2a0003e1 mov w1, w0 > 10: d4000001 svc #0x0 > 14: a8c17bfd ldp x29, x30, [sp], #16 > 18: d65f03c0 ret > Contextualized to what my vdso fallback functions do, this should not be a concern because in no case a function result is directly set to a variable declared as register. Since the vdso fallback functions serve a very specific and limited purpose, I do not expect that that code is going to change much in future. The only thing that can happen is something similar to what I wrote in my example, which as I empirically proved does not trigger the problematic behavior. > > The gcc documentation is vague and ambiguous about precisely whan this > can happen and about how to avoid it. > On this I agree, it is not very clear, but this seems more something to raise with the gcc folks in order to have a more "explicit" description that leaves no room to the interpretation. ... > > However, the workaround is cheap, and to avoid the chance of subtle > intermittent code gen bugs it may be worth it: > > void foo(int x, int y) > { > asm volatile ( > "mov x0, %0\n\t" > "mov x1, %1\n\t" > "svc #0" > :: "r" (x), "r" (bar(x, y)) > : "r0", "r1", "memory" > ); > } > > -> > > 0000000000000000 <foo>: > 0: a9be7bfd stp x29, x30, [sp, #-32]! > 4: 910003fd mov x29, sp > 8: f9000bf3 str x19, [sp, #16] > c: 2a0003f3 mov w19, w0 > 10: 94000000 bl 0 <bar> > 14: 2a0003e2 mov w2, w0 > 18: aa1303e0 mov x0, x19 > 1c: aa0203e1 mov x1, x2 > 20: d4000001 svc #0x0 > 24: f9400bf3 ldr x19, [sp, #16] > 28: a8c27bfd ldp x29, x30, [sp], #32 > 2c: d65f03c0 ret > > > What do you think? > The solution seems ok, thanks for providing it, but IMHO I think we should find a workaround for something that is broken, which, unless I am missing something major, this seems not the case. > Cheers > ---Dave >
On Wed, Jun 26, 2019 at 08:01:58PM +0100, Vincenzo Frascino wrote: [...] > On 6/26/19 5:14 PM, Dave Martin wrote: > > On Wed, Jun 26, 2019 at 02:27:59PM +0100, Vincenzo Frascino wrote: > >> Hi Dave, > >> > >> On 25/06/2019 16:33, Dave Martin wrote: > >>> On Fri, Jun 21, 2019 at 10:52:31AM +0100, Vincenzo Frascino wrote: > >>>> To take advantage of the commonly defined vdso interface for > >>>> gettimeofday the architectural code requires an adaptation. > >>>> > >>>> Re-implement the gettimeofday vdso in C in order to use lib/vdso. > >>>> > >>>> With the new implementation arm64 gains support for CLOCK_BOOTTIME > >>>> and CLOCK_TAI. > >>>> > >>>> Cc: Catalin Marinas <catalin.marinas@arm.com> > >>>> Cc: Will Deacon <will.deacon@arm.com> > >>>> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> > >>>> Tested-by: Shijith Thotton <sthotton@marvell.com> > >>>> Tested-by: Andre Przywara <andre.przywara@arm.com> > >>> > >>> [...] > >>> > >>>> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h > >>>> new file mode 100644 > >>>> index 000000000000..bc3cb6738051 > >>>> --- /dev/null > >>>> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h > >>>> @@ -0,0 +1,86 @@ > >>>> +/* SPDX-License-Identifier: GPL-2.0 */ > >>>> +/* > >>>> + * Copyright (C) 2018 ARM Limited > >>>> + */ > >>>> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H > >>>> +#define __ASM_VDSO_GETTIMEOFDAY_H > >>>> + > >>>> +#ifndef __ASSEMBLY__ > >>>> + > >>>> +#include <asm/unistd.h> > >>>> +#include <uapi/linux/time.h> > >>>> + > >>>> +#define VDSO_HAS_CLOCK_GETRES 1 > >>>> + > >>>> +static __always_inline int gettimeofday_fallback( > >>>> + struct __kernel_old_timeval *_tv, > >>>> + struct timezone *_tz) > >>> > >>> Out of interest, does this need to be __always_inline? > >>> > >> > >> It is a design choice. Philosophically, I prefer to control and reduce the scope > >> of the decisions the compiler has to make in order to not have surprises. > >> > >>>> +{ > >>>> + register struct timezone *tz asm("x1") = _tz; > >>>> + register struct __kernel_old_timeval *tv asm("x0") = _tv; > >>>> + register long ret asm ("x0"); > >>>> + register long nr asm("x8") = __NR_gettimeofday; > >>>> + > >>>> + asm volatile( > >>>> + " svc #0\n" > >>> > >>> Can inlining of this function result in non-trivial expressions being > >>> substituted for _tz or _tv? > >>> > >>> A function call can clobber register asm vars that are assigned to the > >>> caller-save registers or that the PCS uses for function arguments, and > >>> the situations where this can happen are poorly defined AFAICT. There's > >>> also no reliable way to detect at build time whether the compiler has > >>> done this, and no robust way to stop if happening. > >>> > >>> (IMHO the compiler is wrong to do this, but it's been that way for ever, > >>> and I think I saw GCC 9 show this behaviour recently when I was > >>> investigating something related.) > >>> > >>> > >>> To be safe, it's better to put this out of line, or remove the reg asm() > >>> specifiers, mark x0-x18 and lr as clobbered here (so that the compiler > >>> doesn't map arguments to them), and put movs in the asm to move things > >>> into the right registers. The syscall number can be passed with an "i" > >>> constraint. (And yes, this sucks.) > >>> > >>> If the code this is inlined in is simple enough though, we can be fairly > >>> confident of getting away with it. > >>> > >> > >> I took very seriously what you are mentioning here because I think > >> that robustness of the code comes before than everything especially > >> in the kernel and I carried on some experiments to try to verify if > >> in this case is safe to assume that the compiler is doing the right > >> thing. > >> > >> Based on my investigation and on previous observations of the > >> generation of the vDSO library, I can conclude that the approach > >> seems safe due to the fact that the usage of this code is very > >> limited, the code itself is simple enough and that gcc would inline > >> this code anyway based on the current compilation options. > > > > I'd caution about "seems safe". A lot of subtly wrong code not only > > seems safe, but _is_ safe in its original context, in practice. Add > > some code to the vdso over time though, or tweak the compilation options > > at some point in the future, or use a different compiler, and things > > could still go wrong. > > > > (Further comments below.) > > > > Allow me to provide a clarification on "seems safe" vs "is safe": my approach > "seems safe" because I am providing empirical evidence to support my thesis, but > I guess we both know that there is no simple way to prove in one way or another > that the problem has a complete solution. > The proposed problem involves suppositions on potential future code additions > and changes of behavior of the compiler that I can't either control or prevent. > In other words, I can comment and propose solutions only based on the current > status of the things, and it is what my analysis targets, not on what will > happen in future. > > I will reply point by point below. > > >> The experiment that I did was to define some self-contained code that > >> tries to mimic what you are describing and compile it with 3 > >> different versions of gcc (6.4, 8.1 and 8.3) and in all the tree > >> cases the behavior seems correct. > >> > >> Code: > >> ===== > >> > >> typedef int ssize_t; > >> typedef int size_t; > >> > >> static int my_strlen(const char *s) > >> { > >> int i = 0; > >> > >> while (s[i] == '\0') > >> i++; > >> > >> return i; > >> } > >> > >> static inline ssize_t my_syscall(int fd, const void *buf, size_t count) > >> { > >> register ssize_t arg1 asm ("x0") = fd; > >> register const void *arg2 asm ("x1") = buf; > >> register size_t arg3 asm ("x2") = count; > >> > >> __asm__ volatile ( > >> "mov x8, #64\n" > >> "svc #0\n" > >> : "=&r" (arg1) > >> : "r" (arg2), "r" (arg3) > >> : "x8" > >> ); > >> > >> return arg1; > >> } > >> > >> void sys_caller(const char *s) > >> { > >> my_syscall(1, s, my_strlen(s)); > >> } > >> > >> > >> GCC 8.3.0: > >> ========== > >> > >> main.8.3.0.o: file format elf64-littleaarch64 > >> > >> > >> Disassembly of section .text: > >> > >> 0000000000000000 <sys_caller>: > >> 0: 39400001 ldrb w1, [x0] > >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30> > >> 8: d2800023 mov x3, #0x1 // #1 > >> c: d1000404 sub x4, x0, #0x1 > >> 10: 2a0303e2 mov w2, w3 > >> 14: 91000463 add x3, x3, #0x1 > >> 18: 38636881 ldrb w1, [x4, x3] > >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> > >> 20: aa0003e1 mov x1, x0 > >> 24: d2800808 mov x8, #0x40 // #64 > >> 28: d4000001 svc #0x0 > >> 2c: d65f03c0 ret > >> 30: 52800002 mov w2, #0x0 // #0 > >> 34: 17fffffb b 20 <sys_caller+0x20> > >> > >> > >> GCC 8.1.0: > >> ========== > >> > >> main.8.1.0.o: file format elf64-littleaarch64 > >> > >> > >> Disassembly of section .text: > >> > >> 0000000000000000 <sys_caller>: > >> 0: 39400001 ldrb w1, [x0] > >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30> > >> 8: d2800023 mov x3, #0x1 // #1 > >> c: d1000404 sub x4, x0, #0x1 > >> 10: 2a0303e2 mov w2, w3 > >> 14: 91000463 add x3, x3, #0x1 > >> 18: 38636881 ldrb w1, [x4, x3] > >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> > >> 20: aa0003e1 mov x1, x0 > >> 24: d2800808 mov x8, #0x40 // #64 > >> 28: d4000001 svc #0x0 > >> 2c: d65f03c0 ret > >> 30: 52800002 mov w2, #0x0 // #0 > >> 34: 17fffffb b 20 <sys_caller+0x20> > >> > >> > >> > >> GCC 6.4.0: > >> ========== > >> > >> main.6.4.0.o: file format elf64-littleaarch64 > >> > >> > >> Disassembly of section .text: > >> > >> 0000000000000000 <sys_caller>: > >> 0: 39400001 ldrb w1, [x0] > >> 4: 35000161 cbnz w1, 30 <sys_caller+0x30> > >> 8: d2800023 mov x3, #0x1 // #1 > >> c: d1000404 sub x4, x0, #0x1 > >> 10: 2a0303e2 mov w2, w3 > >> 14: 91000463 add x3, x3, #0x1 > >> 18: 38636881 ldrb w1, [x4, x3] > >> 1c: 34ffffa1 cbz w1, 10 <sys_caller+0x10> > >> 20: aa0003e1 mov x1, x0 > >> 24: d2800808 mov x8, #0x40 // #64 > >> 28: d4000001 svc #0x0 > >> 2c: d65f03c0 ret > >> 30: 52800002 mov w2, #0x0 // #0 > >> 34: 17fffffb b 20 <sys_caller+0x20> > > > > Thanks for having a go at this. If the compiler can show the > > problematic behaviour, it looks like your could could probably trigger > > it, and as you observe, it doesn't trigger. > > > > I am sure I have seen it in the past, but today I am struggling > > to tickle the compiler in the right way. My original reproducer may > > have involved LTO, but either way I don't still have it :( > > > > vDSO library is a shared object not compiled with LTO as far as I can > see, hence if this involved LTO should not applicable in this case. That turned to be a spurious hypothesis on my part -- LTO isn't the smoking gun. (See below.) > > The classic example of this (triggered directly and not due to inlining) > > would be something like: > > > > int bar(int, int); > > > > void foo(int x, int y) > > { > > register int x_ asm("r0") = x; > > register int y_ asm("r1") = bar(x, y); > > > > asm volatile ( > > "svc #0" > > :: "r" (x_), "r" (y_) > > : "memory" > > ); > > } > > > > -> > > > > 0000000000000000 <foo>: > > 0: a9bf7bfd stp x29, x30, [sp, #-16]! > > 4: 910003fd mov x29, sp > > 8: 94000000 bl 0 <bar> > > c: 2a0003e1 mov w1, w0 > > 10: d4000001 svc #0x0 > > 14: a8c17bfd ldp x29, x30, [sp], #16 > > 18: d65f03c0 ret > > > > Contextualized to what my vdso fallback functions do, this should not be a > concern because in no case a function result is directly set to a variable > declared as register. > > Since the vdso fallback functions serve a very specific and limited purpose, I > do not expect that that code is going to change much in future. > > The only thing that can happen is something similar to what I wrote in my > example, which as I empirically proved does not trigger the problematic behavior. > > > > > The gcc documentation is vague and ambiguous about precisely whan this > > can happen and about how to avoid it. > > > > On this I agree, it is not very clear, but this seems more something to raise > with the gcc folks in order to have a more "explicit" description that leaves no > room to the interpretation. > > ... > > > > > However, the workaround is cheap, and to avoid the chance of subtle > > intermittent code gen bugs it may be worth it: > > > > void foo(int x, int y) > > { > > asm volatile ( > > "mov x0, %0\n\t" > > "mov x1, %1\n\t" > > "svc #0" > > :: "r" (x), "r" (bar(x, y)) > > : "r0", "r1", "memory" > > ); > > } > > > > -> > > > > 0000000000000000 <foo>: > > 0: a9be7bfd stp x29, x30, [sp, #-32]! > > 4: 910003fd mov x29, sp > > 8: f9000bf3 str x19, [sp, #16] > > c: 2a0003f3 mov w19, w0 > > 10: 94000000 bl 0 <bar> > > 14: 2a0003e2 mov w2, w0 > > 18: aa1303e0 mov x0, x19 > > 1c: aa0203e1 mov x1, x2 > > 20: d4000001 svc #0x0 > > 24: f9400bf3 ldr x19, [sp, #16] > > 28: a8c27bfd ldp x29, x30, [sp], #32 > > 2c: d65f03c0 ret > > > > > > What do you think? > > > > The solution seems ok, thanks for providing it, but IMHO I think we > should find a workaround for something that is broken, which, unless > I am missing something major, this seems not the case. So, after a bit of further experimentation, I found that I could trigger it with implicit function calls on an older compiler. I couldn't show it with explicit function calls (as in your example). With the following code, inlining if an expression that causes an implicit call to a libgcc helper can trigger this issue, but I had to try an older compiler: int foo(int x, int y) { register int res asm("r0"); register const int x_ asm("r0") = x; register const int y_ asm("r1") = y; asm volatile ( "svc #0" : "=r" (res) : "r" (x_), "r" (y_) : "memory" ); return res; } int bar(int x, int y) { return foo(x, x / y); } -> (arm-linux-gnueabihf-gcc 9.1 -O2) 00000000 <foo>: 0: df00 svc 0 2: 4770 bx lr 00000004 <bar>: 4: b510 push {r4, lr} 6: 4604 mov r4, r0 8: f7ff fffe bl 0 <__aeabi_idiv> c: 4601 mov r1, r0 e: 4620 mov r0, r4 10: df00 svc 0 12: bd10 pop {r4, pc} -> (arm-linux-gnueabihf-gcc 5.1 -O2) 00000000 <foo>: 0: df00 svc 0 2: 4770 bx lr 00000004 <bar>: 4: b508 push {r3, lr} 6: f7ff fffe bl 0 <__aeabi_idiv> a: 4601 mov r1, r0 c: df00 svc 0 e: bd08 pop {r3, pc} I was struggling to find a way to emit an implicit function call for AArch64, except for 128-bit divide, which would complicate things since uint128_t doesn't fit in a single register anyway. Maybe this was considered a bug and fixed sometime after GCC 5, but I think the GCC documentation is still quite unclear on the semantics of register asm vars that alias call-clobbered registers in the PCS. If we can get a promise out of the GCC folks that this will not happen with any future compiler, then maybe we could just require a new enough compiler to be used. Then of course there is clang. Cheers ---Dave
Hi Dave, Overall, I want to thank you for bringing out the topic. It helped me to question some decisions and make sure that we have no holes left in the approach. [...] >> >> vDSO library is a shared object not compiled with LTO as far as I can >> see, hence if this involved LTO should not applicable in this case. > > That turned to be a spurious hypothesis on my part -- LTO isn't the > smoking gun. (See below.) > Ok. >>> The classic example of this (triggered directly and not due to inlining) >>> would be something like: >>> >>> int bar(int, int); >>> >>> void foo(int x, int y) >>> { >>> register int x_ asm("r0") = x; >>> register int y_ asm("r1") = bar(x, y); >>> >>> asm volatile ( >>> "svc #0" >>> :: "r" (x_), "r" (y_) >>> : "memory" >>> ); >>> } >>> >>> -> >>> >>> 0000000000000000 <foo>: >>> 0: a9bf7bfd stp x29, x30, [sp, #-16]! >>> 4: 910003fd mov x29, sp >>> 8: 94000000 bl 0 <bar> >>> c: 2a0003e1 mov w1, w0 >>> 10: d4000001 svc #0x0 >>> 14: a8c17bfd ldp x29, x30, [sp], #16 >>> 18: d65f03c0 ret >>> >> >> Contextualized to what my vdso fallback functions do, this should not be a >> concern because in no case a function result is directly set to a variable >> declared as register. >> >> Since the vdso fallback functions serve a very specific and limited purpose, I >> do not expect that that code is going to change much in future. >> >> The only thing that can happen is something similar to what I wrote in my >> example, which as I empirically proved does not trigger the problematic behavior. >> >>> >>> The gcc documentation is vague and ambiguous about precisely whan this >>> can happen and about how to avoid it. >>> >> >> On this I agree, it is not very clear, but this seems more something to raise >> with the gcc folks in order to have a more "explicit" description that leaves no >> room to the interpretation. >> >> ... >> >>> >>> However, the workaround is cheap, and to avoid the chance of subtle >>> intermittent code gen bugs it may be worth it: >>> >>> void foo(int x, int y) >>> { >>> asm volatile ( >>> "mov x0, %0\n\t" >>> "mov x1, %1\n\t" >>> "svc #0" >>> :: "r" (x), "r" (bar(x, y)) >>> : "r0", "r1", "memory" >>> ); >>> } >>> >>> -> >>> >>> 0000000000000000 <foo>: >>> 0: a9be7bfd stp x29, x30, [sp, #-32]! >>> 4: 910003fd mov x29, sp >>> 8: f9000bf3 str x19, [sp, #16] >>> c: 2a0003f3 mov w19, w0 >>> 10: 94000000 bl 0 <bar> >>> 14: 2a0003e2 mov w2, w0 >>> 18: aa1303e0 mov x0, x19 >>> 1c: aa0203e1 mov x1, x2 >>> 20: d4000001 svc #0x0 >>> 24: f9400bf3 ldr x19, [sp, #16] >>> 28: a8c27bfd ldp x29, x30, [sp], #32 >>> 2c: d65f03c0 ret >>> >>> >>> What do you think? >>> >> >> The solution seems ok, thanks for providing it, but IMHO I think we >> should find a workaround for something that is broken, which, unless >> I am missing something major, this seems not the case. > > So, after a bit of further experimentation, I found that I could trigger > it with implicit function calls on an older compiler. I couldn't show > it with explicit function calls (as in your example). > > With the following code, inlining if an expression that causes an > implicit call to a libgcc helper can trigger this issue, but I had to > try an older compiler: > > int foo(int x, int y) > { > register int res asm("r0"); > register const int x_ asm("r0") = x; > register const int y_ asm("r1") = y; > > asm volatile ( > "svc #0" > : "=r" (res) > : "r" (x_), "r" (y_) > : "memory" > ); > > return res; > } > > int bar(int x, int y) > { > return foo(x, x / y); > } > > -> (arm-linux-gnueabihf-gcc 9.1 -O2) > > 00000000 <foo>: > 0: df00 svc 0 > 2: 4770 bx lr > > 00000004 <bar>: > 4: b510 push {r4, lr} > 6: 4604 mov r4, r0 > 8: f7ff fffe bl 0 <__aeabi_idiv> > c: 4601 mov r1, r0 > e: 4620 mov r0, r4 > 10: df00 svc 0 > 12: bd10 pop {r4, pc} > > -> (arm-linux-gnueabihf-gcc 5.1 -O2) > > 00000000 <foo>: > 0: df00 svc 0 > 2: 4770 bx lr > > 00000004 <bar>: > 4: b508 push {r3, lr} > 6: f7ff fffe bl 0 <__aeabi_idiv> > a: 4601 mov r1, r0 > c: df00 svc 0 > e: bd08 pop {r3, pc} > Thanks for reporting this. I had a go with gcc-5.1 on the vDSO library and seems Ok, but it was worth trying. For obvious reasons I am not reporting the objdump here :) > I was struggling to find a way to emit an implicit function call for > AArch64, except for 128-bit divide, which would complicate things since > uint128_t doesn't fit in a single register anyway. > > Maybe this was considered a bug and fixed sometime after GCC 5, but I > think the GCC documentation is still quite unclear on the semantics of > register asm vars that alias call-clobbered registers in the PCS. > > If we can get a promise out of the GCC folks that this will not happen > with any future compiler, then maybe we could just require a new enough > compiler to be used. > On this I fully agree, the compiler should never change an "expected" behavior. If the issue comes from a gray area in the documentation, we have to address it and have it fixed there. The minimum version of the compiler from linux-4.19 is 4.6, hence I had to try that the vDSO lib does not break with 5.1 [1]. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cafa0010cd51fb711fdcb50fc55f394c5f167a0a > Then of course there is clang. > I could not help myself and I tried clang.8 and clang.7 as well with my example, just to make sure that we are fine even in that case. Please find below the results (pretty identical). main.clang.7.o: file format ELF64-aarch64-little Disassembly of section .text: 0000000000000000 show_it: 0: e8 03 1f aa mov x8, xzr 4: 09 68 68 38 ldrb w9, [x0, x8] 8: 08 05 00 91 add x8, x8, #1 c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> 10: 02 05 00 51 sub w2, w8, #1 14: e1 03 00 aa mov x1, x0 18: 08 08 80 d2 mov x8, #64 1c: 01 00 00 d4 svc #0 20: c0 03 5f d6 ret main.clang.8.o: file format ELF64-aarch64-little Disassembly of section .text: 0000000000000000 show_it: 0: e8 03 1f aa mov x8, xzr 4: 09 68 68 38 ldrb w9, [x0, x8] 8: 08 05 00 91 add x8, x8, #1 c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> 10: 02 05 00 51 sub w2, w8, #1 14: e1 03 00 aa mov x1, x0 18: 08 08 80 d2 mov x8, #64 1c: 01 00 00 d4 svc #0 20: c0 03 5f d6 ret Commands used: $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o $ llvm-objdump -d main.clang.<x>.o > Cheers > ---Dave >
On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote: > Hi Dave, > > Overall, I want to thank you for bringing out the topic. It helped me to > question some decisions and make sure that we have no holes left in > the approach. Fair enough. This is really just a nasty compiler corner-case... the validity of the overall approach isn't affected. > >> > >> vDSO library is a shared object not compiled with LTO as far as I can > >> see, hence if this involved LTO should not applicable in this case. > > > > That turned to be a spurious hypothesis on my part -- LTO isn't the > > smoking gun. (See below.) > > > > Ok. > > >>> The classic example of this (triggered directly and not due to inlining) > >>> would be something like: > >>> > >>> int bar(int, int); > >>> > >>> void foo(int x, int y) > >>> { > >>> register int x_ asm("r0") = x; > >>> register int y_ asm("r1") = bar(x, y); > >>> > >>> asm volatile ( > >>> "svc #0" > >>> :: "r" (x_), "r" (y_) > >>> : "memory" > >>> ); > >>> } > >>> > >>> -> > >>> > >>> 0000000000000000 <foo>: > >>> 0: a9bf7bfd stp x29, x30, [sp, #-16]! > >>> 4: 910003fd mov x29, sp > >>> 8: 94000000 bl 0 <bar> > >>> c: 2a0003e1 mov w1, w0 > >>> 10: d4000001 svc #0x0 > >>> 14: a8c17bfd ldp x29, x30, [sp], #16 > >>> 18: d65f03c0 ret > >>> > >> > >> Contextualized to what my vdso fallback functions do, this should not be a > >> concern because in no case a function result is directly set to a variable > >> declared as register. > >> > >> Since the vdso fallback functions serve a very specific and limited purpose, I > >> do not expect that that code is going to change much in future. > >> > >> The only thing that can happen is something similar to what I wrote in my > >> example, which as I empirically proved does not trigger the problematic behavior. > >> > >>> > >>> The gcc documentation is vague and ambiguous about precisely whan this > >>> can happen and about how to avoid it. > >>> > >> > >> On this I agree, it is not very clear, but this seems more something to raise > >> with the gcc folks in order to have a more "explicit" description that leaves no > >> room to the interpretation. > >> > >> ... > >> > >>> > >>> However, the workaround is cheap, and to avoid the chance of subtle > >>> intermittent code gen bugs it may be worth it: > >>> > >>> void foo(int x, int y) > >>> { > >>> asm volatile ( > >>> "mov x0, %0\n\t" > >>> "mov x1, %1\n\t" > >>> "svc #0" > >>> :: "r" (x), "r" (bar(x, y)) > >>> : "r0", "r1", "memory" > >>> ); > >>> } > >>> > >>> -> > >>> > >>> 0000000000000000 <foo>: > >>> 0: a9be7bfd stp x29, x30, [sp, #-32]! > >>> 4: 910003fd mov x29, sp > >>> 8: f9000bf3 str x19, [sp, #16] > >>> c: 2a0003f3 mov w19, w0 > >>> 10: 94000000 bl 0 <bar> > >>> 14: 2a0003e2 mov w2, w0 > >>> 18: aa1303e0 mov x0, x19 > >>> 1c: aa0203e1 mov x1, x2 > >>> 20: d4000001 svc #0x0 > >>> 24: f9400bf3 ldr x19, [sp, #16] > >>> 28: a8c27bfd ldp x29, x30, [sp], #32 > >>> 2c: d65f03c0 ret > >>> > >>> > >>> What do you think? > >>> > >> > >> The solution seems ok, thanks for providing it, but IMHO I think we > >> should find a workaround for something that is broken, which, unless > >> I am missing something major, this seems not the case. > > > > So, after a bit of further experimentation, I found that I could trigger > > it with implicit function calls on an older compiler. I couldn't show > > it with explicit function calls (as in your example). > > > > With the following code, inlining if an expression that causes an > > implicit call to a libgcc helper can trigger this issue, but I had to > > try an older compiler: > > > > int foo(int x, int y) > > { > > register int res asm("r0"); > > register const int x_ asm("r0") = x; > > register const int y_ asm("r1") = y; > > > > asm volatile ( > > "svc #0" > > : "=r" (res) > > : "r" (x_), "r" (y_) > > : "memory" > > ); > > > > return res; > > } > > > > int bar(int x, int y) > > { > > return foo(x, x / y); > > } > > > > -> (arm-linux-gnueabihf-gcc 9.1 -O2) > > > > 00000000 <foo>: > > 0: df00 svc 0 > > 2: 4770 bx lr > > > > 00000004 <bar>: > > 4: b510 push {r4, lr} > > 6: 4604 mov r4, r0 > > 8: f7ff fffe bl 0 <__aeabi_idiv> > > c: 4601 mov r1, r0 > > e: 4620 mov r0, r4 > > 10: df00 svc 0 > > 12: bd10 pop {r4, pc} > > > > -> (arm-linux-gnueabihf-gcc 5.1 -O2) > > > > 00000000 <foo>: > > 0: df00 svc 0 > > 2: 4770 bx lr > > > > 00000004 <bar>: > > 4: b508 push {r3, lr} > > 6: f7ff fffe bl 0 <__aeabi_idiv> > > a: 4601 mov r1, r0 > > c: df00 svc 0 > > e: bd08 pop {r3, pc} > > > > Thanks for reporting this. I had a go with gcc-5.1 on the vDSO library and seems > Ok, but it was worth trying. > > For obvious reasons I am not reporting the objdump here :) > > > I was struggling to find a way to emit an implicit function call for > > AArch64, except for 128-bit divide, which would complicate things since > > uint128_t doesn't fit in a single register anyway. > > > > Maybe this was considered a bug and fixed sometime after GCC 5, but I > > think the GCC documentation is still quite unclear on the semantics of > > register asm vars that alias call-clobbered registers in the PCS. > > > > If we can get a promise out of the GCC folks that this will not happen > > with any future compiler, then maybe we could just require a new enough > > compiler to be used. > > > > On this I fully agree, the compiler should never change an "expected" behavior. > > If the issue comes from a gray area in the documentation, we have to address it > and have it fixed there. > > The minimum version of the compiler from linux-4.19 is 4.6, hence I had to try > that the vDSO lib does not break with 5.1 [1]. > > [1] > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cafa0010cd51fb711fdcb50fc55f394c5f167a0a OK > > Then of course there is clang. > > > > I could not help myself and I tried clang.8 and clang.7 as well with my example, > just to make sure that we are fine even in that case. Please find below the > results (pretty identical). > > main.clang.7.o: file format ELF64-aarch64-little > > Disassembly of section .text: > 0000000000000000 show_it: > 0: e8 03 1f aa mov x8, xzr > 4: 09 68 68 38 ldrb w9, [x0, x8] > 8: 08 05 00 91 add x8, x8, #1 > c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> > 10: 02 05 00 51 sub w2, w8, #1 > 14: e1 03 00 aa mov x1, x0 > 18: 08 08 80 d2 mov x8, #64 > 1c: 01 00 00 d4 svc #0 > 20: c0 03 5f d6 ret > > main.clang.8.o: file format ELF64-aarch64-little > > Disassembly of section .text: > 0000000000000000 show_it: > 0: e8 03 1f aa mov x8, xzr > 4: 09 68 68 38 ldrb w9, [x0, x8] > 8: 08 05 00 91 add x8, x8, #1 > c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> > 10: 02 05 00 51 sub w2, w8, #1 > 14: e1 03 00 aa mov x1, x0 > 18: 08 08 80 d2 mov x8, #64 > 1c: 01 00 00 d4 svc #0 > 20: c0 03 5f d6 ret > > Commands used: > > $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o > $ llvm-objdump -d main.clang.<x>.o Actually, I'm not sure this is comparable with the reproducer I quoted in my last reply. The compiler can see the definition of strlen and fully inlines it. I only ever saw the problem when the compiler emits an out-of-line implicit function call. What does clang do with my example on 32-bit? Cheers ---Dave
On 6/27/19 12:27 PM, Dave Martin wrote: > On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote: >> Hi Dave, >> >> Overall, I want to thank you for bringing out the topic. It helped me to >> question some decisions and make sure that we have no holes left in >> the approach. > > Fair enough. > > This is really just a nasty compiler corner-case... the validity of the > overall approach isn't affected. > >>>> >>>> vDSO library is a shared object not compiled with LTO as far as I can >>>> see, hence if this involved LTO should not applicable in this case. >>> >>> That turned to be a spurious hypothesis on my part -- LTO isn't the >>> smoking gun. (See below.) >>> >> >> Ok. >> >>>>> The classic example of this (triggered directly and not due to inlining) >>>>> would be something like: >>>>> >>>>> int bar(int, int); >>>>> >>>>> void foo(int x, int y) >>>>> { >>>>> register int x_ asm("r0") = x; >>>>> register int y_ asm("r1") = bar(x, y); >>>>> >>>>> asm volatile ( >>>>> "svc #0" >>>>> :: "r" (x_), "r" (y_) >>>>> : "memory" >>>>> ); >>>>> } >>>>> >>>>> -> >>>>> >>>>> 0000000000000000 <foo>: >>>>> 0: a9bf7bfd stp x29, x30, [sp, #-16]! >>>>> 4: 910003fd mov x29, sp >>>>> 8: 94000000 bl 0 <bar> >>>>> c: 2a0003e1 mov w1, w0 >>>>> 10: d4000001 svc #0x0 >>>>> 14: a8c17bfd ldp x29, x30, [sp], #16 >>>>> 18: d65f03c0 ret >>>>> >>>> >>>> Contextualized to what my vdso fallback functions do, this should not be a >>>> concern because in no case a function result is directly set to a variable >>>> declared as register. >>>> >>>> Since the vdso fallback functions serve a very specific and limited purpose, I >>>> do not expect that that code is going to change much in future. >>>> >>>> The only thing that can happen is something similar to what I wrote in my >>>> example, which as I empirically proved does not trigger the problematic behavior. >>>> >>>>> >>>>> The gcc documentation is vague and ambiguous about precisely whan this >>>>> can happen and about how to avoid it. >>>>> >>>> >>>> On this I agree, it is not very clear, but this seems more something to raise >>>> with the gcc folks in order to have a more "explicit" description that leaves no >>>> room to the interpretation. >>>> >>>> ... >>>> >>>>> >>>>> However, the workaround is cheap, and to avoid the chance of subtle >>>>> intermittent code gen bugs it may be worth it: >>>>> >>>>> void foo(int x, int y) >>>>> { >>>>> asm volatile ( >>>>> "mov x0, %0\n\t" >>>>> "mov x1, %1\n\t" >>>>> "svc #0" >>>>> :: "r" (x), "r" (bar(x, y)) >>>>> : "r0", "r1", "memory" >>>>> ); >>>>> } >>>>> >>>>> -> >>>>> >>>>> 0000000000000000 <foo>: >>>>> 0: a9be7bfd stp x29, x30, [sp, #-32]! >>>>> 4: 910003fd mov x29, sp >>>>> 8: f9000bf3 str x19, [sp, #16] >>>>> c: 2a0003f3 mov w19, w0 >>>>> 10: 94000000 bl 0 <bar> >>>>> 14: 2a0003e2 mov w2, w0 >>>>> 18: aa1303e0 mov x0, x19 >>>>> 1c: aa0203e1 mov x1, x2 >>>>> 20: d4000001 svc #0x0 >>>>> 24: f9400bf3 ldr x19, [sp, #16] >>>>> 28: a8c27bfd ldp x29, x30, [sp], #32 >>>>> 2c: d65f03c0 ret >>>>> >>>>> >>>>> What do you think? >>>>> >>>> >>>> The solution seems ok, thanks for providing it, but IMHO I think we >>>> should find a workaround for something that is broken, which, unless >>>> I am missing something major, this seems not the case. >>> >>> So, after a bit of further experimentation, I found that I could trigger >>> it with implicit function calls on an older compiler. I couldn't show >>> it with explicit function calls (as in your example). >>> >>> With the following code, inlining if an expression that causes an >>> implicit call to a libgcc helper can trigger this issue, but I had to >>> try an older compiler: >>> >>> int foo(int x, int y) >>> { >>> register int res asm("r0"); >>> register const int x_ asm("r0") = x; >>> register const int y_ asm("r1") = y; >>> >>> asm volatile ( >>> "svc #0" >>> : "=r" (res) >>> : "r" (x_), "r" (y_) >>> : "memory" >>> ); >>> >>> return res; >>> } >>> >>> int bar(int x, int y) >>> { >>> return foo(x, x / y); >>> } >>> >>> -> (arm-linux-gnueabihf-gcc 9.1 -O2) >>> >>> 00000000 <foo>: >>> 0: df00 svc 0 >>> 2: 4770 bx lr >>> >>> 00000004 <bar>: >>> 4: b510 push {r4, lr} >>> 6: 4604 mov r4, r0 >>> 8: f7ff fffe bl 0 <__aeabi_idiv> >>> c: 4601 mov r1, r0 >>> e: 4620 mov r0, r4 >>> 10: df00 svc 0 >>> 12: bd10 pop {r4, pc} >>> >>> -> (arm-linux-gnueabihf-gcc 5.1 -O2) >>> >>> 00000000 <foo>: >>> 0: df00 svc 0 >>> 2: 4770 bx lr >>> >>> 00000004 <bar>: >>> 4: b508 push {r3, lr} >>> 6: f7ff fffe bl 0 <__aeabi_idiv> >>> a: 4601 mov r1, r0 >>> c: df00 svc 0 >>> e: bd08 pop {r3, pc} >>> >> >> Thanks for reporting this. I had a go with gcc-5.1 on the vDSO library and seems >> Ok, but it was worth trying. >> >> For obvious reasons I am not reporting the objdump here :) >> >>> I was struggling to find a way to emit an implicit function call for >>> AArch64, except for 128-bit divide, which would complicate things since >>> uint128_t doesn't fit in a single register anyway. >>> >>> Maybe this was considered a bug and fixed sometime after GCC 5, but I >>> think the GCC documentation is still quite unclear on the semantics of >>> register asm vars that alias call-clobbered registers in the PCS. >>> >>> If we can get a promise out of the GCC folks that this will not happen >>> with any future compiler, then maybe we could just require a new enough >>> compiler to be used. >>> >> >> On this I fully agree, the compiler should never change an "expected" behavior. >> >> If the issue comes from a gray area in the documentation, we have to address it >> and have it fixed there. >> >> The minimum version of the compiler from linux-4.19 is 4.6, hence I had to try >> that the vDSO lib does not break with 5.1 [1]. >> >> [1] >> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=cafa0010cd51fb711fdcb50fc55f394c5f167a0a > > OK > >>> Then of course there is clang. >>> >> >> I could not help myself and I tried clang.8 and clang.7 as well with my example, >> just to make sure that we are fine even in that case. Please find below the >> results (pretty identical). >> >> main.clang.7.o: file format ELF64-aarch64-little >> >> Disassembly of section .text: >> 0000000000000000 show_it: >> 0: e8 03 1f aa mov x8, xzr >> 4: 09 68 68 38 ldrb w9, [x0, x8] >> 8: 08 05 00 91 add x8, x8, #1 >> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> >> 10: 02 05 00 51 sub w2, w8, #1 >> 14: e1 03 00 aa mov x1, x0 >> 18: 08 08 80 d2 mov x8, #64 >> 1c: 01 00 00 d4 svc #0 >> 20: c0 03 5f d6 ret >> >> main.clang.8.o: file format ELF64-aarch64-little >> >> Disassembly of section .text: >> 0000000000000000 show_it: >> 0: e8 03 1f aa mov x8, xzr >> 4: 09 68 68 38 ldrb w9, [x0, x8] >> 8: 08 05 00 91 add x8, x8, #1 >> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> >> 10: 02 05 00 51 sub w2, w8, #1 >> 14: e1 03 00 aa mov x1, x0 >> 18: 08 08 80 d2 mov x8, #64 >> 1c: 01 00 00 d4 svc #0 >> 20: c0 03 5f d6 ret >> >> Commands used: >> >> $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o >> $ llvm-objdump -d main.clang.<x>.o > > Actually, I'm not sure this is comparable with the reproducer I quoted > in my last reply. > As explained in my previous email, this is the only case that can realistically happen. vDSO has no dependency on any other library (i.e. libgcc you were mentioning) and we are referring to the fallbacks which fall in this category. > The compiler can see the definition of strlen and fully inlines it. > I only ever saw the problem when the compiler emits an out-of-line > implicit function call. > > What does clang do with my example on 32-bit? When clang is selected compat vDSOs are currently disabled on arm64, will be introduced with a future patch series. Anyway since I am curious as well, this is what happens with your example with clang.8 target=arm-linux-gnueabihf: dave-code.clang.8.o: file format ELF32-arm-little Disassembly of section .text: 0000000000000000 foo: 0: 00 00 00 ef svc #0 4: 1e ff 2f e1 bx lr 0000000000000008 bar: 8: 10 4c 2d e9 push {r4, r10, r11, lr} c: 08 b0 8d e2 add r11, sp, #8 10: 00 40 a0 e1 mov r4, r0 14: fe ff ff eb bl #-8 <bar+0xc> 18: 00 10 a0 e1 mov r1, r0 1c: 04 00 a0 e1 mov r0, r4 20: 00 00 00 ef svc #0 24: 10 8c bd e8 pop {r4, r10, r11, pc} Compiled with -O2, -O3, -Os never inlines. Same thing happens for aarch64-linux-gnueabi: dave-code.clang.8.o: file format ELF64-aarch64-little Disassembly of section .text: 0000000000000000 foo: 0: e0 03 00 2a mov w0, w0 4: e1 03 01 2a mov w1, w1 8: 01 00 00 d4 svc #0 c: c0 03 5f d6 ret 0000000000000010 bar: 10: 01 0c c1 1a sdiv w1, w0, w1 14: e0 03 00 2a mov w0, w0 18: 01 00 00 d4 svc #0 1c: c0 03 5f d6 ret Based on this I think we can conclude our investigation. > > Cheers > ---Dave > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >
On Thu, Jun 27, 2019 at 12:59:07PM +0100, Vincenzo Frascino wrote: > On 6/27/19 12:27 PM, Dave Martin wrote: > > On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote: [...] > >> Disassembly of section .text: > >> 0000000000000000 show_it: > >> 0: e8 03 1f aa mov x8, xzr > >> 4: 09 68 68 38 ldrb w9, [x0, x8] > >> 8: 08 05 00 91 add x8, x8, #1 > >> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> > >> 10: 02 05 00 51 sub w2, w8, #1 > >> 14: e1 03 00 aa mov x1, x0 > >> 18: 08 08 80 d2 mov x8, #64 > >> 1c: 01 00 00 d4 svc #0 > >> 20: c0 03 5f d6 ret > >> > >> Commands used: > >> > >> $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o > >> $ llvm-objdump -d main.clang.<x>.o > > > > Actually, I'm not sure this is comparable with the reproducer I quoted > > in my last reply. > > > > As explained in my previous email, this is the only case that can realistically > happen. vDSO has no dependency on any other library (i.e. libgcc you were > mentioning) and we are referring to the fallbacks which fall in this category. Outlining could also introduce a local function call where none exists explicitly in the program IIUC. My point is that the interaction between asm reg vars and machine-level procedure calls is at best ill-defined, and it is largely up to the compiler when to introduce such a call, even without LTO etc. So we should not be surprised to see variations in behaviour depending on compiler, compiler version and compiler flags. > > The compiler can see the definition of strlen and fully inlines it. > > I only ever saw the problem when the compiler emits an out-of-line > > implicit function call. > > > What does clang do with my example on 32-bit? > > When clang is selected compat vDSOs are currently disabled on arm64, will be > introduced with a future patch series. > > Anyway since I am curious as well, this is what happens with your example with > clang.8 target=arm-linux-gnueabihf: > > dave-code.clang.8.o: file format ELF32-arm-little > > Disassembly of section .text: > 0000000000000000 foo: > 0: 00 00 00 ef svc #0 > 4: 1e ff 2f e1 bx lr > > 0000000000000008 bar: > 8: 10 4c 2d e9 push {r4, r10, r11, lr} > c: 08 b0 8d e2 add r11, sp, #8 > 10: 00 40 a0 e1 mov r4, r0 > 14: fe ff ff eb bl #-8 <bar+0xc> > 18: 00 10 a0 e1 mov r1, r0 > 1c: 04 00 a0 e1 mov r0, r4 > 20: 00 00 00 ef svc #0 > 24: 10 8c bd e8 pop {r4, r10, r11, pc} > Compiled with -O2, -O3, -Os never inlines. Looks sane, and is the behaviour we want. > Same thing happens for aarch64-linux-gnueabi: > > dave-code.clang.8.o: file format ELF64-aarch64-little > > Disassembly of section .text: > 0000000000000000 foo: > 0: e0 03 00 2a mov w0, w0 > 4: e1 03 01 2a mov w1, w1 > 8: 01 00 00 d4 svc #0 > c: c0 03 5f d6 ret > > 0000000000000010 bar: > 10: 01 0c c1 1a sdiv w1, w0, w1 > 14: e0 03 00 2a mov w0, w0 > 18: 01 00 00 d4 svc #0 > 1c: c0 03 5f d6 ret Curious, clang seems to be inserting some seemingly redundant moves of its own here, though this shouldn't break anything. I suspect that clang might require an X-reg holding an int to have its top 32 bits zeroed for passing to an asm, whereas GCC does not. I think this comes under "we should not be surprised to see variations". GCC 9 does this instead: 0000000000000000 <foo>: 0: d4000001 svc #0x0 4: d65f03c0 ret 0000000000000008 <bar>: 8: 1ac10c01 sdiv w1, w0, w1 c: d4000001 svc #0x0 10: d65f03c0 ret > Based on this I think we can conclude our investigation. So we use non-reg vars and use the asm clobber list and explicit moves to get things into / out of the right registers? Cheers ---Dave
Hi Dave, On 6/27/19 3:38 PM, Dave Martin wrote: > On Thu, Jun 27, 2019 at 12:59:07PM +0100, Vincenzo Frascino wrote: >> On 6/27/19 12:27 PM, Dave Martin wrote: >>> On Thu, Jun 27, 2019 at 11:57:36AM +0100, Vincenzo Frascino wrote: > > [...] > >>>> Disassembly of section .text: >>>> 0000000000000000 show_it: >>>> 0: e8 03 1f aa mov x8, xzr >>>> 4: 09 68 68 38 ldrb w9, [x0, x8] >>>> 8: 08 05 00 91 add x8, x8, #1 >>>> c: c9 ff ff 34 cbz w9, #-8 <show_it+0x4> >>>> 10: 02 05 00 51 sub w2, w8, #1 >>>> 14: e1 03 00 aa mov x1, x0 >>>> 18: 08 08 80 d2 mov x8, #64 >>>> 1c: 01 00 00 d4 svc #0 >>>> 20: c0 03 5f d6 ret >>>> >>>> Commands used: >>>> >>>> $ clang -target aarch64-linux-gnueabi main.c -O -c -o main.clang.<x>.o >>>> $ llvm-objdump -d main.clang.<x>.o >>> >>> Actually, I'm not sure this is comparable with the reproducer I quoted >>> in my last reply. >>> >> >> As explained in my previous email, this is the only case that can realistically >> happen. vDSO has no dependency on any other library (i.e. libgcc you were >> mentioning) and we are referring to the fallbacks which fall in this category. > > Outlining could also introduce a local function call where none exists > explicitly in the program IIUC. > > My point is that the interaction between asm reg vars and machine-level > procedure calls is at best ill-defined, and it is largely up to the > compiler when to introduce such a call, even without LTO etc. > > So we should not be surprised to see variations in behaviour depending > on compiler, compiler version and compiler flags. > I tested 10 version of the compiler and a part gcc-5.1 that triggers the issue in a specific case and not in the vdso library, I could not find evidence of the problem. >>> The compiler can see the definition of strlen and fully inlines it. >>> I only ever saw the problem when the compiler emits an out-of-line >>> implicit function call. >>>> What does clang do with my example on 32-bit? >> >> When clang is selected compat vDSOs are currently disabled on arm64, will be >> introduced with a future patch series. >> >> Anyway since I am curious as well, this is what happens with your example with >> clang.8 target=arm-linux-gnueabihf: >> >> dave-code.clang.8.o: file format ELF32-arm-little >> >> Disassembly of section .text: >> 0000000000000000 foo: >> 0: 00 00 00 ef svc #0 >> 4: 1e ff 2f e1 bx lr >> >> 0000000000000008 bar: >> 8: 10 4c 2d e9 push {r4, r10, r11, lr} >> c: 08 b0 8d e2 add r11, sp, #8 >> 10: 00 40 a0 e1 mov r4, r0 >> 14: fe ff ff eb bl #-8 <bar+0xc> >> 18: 00 10 a0 e1 mov r1, r0 >> 1c: 04 00 a0 e1 mov r0, r4 >> 20: 00 00 00 ef svc #0 >> 24: 10 8c bd e8 pop {r4, r10, r11, pc} > >> Compiled with -O2, -O3, -Os never inlines. > > Looks sane, and is the behaviour we want. > >> Same thing happens for aarch64-linux-gnueabi: >> >> dave-code.clang.8.o: file format ELF64-aarch64-little >> >> Disassembly of section .text: >> 0000000000000000 foo: >> 0: e0 03 00 2a mov w0, w0 >> 4: e1 03 01 2a mov w1, w1 >> 8: 01 00 00 d4 svc #0 >> c: c0 03 5f d6 ret >> >> 0000000000000010 bar: >> 10: 01 0c c1 1a sdiv w1, w0, w1 >> 14: e0 03 00 2a mov w0, w0 >> 18: 01 00 00 d4 svc #0 >> 1c: c0 03 5f d6 ret > > Curious, clang seems to be inserting some seemingly redundant moves > of its own here, though this shouldn't break anything. > > I suspect that clang might require an X-reg holding an int to have its > top 32 bits zeroed for passing to an asm, whereas GCC does not. I think > this comes under "we should not be surprised to see variations". > > GCC 9 does this instead: > > 0000000000000000 <foo>: > 0: d4000001 svc #0x0 > 4: d65f03c0 ret > > 0000000000000008 <bar>: > 8: 1ac10c01 sdiv w1, w0, w1 > c: d4000001 svc #0x0 > 10: d65f03c0 ret > > >> Based on this I think we can conclude our investigation. > > So we use non-reg vars and use the asm clobber list and explicit moves > to get things into / out of the right registers? > Since I managed to provide enough evidence, based on the behavior of various versions of the compilers, that the library as it stands is consistent and does not suffer any of the issues you reported I think I will keep my code as is at least for this release, I will revisit it in future if something happens. If you manage to prove that my library as it stands (no code additions or source modifications) has the issues you mentioned based on some version of the compiler, this changes everything. Happy to hear from you. > Cheers > ---Dave >
Dear All, On 2019-06-21 11:52, Vincenzo Frascino wrote: > To take advantage of the commonly defined vdso interface for > gettimeofday the architectural code requires an adaptation. > > Re-implement the gettimeofday vdso in C in order to use lib/vdso. > > With the new implementation arm64 gains support for CLOCK_BOOTTIME > and CLOCK_TAI. > > Cc: Catalin Marinas <catalin.marinas@arm.com> > Cc: Will Deacon <will.deacon@arm.com> > Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> > Tested-by: Shijith Thotton <sthotton@marvell.com> > Tested-by: Andre Przywara <andre.przywara@arm.com> > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> This patch causes serious regression on Samsung Exynos5433 SoC based TM2(e) boards. The time in userspace is always set to begin of the epoch: # date 062813152019 Fri Jun 28 13:15:00 UTC 2019 # date Thu Jan 1 00:00:00 UTC 1970 # date Thu Jan 1 00:00:00 UTC 1970 I've noticed that since the patch landed in Linux next-20190625 and bisect indeed pointed to this patch. > --- > arch/arm64/Kconfig | 2 + > arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++ > arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++ > arch/arm64/include/asm/vdso_datapage.h | 48 --- > arch/arm64/kernel/asm-offsets.c | 33 +- > arch/arm64/kernel/vdso.c | 51 +--- > arch/arm64/kernel/vdso/Makefile | 34 ++- > arch/arm64/kernel/vdso/gettimeofday.S | 334 --------------------- > arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++ > 9 files changed, 223 insertions(+), 446 deletions(-) > create mode 100644 arch/arm64/include/asm/vdso/gettimeofday.h > create mode 100644 arch/arm64/include/asm/vdso/vsyscall.h > delete mode 100644 arch/arm64/include/asm/vdso_datapage.h > delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S > create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index 697ea0510729..952c9f8cf3b8 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -107,6 +107,7 @@ config ARM64 > select GENERIC_STRNCPY_FROM_USER > select GENERIC_STRNLEN_USER > select GENERIC_TIME_VSYSCALL > + select GENERIC_GETTIMEOFDAY > select HANDLE_DOMAIN_IRQ > select HARDIRQS_SW_RESEND > select HAVE_PCI > @@ -160,6 +161,7 @@ config ARM64 > select HAVE_SYSCALL_TRACEPOINTS > select HAVE_KPROBES > select HAVE_KRETPROBES > + select HAVE_GENERIC_VDSO > select IOMMU_DMA if IOMMU_SUPPORT > select IRQ_DOMAIN > select IRQ_FORCED_THREADING > diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h > new file mode 100644 > index 000000000000..bc3cb6738051 > --- /dev/null > +++ b/arch/arm64/include/asm/vdso/gettimeofday.h > @@ -0,0 +1,86 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (C) 2018 ARM Limited > + */ > +#ifndef __ASM_VDSO_GETTIMEOFDAY_H > +#define __ASM_VDSO_GETTIMEOFDAY_H > + > +#ifndef __ASSEMBLY__ > + > +#include <asm/unistd.h> > +#include <uapi/linux/time.h> > + > +#define VDSO_HAS_CLOCK_GETRES 1 > + > +static __always_inline int gettimeofday_fallback( > + struct __kernel_old_timeval *_tv, > + struct timezone *_tz) > +{ > + register struct timezone *tz asm("x1") = _tz; > + register struct __kernel_old_timeval *tv asm("x0") = _tv; > + register long ret asm ("x0"); > + register long nr asm("x8") = __NR_gettimeofday; > + > + asm volatile( > + " svc #0\n" > + : "=r" (ret) > + : "r" (tv), "r" (tz), "r" (nr) > + : "memory"); > + > + return ret; > +} > + > +static __always_inline long clock_gettime_fallback( > + clockid_t _clkid, > + struct __kernel_timespec *_ts) > +{ > + register struct __kernel_timespec *ts asm("x1") = _ts; > + register clockid_t clkid asm("x0") = _clkid; > + register long ret asm ("x0"); > + register long nr asm("x8") = __NR_clock_gettime; > + > + asm volatile( > + " svc #0\n" > + : "=r" (ret) > + : "r" (clkid), "r" (ts), "r" (nr) > + : "memory"); > + > + return ret; > +} > + > +static __always_inline int clock_getres_fallback( > + clockid_t _clkid, > + struct __kernel_timespec *_ts) > +{ > + register struct __kernel_timespec *ts asm("x1") = _ts; > + register clockid_t clkid asm("x0") = _clkid; > + register long ret asm ("x0"); > + register long nr asm("x8") = __NR_clock_getres; > + > + asm volatile( > + " svc #0\n" > + : "=r" (ret) > + : "r" (clkid), "r" (ts), "r" (nr) > + : "memory"); > + > + return ret; > +} > + > +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) > +{ > + u64 res; > + > + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); > + > + return res; > +} > + > +static __always_inline > +const struct vdso_data *__arch_get_vdso_data(void) > +{ > + return _vdso_data; > +} > + > +#endif /* !__ASSEMBLY__ */ > + > +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ > diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h > new file mode 100644 > index 000000000000..0c731bfc7c8c > --- /dev/null > +++ b/arch/arm64/include/asm/vdso/vsyscall.h > @@ -0,0 +1,53 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __ASM_VDSO_VSYSCALL_H > +#define __ASM_VDSO_VSYSCALL_H > + > +#ifndef __ASSEMBLY__ > + > +#include <linux/timekeeper_internal.h> > +#include <vdso/datapage.h> > + > +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) > + > +extern struct vdso_data *vdso_data; > + > +/* > + * Update the vDSO data page to keep in sync with kernel timekeeping. > + */ > +static __always_inline > +struct vdso_data *__arm64_get_k_vdso_data(void) > +{ > + return vdso_data; > +} > +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data > + > +static __always_inline > +int __arm64_get_clock_mode(struct timekeeper *tk) > +{ > + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; > + > + return use_syscall; > +} > +#define __arch_get_clock_mode __arm64_get_clock_mode > + > +static __always_inline > +int __arm64_use_vsyscall(struct vdso_data *vdata) > +{ > + return !vdata[CS_HRES_COARSE].clock_mode; > +} > +#define __arch_use_vsyscall __arm64_use_vsyscall > + > +static __always_inline > +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) > +{ > + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; > + vdata[CS_RAW].mask = VDSO_PRECISION_MASK; > +} > +#define __arch_update_vsyscall __arm64_update_vsyscall > + > +/* The asm-generic header needs to be included after the definitions above */ > +#include <asm-generic/vdso/vsyscall.h> > + > +#endif /* !__ASSEMBLY__ */ > + > +#endif /* __ASM_VDSO_VSYSCALL_H */ > diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h > deleted file mode 100644 > index f89263c8e11a..000000000000 > --- a/arch/arm64/include/asm/vdso_datapage.h > +++ /dev/null > @@ -1,48 +0,0 @@ > -/* > - * Copyright (C) 2012 ARM Limited > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of the GNU General Public License version 2 as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > - * GNU General Public License for more details. > - * > - * You should have received a copy of the GNU General Public License > - * along with this program. If not, see <http://www.gnu.org/licenses/>. > - */ > -#ifndef __ASM_VDSO_DATAPAGE_H > -#define __ASM_VDSO_DATAPAGE_H > - > -#ifdef __KERNEL__ > - > -#ifndef __ASSEMBLY__ > - > -struct vdso_data { > - __u64 cs_cycle_last; /* Timebase at clocksource init */ > - __u64 raw_time_sec; /* Raw time */ > - __u64 raw_time_nsec; > - __u64 xtime_clock_sec; /* Kernel time */ > - __u64 xtime_clock_nsec; > - __u64 xtime_coarse_sec; /* Coarse time */ > - __u64 xtime_coarse_nsec; > - __u64 wtm_clock_sec; /* Wall to monotonic time */ > - __u64 wtm_clock_nsec; > - __u32 tb_seq_count; /* Timebase sequence counter */ > - /* cs_* members must be adjacent and in this order (ldp accesses) */ > - __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ > - __u32 cs_shift; /* Clocksource shift (mono = raw) */ > - __u32 cs_raw_mult; /* Raw clocksource multiplier */ > - __u32 tz_minuteswest; /* Whacky timezone stuff */ > - __u32 tz_dsttime; > - __u32 use_syscall; > - __u32 hrtimer_res; > -}; > - > -#endif /* !__ASSEMBLY__ */ > - > -#endif /* __KERNEL__ */ > - > -#endif /* __ASM_VDSO_DATAPAGE_H */ > diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c > index 947e39896e28..9e4b7ccbab2f 100644 > --- a/arch/arm64/kernel/asm-offsets.c > +++ b/arch/arm64/kernel/asm-offsets.c > @@ -25,13 +25,13 @@ > #include <linux/kvm_host.h> > #include <linux/preempt.h> > #include <linux/suspend.h> > +#include <vdso/datapage.h> > #include <asm/cpufeature.h> > #include <asm/fixmap.h> > #include <asm/thread_info.h> > #include <asm/memory.h> > #include <asm/smp_plat.h> > #include <asm/suspend.h> > -#include <asm/vdso_datapage.h> > #include <linux/kbuild.h> > #include <linux/arm-smccc.h> > > @@ -100,17 +100,28 @@ int main(void) > DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); > DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); > BLANK(); > - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); > - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); > - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); > - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); > - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); > - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); > - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); > - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); > - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); > + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq)); > + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode)); > + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last)); > + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask)); > + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult)); > + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift)); > + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec)); > + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec)); > + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec)); > + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec)); > + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec)); > + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec)); > + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec)); > + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec)); > + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec)); > + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec)); > + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec)); > + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec)); > + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec)); > + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec)); > DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); > - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); > + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); > BLANK(); > DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); > DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); > diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c > index 8074cbd3a3a8..23c38303a52a 100644 > --- a/arch/arm64/kernel/vdso.c > +++ b/arch/arm64/kernel/vdso.c > @@ -31,11 +31,13 @@ > #include <linux/slab.h> > #include <linux/timekeeper_internal.h> > #include <linux/vmalloc.h> > +#include <vdso/datapage.h> > +#include <vdso/helpers.h> > +#include <vdso/vsyscall.h> > > #include <asm/cacheflush.h> > #include <asm/signal32.h> > #include <asm/vdso.h> > -#include <asm/vdso_datapage.h> > > extern char vdso_start[], vdso_end[]; > static unsigned long vdso_pages __ro_after_init; > @@ -44,10 +46,10 @@ static unsigned long vdso_pages __ro_after_init; > * The vDSO data page. > */ > static union { > - struct vdso_data data; > + struct vdso_data data[CS_BASES]; > u8 page[PAGE_SIZE]; > } vdso_data_store __page_aligned_data; > -struct vdso_data *vdso_data = &vdso_data_store.data; > +struct vdso_data *vdso_data = vdso_data_store.data; > > #ifdef CONFIG_COMPAT > /* > @@ -280,46 +282,3 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, > up_write(&mm->mmap_sem); > return PTR_ERR(ret); > } > - > -/* > - * Update the vDSO data page to keep in sync with kernel timekeeping. > - */ > -void update_vsyscall(struct timekeeper *tk) > -{ > - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; > - > - ++vdso_data->tb_seq_count; > - smp_wmb(); > - > - vdso_data->use_syscall = use_syscall; > - vdso_data->xtime_coarse_sec = tk->xtime_sec; > - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >> > - tk->tkr_mono.shift; > - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; > - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; > - > - /* Read without the seqlock held by clock_getres() */ > - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); > - > - if (!use_syscall) { > - /* tkr_mono.cycle_last == tkr_raw.cycle_last */ > - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; > - vdso_data->raw_time_sec = tk->raw_sec; > - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; > - vdso_data->xtime_clock_sec = tk->xtime_sec; > - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; > - vdso_data->cs_mono_mult = tk->tkr_mono.mult; > - vdso_data->cs_raw_mult = tk->tkr_raw.mult; > - /* tkr_mono.shift == tkr_raw.shift */ > - vdso_data->cs_shift = tk->tkr_mono.shift; > - } > - > - smp_wmb(); > - ++vdso_data->tb_seq_count; > -} > - > -void update_vsyscall_tz(void) > -{ > - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; > - vdso_data->tz_dsttime = sys_tz.tz_dsttime; > -} > diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile > index fa230ff09aa1..3acfc813e966 100644 > --- a/arch/arm64/kernel/vdso/Makefile > +++ b/arch/arm64/kernel/vdso/Makefile > @@ -6,7 +6,12 @@ > # Heavily based on the vDSO Makefiles for other archs. > # > > -obj-vdso := gettimeofday.o note.o sigreturn.o > +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before > +# the inclusion of generic Makefile. > +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64 > +include $(srctree)/lib/vdso/Makefile > + > +obj-vdso := vgettimeofday.o note.o sigreturn.o > > # Build rules > targets := $(obj-vdso) vdso.so vdso.so.dbg > @@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) > ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ > --build-id -n -T > > +ccflags-y := -fno-common -fno-builtin -fno-stack-protector > +ccflags-y += -DDISABLE_BRANCH_PROFILING > + > +VDSO_LDFLAGS := -Bsymbolic > + > +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os > +KBUILD_CFLAGS += $(DISABLE_LTO) > +KASAN_SANITIZE := n > +UBSAN_SANITIZE := n > +OBJECT_FILES_NON_STANDARD := y > +KCOV_INSTRUMENT := n > + > +ifeq ($(c-gettimeofday-y),) > +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny > +else > +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) > +endif > + > # Disable gcov profiling for VDSO code > GCOV_PROFILE := n > > @@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so > # Link rule for the .so file, .lds has to be first > $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE > $(call if_changed,ld) > + $(call if_changed,vdso_check) > > # Strip rule for the .so file > $(obj)/%.so: OBJCOPYFLAGS := -S > @@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@ > include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE > $(call if_changed,vdsosym) > > -# Assembly rules for the .S files > -$(obj-vdso): %.o: %.S FORCE > - $(call if_changed_dep,vdsoas) > - > # Actual build commands > -quiet_cmd_vdsoas = VDSOA $@ > - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< > +quiet_cmd_vdsocc = VDSOCC $@ > + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $< > > # Install commands for the unstripped file > quiet_cmd_vdso_install = INSTALL $@ > diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S > deleted file mode 100644 > index 856fee6d3512..000000000000 > --- a/arch/arm64/kernel/vdso/gettimeofday.S > +++ /dev/null > @@ -1,334 +0,0 @@ > -/* > - * Userspace implementations of gettimeofday() and friends. > - * > - * Copyright (C) 2012 ARM Limited > - * > - * This program is free software; you can redistribute it and/or modify > - * it under the terms of the GNU General Public License version 2 as > - * published by the Free Software Foundation. > - * > - * This program is distributed in the hope that it will be useful, > - * but WITHOUT ANY WARRANTY; without even the implied warranty of > - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > - * GNU General Public License for more details. > - * > - * You should have received a copy of the GNU General Public License > - * along with this program. If not, see <http://www.gnu.org/licenses/>. > - * > - * Author: Will Deacon <will.deacon@arm.com> > - */ > - > -#include <linux/linkage.h> > -#include <asm/asm-offsets.h> > -#include <asm/unistd.h> > - > -#define NSEC_PER_SEC_LO16 0xca00 > -#define NSEC_PER_SEC_HI16 0x3b9a > - > -vdso_data .req x6 > -seqcnt .req w7 > -w_tmp .req w8 > -x_tmp .req x8 > - > -/* > - * Conventions for macro arguments: > - * - An argument is write-only if its name starts with "res". > - * - All other arguments are read-only, unless otherwise specified. > - */ > - > - .macro seqcnt_acquire > -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] > - tbnz seqcnt, #0, 9999b > - dmb ishld > - .endm > - > - .macro seqcnt_check fail > - dmb ishld > - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] > - cmp w_tmp, seqcnt > - b.ne \fail > - .endm > - > - .macro syscall_check fail > - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] > - cbnz w_tmp, \fail > - .endm > - > - .macro get_nsec_per_sec res > - mov \res, #NSEC_PER_SEC_LO16 > - movk \res, #NSEC_PER_SEC_HI16, lsl #16 > - .endm > - > - /* > - * Returns the clock delta, in nanoseconds left-shifted by the clock > - * shift. > - */ > - .macro get_clock_shifted_nsec res, cycle_last, mult > - /* Read the virtual counter. */ > - isb > - mrs x_tmp, cntvct_el0 > - /* Calculate cycle delta and convert to ns. */ > - sub \res, x_tmp, \cycle_last > - /* We can only guarantee 56 bits of precision. */ > - movn x_tmp, #0xff00, lsl #48 > - and \res, x_tmp, \res > - mul \res, \res, \mult > - /* > - * Fake address dependency from the value computed from the counter > - * register to subsequent data page accesses so that the sequence > - * locking also orders the read of the counter. > - */ > - and x_tmp, \res, xzr > - add vdso_data, vdso_data, x_tmp > - .endm > - > - /* > - * Returns in res_{sec,nsec} the REALTIME timespec, based on the > - * "wall time" (xtime) and the clock_mono delta. > - */ > - .macro get_ts_realtime res_sec, res_nsec, \ > - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec > - add \res_nsec, \clock_nsec, \xtime_nsec > - udiv x_tmp, \res_nsec, \nsec_to_sec > - add \res_sec, \xtime_sec, x_tmp > - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec > - .endm > - > - /* > - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, > - * used for CLOCK_MONOTONIC_RAW. > - */ > - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec > - udiv \res_sec, \clock_nsec, \nsec_to_sec > - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec > - .endm > - > - /* sec and nsec are modified in place. */ > - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec > - /* Add timespec. */ > - add \sec, \sec, \ts_sec > - add \nsec, \nsec, \ts_nsec > - > - /* Normalise the new timespec. */ > - cmp \nsec, \nsec_to_sec > - b.lt 9999f > - sub \nsec, \nsec, \nsec_to_sec > - add \sec, \sec, #1 > -9999: > - cmp \nsec, #0 > - b.ge 9998f > - add \nsec, \nsec, \nsec_to_sec > - sub \sec, \sec, #1 > -9998: > - .endm > - > - .macro clock_gettime_return, shift=0 > - .if \shift == 1 > - lsr x11, x11, x12 > - .endif > - stp x10, x11, [x1, #TSPEC_TV_SEC] > - mov x0, xzr > - ret > - .endm > - > - .macro jump_slot jumptable, index, label > - .if (. - \jumptable) != 4 * (\index) > - .error "Jump slot index mismatch" > - .endif > - b \label > - .endm > - > - .text > - > -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ > -ENTRY(__kernel_gettimeofday) > - .cfi_startproc > - adr vdso_data, _vdso_data > - /* If tv is NULL, skip to the timezone code. */ > - cbz x0, 2f > - > - /* Compute the time of day. */ > -1: seqcnt_acquire > - syscall_check fail=4f > - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] > - /* w11 = cs_mono_mult, w12 = cs_shift */ > - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] > - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] > - > - get_nsec_per_sec res=x9 > - lsl x9, x9, x12 > - > - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 > - seqcnt_check fail=1b > - get_ts_realtime res_sec=x10, res_nsec=x11, \ > - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 > - > - /* Convert ns to us. */ > - mov x13, #1000 > - lsl x13, x13, x12 > - udiv x11, x11, x13 > - stp x10, x11, [x0, #TVAL_TV_SEC] > -2: > - /* If tz is NULL, return 0. */ > - cbz x1, 3f > - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] > - stp w4, w5, [x1, #TZ_MINWEST] > -3: > - mov x0, xzr > - ret > -4: > - /* Syscall fallback. */ > - mov x8, #__NR_gettimeofday > - svc #0 > - ret > - .cfi_endproc > -ENDPROC(__kernel_gettimeofday) > - > -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE > - > -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ > -ENTRY(__kernel_clock_gettime) > - .cfi_startproc > - cmp w0, #JUMPSLOT_MAX > - b.hi syscall > - adr vdso_data, _vdso_data > - adr x_tmp, jumptable > - add x_tmp, x_tmp, w0, uxtw #2 > - br x_tmp > - > - ALIGN > -jumptable: > - jump_slot jumptable, CLOCK_REALTIME, realtime > - jump_slot jumptable, CLOCK_MONOTONIC, monotonic > - b syscall > - b syscall > - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw > - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse > - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse > - > - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) > - .error "Wrong jumptable size" > - .endif > - > - ALIGN > -realtime: > - seqcnt_acquire > - syscall_check fail=syscall > - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] > - /* w11 = cs_mono_mult, w12 = cs_shift */ > - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] > - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] > - > - /* All computations are done with left-shifted nsecs. */ > - get_nsec_per_sec res=x9 > - lsl x9, x9, x12 > - > - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 > - seqcnt_check fail=realtime > - get_ts_realtime res_sec=x10, res_nsec=x11, \ > - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 > - clock_gettime_return, shift=1 > - > - ALIGN > -monotonic: > - seqcnt_acquire > - syscall_check fail=syscall > - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] > - /* w11 = cs_mono_mult, w12 = cs_shift */ > - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] > - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] > - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] > - > - /* All computations are done with left-shifted nsecs. */ > - lsl x4, x4, x12 > - get_nsec_per_sec res=x9 > - lsl x9, x9, x12 > - > - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 > - seqcnt_check fail=monotonic > - get_ts_realtime res_sec=x10, res_nsec=x11, \ > - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 > - > - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 > - clock_gettime_return, shift=1 > - > - ALIGN > -monotonic_raw: > - seqcnt_acquire > - syscall_check fail=syscall > - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] > - /* w11 = cs_raw_mult, w12 = cs_shift */ > - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] > - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] > - > - /* All computations are done with left-shifted nsecs. */ > - get_nsec_per_sec res=x9 > - lsl x9, x9, x12 > - > - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 > - seqcnt_check fail=monotonic_raw > - get_ts_clock_raw res_sec=x10, res_nsec=x11, \ > - clock_nsec=x15, nsec_to_sec=x9 > - > - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 > - clock_gettime_return, shift=1 > - > - ALIGN > -realtime_coarse: > - seqcnt_acquire > - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] > - seqcnt_check fail=realtime_coarse > - clock_gettime_return > - > - ALIGN > -monotonic_coarse: > - seqcnt_acquire > - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] > - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] > - seqcnt_check fail=monotonic_coarse > - > - /* Computations are done in (non-shifted) nsecs. */ > - get_nsec_per_sec res=x9 > - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 > - clock_gettime_return > - > - ALIGN > -syscall: /* Syscall fallback. */ > - mov x8, #__NR_clock_gettime > - svc #0 > - ret > - .cfi_endproc > -ENDPROC(__kernel_clock_gettime) > - > -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ > -ENTRY(__kernel_clock_getres) > - .cfi_startproc > - cmp w0, #CLOCK_REALTIME > - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne > - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne > - b.ne 1f > - > - adr vdso_data, _vdso_data > - ldr w2, [vdso_data, #CLOCK_REALTIME_RES] > - b 2f > -1: > - cmp w0, #CLOCK_REALTIME_COARSE > - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne > - b.ne 4f > - ldr x2, 5f > -2: > - cbz x1, 3f > - stp xzr, x2, [x1] > - > -3: /* res == NULL. */ > - mov w0, wzr > - ret > - > -4: /* Syscall fallback. */ > - mov x8, #__NR_clock_getres > - svc #0 > - ret > -5: > - .quad CLOCK_COARSE_RES > - .cfi_endproc > -ENDPROC(__kernel_clock_getres) > diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c > new file mode 100644 > index 000000000000..3c58f19dbdf4 > --- /dev/null > +++ b/arch/arm64/kernel/vdso/vgettimeofday.c > @@ -0,0 +1,28 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * ARM64 userspace implementations of gettimeofday() and similar. > + * > + * Copyright (C) 2018 ARM Limited > + * > + */ > +#include <linux/time.h> > +#include <linux/types.h> > + > +int __kernel_clock_gettime(clockid_t clock, > + struct __kernel_timespec *ts) > +{ > + return __cvdso_clock_gettime(clock, ts); > +} > + > +int __kernel_gettimeofday(struct __kernel_old_timeval *tv, > + struct timezone *tz) > +{ > + return __cvdso_gettimeofday(tv, tz); > +} > + > +int __kernel_clock_getres(clockid_t clock_id, > + struct __kernel_timespec *res) > +{ > + return __cvdso_clock_getres(clock_id, res); > +} > + Best regards
Hi Marek, On 6/28/19 2:09 PM, Marek Szyprowski wrote: > Dear All, > > On 2019-06-21 11:52, Vincenzo Frascino wrote: >> To take advantage of the commonly defined vdso interface for >> gettimeofday the architectural code requires an adaptation. >> >> Re-implement the gettimeofday vdso in C in order to use lib/vdso. >> >> With the new implementation arm64 gains support for CLOCK_BOOTTIME >> and CLOCK_TAI. >> >> Cc: Catalin Marinas <catalin.marinas@arm.com> >> Cc: Will Deacon <will.deacon@arm.com> >> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> >> Tested-by: Shijith Thotton <sthotton@marvell.com> >> Tested-by: Andre Przywara <andre.przywara@arm.com> >> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > > This patch causes serious regression on Samsung Exynos5433 SoC based > TM2(e) boards. The time in userspace is always set to begin of the epoch: > > # date 062813152019 > Fri Jun 28 13:15:00 UTC 2019 > # date > Thu Jan 1 00:00:00 UTC 1970 > # date > Thu Jan 1 00:00:00 UTC 1970 > > I've noticed that since the patch landed in Linux next-20190625 and > bisect indeed pointed to this patch. > Thank you for reporting this, seems that the next that you posted is missing some fixes for arm64. Could you please try the tree below? git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso Let us know if the functionality is restored. Otherwise the issue will require further investigation. >> --- >> arch/arm64/Kconfig | 2 + >> arch/arm64/include/asm/vdso/gettimeofday.h | 86 ++++++ >> arch/arm64/include/asm/vdso/vsyscall.h | 53 ++++ >> arch/arm64/include/asm/vdso_datapage.h | 48 --- >> arch/arm64/kernel/asm-offsets.c | 33 +- >> arch/arm64/kernel/vdso.c | 51 +--- >> arch/arm64/kernel/vdso/Makefile | 34 ++- >> arch/arm64/kernel/vdso/gettimeofday.S | 334 --------------------- >> arch/arm64/kernel/vdso/vgettimeofday.c | 28 ++ >> 9 files changed, 223 insertions(+), 446 deletions(-) >> create mode 100644 arch/arm64/include/asm/vdso/gettimeofday.h >> create mode 100644 arch/arm64/include/asm/vdso/vsyscall.h >> delete mode 100644 arch/arm64/include/asm/vdso_datapage.h >> delete mode 100644 arch/arm64/kernel/vdso/gettimeofday.S >> create mode 100644 arch/arm64/kernel/vdso/vgettimeofday.c >> >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 697ea0510729..952c9f8cf3b8 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -107,6 +107,7 @@ config ARM64 >> select GENERIC_STRNCPY_FROM_USER >> select GENERIC_STRNLEN_USER >> select GENERIC_TIME_VSYSCALL >> + select GENERIC_GETTIMEOFDAY >> select HANDLE_DOMAIN_IRQ >> select HARDIRQS_SW_RESEND >> select HAVE_PCI >> @@ -160,6 +161,7 @@ config ARM64 >> select HAVE_SYSCALL_TRACEPOINTS >> select HAVE_KPROBES >> select HAVE_KRETPROBES >> + select HAVE_GENERIC_VDSO >> select IOMMU_DMA if IOMMU_SUPPORT >> select IRQ_DOMAIN >> select IRQ_FORCED_THREADING >> diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h >> new file mode 100644 >> index 000000000000..bc3cb6738051 >> --- /dev/null >> +++ b/arch/arm64/include/asm/vdso/gettimeofday.h >> @@ -0,0 +1,86 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +/* >> + * Copyright (C) 2018 ARM Limited >> + */ >> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H >> +#define __ASM_VDSO_GETTIMEOFDAY_H >> + >> +#ifndef __ASSEMBLY__ >> + >> +#include <asm/unistd.h> >> +#include <uapi/linux/time.h> >> + >> +#define VDSO_HAS_CLOCK_GETRES 1 >> + >> +static __always_inline int gettimeofday_fallback( >> + struct __kernel_old_timeval *_tv, >> + struct timezone *_tz) >> +{ >> + register struct timezone *tz asm("x1") = _tz; >> + register struct __kernel_old_timeval *tv asm("x0") = _tv; >> + register long ret asm ("x0"); >> + register long nr asm("x8") = __NR_gettimeofday; >> + >> + asm volatile( >> + " svc #0\n" >> + : "=r" (ret) >> + : "r" (tv), "r" (tz), "r" (nr) >> + : "memory"); >> + >> + return ret; >> +} >> + >> +static __always_inline long clock_gettime_fallback( >> + clockid_t _clkid, >> + struct __kernel_timespec *_ts) >> +{ >> + register struct __kernel_timespec *ts asm("x1") = _ts; >> + register clockid_t clkid asm("x0") = _clkid; >> + register long ret asm ("x0"); >> + register long nr asm("x8") = __NR_clock_gettime; >> + >> + asm volatile( >> + " svc #0\n" >> + : "=r" (ret) >> + : "r" (clkid), "r" (ts), "r" (nr) >> + : "memory"); >> + >> + return ret; >> +} >> + >> +static __always_inline int clock_getres_fallback( >> + clockid_t _clkid, >> + struct __kernel_timespec *_ts) >> +{ >> + register struct __kernel_timespec *ts asm("x1") = _ts; >> + register clockid_t clkid asm("x0") = _clkid; >> + register long ret asm ("x0"); >> + register long nr asm("x8") = __NR_clock_getres; >> + >> + asm volatile( >> + " svc #0\n" >> + : "=r" (ret) >> + : "r" (clkid), "r" (ts), "r" (nr) >> + : "memory"); >> + >> + return ret; >> +} >> + >> +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) >> +{ >> + u64 res; >> + >> + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); >> + >> + return res; >> +} >> + >> +static __always_inline >> +const struct vdso_data *__arch_get_vdso_data(void) >> +{ >> + return _vdso_data; >> +} >> + >> +#endif /* !__ASSEMBLY__ */ >> + >> +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ >> diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h >> new file mode 100644 >> index 000000000000..0c731bfc7c8c >> --- /dev/null >> +++ b/arch/arm64/include/asm/vdso/vsyscall.h >> @@ -0,0 +1,53 @@ >> +/* SPDX-License-Identifier: GPL-2.0 */ >> +#ifndef __ASM_VDSO_VSYSCALL_H >> +#define __ASM_VDSO_VSYSCALL_H >> + >> +#ifndef __ASSEMBLY__ >> + >> +#include <linux/timekeeper_internal.h> >> +#include <vdso/datapage.h> >> + >> +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) >> + >> +extern struct vdso_data *vdso_data; >> + >> +/* >> + * Update the vDSO data page to keep in sync with kernel timekeeping. >> + */ >> +static __always_inline >> +struct vdso_data *__arm64_get_k_vdso_data(void) >> +{ >> + return vdso_data; >> +} >> +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data >> + >> +static __always_inline >> +int __arm64_get_clock_mode(struct timekeeper *tk) >> +{ >> + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; >> + >> + return use_syscall; >> +} >> +#define __arch_get_clock_mode __arm64_get_clock_mode >> + >> +static __always_inline >> +int __arm64_use_vsyscall(struct vdso_data *vdata) >> +{ >> + return !vdata[CS_HRES_COARSE].clock_mode; >> +} >> +#define __arch_use_vsyscall __arm64_use_vsyscall >> + >> +static __always_inline >> +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) >> +{ >> + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; >> + vdata[CS_RAW].mask = VDSO_PRECISION_MASK; >> +} >> +#define __arch_update_vsyscall __arm64_update_vsyscall >> + >> +/* The asm-generic header needs to be included after the definitions above */ >> +#include <asm-generic/vdso/vsyscall.h> >> + >> +#endif /* !__ASSEMBLY__ */ >> + >> +#endif /* __ASM_VDSO_VSYSCALL_H */ >> diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h >> deleted file mode 100644 >> index f89263c8e11a..000000000000 >> --- a/arch/arm64/include/asm/vdso_datapage.h >> +++ /dev/null >> @@ -1,48 +0,0 @@ >> -/* >> - * Copyright (C) 2012 ARM Limited >> - * >> - * This program is free software; you can redistribute it and/or modify >> - * it under the terms of the GNU General Public License version 2 as >> - * published by the Free Software Foundation. >> - * >> - * This program is distributed in the hope that it will be useful, >> - * but WITHOUT ANY WARRANTY; without even the implied warranty of >> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> - * GNU General Public License for more details. >> - * >> - * You should have received a copy of the GNU General Public License >> - * along with this program. If not, see <http://www.gnu.org/licenses/>. >> - */ >> -#ifndef __ASM_VDSO_DATAPAGE_H >> -#define __ASM_VDSO_DATAPAGE_H >> - >> -#ifdef __KERNEL__ >> - >> -#ifndef __ASSEMBLY__ >> - >> -struct vdso_data { >> - __u64 cs_cycle_last; /* Timebase at clocksource init */ >> - __u64 raw_time_sec; /* Raw time */ >> - __u64 raw_time_nsec; >> - __u64 xtime_clock_sec; /* Kernel time */ >> - __u64 xtime_clock_nsec; >> - __u64 xtime_coarse_sec; /* Coarse time */ >> - __u64 xtime_coarse_nsec; >> - __u64 wtm_clock_sec; /* Wall to monotonic time */ >> - __u64 wtm_clock_nsec; >> - __u32 tb_seq_count; /* Timebase sequence counter */ >> - /* cs_* members must be adjacent and in this order (ldp accesses) */ >> - __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ >> - __u32 cs_shift; /* Clocksource shift (mono = raw) */ >> - __u32 cs_raw_mult; /* Raw clocksource multiplier */ >> - __u32 tz_minuteswest; /* Whacky timezone stuff */ >> - __u32 tz_dsttime; >> - __u32 use_syscall; >> - __u32 hrtimer_res; >> -}; >> - >> -#endif /* !__ASSEMBLY__ */ >> - >> -#endif /* __KERNEL__ */ >> - >> -#endif /* __ASM_VDSO_DATAPAGE_H */ >> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c >> index 947e39896e28..9e4b7ccbab2f 100644 >> --- a/arch/arm64/kernel/asm-offsets.c >> +++ b/arch/arm64/kernel/asm-offsets.c >> @@ -25,13 +25,13 @@ >> #include <linux/kvm_host.h> >> #include <linux/preempt.h> >> #include <linux/suspend.h> >> +#include <vdso/datapage.h> >> #include <asm/cpufeature.h> >> #include <asm/fixmap.h> >> #include <asm/thread_info.h> >> #include <asm/memory.h> >> #include <asm/smp_plat.h> >> #include <asm/suspend.h> >> -#include <asm/vdso_datapage.h> >> #include <linux/kbuild.h> >> #include <linux/arm-smccc.h> >> >> @@ -100,17 +100,28 @@ int main(void) >> DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); >> DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); >> BLANK(); >> - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); >> - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); >> - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); >> - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); >> - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); >> - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); >> - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); >> - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); >> - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); >> + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq)); >> + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode)); >> + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last)); >> + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask)); >> + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult)); >> + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift)); >> + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec)); >> + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec)); >> + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec)); >> + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec)); >> + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec)); >> + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec)); >> + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec)); >> + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec)); >> + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec)); >> + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec)); >> + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec)); >> + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec)); >> + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec)); >> + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec)); >> DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); >> - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); >> + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); >> BLANK(); >> DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); >> DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); >> diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c >> index 8074cbd3a3a8..23c38303a52a 100644 >> --- a/arch/arm64/kernel/vdso.c >> +++ b/arch/arm64/kernel/vdso.c >> @@ -31,11 +31,13 @@ >> #include <linux/slab.h> >> #include <linux/timekeeper_internal.h> >> #include <linux/vmalloc.h> >> +#include <vdso/datapage.h> >> +#include <vdso/helpers.h> >> +#include <vdso/vsyscall.h> >> >> #include <asm/cacheflush.h> >> #include <asm/signal32.h> >> #include <asm/vdso.h> >> -#include <asm/vdso_datapage.h> >> >> extern char vdso_start[], vdso_end[]; >> static unsigned long vdso_pages __ro_after_init; >> @@ -44,10 +46,10 @@ static unsigned long vdso_pages __ro_after_init; >> * The vDSO data page. >> */ >> static union { >> - struct vdso_data data; >> + struct vdso_data data[CS_BASES]; >> u8 page[PAGE_SIZE]; >> } vdso_data_store __page_aligned_data; >> -struct vdso_data *vdso_data = &vdso_data_store.data; >> +struct vdso_data *vdso_data = vdso_data_store.data; >> >> #ifdef CONFIG_COMPAT >> /* >> @@ -280,46 +282,3 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, >> up_write(&mm->mmap_sem); >> return PTR_ERR(ret); >> } >> - >> -/* >> - * Update the vDSO data page to keep in sync with kernel timekeeping. >> - */ >> -void update_vsyscall(struct timekeeper *tk) >> -{ >> - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; >> - >> - ++vdso_data->tb_seq_count; >> - smp_wmb(); >> - >> - vdso_data->use_syscall = use_syscall; >> - vdso_data->xtime_coarse_sec = tk->xtime_sec; >> - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >> >> - tk->tkr_mono.shift; >> - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; >> - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; >> - >> - /* Read without the seqlock held by clock_getres() */ >> - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); >> - >> - if (!use_syscall) { >> - /* tkr_mono.cycle_last == tkr_raw.cycle_last */ >> - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; >> - vdso_data->raw_time_sec = tk->raw_sec; >> - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; >> - vdso_data->xtime_clock_sec = tk->xtime_sec; >> - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; >> - vdso_data->cs_mono_mult = tk->tkr_mono.mult; >> - vdso_data->cs_raw_mult = tk->tkr_raw.mult; >> - /* tkr_mono.shift == tkr_raw.shift */ >> - vdso_data->cs_shift = tk->tkr_mono.shift; >> - } >> - >> - smp_wmb(); >> - ++vdso_data->tb_seq_count; >> -} >> - >> -void update_vsyscall_tz(void) >> -{ >> - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; >> - vdso_data->tz_dsttime = sys_tz.tz_dsttime; >> -} >> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile >> index fa230ff09aa1..3acfc813e966 100644 >> --- a/arch/arm64/kernel/vdso/Makefile >> +++ b/arch/arm64/kernel/vdso/Makefile >> @@ -6,7 +6,12 @@ >> # Heavily based on the vDSO Makefiles for other archs. >> # >> >> -obj-vdso := gettimeofday.o note.o sigreturn.o >> +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before >> +# the inclusion of generic Makefile. >> +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64 >> +include $(srctree)/lib/vdso/Makefile >> + >> +obj-vdso := vgettimeofday.o note.o sigreturn.o >> >> # Build rules >> targets := $(obj-vdso) vdso.so vdso.so.dbg >> @@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) >> ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ >> --build-id -n -T >> >> +ccflags-y := -fno-common -fno-builtin -fno-stack-protector >> +ccflags-y += -DDISABLE_BRANCH_PROFILING >> + >> +VDSO_LDFLAGS := -Bsymbolic >> + >> +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os >> +KBUILD_CFLAGS += $(DISABLE_LTO) >> +KASAN_SANITIZE := n >> +UBSAN_SANITIZE := n >> +OBJECT_FILES_NON_STANDARD := y >> +KCOV_INSTRUMENT := n >> + >> +ifeq ($(c-gettimeofday-y),) >> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny >> +else >> +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) >> +endif >> + >> # Disable gcov profiling for VDSO code >> GCOV_PROFILE := n >> >> @@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so >> # Link rule for the .so file, .lds has to be first >> $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE >> $(call if_changed,ld) >> + $(call if_changed,vdso_check) >> >> # Strip rule for the .so file >> $(obj)/%.so: OBJCOPYFLAGS := -S >> @@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@ >> include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE >> $(call if_changed,vdsosym) >> >> -# Assembly rules for the .S files >> -$(obj-vdso): %.o: %.S FORCE >> - $(call if_changed_dep,vdsoas) >> - >> # Actual build commands >> -quiet_cmd_vdsoas = VDSOA $@ >> - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< >> +quiet_cmd_vdsocc = VDSOCC $@ >> + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $< >> >> # Install commands for the unstripped file >> quiet_cmd_vdso_install = INSTALL $@ >> diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S >> deleted file mode 100644 >> index 856fee6d3512..000000000000 >> --- a/arch/arm64/kernel/vdso/gettimeofday.S >> +++ /dev/null >> @@ -1,334 +0,0 @@ >> -/* >> - * Userspace implementations of gettimeofday() and friends. >> - * >> - * Copyright (C) 2012 ARM Limited >> - * >> - * This program is free software; you can redistribute it and/or modify >> - * it under the terms of the GNU General Public License version 2 as >> - * published by the Free Software Foundation. >> - * >> - * This program is distributed in the hope that it will be useful, >> - * but WITHOUT ANY WARRANTY; without even the implied warranty of >> - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> - * GNU General Public License for more details. >> - * >> - * You should have received a copy of the GNU General Public License >> - * along with this program. If not, see <http://www.gnu.org/licenses/>. >> - * >> - * Author: Will Deacon <will.deacon@arm.com> >> - */ >> - >> -#include <linux/linkage.h> >> -#include <asm/asm-offsets.h> >> -#include <asm/unistd.h> >> - >> -#define NSEC_PER_SEC_LO16 0xca00 >> -#define NSEC_PER_SEC_HI16 0x3b9a >> - >> -vdso_data .req x6 >> -seqcnt .req w7 >> -w_tmp .req w8 >> -x_tmp .req x8 >> - >> -/* >> - * Conventions for macro arguments: >> - * - An argument is write-only if its name starts with "res". >> - * - All other arguments are read-only, unless otherwise specified. >> - */ >> - >> - .macro seqcnt_acquire >> -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] >> - tbnz seqcnt, #0, 9999b >> - dmb ishld >> - .endm >> - >> - .macro seqcnt_check fail >> - dmb ishld >> - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] >> - cmp w_tmp, seqcnt >> - b.ne \fail >> - .endm >> - >> - .macro syscall_check fail >> - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] >> - cbnz w_tmp, \fail >> - .endm >> - >> - .macro get_nsec_per_sec res >> - mov \res, #NSEC_PER_SEC_LO16 >> - movk \res, #NSEC_PER_SEC_HI16, lsl #16 >> - .endm >> - >> - /* >> - * Returns the clock delta, in nanoseconds left-shifted by the clock >> - * shift. >> - */ >> - .macro get_clock_shifted_nsec res, cycle_last, mult >> - /* Read the virtual counter. */ >> - isb >> - mrs x_tmp, cntvct_el0 >> - /* Calculate cycle delta and convert to ns. */ >> - sub \res, x_tmp, \cycle_last >> - /* We can only guarantee 56 bits of precision. */ >> - movn x_tmp, #0xff00, lsl #48 >> - and \res, x_tmp, \res >> - mul \res, \res, \mult >> - /* >> - * Fake address dependency from the value computed from the counter >> - * register to subsequent data page accesses so that the sequence >> - * locking also orders the read of the counter. >> - */ >> - and x_tmp, \res, xzr >> - add vdso_data, vdso_data, x_tmp >> - .endm >> - >> - /* >> - * Returns in res_{sec,nsec} the REALTIME timespec, based on the >> - * "wall time" (xtime) and the clock_mono delta. >> - */ >> - .macro get_ts_realtime res_sec, res_nsec, \ >> - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec >> - add \res_nsec, \clock_nsec, \xtime_nsec >> - udiv x_tmp, \res_nsec, \nsec_to_sec >> - add \res_sec, \xtime_sec, x_tmp >> - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec >> - .endm >> - >> - /* >> - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, >> - * used for CLOCK_MONOTONIC_RAW. >> - */ >> - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec >> - udiv \res_sec, \clock_nsec, \nsec_to_sec >> - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec >> - .endm >> - >> - /* sec and nsec are modified in place. */ >> - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec >> - /* Add timespec. */ >> - add \sec, \sec, \ts_sec >> - add \nsec, \nsec, \ts_nsec >> - >> - /* Normalise the new timespec. */ >> - cmp \nsec, \nsec_to_sec >> - b.lt 9999f >> - sub \nsec, \nsec, \nsec_to_sec >> - add \sec, \sec, #1 >> -9999: >> - cmp \nsec, #0 >> - b.ge 9998f >> - add \nsec, \nsec, \nsec_to_sec >> - sub \sec, \sec, #1 >> -9998: >> - .endm >> - >> - .macro clock_gettime_return, shift=0 >> - .if \shift == 1 >> - lsr x11, x11, x12 >> - .endif >> - stp x10, x11, [x1, #TSPEC_TV_SEC] >> - mov x0, xzr >> - ret >> - .endm >> - >> - .macro jump_slot jumptable, index, label >> - .if (. - \jumptable) != 4 * (\index) >> - .error "Jump slot index mismatch" >> - .endif >> - b \label >> - .endm >> - >> - .text >> - >> -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ >> -ENTRY(__kernel_gettimeofday) >> - .cfi_startproc >> - adr vdso_data, _vdso_data >> - /* If tv is NULL, skip to the timezone code. */ >> - cbz x0, 2f >> - >> - /* Compute the time of day. */ >> -1: seqcnt_acquire >> - syscall_check fail=4f >> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] >> - /* w11 = cs_mono_mult, w12 = cs_shift */ >> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] >> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] >> - >> - get_nsec_per_sec res=x9 >> - lsl x9, x9, x12 >> - >> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 >> - seqcnt_check fail=1b >> - get_ts_realtime res_sec=x10, res_nsec=x11, \ >> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 >> - >> - /* Convert ns to us. */ >> - mov x13, #1000 >> - lsl x13, x13, x12 >> - udiv x11, x11, x13 >> - stp x10, x11, [x0, #TVAL_TV_SEC] >> -2: >> - /* If tz is NULL, return 0. */ >> - cbz x1, 3f >> - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] >> - stp w4, w5, [x1, #TZ_MINWEST] >> -3: >> - mov x0, xzr >> - ret >> -4: >> - /* Syscall fallback. */ >> - mov x8, #__NR_gettimeofday >> - svc #0 >> - ret >> - .cfi_endproc >> -ENDPROC(__kernel_gettimeofday) >> - >> -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE >> - >> -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ >> -ENTRY(__kernel_clock_gettime) >> - .cfi_startproc >> - cmp w0, #JUMPSLOT_MAX >> - b.hi syscall >> - adr vdso_data, _vdso_data >> - adr x_tmp, jumptable >> - add x_tmp, x_tmp, w0, uxtw #2 >> - br x_tmp >> - >> - ALIGN >> -jumptable: >> - jump_slot jumptable, CLOCK_REALTIME, realtime >> - jump_slot jumptable, CLOCK_MONOTONIC, monotonic >> - b syscall >> - b syscall >> - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw >> - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse >> - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse >> - >> - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) >> - .error "Wrong jumptable size" >> - .endif >> - >> - ALIGN >> -realtime: >> - seqcnt_acquire >> - syscall_check fail=syscall >> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] >> - /* w11 = cs_mono_mult, w12 = cs_shift */ >> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] >> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] >> - >> - /* All computations are done with left-shifted nsecs. */ >> - get_nsec_per_sec res=x9 >> - lsl x9, x9, x12 >> - >> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 >> - seqcnt_check fail=realtime >> - get_ts_realtime res_sec=x10, res_nsec=x11, \ >> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 >> - clock_gettime_return, shift=1 >> - >> - ALIGN >> -monotonic: >> - seqcnt_acquire >> - syscall_check fail=syscall >> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] >> - /* w11 = cs_mono_mult, w12 = cs_shift */ >> - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] >> - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] >> - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] >> - >> - /* All computations are done with left-shifted nsecs. */ >> - lsl x4, x4, x12 >> - get_nsec_per_sec res=x9 >> - lsl x9, x9, x12 >> - >> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 >> - seqcnt_check fail=monotonic >> - get_ts_realtime res_sec=x10, res_nsec=x11, \ >> - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 >> - >> - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 >> - clock_gettime_return, shift=1 >> - >> - ALIGN >> -monotonic_raw: >> - seqcnt_acquire >> - syscall_check fail=syscall >> - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] >> - /* w11 = cs_raw_mult, w12 = cs_shift */ >> - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] >> - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] >> - >> - /* All computations are done with left-shifted nsecs. */ >> - get_nsec_per_sec res=x9 >> - lsl x9, x9, x12 >> - >> - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 >> - seqcnt_check fail=monotonic_raw >> - get_ts_clock_raw res_sec=x10, res_nsec=x11, \ >> - clock_nsec=x15, nsec_to_sec=x9 >> - >> - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 >> - clock_gettime_return, shift=1 >> - >> - ALIGN >> -realtime_coarse: >> - seqcnt_acquire >> - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] >> - seqcnt_check fail=realtime_coarse >> - clock_gettime_return >> - >> - ALIGN >> -monotonic_coarse: >> - seqcnt_acquire >> - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] >> - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] >> - seqcnt_check fail=monotonic_coarse >> - >> - /* Computations are done in (non-shifted) nsecs. */ >> - get_nsec_per_sec res=x9 >> - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 >> - clock_gettime_return >> - >> - ALIGN >> -syscall: /* Syscall fallback. */ >> - mov x8, #__NR_clock_gettime >> - svc #0 >> - ret >> - .cfi_endproc >> -ENDPROC(__kernel_clock_gettime) >> - >> -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ >> -ENTRY(__kernel_clock_getres) >> - .cfi_startproc >> - cmp w0, #CLOCK_REALTIME >> - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne >> - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne >> - b.ne 1f >> - >> - adr vdso_data, _vdso_data >> - ldr w2, [vdso_data, #CLOCK_REALTIME_RES] >> - b 2f >> -1: >> - cmp w0, #CLOCK_REALTIME_COARSE >> - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne >> - b.ne 4f >> - ldr x2, 5f >> -2: >> - cbz x1, 3f >> - stp xzr, x2, [x1] >> - >> -3: /* res == NULL. */ >> - mov w0, wzr >> - ret >> - >> -4: /* Syscall fallback. */ >> - mov x8, #__NR_clock_getres >> - svc #0 >> - ret >> -5: >> - .quad CLOCK_COARSE_RES >> - .cfi_endproc >> -ENDPROC(__kernel_clock_getres) >> diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c >> new file mode 100644 >> index 000000000000..3c58f19dbdf4 >> --- /dev/null >> +++ b/arch/arm64/kernel/vdso/vgettimeofday.c >> @@ -0,0 +1,28 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* >> + * ARM64 userspace implementations of gettimeofday() and similar. >> + * >> + * Copyright (C) 2018 ARM Limited >> + * >> + */ >> +#include <linux/time.h> >> +#include <linux/types.h> >> + >> +int __kernel_clock_gettime(clockid_t clock, >> + struct __kernel_timespec *ts) >> +{ >> + return __cvdso_clock_gettime(clock, ts); >> +} >> + >> +int __kernel_gettimeofday(struct __kernel_old_timeval *tv, >> + struct timezone *tz) >> +{ >> + return __cvdso_gettimeofday(tv, tz); >> +} >> + >> +int __kernel_clock_getres(clockid_t clock_id, >> + struct __kernel_timespec *res) >> +{ >> + return __cvdso_clock_getres(clock_id, res); >> +} >> + > > Best regards >
Hi Vincenzo, On 6/28/19 16:32, Vincenzo Frascino wrote: > On 6/28/19 2:09 PM, Marek Szyprowski wrote: >> On 2019-06-21 11:52, Vincenzo Frascino wrote: >>> To take advantage of the commonly defined vdso interface for >>> gettimeofday the architectural code requires an adaptation. >>> >>> Re-implement the gettimeofday vdso in C in order to use lib/vdso. >>> >>> With the new implementation arm64 gains support for CLOCK_BOOTTIME >>> and CLOCK_TAI. >>> >>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>> Cc: Will Deacon <will.deacon@arm.com> >>> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> >>> Tested-by: Shijith Thotton <sthotton@marvell.com> >>> Tested-by: Andre Przywara <andre.przywara@arm.com> >>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> >> >> This patch causes serious regression on Samsung Exynos5433 SoC based >> TM2(e) boards. The time in userspace is always set to begin of the epoch: >> >> # date 062813152019 >> Fri Jun 28 13:15:00 UTC 2019 >> # date >> Thu Jan 1 00:00:00 UTC 1970 >> # date >> Thu Jan 1 00:00:00 UTC 1970 >> >> I've noticed that since the patch landed in Linux next-20190625 and >> bisect indeed pointed to this patch. >> > Thank you for reporting this, seems that the next that you posted is missing > some fixes for arm64. > > Could you please try the tree below? > > git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso > > Let us know if the functionality is restored. Otherwise the issue will require > further investigation. Marek is already out for holidays, I gave your tree a try but kernel from that branch was failing to boot on TM2(e). Then I have cherry-picked 5 patches from the branch that seemed to be missing in next-20190628: 28028f3174cf1 (HEAD) MAINTAINERS: Fix Andy's surname and the directory entries of VDSO ec8f8e4bf2206 arm64: vdso: Fix compilation with clang older than 8 721882ebb5729 arm64: compat: Fix __arch_get_hw_counter() implementation 7027fea977a3d arm64: Fix __arch_get_hw_counter() implementation 10b305853fe22 lib/vdso: Make delta calculation work correctly 48568d8c7f479 (tag: next-20190628, linux-next/master) Add linux-next specific files for 20190628 With those 5 additional patches on top of next-20190628 the problem is not observed any more. date, ping, etc. seems to be working well. # date Fri Jun 28 16:39:22 UTC 2019 # # systemctl stop systemd-timesyncd # # date 062818392019 Fri Jun 28 18:39:00 UTC 2019 # date Fri Jun 28 18:39:01 UTC 2019 # # date 062818432019; date Fri Jun 28 18:43:00 UTC 2019 Fri Jun 28 18:43:00 UTC 2019 # date Fri Jun 28 18:43:04 UTC 2019 -- Regards, Sylwester
Hi Sylwester, thank you for the quick turn around to my email. On 6/28/19 5:50 PM, Sylwester Nawrocki wrote: > Hi Vincenzo, > > On 6/28/19 16:32, Vincenzo Frascino wrote: >> On 6/28/19 2:09 PM, Marek Szyprowski wrote: >>> On 2019-06-21 11:52, Vincenzo Frascino wrote: >>>> To take advantage of the commonly defined vdso interface for >>>> gettimeofday the architectural code requires an adaptation. >>>> >>>> Re-implement the gettimeofday vdso in C in order to use lib/vdso. >>>> >>>> With the new implementation arm64 gains support for CLOCK_BOOTTIME >>>> and CLOCK_TAI. >>>> >>>> Cc: Catalin Marinas <catalin.marinas@arm.com> >>>> Cc: Will Deacon <will.deacon@arm.com> >>>> Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com> >>>> Tested-by: Shijith Thotton <sthotton@marvell.com> >>>> Tested-by: Andre Przywara <andre.przywara@arm.com> >>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> >>> >>> This patch causes serious regression on Samsung Exynos5433 SoC based >>> TM2(e) boards. The time in userspace is always set to begin of the epoch: >>> >>> # date 062813152019 >>> Fri Jun 28 13:15:00 UTC 2019 >>> # date >>> Thu Jan 1 00:00:00 UTC 1970 >>> # date >>> Thu Jan 1 00:00:00 UTC 1970 >>> >>> I've noticed that since the patch landed in Linux next-20190625 and >>> bisect indeed pointed to this patch. >>> >> Thank you for reporting this, seems that the next that you posted is missing >> some fixes for arm64. >> >> Could you please try the tree below? >> >> git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git timers/vdso >> >> Let us know if the functionality is restored. Otherwise the issue will require >> further investigation. > > Marek is already out for holidays, I gave your tree a try but kernel from > that branch was failing to boot on TM2(e). > > Then I have cherry-picked 5 patches from the branch that seemed to > be missing in next-20190628: > > 28028f3174cf1 (HEAD) MAINTAINERS: Fix Andy's surname and the directory entries of VDSO > ec8f8e4bf2206 arm64: vdso: Fix compilation with clang older than 8 > 721882ebb5729 arm64: compat: Fix __arch_get_hw_counter() implementation > 7027fea977a3d arm64: Fix __arch_get_hw_counter() implementation > 10b305853fe22 lib/vdso: Make delta calculation work correctly > 48568d8c7f479 (tag: next-20190628, linux-next/master) Add linux-next specific files for 20190628 > > With those 5 additional patches on top of next-20190628 the problem > is not observed any more. date, ping, etc. seems to be working well. > > # date > Fri Jun 28 16:39:22 UTC 2019 > # > # systemctl stop systemd-timesyncd > # > # date 062818392019 > Fri Jun 28 18:39:00 UTC 2019 > # date > Fri Jun 28 18:39:01 UTC 2019 > # > # date 062818432019; date > Fri Jun 28 18:43:00 UTC 2019 > Fri Jun 28 18:43:00 UTC 2019 > # date > Fri Jun 28 18:43:04 UTC 2019 > This seems ok, thanks for spending some time to test our patches against your board. If I may, I would like to ask to you one favor, could you please keep an eye on next and once those patches are merged repeat the test? I want just to make sure that the regression does not reappear. Have a nice weekend. > -- > Regards, > Sylwester >
Hi Vincenzo, On 6/29/19 08:58, Vincenzo Frascino wrote: > If I may, I would like to ask to you one favor, could you please keep an eye on > next and once those patches are merged repeat the test? > > I want just to make sure that the regression does not reappear. My apologies, I forgot about this for a moment. I repeated the test with next-20190705 tag and couldn't see any regressions.
Hi Sylwester, On 08/07/2019 13:57, Sylwester Nawrocki wrote: > Hi Vincenzo, > > On 6/29/19 08:58, Vincenzo Frascino wrote: >> If I may, I would like to ask to you one favor, could you please keep an eye on >> next and once those patches are merged repeat the test? >> >> I want just to make sure that the regression does not reappear. > > My apologies, I forgot about this for a moment. I repeated the test with > next-20190705 tag and couldn't see any regressions. > No problem and thank you for the confirmation.
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 697ea0510729..952c9f8cf3b8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -107,6 +107,7 @@ config ARM64 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select GENERIC_GETTIMEOFDAY select HANDLE_DOMAIN_IRQ select HARDIRQS_SW_RESEND select HAVE_PCI @@ -160,6 +161,7 @@ config ARM64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_GENERIC_VDSO select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/include/asm/vdso/gettimeofday.h b/arch/arm64/include/asm/vdso/gettimeofday.h new file mode 100644 index 000000000000..bc3cb6738051 --- /dev/null +++ b/arch/arm64/include/asm/vdso/gettimeofday.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 ARM Limited + */ +#ifndef __ASM_VDSO_GETTIMEOFDAY_H +#define __ASM_VDSO_GETTIMEOFDAY_H + +#ifndef __ASSEMBLY__ + +#include <asm/unistd.h> +#include <uapi/linux/time.h> + +#define VDSO_HAS_CLOCK_GETRES 1 + +static __always_inline int gettimeofday_fallback( + struct __kernel_old_timeval *_tv, + struct timezone *_tz) +{ + register struct timezone *tz asm("x1") = _tz; + register struct __kernel_old_timeval *tv asm("x0") = _tv; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_gettimeofday; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (tv), "r" (tz), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline long clock_gettime_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_gettime; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline int clock_getres_fallback( + clockid_t _clkid, + struct __kernel_timespec *_ts) +{ + register struct __kernel_timespec *ts asm("x1") = _ts; + register clockid_t clkid asm("x0") = _clkid; + register long ret asm ("x0"); + register long nr asm("x8") = __NR_clock_getres; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (clkid), "r" (ts), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline u64 __arch_get_hw_counter(s32 clock_mode) +{ + u64 res; + + asm volatile("mrs %0, cntvct_el0" : "=r" (res) :: "memory"); + + return res; +} + +static __always_inline +const struct vdso_data *__arch_get_vdso_data(void) +{ + return _vdso_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETTIMEOFDAY_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h new file mode 100644 index 000000000000..0c731bfc7c8c --- /dev/null +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_VDSO_VSYSCALL_H +#define __ASM_VDSO_VSYSCALL_H + +#ifndef __ASSEMBLY__ + +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> + +#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) + +extern struct vdso_data *vdso_data; + +/* + * Update the vDSO data page to keep in sync with kernel timekeeping. + */ +static __always_inline +struct vdso_data *__arm64_get_k_vdso_data(void) +{ + return vdso_data; +} +#define __arch_get_k_vdso_data __arm64_get_k_vdso_data + +static __always_inline +int __arm64_get_clock_mode(struct timekeeper *tk) +{ + u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; + + return use_syscall; +} +#define __arch_get_clock_mode __arm64_get_clock_mode + +static __always_inline +int __arm64_use_vsyscall(struct vdso_data *vdata) +{ + return !vdata[CS_HRES_COARSE].clock_mode; +} +#define __arch_use_vsyscall __arm64_use_vsyscall + +static __always_inline +void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) +{ + vdata[CS_HRES_COARSE].mask = VDSO_PRECISION_MASK; + vdata[CS_RAW].mask = VDSO_PRECISION_MASK; +} +#define __arch_update_vsyscall __arm64_update_vsyscall + +/* The asm-generic header needs to be included after the definitions above */ +#include <asm-generic/vdso/vsyscall.h> + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_VSYSCALL_H */ diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h deleted file mode 100644 index f89263c8e11a..000000000000 --- a/arch/arm64/include/asm/vdso_datapage.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (C) 2012 ARM Limited - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_VDSO_DATAPAGE_H -#define __ASM_VDSO_DATAPAGE_H - -#ifdef __KERNEL__ - -#ifndef __ASSEMBLY__ - -struct vdso_data { - __u64 cs_cycle_last; /* Timebase at clocksource init */ - __u64 raw_time_sec; /* Raw time */ - __u64 raw_time_nsec; - __u64 xtime_clock_sec; /* Kernel time */ - __u64 xtime_clock_nsec; - __u64 xtime_coarse_sec; /* Coarse time */ - __u64 xtime_coarse_nsec; - __u64 wtm_clock_sec; /* Wall to monotonic time */ - __u64 wtm_clock_nsec; - __u32 tb_seq_count; /* Timebase sequence counter */ - /* cs_* members must be adjacent and in this order (ldp accesses) */ - __u32 cs_mono_mult; /* NTP-adjusted clocksource multiplier */ - __u32 cs_shift; /* Clocksource shift (mono = raw) */ - __u32 cs_raw_mult; /* Raw clocksource multiplier */ - __u32 tz_minuteswest; /* Whacky timezone stuff */ - __u32 tz_dsttime; - __u32 use_syscall; - __u32 hrtimer_res; -}; - -#endif /* !__ASSEMBLY__ */ - -#endif /* __KERNEL__ */ - -#endif /* __ASM_VDSO_DATAPAGE_H */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 947e39896e28..9e4b7ccbab2f 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -25,13 +25,13 @@ #include <linux/kvm_host.h> #include <linux/preempt.h> #include <linux/suspend.h> +#include <vdso/datapage.h> #include <asm/cpufeature.h> #include <asm/fixmap.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/smp_plat.h> #include <asm/suspend.h> -#include <asm/vdso_datapage.h> #include <linux/kbuild.h> #include <linux/arm-smccc.h> @@ -100,17 +100,28 @@ int main(void) DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); BLANK(); - DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last)); - DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec)); - DEFINE(VDSO_XTIME_CLK_SEC, offsetof(struct vdso_data, xtime_clock_sec)); - DEFINE(VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec)); - DEFINE(VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec)); - DEFINE(VDSO_WTM_CLK_SEC, offsetof(struct vdso_data, wtm_clock_sec)); - DEFINE(VDSO_TB_SEQ_COUNT, offsetof(struct vdso_data, tb_seq_count)); - DEFINE(VDSO_CS_MONO_MULT, offsetof(struct vdso_data, cs_mono_mult)); - DEFINE(VDSO_CS_SHIFT, offsetof(struct vdso_data, cs_shift)); + DEFINE(VDSO_SEQ, offsetof(struct vdso_data, seq)); + DEFINE(VDSO_CLK_MODE, offsetof(struct vdso_data, clock_mode)); + DEFINE(VDSO_CYCLE_LAST, offsetof(struct vdso_data, cycle_last)); + DEFINE(VDSO_MASK, offsetof(struct vdso_data, mask)); + DEFINE(VDSO_MULT, offsetof(struct vdso_data, mult)); + DEFINE(VDSO_SHIFT, offsetof(struct vdso_data, shift)); + DEFINE(VDSO_REALTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].sec)); + DEFINE(VDSO_REALTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME].nsec)); + DEFINE(VDSO_MONO_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].sec)); + DEFINE(VDSO_MONO_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC].nsec)); + DEFINE(VDSO_MONO_RAW_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].sec)); + DEFINE(VDSO_MONO_RAW_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_RAW].nsec)); + DEFINE(VDSO_BOOTTIME_SEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].sec)); + DEFINE(VDSO_BOOTTIME_NSEC, offsetof(struct vdso_data, basetime[CLOCK_BOOTTIME].nsec)); + DEFINE(VDSO_TAI_SEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].sec)); + DEFINE(VDSO_TAI_NSEC, offsetof(struct vdso_data, basetime[CLOCK_TAI].nsec)); + DEFINE(VDSO_RT_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].sec)); + DEFINE(VDSO_RT_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_REALTIME_COARSE].nsec)); + DEFINE(VDSO_MONO_COARSE_SEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].sec)); + DEFINE(VDSO_MONO_COARSE_NSEC, offsetof(struct vdso_data, basetime[CLOCK_MONOTONIC_COARSE].nsec)); DEFINE(VDSO_TZ_MINWEST, offsetof(struct vdso_data, tz_minuteswest)); - DEFINE(VDSO_USE_SYSCALL, offsetof(struct vdso_data, use_syscall)); + DEFINE(VDSO_TZ_DSTTIME, offsetof(struct vdso_data, tz_dsttime)); BLANK(); DEFINE(TVAL_TV_SEC, offsetof(struct timeval, tv_sec)); DEFINE(TSPEC_TV_SEC, offsetof(struct timespec, tv_sec)); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 8074cbd3a3a8..23c38303a52a 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -31,11 +31,13 @@ #include <linux/slab.h> #include <linux/timekeeper_internal.h> #include <linux/vmalloc.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> +#include <vdso/vsyscall.h> #include <asm/cacheflush.h> #include <asm/signal32.h> #include <asm/vdso.h> -#include <asm/vdso_datapage.h> extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; @@ -44,10 +46,10 @@ static unsigned long vdso_pages __ro_after_init; * The vDSO data page. */ static union { - struct vdso_data data; + struct vdso_data data[CS_BASES]; u8 page[PAGE_SIZE]; } vdso_data_store __page_aligned_data; -struct vdso_data *vdso_data = &vdso_data_store.data; +struct vdso_data *vdso_data = vdso_data_store.data; #ifdef CONFIG_COMPAT /* @@ -280,46 +282,3 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, up_write(&mm->mmap_sem); return PTR_ERR(ret); } - -/* - * Update the vDSO data page to keep in sync with kernel timekeeping. - */ -void update_vsyscall(struct timekeeper *tk) -{ - u32 use_syscall = !tk->tkr_mono.clock->archdata.vdso_direct; - - ++vdso_data->tb_seq_count; - smp_wmb(); - - vdso_data->use_syscall = use_syscall; - vdso_data->xtime_coarse_sec = tk->xtime_sec; - vdso_data->xtime_coarse_nsec = tk->tkr_mono.xtime_nsec >> - tk->tkr_mono.shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; - - /* Read without the seqlock held by clock_getres() */ - WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); - - if (!use_syscall) { - /* tkr_mono.cycle_last == tkr_raw.cycle_last */ - vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; - vdso_data->raw_time_sec = tk->raw_sec; - vdso_data->raw_time_nsec = tk->tkr_raw.xtime_nsec; - vdso_data->xtime_clock_sec = tk->xtime_sec; - vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - vdso_data->cs_mono_mult = tk->tkr_mono.mult; - vdso_data->cs_raw_mult = tk->tkr_raw.mult; - /* tkr_mono.shift == tkr_raw.shift */ - vdso_data->cs_shift = tk->tkr_mono.shift; - } - - smp_wmb(); - ++vdso_data->tb_seq_count; -} - -void update_vsyscall_tz(void) -{ - vdso_data->tz_minuteswest = sys_tz.tz_minuteswest; - vdso_data->tz_dsttime = sys_tz.tz_dsttime; -} diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index fa230ff09aa1..3acfc813e966 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -6,7 +6,12 @@ # Heavily based on the vDSO Makefiles for other archs. # -obj-vdso := gettimeofday.o note.o sigreturn.o +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64 +include $(srctree)/lib/vdso/Makefile + +obj-vdso := vgettimeofday.o note.o sigreturn.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -15,6 +20,24 @@ obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ --build-id -n -T +ccflags-y := -fno-common -fno-builtin -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +VDSO_LDFLAGS := -Bsymbolic + +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +KBUILD_CFLAGS += $(DISABLE_LTO) +KASAN_SANITIZE := n +UBSAN_SANITIZE := n +OBJECT_FILES_NON_STANDARD := y +KCOV_INSTRUMENT := n + +ifeq ($(c-gettimeofday-y),) +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny +else +CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y) +endif + # Disable gcov profiling for VDSO code GCOV_PROFILE := n @@ -28,6 +51,7 @@ $(obj)/vdso.o : $(obj)/vdso.so # Link rule for the .so file, .lds has to be first $(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,ld) + $(call if_changed,vdso_check) # Strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -42,13 +66,9 @@ quiet_cmd_vdsosym = VDSOSYM $@ include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) -# Assembly rules for the .S files -$(obj-vdso): %.o: %.S FORCE - $(call if_changed_dep,vdsoas) - # Actual build commands -quiet_cmd_vdsoas = VDSOA $@ - cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdsocc = VDSOCC $@ + cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $< # Install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S deleted file mode 100644 index 856fee6d3512..000000000000 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ /dev/null @@ -1,334 +0,0 @@ -/* - * Userspace implementations of gettimeofday() and friends. - * - * Copyright (C) 2012 ARM Limited - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * Author: Will Deacon <will.deacon@arm.com> - */ - -#include <linux/linkage.h> -#include <asm/asm-offsets.h> -#include <asm/unistd.h> - -#define NSEC_PER_SEC_LO16 0xca00 -#define NSEC_PER_SEC_HI16 0x3b9a - -vdso_data .req x6 -seqcnt .req w7 -w_tmp .req w8 -x_tmp .req x8 - -/* - * Conventions for macro arguments: - * - An argument is write-only if its name starts with "res". - * - All other arguments are read-only, unless otherwise specified. - */ - - .macro seqcnt_acquire -9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT] - tbnz seqcnt, #0, 9999b - dmb ishld - .endm - - .macro seqcnt_check fail - dmb ishld - ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT] - cmp w_tmp, seqcnt - b.ne \fail - .endm - - .macro syscall_check fail - ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL] - cbnz w_tmp, \fail - .endm - - .macro get_nsec_per_sec res - mov \res, #NSEC_PER_SEC_LO16 - movk \res, #NSEC_PER_SEC_HI16, lsl #16 - .endm - - /* - * Returns the clock delta, in nanoseconds left-shifted by the clock - * shift. - */ - .macro get_clock_shifted_nsec res, cycle_last, mult - /* Read the virtual counter. */ - isb - mrs x_tmp, cntvct_el0 - /* Calculate cycle delta and convert to ns. */ - sub \res, x_tmp, \cycle_last - /* We can only guarantee 56 bits of precision. */ - movn x_tmp, #0xff00, lsl #48 - and \res, x_tmp, \res - mul \res, \res, \mult - /* - * Fake address dependency from the value computed from the counter - * register to subsequent data page accesses so that the sequence - * locking also orders the read of the counter. - */ - and x_tmp, \res, xzr - add vdso_data, vdso_data, x_tmp - .endm - - /* - * Returns in res_{sec,nsec} the REALTIME timespec, based on the - * "wall time" (xtime) and the clock_mono delta. - */ - .macro get_ts_realtime res_sec, res_nsec, \ - clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec - add \res_nsec, \clock_nsec, \xtime_nsec - udiv x_tmp, \res_nsec, \nsec_to_sec - add \res_sec, \xtime_sec, x_tmp - msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec - .endm - - /* - * Returns in res_{sec,nsec} the timespec based on the clock_raw delta, - * used for CLOCK_MONOTONIC_RAW. - */ - .macro get_ts_clock_raw res_sec, res_nsec, clock_nsec, nsec_to_sec - udiv \res_sec, \clock_nsec, \nsec_to_sec - msub \res_nsec, \res_sec, \nsec_to_sec, \clock_nsec - .endm - - /* sec and nsec are modified in place. */ - .macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec - /* Add timespec. */ - add \sec, \sec, \ts_sec - add \nsec, \nsec, \ts_nsec - - /* Normalise the new timespec. */ - cmp \nsec, \nsec_to_sec - b.lt 9999f - sub \nsec, \nsec, \nsec_to_sec - add \sec, \sec, #1 -9999: - cmp \nsec, #0 - b.ge 9998f - add \nsec, \nsec, \nsec_to_sec - sub \sec, \sec, #1 -9998: - .endm - - .macro clock_gettime_return, shift=0 - .if \shift == 1 - lsr x11, x11, x12 - .endif - stp x10, x11, [x1, #TSPEC_TV_SEC] - mov x0, xzr - ret - .endm - - .macro jump_slot jumptable, index, label - .if (. - \jumptable) != 4 * (\index) - .error "Jump slot index mismatch" - .endif - b \label - .endm - - .text - -/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */ -ENTRY(__kernel_gettimeofday) - .cfi_startproc - adr vdso_data, _vdso_data - /* If tv is NULL, skip to the timezone code. */ - cbz x0, 2f - - /* Compute the time of day. */ -1: seqcnt_acquire - syscall_check fail=4f - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=1b - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - /* Convert ns to us. */ - mov x13, #1000 - lsl x13, x13, x12 - udiv x11, x11, x13 - stp x10, x11, [x0, #TVAL_TV_SEC] -2: - /* If tz is NULL, return 0. */ - cbz x1, 3f - ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST] - stp w4, w5, [x1, #TZ_MINWEST] -3: - mov x0, xzr - ret -4: - /* Syscall fallback. */ - mov x8, #__NR_gettimeofday - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_gettimeofday) - -#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE - -/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */ -ENTRY(__kernel_clock_gettime) - .cfi_startproc - cmp w0, #JUMPSLOT_MAX - b.hi syscall - adr vdso_data, _vdso_data - adr x_tmp, jumptable - add x_tmp, x_tmp, w0, uxtw #2 - br x_tmp - - ALIGN -jumptable: - jump_slot jumptable, CLOCK_REALTIME, realtime - jump_slot jumptable, CLOCK_MONOTONIC, monotonic - b syscall - b syscall - jump_slot jumptable, CLOCK_MONOTONIC_RAW, monotonic_raw - jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse - jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse - - .if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1) - .error "Wrong jumptable size" - .endif - - ALIGN -realtime: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=realtime - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_mono_mult, w12 = cs_shift */ - ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT] - ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC] - ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC] - - /* All computations are done with left-shifted nsecs. */ - lsl x4, x4, x12 - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic - get_ts_realtime res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -monotonic_raw: - seqcnt_acquire - syscall_check fail=syscall - ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST] - /* w11 = cs_raw_mult, w12 = cs_shift */ - ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT] - ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC] - - /* All computations are done with left-shifted nsecs. */ - get_nsec_per_sec res=x9 - lsl x9, x9, x12 - - get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11 - seqcnt_check fail=monotonic_raw - get_ts_clock_raw res_sec=x10, res_nsec=x11, \ - clock_nsec=x15, nsec_to_sec=x9 - - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return, shift=1 - - ALIGN -realtime_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - seqcnt_check fail=realtime_coarse - clock_gettime_return - - ALIGN -monotonic_coarse: - seqcnt_acquire - ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC] - ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC] - seqcnt_check fail=monotonic_coarse - - /* Computations are done in (non-shifted) nsecs. */ - get_nsec_per_sec res=x9 - add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9 - clock_gettime_return - - ALIGN -syscall: /* Syscall fallback. */ - mov x8, #__NR_clock_gettime - svc #0 - ret - .cfi_endproc -ENDPROC(__kernel_clock_gettime) - -/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */ -ENTRY(__kernel_clock_getres) - .cfi_startproc - cmp w0, #CLOCK_REALTIME - ccmp w0, #CLOCK_MONOTONIC, #0x4, ne - ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne - b.ne 1f - - adr vdso_data, _vdso_data - ldr w2, [vdso_data, #CLOCK_REALTIME_RES] - b 2f -1: - cmp w0, #CLOCK_REALTIME_COARSE - ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne - b.ne 4f - ldr x2, 5f -2: - cbz x1, 3f - stp xzr, x2, [x1] - -3: /* res == NULL. */ - mov w0, wzr - ret - -4: /* Syscall fallback. */ - mov x8, #__NR_clock_getres - svc #0 - ret -5: - .quad CLOCK_COARSE_RES - .cfi_endproc -ENDPROC(__kernel_clock_getres) diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000000..3c58f19dbdf4 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgettimeofday.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ARM64 userspace implementations of gettimeofday() and similar. + * + * Copyright (C) 2018 ARM Limited + * + */ +#include <linux/time.h> +#include <linux/types.h> + +int __kernel_clock_gettime(clockid_t clock, + struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +int __kernel_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +int __kernel_clock_getres(clockid_t clock_id, + struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} +