@@ -136,6 +136,7 @@ config X86
select GENERIC_TIME_VSYSCALL
select GENERIC_GETTIMEOFDAY
select GENERIC_VDSO_TIME_NS
+ select GENERIC_VDSO_PREFETCH
select GUP_GET_PTE_LOW_HIGH if X86_PAE
select HARDIRQS_SW_RESEND
select HARDLOCKUP_CHECK_TIMESTAMP if X86_64
@@ -28,6 +28,7 @@ vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
vobjs32-y += vdso32/vclock_gettime.o
vobjs-$(CONFIG_X86_SGX) += vsgx.o
+vobjs-$(CONFIG_GENERIC_VDSO_PREFETCH) += vprefetch.o
# files to link into kernel
obj-y += vma.o extable.o
@@ -4,36 +4,67 @@
#include <asm/current.h>
#include <asm/traps.h>
#include <asm/vdso.h>
+#include "extable.h"
struct vdso_exception_table_entry {
int insn, fixup;
unsigned int mask, flags;
};
-bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
- unsigned long error_code, unsigned long fault_addr)
+static unsigned long
+get_vdso_exception_table_entry(const struct pt_regs *regs, int trapnr,
+ unsigned int *flags)
{
const struct vdso_image *image = current->mm->context.vdso_image;
const struct vdso_exception_table_entry *extable;
unsigned int nr_entries, i;
unsigned long base;
+ unsigned long ip = regs->ip;
+ unsigned long vdso_base = (unsigned long)current->mm->context.vdso;
- if (!current->mm->context.vdso)
- return false;
-
- base = (unsigned long)current->mm->context.vdso + image->extable_base;
+ base = vdso_base + image->extable_base;
nr_entries = image->extable_len / (sizeof(*extable));
extable = image->extable;
for (i = 0; i < nr_entries; i++, base += sizeof(*extable)) {
- if (regs->ip == base + extable[i].insn) {
- regs->ip = base + extable[i].fixup;
- regs->di = trapnr;
- regs->si = error_code;
- regs->dx = fault_addr;
- return true;
- }
+ if (ip != base + extable[i].insn)
+ continue;
+
+ if (!((1u << trapnr) & extable[i].mask))
+ continue;
+
+ /* found */
+ if (flags)
+ *flags = extable[i].flags;
+ return base + extable[i].fixup;
}
- return false;
+ return 0;
+}
+
+bool __fixup_vdso_exception(struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+ unsigned long new_ip;
+
+ new_ip = get_vdso_exception_table_entry(regs, trapnr, NULL);
+ if (!new_ip)
+ return false;
+
+ instruction_pointer_set(regs, new_ip);
+ regs->di = trapnr;
+ regs->si = error_code;
+ regs->dx = fault_addr;
+ return true;
+}
+
+__attribute_const__ bool __is_async_vdso_exception(struct pt_regs *regs,
+ int trapnr)
+{
+ unsigned long new_ip;
+ unsigned int flags;
+
+ new_ip = get_vdso_exception_table_entry(regs, trapnr, &flags);
+
+ return new_ip && (flags & ASM_VDSO_ASYNC_FLAGS);
}
@@ -28,6 +28,7 @@ VERSION {
clock_getres;
__vdso_clock_getres;
__vdso_sgx_enter_enclave;
+ __vdso_prefetch_page;
local: *;
};
}
new file mode 100644
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/errno.h>
+#include <asm/enclu.h>
+
+#include "extable.h"
+
+.code64
+.section .text, "ax"
+
+SYM_FUNC_START(__vdso_prefetch_page)
+ /* Prolog */
+ .cfi_startproc
+ push %rbp
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset %rbp, 0
+ mov %rsp, %rbp
+ .cfi_def_cfa_register %rbp
+
+ xor %rax, %rax
+.Laccess_page:
+ movb (%rdi), %dil
+.Lout:
+
+ /* Epilog */
+ pop %rbp
+ .cfi_def_cfa %rsp, 8
+ ret
+
+.Lhandle_exception:
+ mov $-1ll, %rax
+ jmp .Lout
+ .cfi_endproc
+ASM_VDSO_EXTABLE_HANDLE .Laccess_page, .Lhandle_exception, \
+ (1<<X86_TRAP_PF), ASM_VDSO_ASYNC_FLAGS
+
+SYM_FUNC_END(__vdso_prefetch_page)
@@ -9,6 +9,7 @@
#ifndef __ASSEMBLER__
#include <linux/mm_types.h>
+#include <linux/sched.h>
struct vdso_image {
void *data;
@@ -49,9 +50,40 @@ extern void __init init_vdso_image(const struct vdso_image *image);
extern int map_vdso_once(const struct vdso_image *image, unsigned long addr);
-extern bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr);
+extern bool __fixup_vdso_exception(struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr);
+
+extern __attribute_const__ bool __is_async_vdso_exception(struct pt_regs *regs,
+ int trapnr);
+
+static inline bool is_exception_in_vdso(struct pt_regs *regs)
+{
+ const struct vdso_image *image = current->mm->context.vdso_image;
+ unsigned long vdso_base = (unsigned long)current->mm->context.vdso;
+
+ return regs->ip >= vdso_base && regs->ip < vdso_base + image->size &&
+ vdso_base != 0;
+}
+
+static inline bool is_async_vdso_exception(struct pt_regs *regs, int trapnr)
+{
+ if (!is_exception_in_vdso(regs))
+ return false;
+
+ return __is_async_vdso_exception(regs, trapnr);
+}
+
+static inline bool fixup_vdso_exception(struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ if (is_exception_in_vdso(regs))
+ return __fixup_vdso_exception(regs, trapnr, error_code,
+ fault_addr);
+ return false;
+}
+
#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_VDSO_H */
@@ -1289,6 +1289,10 @@ void do_user_addr_fault(struct pt_regs *regs,
if (user_mode(regs)) {
local_irq_enable();
flags |= FAULT_FLAG_USER;
+ if (IS_ENABLED(CONFIG_GENERIC_VDSO_PREFETCH) &&
+ is_async_vdso_exception(regs, X86_TRAP_PF))
+ flags |= FAULT_FLAG_ALLOW_RETRY |
+ FAULT_FLAG_RETRY_NOWAIT;
} else {
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
@@ -1407,8 +1411,11 @@ void do_user_addr_fault(struct pt_regs *regs,
*/
if (unlikely((fault & VM_FAULT_RETRY) &&
(flags & FAULT_FLAG_ALLOW_RETRY))) {
- flags |= FAULT_FLAG_TRIED;
- goto retry;
+ if (!(flags & FAULT_FLAG_RETRY_NOWAIT)) {
+ flags |= FAULT_FLAG_TRIED;
+ goto retry;
+ }
+ fixup_vdso_exception(regs, X86_TRAP_PF, hw_error_code, address);
}
mmap_read_unlock(mm);
@@ -30,4 +30,9 @@ config GENERIC_VDSO_TIME_NS
Selected by architectures which support time namespaces in the
VDSO
+config GENERIC_VDSO_PREFETCH
+ bool
+ help
+ Selected by architectures which support page prefetch VDSO
+
endif