@@ -29,6 +29,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
copy_page(to, from);
}
+void clear_user_highpage_nocache(struct page *page, unsigned long vaddr);
+
#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
@@ -3,6 +3,8 @@
#ifdef __KERNEL__
+#include <linux/linkage.h>
+
/* Let gcc decide whether to inline or use the out of line functions */
#define __HAVE_ARCH_STRCPY
@@ -337,6 +339,9 @@ void *__constant_c_and_count_memset(void *s, unsigned long pattern,
#define __HAVE_ARCH_MEMSCAN
extern void *memscan(void *addr, int c, size_t size);
+#define ARCH_HAS_USER_NOCACHE 1
+asmlinkage void clear_page_nocache(void *page);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_32_H */
@@ -3,6 +3,8 @@
#ifdef __KERNEL__
+#include <linux/linkage.h>
+
/* Written 2002 by Andi Kleen */
/* Only used for special circumstances. Stolen from i386/string.h */
@@ -63,6 +65,9 @@ char *strcpy(char *dest, const char *src);
char *strcat(char *dest, const char *src);
int strcmp(const char *cs, const char *ct);
+#define ARCH_HAS_USER_NOCACHE 1
+asmlinkage void clear_page_nocache(void *page);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */
@@ -23,6 +23,7 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_SMP) += rwlock.o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
+lib-y += clear_page_$(BITS).o
obj-y += msr.o msr-reg.o msr-reg-export.o
@@ -40,7 +41,7 @@ endif
else
obj-y += iomap_copy_64.o
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
- lib-y += thunk_64.o clear_page_64.o copy_page_64.o
+ lib-y += thunk_64.o copy_page_64.o
lib-y += memmove_64.o memset_64.o
lib-y += copy_user_64.o copy_user_nocache_64.o
lib-y += cmpxchg16b_emu.o
new file mode 100644
@@ -0,0 +1,72 @@
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/cpufeature.h>
+#include <asm/dwarf2.h>
+
+/*
+ * Fallback version if SSE2 is not avaible.
+ */
+ENTRY(clear_page_nocache)
+ CFI_STARTPROC
+ mov %eax,%edx
+ xorl %eax,%eax
+ movl $4096/32,%ecx
+ .p2align 4
+.Lloop:
+ decl %ecx
+#define PUT(x) mov %eax,x*4(%edx)
+ PUT(0)
+ PUT(1)
+ PUT(2)
+ PUT(3)
+ PUT(4)
+ PUT(5)
+ PUT(6)
+ PUT(7)
+#undef PUT
+ lea 32(%edx),%edx
+ jnz .Lloop
+ nop
+ ret
+ CFI_ENDPROC
+ENDPROC(clear_page_nocache)
+
+ .section .altinstr_replacement,"ax"
+1: .byte 0xeb /* jmp <disp8> */
+ .byte (clear_page_nocache_sse2 - clear_page_nocache) - (2f - 1b)
+ /* offset */
+2:
+ .previous
+ .section .altinstructions,"a"
+ altinstruction_entry clear_page_nocache,1b,X86_FEATURE_XMM2,\
+ 16, 2b-1b
+ .previous
+
+/*
+ * Zero a page avoiding the caches
+ * eax page
+ */
+ENTRY(clear_page_nocache_sse2)
+ CFI_STARTPROC
+ mov %eax,%edx
+ xorl %eax,%eax
+ movl $4096/32,%ecx
+ .p2align 4
+.Lloop_sse2:
+ decl %ecx
+#define PUT(x) movnti %eax,x*4(%edx)
+ PUT(0)
+ PUT(1)
+ PUT(2)
+ PUT(3)
+ PUT(4)
+ PUT(5)
+ PUT(6)
+ PUT(7)
+#undef PUT
+ lea 32(%edx),%edx
+ jnz .Lloop_sse2
+ nop
+ ret
+ CFI_ENDPROC
+ENDPROC(clear_page_nocache_sse2)
@@ -40,6 +40,7 @@ ENTRY(clear_page)
PUT(5)
PUT(6)
PUT(7)
+#undef PUT
leaq 64(%rdi),%rdi
jnz .Lloop
nop
@@ -71,3 +72,31 @@ ENDPROC(clear_page)
altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
.Lclear_page_end-clear_page,3b-2b
.previous
+
+/*
+ * Zero a page avoiding the caches
+ * rdi page
+ */
+ENTRY(clear_page_nocache)
+ CFI_STARTPROC
+ xorl %eax,%eax
+ movl $4096/64,%ecx
+ .p2align 4
+.Lloop_nocache:
+ decl %ecx
+#define PUT(x) movnti %rax,x*8(%rdi)
+ movnti %rax,(%rdi)
+ PUT(1)
+ PUT(2)
+ PUT(3)
+ PUT(4)
+ PUT(5)
+ PUT(6)
+ PUT(7)
+#undef PUT
+ leaq 64(%rdi),%rdi
+ jnz .Lloop_nocache
+ nop
+ ret
+ CFI_ENDPROC
+ENDPROC(clear_page_nocache)
@@ -1209,3 +1209,10 @@ good_area:
up_read(&mm->mmap_sem);
}
+
+void clear_user_highpage_nocache(struct page *page, unsigned long vaddr)
+{
+ void *p = kmap_atomic(page);
+ clear_page_nocache(p);
+ kunmap_atomic(p);
+}