Message ID | de812a02fd94a0dba07d43606bd893c564aa4528.1620849613.git.pcc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: improve efficiency of setting tags for user pages | expand |
On Wed, May 12, 2021 at 11:09 PM Peter Collingbourne <pcc@google.com> wrote: > > Currently, on an anonymous page fault, the kernel allocates a zeroed > page and maps it in user space. If the mapping is tagged (PROT_MTE), > set_pte_at() additionally clears the tags. It is, however, more > efficient to clear the tags at the same time as zeroing the data on > allocation. To avoid clearing the tags on any page (which may not be > mapped as tagged), only do this if the vma flags contain VM_MTE. This > requires introducing a new GFP flag that is used to determine whether > to clear the tags. > > The DC GZVA instruction with a 0 top byte (and 0 tag) requires > top-byte-ignore. Set the TCR_EL1.{TBI1,TBID1} bits irrespective of > whether KASAN_HW is enabled. > > Signed-off-by: Peter Collingbourne <pcc@google.com> > Co-developed-by: Catalin Marinas <catalin.marinas@arm.com> > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> > Link: https://linux-review.googlesource.com/id/Id46dc94e30fe11474f7e54f5d65e7658dbdddb26 > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> > --- > v2: > - remove want_zero_tags_on_free() > > arch/arm64/include/asm/mte.h | 4 ++++ > arch/arm64/include/asm/page.h | 9 +++++++-- > arch/arm64/lib/mte.S | 20 ++++++++++++++++++++ > arch/arm64/mm/fault.c | 25 +++++++++++++++++++++++++ > arch/arm64/mm/proc.S | 10 +++++++--- > include/linux/gfp.h | 9 +++++++-- > include/linux/highmem.h | 8 ++++++++ > mm/kasan/hw_tags.c | 9 ++++++++- > mm/page_alloc.c | 13 ++++++++++--- > 9 files changed, 96 insertions(+), 11 deletions(-) > > diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h > index bc88a1ced0d7..67bf259ae768 100644 > --- a/arch/arm64/include/asm/mte.h > +++ b/arch/arm64/include/asm/mte.h > @@ -37,6 +37,7 @@ void mte_free_tag_storage(char *storage); > /* track which pages have valid allocation tags */ > #define PG_mte_tagged PG_arch_2 > > +void mte_zero_clear_page_tags(void *addr); > void mte_sync_tags(pte_t *ptep, pte_t pte); > void mte_copy_page_tags(void *kto, const void *kfrom); > void mte_thread_init_user(void); > @@ -53,6 +54,9 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, > /* unused if !CONFIG_ARM64_MTE, silence the compiler */ > #define PG_mte_tagged 0 > > +static inline void mte_zero_clear_page_tags(void *addr) > +{ > +} > static inline void mte_sync_tags(pte_t *ptep, pte_t pte) > { > } > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h > index 012cffc574e8..448e14071d13 100644 > --- a/arch/arm64/include/asm/page.h > +++ b/arch/arm64/include/asm/page.h > @@ -13,6 +13,7 @@ > #ifndef __ASSEMBLY__ > > #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ > +#include <linux/types.h> /* for gfp_t */ > #include <asm/pgtable-types.h> > > struct page; > @@ -28,10 +29,14 @@ void copy_user_highpage(struct page *to, struct page *from, > void copy_highpage(struct page *to, struct page *from); > #define __HAVE_ARCH_COPY_HIGHPAGE > > -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ > - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) > +struct page *__alloc_zeroed_user_highpage(gfp_t movableflags, > + struct vm_area_struct *vma, > + unsigned long vaddr); > #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE > > +void tag_clear_highpage(struct page *to); > +#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE > + > #define clear_user_page(page, vaddr, pg) clear_page(page) > #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) > > diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S > index 351537c12f36..e83643b3995f 100644 > --- a/arch/arm64/lib/mte.S > +++ b/arch/arm64/lib/mte.S > @@ -36,6 +36,26 @@ SYM_FUNC_START(mte_clear_page_tags) > ret > SYM_FUNC_END(mte_clear_page_tags) > > +/* > + * Zero the page and tags at the same time > + * > + * Parameters: > + * x0 - address to the beginning of the page > + */ > +SYM_FUNC_START(mte_zero_clear_page_tags) > + mrs x1, dczid_el0 > + and w1, w1, #0xf > + mov x2, #4 > + lsl x1, x2, x1 > + and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag > + > +1: dc gzva, x0 > + add x0, x0, x1 > + tst x0, #(PAGE_SIZE - 1) > + b.ne 1b > + ret > +SYM_FUNC_END(mte_zero_clear_page_tags) > + > /* > * Copy the tags from the source page to the destination one > * x0 - address of the destination page > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index 871c82ab0a30..8127e0c0b8fb 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -921,3 +921,28 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, > debug_exception_exit(regs); > } > NOKPROBE_SYMBOL(do_debug_exception); > + > +/* > + * Used during anonymous page fault handling. > + */ > +struct page *__alloc_zeroed_user_highpage(gfp_t flags, > + struct vm_area_struct *vma, > + unsigned long vaddr) > +{ > + /* > + * If the page is mapped with PROT_MTE, initialise the tags at the > + * point of allocation and page zeroing as this is usually faster than > + * separate DC ZVA and STGM. > + */ > + if (vma->vm_flags & VM_MTE) > + flags |= __GFP_ZEROTAGS; > + > + return alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | flags, vma, vaddr); > +} > + > +void tag_clear_highpage(struct page *page) > +{ > + mte_zero_clear_page_tags(page_address(page)); > + page_kasan_tag_reset(page); > + set_bit(PG_mte_tagged, &page->flags); > +} > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index 0a48191534ff..a27c77dbe91c 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -46,9 +46,13 @@ > #endif > > #ifdef CONFIG_KASAN_HW_TAGS > -#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 > +#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 > #else > -#define TCR_KASAN_HW_FLAGS 0 > +/* > + * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on > + * TBI being enabled at EL1. > + */ > +#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 > #endif > > /* > @@ -452,7 +456,7 @@ SYM_FUNC_START(__cpu_setup) > msr_s SYS_TFSRE0_EL1, xzr > > /* set the TCR_EL1 bits */ > - mov_q x10, TCR_KASAN_HW_FLAGS > + mov_q x10, TCR_MTE_FLAGS > orr tcr, tcr, x10 > 1: > #endif > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index 11da8af06704..68ba237365dc 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -53,8 +53,9 @@ struct vm_area_struct; > #define ___GFP_HARDWALL 0x100000u > #define ___GFP_THISNODE 0x200000u > #define ___GFP_ACCOUNT 0x400000u > +#define ___GFP_ZEROTAGS 0x800000u > #ifdef CONFIG_LOCKDEP > -#define ___GFP_NOLOCKDEP 0x800000u > +#define ___GFP_NOLOCKDEP 0x1000000u > #else > #define ___GFP_NOLOCKDEP 0 > #endif > @@ -229,16 +230,20 @@ struct vm_area_struct; > * %__GFP_COMP address compound page metadata. > * > * %__GFP_ZERO returns a zeroed page on success. > + * > + * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if > + * __GFP_ZERO is set. > */ > #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) > #define __GFP_COMP ((__force gfp_t)___GFP_COMP) > #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) > +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) > > /* Disable lockdep for GFP context tracking */ > #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) > > /* Room for N __GFP_FOO bits */ > -#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) > +#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) > #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) > > /** > diff --git a/include/linux/highmem.h b/include/linux/highmem.h > index 832b49b50c7b..caaa62e1dd24 100644 > --- a/include/linux/highmem.h > +++ b/include/linux/highmem.h > @@ -204,6 +204,14 @@ static inline void clear_highpage(struct page *page) > kunmap_atomic(kaddr); > } > > +#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE > + > +static inline void tag_clear_highpage(struct page *page) > +{ > +} > + > +#endif > + > /* > * If we pass in a base or tail page, we can zero up to PAGE_SIZE. > * If we pass in a head page, we can zero up to the size of the compound page. > diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c > index 45e552cb9172..34362c8d0955 100644 > --- a/mm/kasan/hw_tags.c > +++ b/mm/kasan/hw_tags.c > @@ -242,7 +242,14 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) > { > bool init = !want_init_on_free() && want_init_on_alloc(flags); > > - kasan_unpoison_pages(page, order, init); > + if (flags & __GFP_ZEROTAGS) { > + int i; > + > + for (i = 0; i != 1 << order; ++i) > + tag_clear_highpage(page + i); > + } else { > + kasan_unpoison_pages(page, order, init); > + } > } > > void kasan_free_pages(struct page *page, unsigned int order) > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 6e82a7f6fd6f..24e6f668ef73 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1219,10 +1219,16 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) > return ret; > } > > -static void kernel_init_free_pages(struct page *page, int numpages) > +static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags) > { > int i; > > + if (zero_tags) { > + for (i = 0; i < numpages; i++) > + tag_clear_highpage(page + i); > + return; > + } > + > /* s390's use of memset() could override KASAN redzones. */ > kasan_disable_current(); > for (i = 0; i < numpages; i++) { > @@ -1314,7 +1320,7 @@ static __always_inline bool free_pages_prepare(struct page *page, > bool init = want_init_on_free(); > > if (init) > - kernel_init_free_pages(page, 1 << order); > + kernel_init_free_pages(page, 1 << order, false); > if (!skip_kasan_poison) > kasan_poison_pages(page, order, init); > } > @@ -2350,7 +2356,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order, > > kasan_unpoison_pages(page, order, init); > if (init) > - kernel_init_free_pages(page, 1 << order); > + kernel_init_free_pages(page, 1 << order, > + gfp_flags & __GFP_ZEROTAGS); > } > > set_page_owner(page, order, gfp_flags); > -- > 2.31.1.607.g51e8a6a459-goog > For KASAN parts: Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index bc88a1ced0d7..67bf259ae768 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -37,6 +37,7 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +void mte_zero_clear_page_tags(void *addr); void mte_sync_tags(pte_t *ptep, pte_t pte); void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); @@ -53,6 +54,9 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 +static inline void mte_zero_clear_page_tags(void *addr) +{ +} static inline void mte_sync_tags(pte_t *ptep, pte_t pte) { } diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 012cffc574e8..448e14071d13 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ +#include <linux/types.h> /* for gfp_t */ #include <asm/pgtable-types.h> struct page; @@ -28,10 +29,14 @@ void copy_user_highpage(struct page *to, struct page *from, void copy_highpage(struct page *to, struct page *from); #define __HAVE_ARCH_COPY_HIGHPAGE -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) +struct page *__alloc_zeroed_user_highpage(gfp_t movableflags, + struct vm_area_struct *vma, + unsigned long vaddr); #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +void tag_clear_highpage(struct page *to); +#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE + #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index 351537c12f36..e83643b3995f 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -36,6 +36,26 @@ SYM_FUNC_START(mte_clear_page_tags) ret SYM_FUNC_END(mte_clear_page_tags) +/* + * Zero the page and tags at the same time + * + * Parameters: + * x0 - address to the beginning of the page + */ +SYM_FUNC_START(mte_zero_clear_page_tags) + mrs x1, dczid_el0 + and w1, w1, #0xf + mov x2, #4 + lsl x1, x2, x1 + and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag + +1: dc gzva, x0 + add x0, x0, x1 + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + ret +SYM_FUNC_END(mte_zero_clear_page_tags) + /* * Copy the tags from the source page to the destination one * x0 - address of the destination page diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 871c82ab0a30..8127e0c0b8fb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -921,3 +921,28 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, debug_exception_exit(regs); } NOKPROBE_SYMBOL(do_debug_exception); + +/* + * Used during anonymous page fault handling. + */ +struct page *__alloc_zeroed_user_highpage(gfp_t flags, + struct vm_area_struct *vma, + unsigned long vaddr) +{ + /* + * If the page is mapped with PROT_MTE, initialise the tags at the + * point of allocation and page zeroing as this is usually faster than + * separate DC ZVA and STGM. + */ + if (vma->vm_flags & VM_MTE) + flags |= __GFP_ZEROTAGS; + + return alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | flags, vma, vaddr); +} + +void tag_clear_highpage(struct page *page) +{ + mte_zero_clear_page_tags(page_address(page)); + page_kasan_tag_reset(page); + set_bit(PG_mte_tagged, &page->flags); +} diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 0a48191534ff..a27c77dbe91c 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,9 +46,13 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 #else -#define TCR_KASAN_HW_FLAGS 0 +/* + * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on + * TBI being enabled at EL1. + */ +#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 #endif /* @@ -452,7 +456,7 @@ SYM_FUNC_START(__cpu_setup) msr_s SYS_TFSRE0_EL1, xzr /* set the TCR_EL1 bits */ - mov_q x10, TCR_KASAN_HW_FLAGS + mov_q x10, TCR_MTE_FLAGS orr tcr, tcr, x10 1: #endif diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 11da8af06704..68ba237365dc 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -53,8 +53,9 @@ struct vm_area_struct; #define ___GFP_HARDWALL 0x100000u #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u +#define ___GFP_ZEROTAGS 0x800000u #ifdef CONFIG_LOCKDEP -#define ___GFP_NOLOCKDEP 0x800000u +#define ___GFP_NOLOCKDEP 0x1000000u #else #define ___GFP_NOLOCKDEP 0 #endif @@ -229,16 +230,20 @@ struct vm_area_struct; * %__GFP_COMP address compound page metadata. * * %__GFP_ZERO returns a zeroed page on success. + * + * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if + * __GFP_ZERO is set. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /** diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 832b49b50c7b..caaa62e1dd24 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -204,6 +204,14 @@ static inline void clear_highpage(struct page *page) kunmap_atomic(kaddr); } +#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE + +static inline void tag_clear_highpage(struct page *page) +{ +} + +#endif + /* * If we pass in a base or tail page, we can zero up to PAGE_SIZE. * If we pass in a head page, we can zero up to the size of the compound page. diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 45e552cb9172..34362c8d0955 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -242,7 +242,14 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) { bool init = !want_init_on_free() && want_init_on_alloc(flags); - kasan_unpoison_pages(page, order, init); + if (flags & __GFP_ZEROTAGS) { + int i; + + for (i = 0; i != 1 << order; ++i) + tag_clear_highpage(page + i); + } else { + kasan_unpoison_pages(page, order, init); + } } void kasan_free_pages(struct page *page, unsigned int order) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6e82a7f6fd6f..24e6f668ef73 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1219,10 +1219,16 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) return ret; } -static void kernel_init_free_pages(struct page *page, int numpages) +static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags) { int i; + if (zero_tags) { + for (i = 0; i < numpages; i++) + tag_clear_highpage(page + i); + return; + } + /* s390's use of memset() could override KASAN redzones. */ kasan_disable_current(); for (i = 0; i < numpages; i++) { @@ -1314,7 +1320,7 @@ static __always_inline bool free_pages_prepare(struct page *page, bool init = want_init_on_free(); if (init) - kernel_init_free_pages(page, 1 << order); + kernel_init_free_pages(page, 1 << order, false); if (!skip_kasan_poison) kasan_poison_pages(page, order, init); } @@ -2350,7 +2356,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order, kasan_unpoison_pages(page, order, init); if (init) - kernel_init_free_pages(page, 1 << order); + kernel_init_free_pages(page, 1 << order, + gfp_flags & __GFP_ZEROTAGS); } set_page_owner(page, order, gfp_flags);