Message ID | 20230621175002.2832640-6-rananta@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: Add support for FEAT_TLBIRANGE | expand |
On 6/22/23 03:49, Raghavendra Rao Ananta wrote: > Currently, the core TLB flush functionality of __flush_tlb_range() > hardcodes vae1is (and variants) for the flush operation. In the > upcoming patches, the KVM code reuses this core algorithm with > ipas2e1is for range based TLB invalidations based on the IPA. > Hence, extract the core flush functionality of __flush_tlb_range() > into its own macro that accepts an 'op' argument to pass any > TLBI operation, such that other callers (KVM) can benefit. > > No functional changes intended. > > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> > --- > arch/arm64/include/asm/tlbflush.h | 108 +++++++++++++++--------------- > 1 file changed, 55 insertions(+), 53 deletions(-) > With the following nits addressed: Reviewed-by: Gavin Shan <gshan@redhat.com> > diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h > index 412a3b9a3c25d..4775378b6da1b 100644 > --- a/arch/arm64/include/asm/tlbflush.h > +++ b/arch/arm64/include/asm/tlbflush.h > @@ -278,14 +278,61 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, > */ > #define MAX_TLBI_OPS PTRS_PER_PTE > > +/* When the CPU does not support TLB range operations, flush the TLB > + * entries one by one at the granularity of 'stride'. If the TLB > + * range ops are supported, then: > + * > + * 1. If 'pages' is odd, flush the first page through non-range > + * operations; > + * > + * 2. For remaining pages: the minimum range granularity is decided > + * by 'scale', so multiple range TLBI operations may be required. > + * Start from scale = 0, flush the corresponding number of pages > + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it > + * until no pages left. > + * > + * Note that certain ranges can be represented by either num = 31 and > + * scale or num = 0 and scale + 1. The loop below favours the latter > + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. > + */ > +#define __flush_tlb_range_op(op, start, pages, stride, \ > + asid, tlb_level, tlbi_user) do { \ > + int num = 0; \ > + int scale = 0; \ > + unsigned long addr; \ > + \ > + while (pages > 0) { \ > + if (!system_supports_tlb_range() || \ > + pages % 2 == 1) { \ > + addr = __TLBI_VADDR(start, asid); \ > + __tlbi_level(op, addr, tlb_level); \ > + if (tlbi_user) \ > + __tlbi_user_level(op, addr, tlb_level); \ > + start += stride; \ > + pages -= stride >> PAGE_SHIFT; \ > + continue; \ > + } \ > + \ > + num = __TLBI_RANGE_NUM(pages, scale); \ > + if (num >= 0) { \ > + addr = __TLBI_VADDR_RANGE(start, asid, scale, \ > + num, tlb_level); \ > + __tlbi(r##op, addr); \ > + if (tlbi_user) \ > + __tlbi_user(r##op, addr); \ > + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ > + pages -= __TLBI_RANGE_PAGES(num, scale); \ > + } \ > + scale++; \ > + } \ > +} while (0) > + There is a warning reported from 'checkpatch.pl'. WARNING: suspect code indent for conditional statements (32, 8) #52: FILE: arch/arm64/include/asm/tlbflush.h:299: + asid, tlb_level, tlbi_user) do { \ [...] + unsigned long addr; \ total: 0 errors, 1 warnings, 125 lines checked You probably need to tweak it as below, to avoid the warning. #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user) \ do { \ > static inline void __flush_tlb_range(struct vm_area_struct *vma, > unsigned long start, unsigned long end, > unsigned long stride, bool last_level, > int tlb_level) > { > - int num = 0; > - int scale = 0; > - unsigned long asid, addr, pages; > + unsigned long asid, pages; > > start = round_down(start, stride); > end = round_up(end, stride); > @@ -307,56 +354,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, > dsb(ishst); > asid = ASID(vma->vm_mm); > > - /* > - * When the CPU does not support TLB range operations, flush the TLB > - * entries one by one at the granularity of 'stride'. If the TLB > - * range ops are supported, then: > - * > - * 1. If 'pages' is odd, flush the first page through non-range > - * operations; > - * > - * 2. For remaining pages: the minimum range granularity is decided > - * by 'scale', so multiple range TLBI operations may be required. > - * Start from scale = 0, flush the corresponding number of pages > - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it > - * until no pages left. > - * > - * Note that certain ranges can be represented by either num = 31 and > - * scale or num = 0 and scale + 1. The loop below favours the latter > - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. > - */ > - while (pages > 0) { > - if (!system_supports_tlb_range() || > - pages % 2 == 1) { > - addr = __TLBI_VADDR(start, asid); > - if (last_level) { > - __tlbi_level(vale1is, addr, tlb_level); > - __tlbi_user_level(vale1is, addr, tlb_level); > - } else { > - __tlbi_level(vae1is, addr, tlb_level); > - __tlbi_user_level(vae1is, addr, tlb_level); > - } > - start += stride; > - pages -= stride >> PAGE_SHIFT; > - continue; > - } > - > - num = __TLBI_RANGE_NUM(pages, scale); > - if (num >= 0) { > - addr = __TLBI_VADDR_RANGE(start, asid, scale, > - num, tlb_level); > - if (last_level) { > - __tlbi(rvale1is, addr); > - __tlbi_user(rvale1is, addr); > - } else { > - __tlbi(rvae1is, addr); > - __tlbi_user(rvae1is, addr); > - } > - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; > - pages -= __TLBI_RANGE_PAGES(num, scale); > - } > - scale++; > - } > + if (last_level) > + __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); > + else > + __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); > + > dsb(ish); > } > Thanks, Gavin
On Tue, Jul 4, 2023 at 5:11 PM Gavin Shan <gshan@redhat.com> wrote: > > On 6/22/23 03:49, Raghavendra Rao Ananta wrote: > > Currently, the core TLB flush functionality of __flush_tlb_range() > > hardcodes vae1is (and variants) for the flush operation. In the > > upcoming patches, the KVM code reuses this core algorithm with > > ipas2e1is for range based TLB invalidations based on the IPA. > > Hence, extract the core flush functionality of __flush_tlb_range() > > into its own macro that accepts an 'op' argument to pass any > > TLBI operation, such that other callers (KVM) can benefit. > > > > No functional changes intended. > > > > Signed-off-by: Raghavendra Rao Ananta <rananta@google.com> > > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> > > --- > > arch/arm64/include/asm/tlbflush.h | 108 +++++++++++++++--------------- > > 1 file changed, 55 insertions(+), 53 deletions(-) > > > > With the following nits addressed: > > Reviewed-by: Gavin Shan <gshan@redhat.com> > > > diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h > > index 412a3b9a3c25d..4775378b6da1b 100644 > > --- a/arch/arm64/include/asm/tlbflush.h > > +++ b/arch/arm64/include/asm/tlbflush.h > > @@ -278,14 +278,61 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, > > */ > > #define MAX_TLBI_OPS PTRS_PER_PTE > > > > +/* When the CPU does not support TLB range operations, flush the TLB > > + * entries one by one at the granularity of 'stride'. If the TLB > > + * range ops are supported, then: > > + * > > + * 1. If 'pages' is odd, flush the first page through non-range > > + * operations; > > + * > > + * 2. For remaining pages: the minimum range granularity is decided > > + * by 'scale', so multiple range TLBI operations may be required. > > + * Start from scale = 0, flush the corresponding number of pages > > + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it > > + * until no pages left. > > + * > > + * Note that certain ranges can be represented by either num = 31 and > > + * scale or num = 0 and scale + 1. The loop below favours the latter > > + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. > > + */ > > +#define __flush_tlb_range_op(op, start, pages, stride, \ > > + asid, tlb_level, tlbi_user) do { \ > > + int num = 0; \ > > + int scale = 0; \ > > + unsigned long addr; \ > > + \ > > + while (pages > 0) { \ > > + if (!system_supports_tlb_range() || \ > > + pages % 2 == 1) { \ > > + addr = __TLBI_VADDR(start, asid); \ > > + __tlbi_level(op, addr, tlb_level); \ > > + if (tlbi_user) \ > > + __tlbi_user_level(op, addr, tlb_level); \ > > + start += stride; \ > > + pages -= stride >> PAGE_SHIFT; \ > > + continue; \ > > + } \ > > + \ > > + num = __TLBI_RANGE_NUM(pages, scale); \ > > + if (num >= 0) { \ > > + addr = __TLBI_VADDR_RANGE(start, asid, scale, \ > > + num, tlb_level); \ > > + __tlbi(r##op, addr); \ > > + if (tlbi_user) \ > > + __tlbi_user(r##op, addr); \ > > + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ > > + pages -= __TLBI_RANGE_PAGES(num, scale); \ > > + } \ > > + scale++; \ > > + } \ > > +} while (0) > > + > > There is a warning reported from 'checkpatch.pl'. > > WARNING: suspect code indent for conditional statements (32, 8) > #52: FILE: arch/arm64/include/asm/tlbflush.h:299: > + asid, tlb_level, tlbi_user) do { \ > [...] > + unsigned long addr; \ > > total: 0 errors, 1 warnings, 125 lines checked > > You probably need to tweak it as below, to avoid the warning. > > #define __flush_tlb_range_op(op, start, pages, stride, \ > asid, tlb_level, tlbi_user) \ > do { \ > Thanks for the suggestion, Gavin. I'll fix this. - Raghavendra > > > static inline void __flush_tlb_range(struct vm_area_struct *vma, > > unsigned long start, unsigned long end, > > unsigned long stride, bool last_level, > > int tlb_level) > > { > > - int num = 0; > > - int scale = 0; > > - unsigned long asid, addr, pages; > > + unsigned long asid, pages; > > > > start = round_down(start, stride); > > end = round_up(end, stride); > > @@ -307,56 +354,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, > > dsb(ishst); > > asid = ASID(vma->vm_mm); > > > > - /* > > - * When the CPU does not support TLB range operations, flush the TLB > > - * entries one by one at the granularity of 'stride'. If the TLB > > - * range ops are supported, then: > > - * > > - * 1. If 'pages' is odd, flush the first page through non-range > > - * operations; > > - * > > - * 2. For remaining pages: the minimum range granularity is decided > > - * by 'scale', so multiple range TLBI operations may be required. > > - * Start from scale = 0, flush the corresponding number of pages > > - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it > > - * until no pages left. > > - * > > - * Note that certain ranges can be represented by either num = 31 and > > - * scale or num = 0 and scale + 1. The loop below favours the latter > > - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. > > - */ > > - while (pages > 0) { > > - if (!system_supports_tlb_range() || > > - pages % 2 == 1) { > > - addr = __TLBI_VADDR(start, asid); > > - if (last_level) { > > - __tlbi_level(vale1is, addr, tlb_level); > > - __tlbi_user_level(vale1is, addr, tlb_level); > > - } else { > > - __tlbi_level(vae1is, addr, tlb_level); > > - __tlbi_user_level(vae1is, addr, tlb_level); > > - } > > - start += stride; > > - pages -= stride >> PAGE_SHIFT; > > - continue; > > - } > > - > > - num = __TLBI_RANGE_NUM(pages, scale); > > - if (num >= 0) { > > - addr = __TLBI_VADDR_RANGE(start, asid, scale, > > - num, tlb_level); > > - if (last_level) { > > - __tlbi(rvale1is, addr); > > - __tlbi_user(rvale1is, addr); > > - } else { > > - __tlbi(rvae1is, addr); > > - __tlbi_user(rvae1is, addr); > > - } > > - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; > > - pages -= __TLBI_RANGE_PAGES(num, scale); > > - } > > - scale++; > > - } > > + if (last_level) > > + __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); > > + else > > + __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); > > + > > dsb(ish); > > } > > > > Thanks, > Gavin >
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 412a3b9a3c25d..4775378b6da1b 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -278,14 +278,61 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, */ #define MAX_TLBI_OPS PTRS_PER_PTE +/* When the CPU does not support TLB range operations, flush the TLB + * entries one by one at the granularity of 'stride'. If the TLB + * range ops are supported, then: + * + * 1. If 'pages' is odd, flush the first page through non-range + * operations; + * + * 2. For remaining pages: the minimum range granularity is decided + * by 'scale', so multiple range TLBI operations may be required. + * Start from scale = 0, flush the corresponding number of pages + * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it + * until no pages left. + * + * Note that certain ranges can be represented by either num = 31 and + * scale or num = 0 and scale + 1. The loop below favours the latter + * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. + */ +#define __flush_tlb_range_op(op, start, pages, stride, \ + asid, tlb_level, tlbi_user) do { \ + int num = 0; \ + int scale = 0; \ + unsigned long addr; \ + \ + while (pages > 0) { \ + if (!system_supports_tlb_range() || \ + pages % 2 == 1) { \ + addr = __TLBI_VADDR(start, asid); \ + __tlbi_level(op, addr, tlb_level); \ + if (tlbi_user) \ + __tlbi_user_level(op, addr, tlb_level); \ + start += stride; \ + pages -= stride >> PAGE_SHIFT; \ + continue; \ + } \ + \ + num = __TLBI_RANGE_NUM(pages, scale); \ + if (num >= 0) { \ + addr = __TLBI_VADDR_RANGE(start, asid, scale, \ + num, tlb_level); \ + __tlbi(r##op, addr); \ + if (tlbi_user) \ + __tlbi_user(r##op, addr); \ + start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \ + pages -= __TLBI_RANGE_PAGES(num, scale); \ + } \ + scale++; \ + } \ +} while (0) + static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, unsigned long stride, bool last_level, int tlb_level) { - int num = 0; - int scale = 0; - unsigned long asid, addr, pages; + unsigned long asid, pages; start = round_down(start, stride); end = round_up(end, stride); @@ -307,56 +354,11 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ishst); asid = ASID(vma->vm_mm); - /* - * When the CPU does not support TLB range operations, flush the TLB - * entries one by one at the granularity of 'stride'. If the TLB - * range ops are supported, then: - * - * 1. If 'pages' is odd, flush the first page through non-range - * operations; - * - * 2. For remaining pages: the minimum range granularity is decided - * by 'scale', so multiple range TLBI operations may be required. - * Start from scale = 0, flush the corresponding number of pages - * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it - * until no pages left. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. - */ - while (pages > 0) { - if (!system_supports_tlb_range() || - pages % 2 == 1) { - addr = __TLBI_VADDR(start, asid); - if (last_level) { - __tlbi_level(vale1is, addr, tlb_level); - __tlbi_user_level(vale1is, addr, tlb_level); - } else { - __tlbi_level(vae1is, addr, tlb_level); - __tlbi_user_level(vae1is, addr, tlb_level); - } - start += stride; - pages -= stride >> PAGE_SHIFT; - continue; - } - - num = __TLBI_RANGE_NUM(pages, scale); - if (num >= 0) { - addr = __TLBI_VADDR_RANGE(start, asid, scale, - num, tlb_level); - if (last_level) { - __tlbi(rvale1is, addr); - __tlbi_user(rvale1is, addr); - } else { - __tlbi(rvae1is, addr); - __tlbi_user(rvae1is, addr); - } - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; - pages -= __TLBI_RANGE_PAGES(num, scale); - } - scale++; - } + if (last_level) + __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true); + else + __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true); + dsb(ish); }