diff mbox series

[v11,1/3] riscv: mm: modify pte format for Svnapot

Message ID 20221216062109.865573-2-panqinglin2020@iscas.ac.cn (mailing list archive)
State Changes Requested
Delegated to: Palmer Dabbelt
Headers show
Series riscv, mm: detect svnapot cpu support at runtime | expand

Checks

Context Check Description
conchuod/patch_count success Link
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/module_param success Was 0 now: 0
conchuod/alphanumeric_selects fail Out of order selects before the patch: 57 and now 59
conchuod/build_rv32_defconfig success Build OK
conchuod/build_warn_rv64 success Errors and warnings before: 0 this patch: 0
conchuod/dtb_warn_rv64 success Errors and warnings before: 0 this patch: 0
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch success total: 0 errors, 0 warnings, 0 checks, 219 lines checked
conchuod/source_inline success Was 0 now: 0
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Qinglin Pan Dec. 16, 2022, 6:21 a.m. UTC
From: Qinglin Pan <panqinglin2020@iscas.ac.cn>

Add one alternative to enable/disable svnapot support, enable this static
key when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT compile
option is set. It will influence the behavior of has_svnapot. All code
dependent on svnapot should make sure that has_svnapot return true firstly.

Modify PTE definition for Svnapot, and creates some functions in pgtable.h
to mark a PTE as napot and check if it is a Svnapot PTE. Until now, only
64KB napot size is supported in spec, so some macros has only 64KB version.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>

Comments

Conor Dooley Dec. 19, 2022, 9:51 p.m. UTC | #1
On Fri, Dec 16, 2022 at 02:21:07PM +0800, panqinglin2020@iscas.ac.cn wrote:
> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> 
> Add one alternative to enable/disable svnapot support, enable this static
> key when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT compile
> option is set. It will influence the behavior of has_svnapot. All code
> dependent on svnapot should make sure that has_svnapot return true firstly.
> 
> Modify PTE definition for Svnapot, and creates some functions in pgtable.h
> to mark a PTE as napot and check if it is a Svnapot PTE. Until now, only
> 64KB napot size is supported in spec, so some macros has only 64KB version.
> 
> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> 
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index e1a9fa47f012..25c230e3bf61 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -397,6 +397,25 @@ config RISCV_ISA_C
>  
>  	  If you don't know what to do here, say Y.
>  
> +config RISCV_ISA_SVNAPOT
> +	bool "SVNAPOT extension support"
> +	depends on 64BIT && MMU
> +	select RISCV_ALTERNATIVE
> +	default y
> +	help
> +	  Allow kernel to detect the SVNAPOT ISA-extension dynamically at boot
> +	  time and enable its usage.
> +
> +	  The SVNAPOT extension is used to mark contiguous PTEs as a range
> +	  of contiguous virtual-to-physical translations for a naturally
> +	  aligned power-of-2 (NAPOT) granularity larger than the base 4KB page
> +	  size. When HUGETLBFS is also selected this option unconditionally
> +	  allocates some memory for each NAPOT page size supported by the kernel.
> +	  When optimizing for low memory consumption and for platforms without

nit: Does this make more sense as "When optimising for low memory
consumption on platforms without the SVNAPOT extension"...?
Or does disabling Svnapot save memory on Svnapot capable systems too?

Thanks,
Conor.

> +	  the SVNAPOT extension, it may be better to say N here.
> +
> +	  If you don't know what to do here, say Y.
> +
>  config RISCV_ISA_SVPBMT
>  	bool "SVPBMT extension support"
>  	depends on 64BIT && MMU
> diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
> index 4180312d2a70..beadb1126ed9 100644
> --- a/arch/riscv/include/asm/errata_list.h
> +++ b/arch/riscv/include/asm/errata_list.h
> @@ -22,9 +22,10 @@
>  #define	ERRATA_THEAD_NUMBER 3
>  #endif
>  
> -#define	CPUFEATURE_SVPBMT 0
> -#define	CPUFEATURE_ZICBOM 1
> -#define	CPUFEATURE_NUMBER 2
> +#define	CPUFEATURE_SVPBMT	0
> +#define	CPUFEATURE_ZICBOM	1
> +#define	CPUFEATURE_SVNAPOT	2
> +#define	CPUFEATURE_NUMBER	3
>  
>  #ifdef __ASSEMBLY__
>  
> @@ -156,6 +157,14 @@ asm volatile(ALTERNATIVE(						\
>  	: "=r" (__ovl) :						\
>  	: "memory")
>  
> +#define ALT_SVNAPOT(_val)						\
> +asm(ALTERNATIVE(							\
> +	"li %0, 0",							\
> +	"li %0, 1",							\
> +		0, CPUFEATURE_SVNAPOT, CONFIG_RISCV_ISA_SVNAPOT)	\
> +	: "=r" (_val) :							\
> +	: "memory")
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #endif
> diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
> index 86328e3acb02..a8f1a390b1a4 100644
> --- a/arch/riscv/include/asm/hwcap.h
> +++ b/arch/riscv/include/asm/hwcap.h
> @@ -54,6 +54,7 @@ extern unsigned long elf_hwcap;
>   */
>  enum riscv_isa_ext_id {
>  	RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
> +	RISCV_ISA_EXT_SVNAPOT,
>  	RISCV_ISA_EXT_SVPBMT,
>  	RISCV_ISA_EXT_ZICBOM,
>  	RISCV_ISA_EXT_ZIHINTPAUSE,
> @@ -88,7 +89,6 @@ static __always_inline int riscv_isa_ext2key(int num)
>  {
>  	switch (num) {
>  	case RISCV_ISA_EXT_f:
> -		return RISCV_ISA_EXT_KEY_FPU;
>  	case RISCV_ISA_EXT_d:
>  		return RISCV_ISA_EXT_KEY_FPU;
>  	case RISCV_ISA_EXT_ZIHINTPAUSE:
> diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
> index 9f432c1b5289..24a3dd265183 100644
> --- a/arch/riscv/include/asm/page.h
> +++ b/arch/riscv/include/asm/page.h
> @@ -16,11 +16,6 @@
>  #define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
>  #define PAGE_MASK	(~(PAGE_SIZE - 1))
>  
> -#ifdef CONFIG_64BIT
> -#define HUGE_MAX_HSTATE		2
> -#else
> -#define HUGE_MAX_HSTATE		1
> -#endif
>  #define HPAGE_SHIFT		PMD_SHIFT
>  #define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
>  #define HPAGE_MASK              (~(HPAGE_SIZE - 1))
> diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
> index 42a042c0e13e..7a5097202e15 100644
> --- a/arch/riscv/include/asm/pgtable-64.h
> +++ b/arch/riscv/include/asm/pgtable-64.h
> @@ -78,6 +78,40 @@ typedef struct {
>   */
>  #define _PAGE_PFN_MASK  GENMASK(53, 10)
>  
> +/*
> + * [63] Svnapot definitions:
> + * 0 Svnapot disabled
> + * 1 Svnapot enabled
> + */
> +#define _PAGE_NAPOT_SHIFT	63
> +#define _PAGE_NAPOT		BIT(_PAGE_NAPOT_SHIFT)
> +/*
> + * Only 64KB (order 4) napot ptes supported.
> + */
> +#define NAPOT_CONT_ORDER_BASE 4
> +enum napot_cont_order {
> +	NAPOT_CONT64KB_ORDER = NAPOT_CONT_ORDER_BASE,
> +	NAPOT_ORDER_MAX,
> +};
> +
> +#define for_each_napot_order(order)						\
> +	for (order = NAPOT_CONT_ORDER_BASE; order < NAPOT_ORDER_MAX; order++)
> +#define for_each_napot_order_rev(order)						\
> +	for (order = NAPOT_ORDER_MAX - 1;					\
> +	     order >= NAPOT_CONT_ORDER_BASE; order--)
> +#define napot_cont_order(val)	(__builtin_ctzl((val.pte >> _PAGE_PFN_SHIFT) << 1))
> +
> +#define napot_cont_shift(order)	((order) + PAGE_SHIFT)
> +#define napot_cont_size(order)	BIT(napot_cont_shift(order))
> +#define napot_cont_mask(order)	(~(napot_cont_size(order) - 1UL))
> +#define napot_pte_num(order)	BIT(order)
> +
> +#ifdef CONFIG_RISCV_ISA_SVNAPOT
> +#define HUGE_MAX_HSTATE		(2 + (NAPOT_ORDER_MAX - NAPOT_CONT_ORDER_BASE))
> +#else
> +#define HUGE_MAX_HSTATE		2
> +#endif
> +
>  /*
>   * [62:61] Svpbmt Memory Type definitions:
>   *
> diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
> index 2359f1f9bda9..07213236b976 100644
> --- a/arch/riscv/include/asm/pgtable.h
> +++ b/arch/riscv/include/asm/pgtable.h
> @@ -6,10 +6,12 @@
>  #ifndef _ASM_RISCV_PGTABLE_H
>  #define _ASM_RISCV_PGTABLE_H
>  
> +#include <linux/jump_label.h>
>  #include <linux/mmzone.h>
>  #include <linux/sizes.h>
>  
>  #include <asm/pgtable-bits.h>
> +#include <asm/hwcap.h>
>  
>  #ifndef CONFIG_MMU
>  #define KERNEL_LINK_ADDR	PAGE_OFFSET
> @@ -264,10 +266,49 @@ static inline pte_t pud_pte(pud_t pud)
>  	return __pte(pud_val(pud));
>  }
>  
> +static __always_inline bool has_svnapot(void)
> +{
> +	unsigned int _val;
> +
> +	ALT_SVNAPOT(_val);
> +
> +	return _val;
> +}
> +
> +#ifdef CONFIG_RISCV_ISA_SVNAPOT
> +
> +static inline unsigned long pte_napot(pte_t pte)
> +{
> +	return pte_val(pte) & _PAGE_NAPOT;
> +}
> +
> +static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
> +{
> +	int pos = order - 1 + _PAGE_PFN_SHIFT;
> +	unsigned long napot_bit = BIT(pos);
> +	unsigned long napot_mask = ~GENMASK(pos, _PAGE_PFN_SHIFT);
> +
> +	return __pte((pte_val(pte) & napot_mask) | napot_bit | _PAGE_NAPOT);
> +}
> +
> +#else
> +
> +static inline unsigned long pte_napot(pte_t pte)
> +{
> +	return 0;
> +}
> +
> +#endif /* CONFIG_RISCV_ISA_SVNAPOT */
> +
>  /* Yields the page frame number (PFN) of a page table entry */
>  static inline unsigned long pte_pfn(pte_t pte)
>  {
> -	return __page_val_to_pfn(pte_val(pte));
> +	unsigned long res  = __page_val_to_pfn(pte_val(pte));
> +
> +	if (has_svnapot() && pte_napot(pte))
> +		res = res & (res - 1UL);
> +
> +	return res;
>  }
>  
>  #define pte_page(x)     pfn_to_page(pte_pfn(x))
> diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
> index bf9dd6764bad..88495f5fcafd 100644
> --- a/arch/riscv/kernel/cpu.c
> +++ b/arch/riscv/kernel/cpu.c
> @@ -165,6 +165,7 @@ static struct riscv_isa_ext_data isa_ext_arr[] = {
>  	__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
>  	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
>  	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
> +	__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
>  	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
>  	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
>  	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
> diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
> index 93e45560af30..60ebe2f22112 100644
> --- a/arch/riscv/kernel/cpufeature.c
> +++ b/arch/riscv/kernel/cpufeature.c
> @@ -228,6 +228,7 @@ void __init riscv_fill_hwcap(void)
>  				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
>  				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
>  				SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
> +				SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
>  			}
>  #undef SET_ISA_EXT_MAP
>  		}
> @@ -275,6 +276,17 @@ void __init riscv_fill_hwcap(void)
>  }
>  
>  #ifdef CONFIG_RISCV_ALTERNATIVE
> +static bool __init_or_module cpufeature_probe_svnapot(unsigned int stage)
> +{
> +	if (!IS_ENABLED(CONFIG_RISCV_ISA_SVNAPOT))
> +		return false;
> +
> +	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
> +		return false;
> +
> +	return riscv_isa_extension_available(NULL, SVNAPOT);
> +}
> +
>  static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage)
>  {
>  	if (!IS_ENABLED(CONFIG_RISCV_ISA_SVPBMT))
> @@ -312,6 +324,9 @@ static u32 __init_or_module cpufeature_probe(unsigned int stage)
>  {
>  	u32 cpu_req_feature = 0;
>  
> +	if (cpufeature_probe_svnapot(stage))
> +		cpu_req_feature |= BIT(CPUFEATURE_SVNAPOT);
> +
>  	if (cpufeature_probe_svpbmt(stage))
>  		cpu_req_feature |= BIT(CPUFEATURE_SVPBMT);
>  
> -- 
> 2.37.4
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
>
Andrew Jones Dec. 26, 2022, 4:01 p.m. UTC | #2
On Mon, Dec 19, 2022 at 09:51:29PM +0000, Conor Dooley wrote:
> On Fri, Dec 16, 2022 at 02:21:07PM +0800, panqinglin2020@iscas.ac.cn wrote:
> > From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> > 
> > Add one alternative to enable/disable svnapot support, enable this static
> > key when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT compile
> > option is set. It will influence the behavior of has_svnapot. All code
> > dependent on svnapot should make sure that has_svnapot return true firstly.
> > 
> > Modify PTE definition for Svnapot, and creates some functions in pgtable.h
> > to mark a PTE as napot and check if it is a Svnapot PTE. Until now, only
> > 64KB napot size is supported in spec, so some macros has only 64KB version.
> > 
> > Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > 
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index e1a9fa47f012..25c230e3bf61 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -397,6 +397,25 @@ config RISCV_ISA_C
> >  
> >  	  If you don't know what to do here, say Y.
> >  
> > +config RISCV_ISA_SVNAPOT
> > +	bool "SVNAPOT extension support"
> > +	depends on 64BIT && MMU
> > +	select RISCV_ALTERNATIVE
> > +	default y
> > +	help
> > +	  Allow kernel to detect the SVNAPOT ISA-extension dynamically at boot
> > +	  time and enable its usage.
> > +
> > +	  The SVNAPOT extension is used to mark contiguous PTEs as a range
> > +	  of contiguous virtual-to-physical translations for a naturally
> > +	  aligned power-of-2 (NAPOT) granularity larger than the base 4KB page
> > +	  size. When HUGETLBFS is also selected this option unconditionally
> > +	  allocates some memory for each NAPOT page size supported by the kernel.
> > +	  When optimizing for low memory consumption and for platforms without
> 
> nit: Does this make more sense as "When optimising for low memory
> consumption on platforms without the SVNAPOT extension"...?
> Or does disabling Svnapot save memory on Svnapot capable systems too?

Depends on how we define "save". Disabling RISCV_ISA_SVNAPOT on HUGETLBFS
enabled systems will reduce memory consumption whether the svnapot
extension is present or not. When the extension is present, the memory
consumed may serve a purpose, whereas, on systems without svnapot, the
memory consumed cannot serve a purpose.

Thanks,
drew
Conor Dooley Dec. 26, 2022, 6:40 p.m. UTC | #3
On Mon, Dec 26, 2022 at 05:01:29PM +0100, Andrew Jones wrote:
> On Mon, Dec 19, 2022 at 09:51:29PM +0000, Conor Dooley wrote:
> > On Fri, Dec 16, 2022 at 02:21:07PM +0800, panqinglin2020@iscas.ac.cn wrote:
> > > From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> > > 
> > > Add one alternative to enable/disable svnapot support, enable this static
> > > key when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT compile
> > > option is set. It will influence the behavior of has_svnapot. All code
> > > dependent on svnapot should make sure that has_svnapot return true firstly.
> > > 
> > > Modify PTE definition for Svnapot, and creates some functions in pgtable.h
> > > to mark a PTE as napot and check if it is a Svnapot PTE. Until now, only
> > > 64KB napot size is supported in spec, so some macros has only 64KB version.
> > > 
> > > Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > 
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index e1a9fa47f012..25c230e3bf61 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -397,6 +397,25 @@ config RISCV_ISA_C
> > >  
> > >  	  If you don't know what to do here, say Y.
> > >  
> > > +config RISCV_ISA_SVNAPOT
> > > +	bool "SVNAPOT extension support"
> > > +	depends on 64BIT && MMU
> > > +	select RISCV_ALTERNATIVE
> > > +	default y
> > > +	help
> > > +	  Allow kernel to detect the SVNAPOT ISA-extension dynamically at boot
> > > +	  time and enable its usage.
> > > +
> > > +	  The SVNAPOT extension is used to mark contiguous PTEs as a range
> > > +	  of contiguous virtual-to-physical translations for a naturally
> > > +	  aligned power-of-2 (NAPOT) granularity larger than the base 4KB page
> > > +	  size. When HUGETLBFS is also selected this option unconditionally
> > > +	  allocates some memory for each NAPOT page size supported by the kernel.
> > > +	  When optimizing for low memory consumption and for platforms without
> > 
> > nit: Does this make more sense as "When optimising for low memory
> > consumption on platforms without the SVNAPOT extension"...?
> > Or does disabling Svnapot save memory on Svnapot capable systems too?
> 
> Depends on how we define "save". Disabling RISCV_ISA_SVNAPOT on HUGETLBFS
> enabled systems will reduce memory consumption whether the svnapot
> extension is present or not. When the extension is present, the memory
> consumed may serve a purpose, whereas, on systems without svnapot, the
> memory consumed cannot serve a purpose.

Right, thanks for the explanation. I'm sorry to be a bore here, but should
it be reworded then to "When optimising for low memory consumption on
platforms without SVNAPOT, or if HUGETLBFS is enabled, say N here"?

I don't really care either way to be perfectly honest, so as I said in
my other reply "don't bother resubmitting unless someone else asks for
meaningful changes" ;)

Conor.
Andrew Jones Dec. 26, 2022, 7:19 p.m. UTC | #4
On Mon, Dec 26, 2022 at 06:40:23PM +0000, Conor Dooley wrote:
> On Mon, Dec 26, 2022 at 05:01:29PM +0100, Andrew Jones wrote:
> > On Mon, Dec 19, 2022 at 09:51:29PM +0000, Conor Dooley wrote:
> > > On Fri, Dec 16, 2022 at 02:21:07PM +0800, panqinglin2020@iscas.ac.cn wrote:
> > > > From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> > > > 
> > > > Add one alternative to enable/disable svnapot support, enable this static
> > > > key when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT compile
> > > > option is set. It will influence the behavior of has_svnapot. All code
> > > > dependent on svnapot should make sure that has_svnapot return true firstly.
> > > > 
> > > > Modify PTE definition for Svnapot, and creates some functions in pgtable.h
> > > > to mark a PTE as napot and check if it is a Svnapot PTE. Until now, only
> > > > 64KB napot size is supported in spec, so some macros has only 64KB version.
> > > > 
> > > > Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
> > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > 
> > > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > > index e1a9fa47f012..25c230e3bf61 100644
> > > > --- a/arch/riscv/Kconfig
> > > > +++ b/arch/riscv/Kconfig
> > > > @@ -397,6 +397,25 @@ config RISCV_ISA_C
> > > >  
> > > >  	  If you don't know what to do here, say Y.
> > > >  
> > > > +config RISCV_ISA_SVNAPOT
> > > > +	bool "SVNAPOT extension support"
> > > > +	depends on 64BIT && MMU
> > > > +	select RISCV_ALTERNATIVE
> > > > +	default y
> > > > +	help
> > > > +	  Allow kernel to detect the SVNAPOT ISA-extension dynamically at boot
> > > > +	  time and enable its usage.
> > > > +
> > > > +	  The SVNAPOT extension is used to mark contiguous PTEs as a range
> > > > +	  of contiguous virtual-to-physical translations for a naturally
> > > > +	  aligned power-of-2 (NAPOT) granularity larger than the base 4KB page
> > > > +	  size. When HUGETLBFS is also selected this option unconditionally
> > > > +	  allocates some memory for each NAPOT page size supported by the kernel.
> > > > +	  When optimizing for low memory consumption and for platforms without
> > > 
> > > nit: Does this make more sense as "When optimising for low memory
> > > consumption on platforms without the SVNAPOT extension"...?
> > > Or does disabling Svnapot save memory on Svnapot capable systems too?
> > 
> > Depends on how we define "save". Disabling RISCV_ISA_SVNAPOT on HUGETLBFS
> > enabled systems will reduce memory consumption whether the svnapot
> > extension is present or not. When the extension is present, the memory
> > consumed may serve a purpose, whereas, on systems without svnapot, the
> > memory consumed cannot serve a purpose.
> 
> Right, thanks for the explanation. I'm sorry to be a bore here, but should
> it be reworded then to "When optimising for low memory consumption on
> platforms without SVNAPOT, or if HUGETLBFS is enabled, say N here"?

I'd prefer we keep the instructions as simple as possible. Basically,
"Turn this off if you don't plan to use it and if you care about memory
consumption", which, IMHO, is how the text more or less already reads.
But...

> 
> I don't really care either way to be perfectly honest, so as I said in
> my other reply "don't bother resubmitting unless someone else asks for
> meaningful changes" ;)

...I'm not too worried about it either, so I won't complain if Qinglin
respins with your suggestion.

Thanks,
drew
Qinglin Pan Jan. 8, 2023, 6:21 a.m. UTC | #5
Hi Andrew and Conor,

On 2022/12/27 03:19, Andrew Jones wrote:
> On Mon, Dec 26, 2022 at 06:40:23PM +0000, Conor Dooley wrote:
>> On Mon, Dec 26, 2022 at 05:01:29PM +0100, Andrew Jones wrote:
>>> On Mon, Dec 19, 2022 at 09:51:29PM +0000, Conor Dooley wrote:
>>>> On Fri, Dec 16, 2022 at 02:21:07PM +0800, panqinglin2020@iscas.ac.cn wrote:
>>>>> From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
>>>>>
>>>>> Add one alternative to enable/disable svnapot support, enable this static
>>>>> key when "svnapot" is in the "riscv,isa" field of fdt and SVNAPOT compile
>>>>> option is set. It will influence the behavior of has_svnapot. All code
>>>>> dependent on svnapot should make sure that has_svnapot return true firstly.
>>>>>
>>>>> Modify PTE definition for Svnapot, and creates some functions in pgtable.h
>>>>> to mark a PTE as napot and check if it is a Svnapot PTE. Until now, only
>>>>> 64KB napot size is supported in spec, so some macros has only 64KB version.
>>>>>
>>>>> Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
>>>>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>>>>>
>>>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>>>> index e1a9fa47f012..25c230e3bf61 100644
>>>>> --- a/arch/riscv/Kconfig
>>>>> +++ b/arch/riscv/Kconfig
>>>>> @@ -397,6 +397,25 @@ config RISCV_ISA_C
>>>>>   
>>>>>   	  If you don't know what to do here, say Y.
>>>>>   
>>>>> +config RISCV_ISA_SVNAPOT
>>>>> +	bool "SVNAPOT extension support"
>>>>> +	depends on 64BIT && MMU
>>>>> +	select RISCV_ALTERNATIVE
>>>>> +	default y
>>>>> +	help
>>>>> +	  Allow kernel to detect the SVNAPOT ISA-extension dynamically at boot
>>>>> +	  time and enable its usage.
>>>>> +
>>>>> +	  The SVNAPOT extension is used to mark contiguous PTEs as a range
>>>>> +	  of contiguous virtual-to-physical translations for a naturally
>>>>> +	  aligned power-of-2 (NAPOT) granularity larger than the base 4KB page
>>>>> +	  size. When HUGETLBFS is also selected this option unconditionally
>>>>> +	  allocates some memory for each NAPOT page size supported by the kernel.
>>>>> +	  When optimizing for low memory consumption and for platforms without
>>>>
>>>> nit: Does this make more sense as "When optimising for low memory
>>>> consumption on platforms without the SVNAPOT extension"...?
>>>> Or does disabling Svnapot save memory on Svnapot capable systems too?
>>>
>>> Depends on how we define "save". Disabling RISCV_ISA_SVNAPOT on HUGETLBFS
>>> enabled systems will reduce memory consumption whether the svnapot
>>> extension is present or not. When the extension is present, the memory
>>> consumed may serve a purpose, whereas, on systems without svnapot, the
>>> memory consumed cannot serve a purpose.
>>
>> Right, thanks for the explanation. I'm sorry to be a bore here, but should
>> it be reworded then to "When optimising for low memory consumption on
>> platforms without SVNAPOT, or if HUGETLBFS is enabled, say N here"?
> 
> I'd prefer we keep the instructions as simple as possible. Basically,

Sorry for slow reply.

I agree with Andrew, we should keep it as simple as possible. :)
So I will keep it unchanged unless any new suggestions about this
patchset is given.

Thanks,
Qinglin

> "Turn this off if you don't plan to use it and if you care about memory
> consumption", which, IMHO, is how the text more or less already reads.
> But...
> 
>>
>> I don't really care either way to be perfectly honest, so as I said in
>> my other reply "don't bother resubmitting unless someone else asks for
>> meaningful changes" ;)
> 
> ...I'm not too worried about it either, so I won't complain if Qinglin
> respins with your suggestion.
> 
> Thanks,
> drew
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e1a9fa47f012..25c230e3bf61 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -397,6 +397,25 @@  config RISCV_ISA_C
 
 	  If you don't know what to do here, say Y.
 
+config RISCV_ISA_SVNAPOT
+	bool "SVNAPOT extension support"
+	depends on 64BIT && MMU
+	select RISCV_ALTERNATIVE
+	default y
+	help
+	  Allow kernel to detect the SVNAPOT ISA-extension dynamically at boot
+	  time and enable its usage.
+
+	  The SVNAPOT extension is used to mark contiguous PTEs as a range
+	  of contiguous virtual-to-physical translations for a naturally
+	  aligned power-of-2 (NAPOT) granularity larger than the base 4KB page
+	  size. When HUGETLBFS is also selected this option unconditionally
+	  allocates some memory for each NAPOT page size supported by the kernel.
+	  When optimizing for low memory consumption and for platforms without
+	  the SVNAPOT extension, it may be better to say N here.
+
+	  If you don't know what to do here, say Y.
+
 config RISCV_ISA_SVPBMT
 	bool "SVPBMT extension support"
 	depends on 64BIT && MMU
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 4180312d2a70..beadb1126ed9 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -22,9 +22,10 @@ 
 #define	ERRATA_THEAD_NUMBER 3
 #endif
 
-#define	CPUFEATURE_SVPBMT 0
-#define	CPUFEATURE_ZICBOM 1
-#define	CPUFEATURE_NUMBER 2
+#define	CPUFEATURE_SVPBMT	0
+#define	CPUFEATURE_ZICBOM	1
+#define	CPUFEATURE_SVNAPOT	2
+#define	CPUFEATURE_NUMBER	3
 
 #ifdef __ASSEMBLY__
 
@@ -156,6 +157,14 @@  asm volatile(ALTERNATIVE(						\
 	: "=r" (__ovl) :						\
 	: "memory")
 
+#define ALT_SVNAPOT(_val)						\
+asm(ALTERNATIVE(							\
+	"li %0, 0",							\
+	"li %0, 1",							\
+		0, CPUFEATURE_SVNAPOT, CONFIG_RISCV_ISA_SVNAPOT)	\
+	: "=r" (_val) :							\
+	: "memory")
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 86328e3acb02..a8f1a390b1a4 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -54,6 +54,7 @@  extern unsigned long elf_hwcap;
  */
 enum riscv_isa_ext_id {
 	RISCV_ISA_EXT_SSCOFPMF = RISCV_ISA_EXT_BASE,
+	RISCV_ISA_EXT_SVNAPOT,
 	RISCV_ISA_EXT_SVPBMT,
 	RISCV_ISA_EXT_ZICBOM,
 	RISCV_ISA_EXT_ZIHINTPAUSE,
@@ -88,7 +89,6 @@  static __always_inline int riscv_isa_ext2key(int num)
 {
 	switch (num) {
 	case RISCV_ISA_EXT_f:
-		return RISCV_ISA_EXT_KEY_FPU;
 	case RISCV_ISA_EXT_d:
 		return RISCV_ISA_EXT_KEY_FPU;
 	case RISCV_ISA_EXT_ZIHINTPAUSE:
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 9f432c1b5289..24a3dd265183 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -16,11 +16,6 @@ 
 #define PAGE_SIZE	(_AC(1, UL) << PAGE_SHIFT)
 #define PAGE_MASK	(~(PAGE_SIZE - 1))
 
-#ifdef CONFIG_64BIT
-#define HUGE_MAX_HSTATE		2
-#else
-#define HUGE_MAX_HSTATE		1
-#endif
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1, UL) << HPAGE_SHIFT)
 #define HPAGE_MASK              (~(HPAGE_SIZE - 1))
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 42a042c0e13e..7a5097202e15 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -78,6 +78,40 @@  typedef struct {
  */
 #define _PAGE_PFN_MASK  GENMASK(53, 10)
 
+/*
+ * [63] Svnapot definitions:
+ * 0 Svnapot disabled
+ * 1 Svnapot enabled
+ */
+#define _PAGE_NAPOT_SHIFT	63
+#define _PAGE_NAPOT		BIT(_PAGE_NAPOT_SHIFT)
+/*
+ * Only 64KB (order 4) napot ptes supported.
+ */
+#define NAPOT_CONT_ORDER_BASE 4
+enum napot_cont_order {
+	NAPOT_CONT64KB_ORDER = NAPOT_CONT_ORDER_BASE,
+	NAPOT_ORDER_MAX,
+};
+
+#define for_each_napot_order(order)						\
+	for (order = NAPOT_CONT_ORDER_BASE; order < NAPOT_ORDER_MAX; order++)
+#define for_each_napot_order_rev(order)						\
+	for (order = NAPOT_ORDER_MAX - 1;					\
+	     order >= NAPOT_CONT_ORDER_BASE; order--)
+#define napot_cont_order(val)	(__builtin_ctzl((val.pte >> _PAGE_PFN_SHIFT) << 1))
+
+#define napot_cont_shift(order)	((order) + PAGE_SHIFT)
+#define napot_cont_size(order)	BIT(napot_cont_shift(order))
+#define napot_cont_mask(order)	(~(napot_cont_size(order) - 1UL))
+#define napot_pte_num(order)	BIT(order)
+
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+#define HUGE_MAX_HSTATE		(2 + (NAPOT_ORDER_MAX - NAPOT_CONT_ORDER_BASE))
+#else
+#define HUGE_MAX_HSTATE		2
+#endif
+
 /*
  * [62:61] Svpbmt Memory Type definitions:
  *
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 2359f1f9bda9..07213236b976 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -6,10 +6,12 @@ 
 #ifndef _ASM_RISCV_PGTABLE_H
 #define _ASM_RISCV_PGTABLE_H
 
+#include <linux/jump_label.h>
 #include <linux/mmzone.h>
 #include <linux/sizes.h>
 
 #include <asm/pgtable-bits.h>
+#include <asm/hwcap.h>
 
 #ifndef CONFIG_MMU
 #define KERNEL_LINK_ADDR	PAGE_OFFSET
@@ -264,10 +266,49 @@  static inline pte_t pud_pte(pud_t pud)
 	return __pte(pud_val(pud));
 }
 
+static __always_inline bool has_svnapot(void)
+{
+	unsigned int _val;
+
+	ALT_SVNAPOT(_val);
+
+	return _val;
+}
+
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+
+static inline unsigned long pte_napot(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_NAPOT;
+}
+
+static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
+{
+	int pos = order - 1 + _PAGE_PFN_SHIFT;
+	unsigned long napot_bit = BIT(pos);
+	unsigned long napot_mask = ~GENMASK(pos, _PAGE_PFN_SHIFT);
+
+	return __pte((pte_val(pte) & napot_mask) | napot_bit | _PAGE_NAPOT);
+}
+
+#else
+
+static inline unsigned long pte_napot(pte_t pte)
+{
+	return 0;
+}
+
+#endif /* CONFIG_RISCV_ISA_SVNAPOT */
+
 /* Yields the page frame number (PFN) of a page table entry */
 static inline unsigned long pte_pfn(pte_t pte)
 {
-	return __page_val_to_pfn(pte_val(pte));
+	unsigned long res  = __page_val_to_pfn(pte_val(pte));
+
+	if (has_svnapot() && pte_napot(pte))
+		res = res & (res - 1UL);
+
+	return res;
 }
 
 #define pte_page(x)     pfn_to_page(pte_pfn(x))
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index bf9dd6764bad..88495f5fcafd 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -165,6 +165,7 @@  static struct riscv_isa_ext_data isa_ext_arr[] = {
 	__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
 	__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
 	__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
+	__RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
 	__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
 	__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
 	__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 93e45560af30..60ebe2f22112 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -228,6 +228,7 @@  void __init riscv_fill_hwcap(void)
 				SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
 				SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
 				SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
+				SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
 			}
 #undef SET_ISA_EXT_MAP
 		}
@@ -275,6 +276,17 @@  void __init riscv_fill_hwcap(void)
 }
 
 #ifdef CONFIG_RISCV_ALTERNATIVE
+static bool __init_or_module cpufeature_probe_svnapot(unsigned int stage)
+{
+	if (!IS_ENABLED(CONFIG_RISCV_ISA_SVNAPOT))
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+		return false;
+
+	return riscv_isa_extension_available(NULL, SVNAPOT);
+}
+
 static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage)
 {
 	if (!IS_ENABLED(CONFIG_RISCV_ISA_SVPBMT))
@@ -312,6 +324,9 @@  static u32 __init_or_module cpufeature_probe(unsigned int stage)
 {
 	u32 cpu_req_feature = 0;
 
+	if (cpufeature_probe_svnapot(stage))
+		cpu_req_feature |= BIT(CPUFEATURE_SVNAPOT);
+
 	if (cpufeature_probe_svpbmt(stage))
 		cpu_req_feature |= BIT(CPUFEATURE_SVPBMT);