diff mbox series

[2/3] powperc/mm: read TLB Block Invalidate Characteristics

Message ID 20190830120712.22971-3-ldufour@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series powerpc/mm: Conditionally call H_BLOCK_REMOVE | expand

Commit Message

Laurent Dufour Aug. 30, 2019, 12:07 p.m. UTC
The PAPR document specifies the TLB Block Invalidate Characteristics which
is telling which couple base page size / page size is supported by the
H_BLOCK_REMOVE hcall.

A new set of feature is added to the mmu_psize_def structure to record per
base page size which page size is supported by H_BLOCK_REMOVE.

A new init service is added to read the characteristics. The size of the
buffer is set to twice the number of known page size, plus 10 bytes to
ensure we have enough place.

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
---
 arch/powerpc/include/asm/book3s/64/mmu.h |   3 +
 arch/powerpc/platforms/pseries/lpar.c    | 107 +++++++++++++++++++++++
 2 files changed, 110 insertions(+)

Comments

Aneesh Kumar K.V Sept. 12, 2019, 2:16 p.m. UTC | #1
On 8/30/19 5:37 PM, Laurent Dufour wrote:
> The PAPR document specifies the TLB Block Invalidate Characteristics which
> is telling which couple base page size / page size is supported by the
> H_BLOCK_REMOVE hcall.
> 
> A new set of feature is added to the mmu_psize_def structure to record per
> base page size which page size is supported by H_BLOCK_REMOVE.
> 
> A new init service is added to read the characteristics. The size of the
> buffer is set to twice the number of known page size, plus 10 bytes to
> ensure we have enough place.
> 


So this is not really the base page size/actual page size combination. 
This is related to H_BLOCK_REMOVE hcall, block size supported by that 
HCALL and what page size combination is supported with that specific 
block size.

We should add that TLB block invalidate characteristics format in this 
patch.


> Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
> ---
>   arch/powerpc/include/asm/book3s/64/mmu.h |   3 +
>   arch/powerpc/platforms/pseries/lpar.c    | 107 +++++++++++++++++++++++
>   2 files changed, 110 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index 23b83d3593e2..675895dfe39f 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -12,11 +12,14 @@
>    *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
>    *            directly to a slbmte "vsid" value
>    *    penc  : is the HPTE encoding mask for the "LP" field:
> + *    hblk  : H_BLOCK_REMOVE supported block size for this page size in
> + *            segment who's base page size is that page size.
>    *
>    */
>   struct mmu_psize_def {
>   	unsigned int	shift;	/* number of bits */
>   	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
> +	int		hblk[MMU_PAGE_COUNT];	/* H_BLOCK_REMOVE support */
>   	unsigned int	tlbiel;	/* tlbiel supported for that page size */
>   	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
>   	union {
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
> index 4f76e5f30c97..375e19b3cf53 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -1311,6 +1311,113 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
>   		(void)call_block_remove(pix, param, true);
>   }
>   
> +static inline void __init set_hblk_bloc_size(int bpsize, int psize,
> +					     unsigned int block_size)
> +{
> +	struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
> +
> +	if (block_size > def->hblk[psize])
> +		def->hblk[psize] = block_size;
> +}
> +
> +static inline void __init check_lp_set_hblk(unsigned int lp,
> +					    unsigned int block_size)
> +{
> +	unsigned int bpsize, psize;
> +
> +
> +	/* First, check the L bit, if not set, this means 4K */
> +	if ((lp & 0x80) == 0) {


What is that 0x80? We should have #define for most of those.

> +		set_hblk_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
> +		return;
> +	}
> +
> +	/* PAPR says to look at bits 2-7 (0 = MSB) */
> +	lp &= 0x3f;

Also convert that to #define?

> +	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
> +		struct mmu_psize_def *def =  &mmu_psize_defs[bpsize];
> +
> +		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
> +			if (def->penc[psize] == lp) {
> +				set_hblk_bloc_size(bpsize, psize, block_size);
> +				return;
> +			}
> +		}
> +	}
> +}
> +
> +#define SPLPAR_TLB_BIC_TOKEN		50
> +#define SPLPAR_TLB_BIC_MAXLENGTH	(MMU_PAGE_COUNT*2 + 10)
> +static int __init read_tlbbi_characteristics(void)
> +{
> +	int call_status;
> +	unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
> +	int len, idx, bpsize;
> +
> +	if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
> +		pr_info("H_BLOCK_REMOVE is not supported");
> +		return 0;
> +	}
> +
> +	memset(local_buffer, 0, SPLPAR_TLB_BIC_MAXLENGTH);
> +
> +	spin_lock(&rtas_data_buf_lock);
> +	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
> +	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
> +				NULL,
> +				SPLPAR_TLB_BIC_TOKEN,
> +				__pa(rtas_data_buf),
> +				RTAS_DATA_BUF_SIZE);
> +	memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
> +	local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
> +	spin_unlock(&rtas_data_buf_lock);
> +
> +	if (call_status != 0) {
> +		pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
> +			__FILE__, __func__, call_status);
> +		return 0;
> +	}
> +
> +	/*
> +	 * The first two (2) bytes of the data in the buffer are the length of
> +	 * the returned data, not counting these first two (2) bytes.
> +	 */
> +	len = local_buffer[0] * 256 + local_buffer[1] + 2;
> +	if (len >= SPLPAR_TLB_BIC_MAXLENGTH) {
> +		pr_warn("%s too large returned buffer %d", __func__, len);
> +		return 0;
> +	}
> +
> +	idx = 2;
> +	while (idx < len) {
> +		unsigned int block_size = local_buffer[idx++];
> +		unsigned int npsize;
> +
> +		if (!block_size)
> +			break;
> +
> +		block_size = 1 << block_size;
> +		if (block_size != 8)
> +			/* We only support 8 bytes size TLB invalidate buffer */
> +			pr_warn("Unsupported H_BLOCK_REMOVE block size : %d\n",
> +				block_size);
> +
> +		for (npsize = local_buffer[idx++];  npsize > 0; npsize--)
> +			check_lp_set_hblk((unsigned int) local_buffer[idx++],
> +					  block_size);
> +	}
> +
> +	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
> +		for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
> +			if (mmu_psize_defs[bpsize].hblk[idx])
> +				pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
> +					bpsize, idx,
> +					mmu_psize_defs[bpsize].hblk[idx]);
> +
> +	return 0;
> +}
> +machine_arch_initcall(pseries, read_tlbbi_characteristics);
> +

Why a machine_arch_initcall() ? Can't we do this similar to how we do 
segment-page-size parsing from device tree? Also this should be hash 
translation mode specific.

>   /*
>    * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
>    * lock.
>
Aneesh Kumar K.V Sept. 12, 2019, 2:44 p.m. UTC | #2
Laurent Dufour <ldufour@linux.ibm.com> writes:

> The PAPR document specifies the TLB Block Invalidate Characteristics which
> is telling which couple base page size / page size is supported by the
> H_BLOCK_REMOVE hcall.
>
> A new set of feature is added to the mmu_psize_def structure to record per
> base page size which page size is supported by H_BLOCK_REMOVE.
>
> A new init service is added to read the characteristics. The size of the
> buffer is set to twice the number of known page size, plus 10 bytes to
> ensure we have enough place.
>
> Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
> ---
>  arch/powerpc/include/asm/book3s/64/mmu.h |   3 +
>  arch/powerpc/platforms/pseries/lpar.c    | 107 +++++++++++++++++++++++
>  2 files changed, 110 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
> index 23b83d3593e2..675895dfe39f 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
> @@ -12,11 +12,14 @@
>   *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
>   *            directly to a slbmte "vsid" value
>   *    penc  : is the HPTE encoding mask for the "LP" field:
> + *    hblk  : H_BLOCK_REMOVE supported block size for this page size in
> + *            segment who's base page size is that page size.
>   *
>   */
>  struct mmu_psize_def {
>  	unsigned int	shift;	/* number of bits */
>  	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
> +	int		hblk[MMU_PAGE_COUNT];	/* H_BLOCK_REMOVE support */
>  	unsigned int	tlbiel;	/* tlbiel supported for that page size */
>  	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
>  	union {
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
> index 4f76e5f30c97..375e19b3cf53 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -1311,6 +1311,113 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
>  		(void)call_block_remove(pix, param, true);
>  }
>  
> +static inline void __init set_hblk_bloc_size(int bpsize, int psize,
> +					     unsigned int block_size)
> +{
> +	struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
> +
> +	if (block_size > def->hblk[psize])
> +		def->hblk[psize] = block_size;
> +}
> +
> +static inline void __init check_lp_set_hblk(unsigned int lp,
> +					    unsigned int block_size)
> +{
> +	unsigned int bpsize, psize;
> +
> +
> +	/* First, check the L bit, if not set, this means 4K */
> +	if ((lp & 0x80) == 0) {
> +		set_hblk_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
> +		return;
> +	}
> +
> +	/* PAPR says to look at bits 2-7 (0 = MSB) */
> +	lp &= 0x3f;
> +	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
> +		struct mmu_psize_def *def =  &mmu_psize_defs[bpsize];
> +
> +		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
> +			if (def->penc[psize] == lp) {
> +				set_hblk_bloc_size(bpsize, psize, block_size);
> +				return;
> +			}
> +		}
> +	}
> +}
> +
> +#define SPLPAR_TLB_BIC_TOKEN		50
> +#define SPLPAR_TLB_BIC_MAXLENGTH	(MMU_PAGE_COUNT*2 + 10)
> +static int __init read_tlbbi_characteristics(void)
> +{
> +	int call_status;
> +	unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
> +	int len, idx, bpsize;
> +
> +	if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
> +		pr_info("H_BLOCK_REMOVE is not supported");
> +		return 0;
> +	}
> +
> +	memset(local_buffer, 0, SPLPAR_TLB_BIC_MAXLENGTH);
> +
> +	spin_lock(&rtas_data_buf_lock);
> +	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
> +	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
> +				NULL,
> +				SPLPAR_TLB_BIC_TOKEN,
> +				__pa(rtas_data_buf),
> +				RTAS_DATA_BUF_SIZE);
> +	memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
> +	local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
> +	spin_unlock(&rtas_data_buf_lock);
> +
> +	if (call_status != 0) {
> +		pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
> +			__FILE__, __func__, call_status);
> +		return 0;
> +	}
> +
> +	/*
> +	 * The first two (2) bytes of the data in the buffer are the length of
> +	 * the returned data, not counting these first two (2) bytes.
> +	 */
> +	len = local_buffer[0] * 256 + local_buffer[1] + 2;
> +	if (len >= SPLPAR_TLB_BIC_MAXLENGTH) {
> +		pr_warn("%s too large returned buffer %d", __func__, len);
> +		return 0;
> +	}
> +
> +	idx = 2;
> +	while (idx < len) {
> +		unsigned int block_size = local_buffer[idx++];
> +		unsigned int npsize;
> +
> +		if (!block_size)
> +			break;
> +
> +		block_size = 1 << block_size;
> +		if (block_size != 8)
> +			/* We only support 8 bytes size TLB invalidate buffer */
> +			pr_warn("Unsupported H_BLOCK_REMOVE block size : %d\n",
> +				block_size);

Should we skip setting block size if we find block_size != 8? Also can
we avoid doing that pr_warn in loop and only warn if we don't find
block_size 8 in the invalidate characteristics array? 

> +
> +		for (npsize = local_buffer[idx++];  npsize > 0; npsize--)
> +			check_lp_set_hblk((unsigned int) local_buffer[idx++],
> +					  block_size);
> +	}
> +
> +	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
> +		for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
> +			if (mmu_psize_defs[bpsize].hblk[idx])
> +				pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
> +					bpsize, idx,
> +					mmu_psize_defs[bpsize].hblk[idx]);
> +
> +	return 0;
> +}
> +machine_arch_initcall(pseries, read_tlbbi_characteristics);
> +
>  /*
>   * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
>   * lock.
> -- 
> 2.23.0
Laurent Dufour Sept. 12, 2019, 7:26 p.m. UTC | #3
Le 12/09/2019 à 16:44, Aneesh Kumar K.V a écrit :
> Laurent Dufour <ldufour@linux.ibm.com> writes:
> 
>> The PAPR document specifies the TLB Block Invalidate Characteristics which
>> is telling which couple base page size / page size is supported by the
>> H_BLOCK_REMOVE hcall.
>>
>> A new set of feature is added to the mmu_psize_def structure to record per
>> base page size which page size is supported by H_BLOCK_REMOVE.
>>
>> A new init service is added to read the characteristics. The size of the
>> buffer is set to twice the number of known page size, plus 10 bytes to
>> ensure we have enough place.
>>
>> Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/book3s/64/mmu.h |   3 +
>>   arch/powerpc/platforms/pseries/lpar.c    | 107 +++++++++++++++++++++++
>>   2 files changed, 110 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
>> index 23b83d3593e2..675895dfe39f 100644
>> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
>> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
>> @@ -12,11 +12,14 @@
>>    *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
>>    *            directly to a slbmte "vsid" value
>>    *    penc  : is the HPTE encoding mask for the "LP" field:
>> + *    hblk  : H_BLOCK_REMOVE supported block size for this page size in
>> + *            segment who's base page size is that page size.
>>    *
>>    */
>>   struct mmu_psize_def {
>>   	unsigned int	shift;	/* number of bits */
>>   	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
>> +	int		hblk[MMU_PAGE_COUNT];	/* H_BLOCK_REMOVE support */
>>   	unsigned int	tlbiel;	/* tlbiel supported for that page size */
>>   	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
>>   	union {
>> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
>> index 4f76e5f30c97..375e19b3cf53 100644
>> --- a/arch/powerpc/platforms/pseries/lpar.c
>> +++ b/arch/powerpc/platforms/pseries/lpar.c
>> @@ -1311,6 +1311,113 @@ static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
>>   		(void)call_block_remove(pix, param, true);
>>   }
>>   
>> +static inline void __init set_hblk_bloc_size(int bpsize, int psize,
>> +					     unsigned int block_size)
>> +{
>> +	struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
>> +
>> +	if (block_size > def->hblk[psize])
>> +		def->hblk[psize] = block_size;
>> +}
>> +
>> +static inline void __init check_lp_set_hblk(unsigned int lp,
>> +					    unsigned int block_size)
>> +{
>> +	unsigned int bpsize, psize;
>> +
>> +
>> +	/* First, check the L bit, if not set, this means 4K */
>> +	if ((lp & 0x80) == 0) {
>> +		set_hblk_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
>> +		return;
>> +	}
>> +
>> +	/* PAPR says to look at bits 2-7 (0 = MSB) */
>> +	lp &= 0x3f;
>> +	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
>> +		struct mmu_psize_def *def =  &mmu_psize_defs[bpsize];
>> +
>> +		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
>> +			if (def->penc[psize] == lp) {
>> +				set_hblk_bloc_size(bpsize, psize, block_size);
>> +				return;
>> +			}
>> +		}
>> +	}
>> +}
>> +
>> +#define SPLPAR_TLB_BIC_TOKEN		50
>> +#define SPLPAR_TLB_BIC_MAXLENGTH	(MMU_PAGE_COUNT*2 + 10)
>> +static int __init read_tlbbi_characteristics(void)
>> +{
>> +	int call_status;
>> +	unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
>> +	int len, idx, bpsize;
>> +
>> +	if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
>> +		pr_info("H_BLOCK_REMOVE is not supported");
>> +		return 0;
>> +	}
>> +
>> +	memset(local_buffer, 0, SPLPAR_TLB_BIC_MAXLENGTH);
>> +
>> +	spin_lock(&rtas_data_buf_lock);
>> +	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
>> +	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
>> +				NULL,
>> +				SPLPAR_TLB_BIC_TOKEN,
>> +				__pa(rtas_data_buf),
>> +				RTAS_DATA_BUF_SIZE);
>> +	memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
>> +	local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
>> +	spin_unlock(&rtas_data_buf_lock);
>> +
>> +	if (call_status != 0) {
>> +		pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
>> +			__FILE__, __func__, call_status);
>> +		return 0;
>> +	}
>> +
>> +	/*
>> +	 * The first two (2) bytes of the data in the buffer are the length of
>> +	 * the returned data, not counting these first two (2) bytes.
>> +	 */
>> +	len = local_buffer[0] * 256 + local_buffer[1] + 2;
>> +	if (len >= SPLPAR_TLB_BIC_MAXLENGTH) {
>> +		pr_warn("%s too large returned buffer %d", __func__, len);
>> +		return 0;
>> +	}
>> +
>> +	idx = 2;
>> +	while (idx < len) {
>> +		unsigned int block_size = local_buffer[idx++];
>> +		unsigned int npsize;
>> +
>> +		if (!block_size)
>> +			break;
>> +
>> +		block_size = 1 << block_size;
>> +		if (block_size != 8)
>> +			/* We only support 8 bytes size TLB invalidate buffer */
>> +			pr_warn("Unsupported H_BLOCK_REMOVE block size : %d\n",
>> +				block_size);
> 
> Should we skip setting block size if we find block_size != 8? Also can
> we avoid doing that pr_warn in loop and only warn if we don't find
> block_size 8 in the invalidate characteristics array?

My idea here is to fully read and process the data returned by the hcall, 
and to put the limitation to 8 when checking before calling H_BLOCK_REMOVE.
The warning is there because I want it to be displayed once at boot.

> 
>> +
>> +		for (npsize = local_buffer[idx++];  npsize > 0; npsize--)
>> +			check_lp_set_hblk((unsigned int) local_buffer[idx++],
>> +					  block_size);
>> +	}
>> +
>> +	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
>> +		for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
>> +			if (mmu_psize_defs[bpsize].hblk[idx])
>> +				pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
>> +					bpsize, idx,
>> +					mmu_psize_defs[bpsize].hblk[idx]);
>> +
>> +	return 0;
>> +}
>> +machine_arch_initcall(pseries, read_tlbbi_characteristics);
>> +
>>   /*
>>    * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
>>    * lock.
>> -- 
>> 2.23.0
Aneesh Kumar K.V Sept. 13, 2019, 2 a.m. UTC | #4
On 9/13/19 12:56 AM, Laurent Dufour wrote:
> Le 12/09/2019 à 16:44, Aneesh Kumar K.V a écrit :
>> Laurent Dufour <ldufour@linux.ibm.com> writes:

>>> +
>>> +    idx = 2;
>>> +    while (idx < len) {
>>> +        unsigned int block_size = local_buffer[idx++];
>>> +        unsigned int npsize;
>>> +
>>> +        if (!block_size)
>>> +            break;
>>> +
>>> +        block_size = 1 << block_size;
>>> +        if (block_size != 8)
>>> +            /* We only support 8 bytes size TLB invalidate buffer */
>>> +            pr_warn("Unsupported H_BLOCK_REMOVE block size : %d\n",
>>> +                block_size);
>>
>> Should we skip setting block size if we find block_size != 8? Also can
>> we avoid doing that pr_warn in loop and only warn if we don't find
>> block_size 8 in the invalidate characteristics array?
> 
> My idea here is to fully read and process the data returned by the 
> hcall, and to put the limitation to 8 when checking before calling 
> H_BLOCK_REMOVE.
> The warning is there because I want it to be displayed once at boot.
> 


Can we have two block size reported for the same base page size/actual 
page size combination? If so we will overwrite the hblk[actual_psize] ?

>>
>>> +
>>> +        for (npsize = local_buffer[idx++];  npsize > 0; npsize--)
>>> +            check_lp_set_hblk((unsigned int) local_buffer[idx++],
>>> +                      block_size);
>>> +    }
>>> +
>>> +    for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
>>> +        for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
>>> +            if (mmu_psize_defs[bpsize].hblk[idx])
>>> +                pr_info("H_BLOCK_REMOVE supports base psize:%d 
>>> psize:%d block size:%d",
>>> +                    bpsize, idx,
>>> +                    mmu_psize_defs[bpsize].hblk[idx]);
>>> +
>>> +    return 0;
>>> +}
>>> +machine_arch_initcall(pseries, read_tlbbi_characteristics);
>>> +
>>>   /*
>>>    * Take a spinlock around flushes to avoid bouncing the hypervisor 
>>> tlbie
>>>    * lock.

-aneesh
Laurent Dufour Sept. 13, 2019, 9:10 a.m. UTC | #5
Le 13/09/2019 à 04:00, Aneesh Kumar K.V a écrit :
> On 9/13/19 12:56 AM, Laurent Dufour wrote:
>> Le 12/09/2019 à 16:44, Aneesh Kumar K.V a écrit :
>>> Laurent Dufour <ldufour@linux.ibm.com> writes:
> 
>>>> +
>>>> +    idx = 2;
>>>> +    while (idx < len) {
>>>> +        unsigned int block_size = local_buffer[idx++];
>>>> +        unsigned int npsize;
>>>> +
>>>> +        if (!block_size)
>>>> +            break;
>>>> +
>>>> +        block_size = 1 << block_size;
>>>> +        if (block_size != 8)
>>>> +            /* We only support 8 bytes size TLB invalidate buffer */
>>>> +            pr_warn("Unsupported H_BLOCK_REMOVE block size : %d\n",
>>>> +                block_size);
>>>
>>> Should we skip setting block size if we find block_size != 8? Also can
>>> we avoid doing that pr_warn in loop and only warn if we don't find
>>> block_size 8 in the invalidate characteristics array?
>>
>> My idea here is to fully read and process the data returned by the hcall, 
>> and to put the limitation to 8 when checking before calling H_BLOCK_REMOVE.
>> The warning is there because I want it to be displayed once at boot.
>>
> 
> 
> Can we have two block size reported for the same base page size/actual page 
> size combination? If so we will overwrite the hblk[actual_psize] ?

In check_lp_set_hblk() I'm only keeping the bigger one.

> 
>>>
>>>> +
>>>> +        for (npsize = local_buffer[idx++];  npsize > 0; npsize--)
>>>> +            check_lp_set_hblk((unsigned int) local_buffer[idx++],
>>>> +                      block_size);
>>>> +    }
>>>> +
>>>> +    for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
>>>> +        for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
>>>> +            if (mmu_psize_defs[bpsize].hblk[idx])
>>>> +                pr_info("H_BLOCK_REMOVE supports base psize:%d 
>>>> psize:%d block size:%d",
>>>> +                    bpsize, idx,
>>>> +                    mmu_psize_defs[bpsize].hblk[idx]);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +machine_arch_initcall(pseries, read_tlbbi_characteristics);
>>>> +
>>>>   /*
>>>>    * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
>>>>    * lock.
> 
> -aneesh
Laurent Dufour Sept. 13, 2019, 1:55 p.m. UTC | #6
Le 12/09/2019 à 16:16, Aneesh Kumar K.V a écrit :
> On 8/30/19 5:37 PM, Laurent Dufour wrote:
>> The PAPR document specifies the TLB Block Invalidate Characteristics which
>> is telling which couple base page size / page size is supported by the
>> H_BLOCK_REMOVE hcall.
>>
>> A new set of feature is added to the mmu_psize_def structure to record per
>> base page size which page size is supported by H_BLOCK_REMOVE.
>>
>> A new init service is added to read the characteristics. The size of the
>> buffer is set to twice the number of known page size, plus 10 bytes to
>> ensure we have enough place.
>>
> 
> 
> So this is not really the base page size/actual page size combination. This 
> is related to H_BLOCK_REMOVE hcall, block size supported by that HCALL and 
> what page size combination is supported with that specific block size.

I agree

> 
> We should add that TLB block invalidate characteristics format in this patch.

Sure, will do that in a comment inside the code.

> 
>> Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
>> ---
>>   arch/powerpc/include/asm/book3s/64/mmu.h |   3 +
>>   arch/powerpc/platforms/pseries/lpar.c    | 107 +++++++++++++++++++++++
>>   2 files changed, 110 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
>> b/arch/powerpc/include/asm/book3s/64/mmu.h
>> index 23b83d3593e2..675895dfe39f 100644
>> --- a/arch/powerpc/include/asm/book3s/64/mmu.h
>> +++ b/arch/powerpc/include/asm/book3s/64/mmu.h
>> @@ -12,11 +12,14 @@
>>    *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
>>    *            directly to a slbmte "vsid" value
>>    *    penc  : is the HPTE encoding mask for the "LP" field:
>> + *    hblk  : H_BLOCK_REMOVE supported block size for this page size in
>> + *            segment who's base page size is that page size.
>>    *
>>    */
>>   struct mmu_psize_def {
>>       unsigned int    shift;    /* number of bits */
>>       int        penc[MMU_PAGE_COUNT];    /* HPTE encoding */
>> +    int        hblk[MMU_PAGE_COUNT];    /* H_BLOCK_REMOVE support */
>>       unsigned int    tlbiel;    /* tlbiel supported for that page size */
>>       unsigned long    avpnm;    /* bits to mask out in AVPN in the HPTE */
>>       union {
>> diff --git a/arch/powerpc/platforms/pseries/lpar.c 
>> b/arch/powerpc/platforms/pseries/lpar.c
>> index 4f76e5f30c97..375e19b3cf53 100644
>> --- a/arch/powerpc/platforms/pseries/lpar.c
>> +++ b/arch/powerpc/platforms/pseries/lpar.c
>> @@ -1311,6 +1311,113 @@ static void do_block_remove(unsigned long number, 
>> struct ppc64_tlb_batch *batch,
>>           (void)call_block_remove(pix, param, true);
>>   }
>> +static inline void __init set_hblk_bloc_size(int bpsize, int psize,
>> +                         unsigned int block_size)
>> +{
>> +    struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
>> +
>> +    if (block_size > def->hblk[psize])
>> +        def->hblk[psize] = block_size;
>> +}
>> +
>> +static inline void __init check_lp_set_hblk(unsigned int lp,
>> +                        unsigned int block_size)
>> +{
>> +    unsigned int bpsize, psize;
>> +
>> +
>> +    /* First, check the L bit, if not set, this means 4K */
>> +    if ((lp & 0x80) == 0) {
> 
> 
> What is that 0x80? We should have #define for most of those.

I will make that more explicit through a define

> 
>> +        set_hblk_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
>> +        return;
>> +    }
>> +
>> +    /* PAPR says to look at bits 2-7 (0 = MSB) */
>> +    lp &= 0x3f;
> 
> Also convert that to #define?

Really ? The comment above is explicitly saying that we are looking at bits 
2-7. A define will obfuscate that.

> 
>> +    for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
>> +        struct mmu_psize_def *def =  &mmu_psize_defs[bpsize];
>> +
>> +        for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
>> +            if (def->penc[psize] == lp) {
>> +                set_hblk_bloc_size(bpsize, psize, block_size);
>> +                return;
>> +            }
>> +        }
>> +    }
>> +}
>> +
>> +#define SPLPAR_TLB_BIC_TOKEN        50
>> +#define SPLPAR_TLB_BIC_MAXLENGTH    (MMU_PAGE_COUNT*2 + 10)
>> +static int __init read_tlbbi_characteristics(void)
>> +{
>> +    int call_status;
>> +    unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
>> +    int len, idx, bpsize;
>> +
>> +    if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
>> +        pr_info("H_BLOCK_REMOVE is not supported");
>> +        return 0;
>> +    }
>> +
>> +    memset(local_buffer, 0, SPLPAR_TLB_BIC_MAXLENGTH);
>> +
>> +    spin_lock(&rtas_data_buf_lock);
>> +    memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
>> +    call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
>> +                NULL,
>> +                SPLPAR_TLB_BIC_TOKEN,
>> +                __pa(rtas_data_buf),
>> +                RTAS_DATA_BUF_SIZE);
>> +    memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
>> +    local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
>> +    spin_unlock(&rtas_data_buf_lock);
>> +
>> +    if (call_status != 0) {
>> +        pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
>> +            __FILE__, __func__, call_status);
>> +        return 0;
>> +    }
>> +
>> +    /*
>> +     * The first two (2) bytes of the data in the buffer are the length of
>> +     * the returned data, not counting these first two (2) bytes.
>> +     */
>> +    len = local_buffer[0] * 256 + local_buffer[1] + 2;
>> +    if (len >= SPLPAR_TLB_BIC_MAXLENGTH) {
>> +        pr_warn("%s too large returned buffer %d", __func__, len);
>> +        return 0;
>> +    }
>> +
>> +    idx = 2;
>> +    while (idx < len) {
>> +        unsigned int block_size = local_buffer[idx++];
>> +        unsigned int npsize;
>> +
>> +        if (!block_size)
>> +            break;
>> +
>> +        block_size = 1 << block_size;
>> +        if (block_size != 8)
>> +            /* We only support 8 bytes size TLB invalidate buffer */
>> +            pr_warn("Unsupported H_BLOCK_REMOVE block size : %d\n",
>> +                block_size);
>> +
>> +        for (npsize = local_buffer[idx++];  npsize > 0; npsize--)
>> +            check_lp_set_hblk((unsigned int) local_buffer[idx++],
>> +                      block_size);
>> +    }
>> +
>> +    for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
>> +        for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
>> +            if (mmu_psize_defs[bpsize].hblk[idx])
>> +                pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d 
>> block size:%d",
>> +                    bpsize, idx,
>> +                    mmu_psize_defs[bpsize].hblk[idx]);
>> +
>> +    return 0;
>> +}
>> +machine_arch_initcall(pseries, read_tlbbi_characteristics);
>> +
> 
> Why a machine_arch_initcall() ? Can't we do this similar to how we do 
> segment-page-size parsing from device tree? Also this should be hash 
> translation mode specific.

Because that code is specific to the pseries architecture. the hash 
translation is not pseries specific.

Indeed the change in mmu_psize_defs is not too generic. The hblk 
characteristics should remain static to the lpar.c file where it is used.

> 
>>   /*
>>    * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
>>    * lock.
>>
>
diff mbox series

Patch

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 23b83d3593e2..675895dfe39f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -12,11 +12,14 @@ 
  *    sllp  : is a bit mask with the value of SLB L || LP to be or'ed
  *            directly to a slbmte "vsid" value
  *    penc  : is the HPTE encoding mask for the "LP" field:
+ *    hblk  : H_BLOCK_REMOVE supported block size for this page size in
+ *            segment who's base page size is that page size.
  *
  */
 struct mmu_psize_def {
 	unsigned int	shift;	/* number of bits */
 	int		penc[MMU_PAGE_COUNT];	/* HPTE encoding */
+	int		hblk[MMU_PAGE_COUNT];	/* H_BLOCK_REMOVE support */
 	unsigned int	tlbiel;	/* tlbiel supported for that page size */
 	unsigned long	avpnm;	/* bits to mask out in AVPN in the HPTE */
 	union {
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 4f76e5f30c97..375e19b3cf53 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1311,6 +1311,113 @@  static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
 		(void)call_block_remove(pix, param, true);
 }
 
+static inline void __init set_hblk_bloc_size(int bpsize, int psize,
+					     unsigned int block_size)
+{
+	struct mmu_psize_def *def = &mmu_psize_defs[bpsize];
+
+	if (block_size > def->hblk[psize])
+		def->hblk[psize] = block_size;
+}
+
+static inline void __init check_lp_set_hblk(unsigned int lp,
+					    unsigned int block_size)
+{
+	unsigned int bpsize, psize;
+
+
+	/* First, check the L bit, if not set, this means 4K */
+	if ((lp & 0x80) == 0) {
+		set_hblk_bloc_size(MMU_PAGE_4K, MMU_PAGE_4K, block_size);
+		return;
+	}
+
+	/* PAPR says to look at bits 2-7 (0 = MSB) */
+	lp &= 0x3f;
+	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++) {
+		struct mmu_psize_def *def =  &mmu_psize_defs[bpsize];
+
+		for (psize = 0; psize < MMU_PAGE_COUNT; psize++) {
+			if (def->penc[psize] == lp) {
+				set_hblk_bloc_size(bpsize, psize, block_size);
+				return;
+			}
+		}
+	}
+}
+
+#define SPLPAR_TLB_BIC_TOKEN		50
+#define SPLPAR_TLB_BIC_MAXLENGTH	(MMU_PAGE_COUNT*2 + 10)
+static int __init read_tlbbi_characteristics(void)
+{
+	int call_status;
+	unsigned char local_buffer[SPLPAR_TLB_BIC_MAXLENGTH];
+	int len, idx, bpsize;
+
+	if (!firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+		pr_info("H_BLOCK_REMOVE is not supported");
+		return 0;
+	}
+
+	memset(local_buffer, 0, SPLPAR_TLB_BIC_MAXLENGTH);
+
+	spin_lock(&rtas_data_buf_lock);
+	memset(rtas_data_buf, 0, RTAS_DATA_BUF_SIZE);
+	call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+				NULL,
+				SPLPAR_TLB_BIC_TOKEN,
+				__pa(rtas_data_buf),
+				RTAS_DATA_BUF_SIZE);
+	memcpy(local_buffer, rtas_data_buf, SPLPAR_TLB_BIC_MAXLENGTH);
+	local_buffer[SPLPAR_TLB_BIC_MAXLENGTH - 1] = '\0';
+	spin_unlock(&rtas_data_buf_lock);
+
+	if (call_status != 0) {
+		pr_warn("%s %s Error calling get-system-parameter (0x%x)\n",
+			__FILE__, __func__, call_status);
+		return 0;
+	}
+
+	/*
+	 * The first two (2) bytes of the data in the buffer are the length of
+	 * the returned data, not counting these first two (2) bytes.
+	 */
+	len = local_buffer[0] * 256 + local_buffer[1] + 2;
+	if (len >= SPLPAR_TLB_BIC_MAXLENGTH) {
+		pr_warn("%s too large returned buffer %d", __func__, len);
+		return 0;
+	}
+
+	idx = 2;
+	while (idx < len) {
+		unsigned int block_size = local_buffer[idx++];
+		unsigned int npsize;
+
+		if (!block_size)
+			break;
+
+		block_size = 1 << block_size;
+		if (block_size != 8)
+			/* We only support 8 bytes size TLB invalidate buffer */
+			pr_warn("Unsupported H_BLOCK_REMOVE block size : %d\n",
+				block_size);
+
+		for (npsize = local_buffer[idx++];  npsize > 0; npsize--)
+			check_lp_set_hblk((unsigned int) local_buffer[idx++],
+					  block_size);
+	}
+
+	for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
+		for (idx = 0; idx < MMU_PAGE_COUNT; idx++)
+			if (mmu_psize_defs[bpsize].hblk[idx])
+				pr_info("H_BLOCK_REMOVE supports base psize:%d psize:%d block size:%d",
+					bpsize, idx,
+					mmu_psize_defs[bpsize].hblk[idx]);
+
+	return 0;
+}
+machine_arch_initcall(pseries, read_tlbbi_characteristics);
+
 /*
  * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
  * lock.