diff mbox

[RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA

Message ID 1448543686-31869-1-git-send-email-aryabinin@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Andrey Ryabinin Nov. 26, 2015, 1:14 p.m. UTC
Currently kasan assumes that shadow memory covers one or more entire PGDs.
That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
than the whole shadow memory.

This patch tries to fix that case.
clear_page_tables() is a new replacement of clear_pgs(). Instead of always
clearing pgds it clears top level page table entries that entirely belongs
to shadow memory.
In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
puds that now might be cleared by clear_page_tables.

Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---

 *** THIS is not tested with 16k pages ***

 arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 76 insertions(+), 11 deletions(-)

Comments

Mark Rutland Nov. 26, 2015, 2:48 p.m. UTC | #1
Hi,

On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> than the whole shadow memory.
> 
> This patch tries to fix that case.
> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> clearing pgds it clears top level page table entries that entirely belongs
> to shadow memory.
> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> puds that now might be cleared by clear_page_tables.
> 
> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
> 
>  *** THIS is not tested with 16k pages ***
> 
>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 76 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
> index cf038c7..ea9f92a 100644
> --- a/arch/arm64/mm/kasan_init.c
> +++ b/arch/arm64/mm/kasan_init.c
> @@ -22,6 +22,7 @@
>  #include <asm/tlbflush.h>
>  
>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
>  
>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
>  					unsigned long end)
> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
>  {
>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));

We also assume that even in the shared PUD case, the shadow region falls
within the same PGD entry, or we would need more than a single tmp_pud.

It would be good to test for that.

>  	kasan_map_early_shadow();
>  }
>  
> -static void __init clear_pgds(unsigned long start,
> -			unsigned long end)
> +static void __init clear_pmds(pud_t *pud, unsigned long addr, unsigned long end)
>  {
> +	pmd_t *pmd;
> +	unsigned long next;
> +
> +	pmd = pmd_offset(pud, addr);
> +
> +	do {
> +		next = pmd_addr_end(addr, end);
> +		if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE)
> +			pmd_clear(pmd);
> +
> +	} while (pmd++, addr = next, addr != end);
> +}
> +
> +static void __init clear_puds(pgd_t *pgd, unsigned long addr, unsigned long end)
> +{
> +	pud_t *pud;
> +	unsigned long next;
> +
> +	pud = pud_offset(pgd, addr);
> +
> +	do {
> +		next = pud_addr_end(addr, end);
> +		if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE)
> +			pud_clear(pud);

I think this can just be:

		if (next - addr == PUD_SIZE)
			pud_clear(pud);

Given that next can at most be PUD_SIZE from addr, and if so we knwo
addr is aligned.

> +
> +		if (!pud_none(*pud))
> +			clear_pmds(pud, addr, next);

I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
either end, so KASAN should never own a partial pud entry like this.

Regardless, were this case to occur, surely we'd be clearing pmd entries
in the active page tables? We didn't copy anything at the pmd level.

That doesn't seem right.

> +	} while (pud++, addr = next, addr != end);
> +}
> +
> +static void __init clear_page_tables(unsigned long addr, unsigned long end)
> +{
> +	pgd_t *pgd;
> +	unsigned long next;
> +
> +	pgd = pgd_offset_k(addr);
> +
> +	do {
> +		next = pgd_addr_end(addr, end);
> +		if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE)
> +			pgd_clear(pgd);

As with clear_puds, I think this can be:

		if (next - addr == PGDIR_SIZE)
			pgd_clear(pgd);

> +
> +		if (!pgd_none(*pgd))
> +			clear_puds(pgd, addr, next);
> +	} while (pgd++, addr = next, addr != end);
> +}
> +
> +static void copy_pagetables(void)
> +{
> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
> +
> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> +
>  	/*
> -	 * Remove references to kasan page tables from
> -	 * swapper_pg_dir. pgd_clear() can't be used
> -	 * here because it's nop on 2,3-level pagetable setups
> +	 * If kasan shadow shares PGD with other mappings,
> +	 * clear_page_tables() will clear puds instead of pgd,
> +	 * so we need temporary pud table to keep early shadow mapped.
>  	 */
> -	for (; start < end; start += PGDIR_SIZE)
> -		set_pgd(pgd_offset_k(start), __pgd(0));
> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
> +		pud_t *pud;
> +		pmd_t *pmd;
> +		pte_t *pte;
> +
> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
> +
> +		pgd_populate(&init_mm, pgd, tmp_pud);
> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
> +		pud_populate(&init_mm, pud, pmd);
> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
> +		pmd_populate_kernel(&init_mm, pmd, pte);

I don't understand why we need to do anything below the pud level here.
We only copy down to the pud level, and we already initialised the
shared ptes and pmds earlier.

Regardless of this patch, we currently initialise the shared tables
repeatedly, which is redundant after the first time we initialise them.
We could improve that.

> +	}
>  }
>  
>  static void __init cpu_set_ttbr1(unsigned long ttbr1)
> @@ -123,16 +188,16 @@ void __init kasan_init(void)
>  
>  	/*
>  	 * We are going to perform proper setup of shadow memory.
> -	 * At first we should unmap early shadow (clear_pgds() call bellow).
> +	 * At first we should unmap early shadow (clear_page_tables()).
>  	 * However, instrumented code couldn't execute without shadow memory.
>  	 * tmp_pg_dir used to keep early shadow mapped until full shadow
>  	 * setup will be finished.
>  	 */
> -	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> +	copy_pagetables();
>  	cpu_set_ttbr1(__pa(tmp_pg_dir));
>  	flush_tlb_all();

As a heads-up, I will shortly have patches altering the swap of TTBR1,
as it risks conflicting TLB entries and misses barriers.

Otherwise, we need a dsb(ishst) between the memcpy and writing to the
TTBR to ensure that the tables are visible to the MMU.

Thanks,
Mark.
Andrey Ryabinin Nov. 26, 2015, 3:47 p.m. UTC | #2
On 11/26/2015 05:48 PM, Mark Rutland wrote:
> Hi,
> 
> On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
>> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>> than the whole shadow memory.
>>
>> This patch tries to fix that case.
>> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>> clearing pgds it clears top level page table entries that entirely belongs
>> to shadow memory.
>> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>> puds that now might be cleared by clear_page_tables.
>>
>> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> ---
>>
>>  *** THIS is not tested with 16k pages ***
>>
>>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
>>  1 file changed, 76 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
>> index cf038c7..ea9f92a 100644
>> --- a/arch/arm64/mm/kasan_init.c
>> +++ b/arch/arm64/mm/kasan_init.c
>> @@ -22,6 +22,7 @@
>>  #include <asm/tlbflush.h>
>>  
>>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
>> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
>>  
>>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
>>  					unsigned long end)
>> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
>>  {
>>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
>>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
>> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
>> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
> 
> We also assume that even in the shared PUD case, the shadow region falls
> within the same PGD entry, or we would need more than a single tmp_pud.
> 
> It would be good to test for that.
> 

Something like this:

	#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)

	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGD_SIZE)
			 && !((PGDIR_SIZE > KASAN_SHADOW_SIZE)
				 && IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)));



>> +static void __init clear_puds(pgd_t *pgd, unsigned long addr, unsigned long end)
>> +{
>> +	pud_t *pud;
>> +	unsigned long next;
>> +
>> +	pud = pud_offset(pgd, addr);
>> +
>> +	do {
>> +		next = pud_addr_end(addr, end);
>> +		if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE)
>> +			pud_clear(pud);
> 
> I think this can just be:
> 
> 		if (next - addr == PUD_SIZE)
> 			pud_clear(pud);
> 
> Given that next can at most be PUD_SIZE from addr, and if so we knwo
> addr is aligned.
> 

Right.

>> +
>> +		if (!pud_none(*pud))
>> +			clear_pmds(pud, addr, next);
> 
> I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
> either end, so KASAN should never own a partial pud entry like this.
> 
> Regardless, were this case to occur, surely we'd be clearing pmd entries
> in the active page tables? We didn't copy anything at the pmd level.
> 
> That doesn't seem right.
> 

Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.

I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
In that case going down to pmds is not needed, set_p?d() macro will do it for us.


...

>> +static void copy_pagetables(void)
>> +{
>> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
>> +
>> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
>> +
>>  	/*
>> -	 * Remove references to kasan page tables from
>> -	 * swapper_pg_dir. pgd_clear() can't be used
>> -	 * here because it's nop on 2,3-level pagetable setups
>> +	 * If kasan shadow shares PGD with other mappings,
>> +	 * clear_page_tables() will clear puds instead of pgd,
>> +	 * so we need temporary pud table to keep early shadow mapped.
>>  	 */
>> -	for (; start < end; start += PGDIR_SIZE)
>> -		set_pgd(pgd_offset_k(start), __pgd(0));
>> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
>> +		pud_t *pud;
>> +		pmd_t *pmd;
>> +		pte_t *pte;
>> +
>> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
>> +
>> +		pgd_populate(&init_mm, pgd, tmp_pud);
>> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
>> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
>> +		pud_populate(&init_mm, pud, pmd);
>> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
>> +		pmd_populate_kernel(&init_mm, pmd, pte);
> 
> I don't understand why we need to do anything below the pud level here.
> We only copy down to the pud level, and we already initialised the
> shared ptes and pmds earlier.
> 
> Regardless of this patch, we currently initialise the shared tables
> repeatedly, which is redundant after the first time we initialise them.
> We could improve that.
> 

Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
which has 4-level pagetables.
But it wouldn't work if 16k+48-bit would have > 4-level.
Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'

I just tried to avoid assumptions about number of pagetables levels in that code.




>> +	}
>>  }
>>  
>>  static void __init cpu_set_ttbr1(unsigned long ttbr1)
>> @@ -123,16 +188,16 @@ void __init kasan_init(void)
>>  
>>  	/*
>>  	 * We are going to perform proper setup of shadow memory.
>> -	 * At first we should unmap early shadow (clear_pgds() call bellow).
>> +	 * At first we should unmap early shadow (clear_page_tables()).
>>  	 * However, instrumented code couldn't execute without shadow memory.
>>  	 * tmp_pg_dir used to keep early shadow mapped until full shadow
>>  	 * setup will be finished.
>>  	 */
>> -	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
>> +	copy_pagetables();
>>  	cpu_set_ttbr1(__pa(tmp_pg_dir));
>>  	flush_tlb_all();
> 
> As a heads-up, I will shortly have patches altering the swap of TTBR1,
> as it risks conflicting TLB entries and misses barriers.
> 
> Otherwise, we need a dsb(ishst) between the memcpy and writing to the
> TTBR to ensure that the tables are visible to the MMU.
> 

Thanks.

> Thanks,
> Mark.
>
Mark Rutland Nov. 26, 2015, 4:21 p.m. UTC | #3
On Thu, Nov 26, 2015 at 06:47:36PM +0300, Andrey Ryabinin wrote:
> 
> 
> On 11/26/2015 05:48 PM, Mark Rutland wrote:
> > Hi,
> > 
> > On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
> >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> >> than the whole shadow memory.
> >>
> >> This patch tries to fix that case.
> >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> >> clearing pgds it clears top level page table entries that entirely belongs
> >> to shadow memory.
> >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> >> puds that now might be cleared by clear_page_tables.
> >>
> >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> >> ---
> >>
> >>  *** THIS is not tested with 16k pages ***
> >>
> >>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
> >>  1 file changed, 76 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
> >> index cf038c7..ea9f92a 100644
> >> --- a/arch/arm64/mm/kasan_init.c
> >> +++ b/arch/arm64/mm/kasan_init.c
> >> @@ -22,6 +22,7 @@
> >>  #include <asm/tlbflush.h>
> >>  
> >>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
> >> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
> >>  
> >>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
> >>  					unsigned long end)
> >> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
> >>  {
> >>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
> >>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
> >> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
> >> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
> > 
> > We also assume that even in the shared PUD case, the shadow region falls
> > within the same PGD entry, or we would need more than a single tmp_pud.
> > 
> > It would be good to test for that.
> > 
> 
> Something like this:
> 
> 	#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)
> 
> 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGD_SIZE)
> 			 && !((PGDIR_SIZE > KASAN_SHADOW_SIZE)
> 				 && IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)));

I was thinking something more like:

	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE);
	BUILD_BUG_ON(KASAN_SHADOW_START >> PGDIR_SHIFT !=
		     KASAN_SHADOW_END >> PGDIR_SHIFT);

> >> +		if (!pud_none(*pud))
> >> +			clear_pmds(pud, addr, next);
> > 
> > I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
> > either end, so KASAN should never own a partial pud entry like this.
> > 
> > Regardless, were this case to occur, surely we'd be clearing pmd entries
> > in the active page tables? We didn't copy anything at the pmd level.
> > 
> > That doesn't seem right.
> > 
> 
> Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
> pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.

I see. Thanks for pointing that out.

I detest the weird folding behaviour we have in the p??_* macros. It
violates least surprise almost every time.

> I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
> In that case going down to pmds is not needed, set_p?d() macro will do it for us.

I think it would be simpler to rely on the fact that we only use puds
with 4 levels of table (and hence the p??_* macros will operate at the
levels their names imply).

We can verify that at build time with:

BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS != 4 &&
	     (!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE) ||
	      !IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)));

> >> +static void copy_pagetables(void)
> >> +{
> >> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
> >> +
> >> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> >> +
> >>  	/*
> >> -	 * Remove references to kasan page tables from
> >> -	 * swapper_pg_dir. pgd_clear() can't be used
> >> -	 * here because it's nop on 2,3-level pagetable setups
> >> +	 * If kasan shadow shares PGD with other mappings,
> >> +	 * clear_page_tables() will clear puds instead of pgd,
> >> +	 * so we need temporary pud table to keep early shadow mapped.
> >>  	 */
> >> -	for (; start < end; start += PGDIR_SIZE)
> >> -		set_pgd(pgd_offset_k(start), __pgd(0));
> >> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
> >> +		pud_t *pud;
> >> +		pmd_t *pmd;
> >> +		pte_t *pte;
> >> +
> >> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
> >> +
> >> +		pgd_populate(&init_mm, pgd, tmp_pud);
> >> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
> >> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
> >> +		pud_populate(&init_mm, pud, pmd);
> >> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
> >> +		pmd_populate_kernel(&init_mm, pmd, pte);
> > 
> > I don't understand why we need to do anything below the pud level here.
> > We only copy down to the pud level, and we already initialised the
> > shared ptes and pmds earlier.
> > 
> > Regardless of this patch, we currently initialise the shared tables
> > repeatedly, which is redundant after the first time we initialise them.
> > We could improve that.
> > 
> 
> Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
> which has 4-level pagetables.
> But it wouldn't work if 16k+48-bit would have > 4-level.
> Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'

I don't follow.

16K + 48-bit will always require 4 levels given the page table format.
We never have more than 4 levels.

Thanks,
Mark.
Andrey Ryabinin Nov. 26, 2015, 4:40 p.m. UTC | #4
On 11/26/2015 07:21 PM, Mark Rutland wrote:
> On Thu, Nov 26, 2015 at 06:47:36PM +0300, Andrey Ryabinin wrote:
>>
>>
>> On 11/26/2015 05:48 PM, Mark Rutland wrote:
>>> Hi,
>>>
>>> On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
>>>> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>>>> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>>>> than the whole shadow memory.
>>>>
>>>> This patch tries to fix that case.
>>>> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>>>> clearing pgds it clears top level page table entries that entirely belongs
>>>> to shadow memory.
>>>> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>>>> puds that now might be cleared by clear_page_tables.
>>>>
>>>> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>>>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>>>> ---
>>>>
>>>>  *** THIS is not tested with 16k pages ***
>>>>
>>>>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
>>>>  1 file changed, 76 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
>>>> index cf038c7..ea9f92a 100644
>>>> --- a/arch/arm64/mm/kasan_init.c
>>>> +++ b/arch/arm64/mm/kasan_init.c
>>>> @@ -22,6 +22,7 @@
>>>>  #include <asm/tlbflush.h>
>>>>  
>>>>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
>>>> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
>>>>  
>>>>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
>>>>  					unsigned long end)
>>>> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
>>>>  {
>>>>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
>>>>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
>>>> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
>>>> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
>>>
>>> We also assume that even in the shared PUD case, the shadow region falls
>>> within the same PGD entry, or we would need more than a single tmp_pud.
>>>
>>> It would be good to test for that.
>>>
>>
>> Something like this:
>>
>> 	#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)
>>
>> 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGD_SIZE)
>> 			 && !((PGDIR_SIZE > KASAN_SHADOW_SIZE)
>> 				 && IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)));
> 
> I was thinking something more like:
> 
> 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE);
> 	BUILD_BUG_ON(KASAN_SHADOW_START >> PGDIR_SHIFT !=
> 		     KASAN_SHADOW_END >> PGDIR_SHIFT);
> 
>>>> +		if (!pud_none(*pud))
>>>> +			clear_pmds(pud, addr, next);
>>>
>>> I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
>>> either end, so KASAN should never own a partial pud entry like this.
>>>
>>> Regardless, were this case to occur, surely we'd be clearing pmd entries
>>> in the active page tables? We didn't copy anything at the pmd level.
>>>
>>> That doesn't seem right.
>>>
>>
>> Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
>> pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.
> 
> I see. Thanks for pointing that out.
> 
> I detest the weird folding behaviour we have in the p??_* macros. It
> violates least surprise almost every time.
> 
>> I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
>> In that case going down to pmds is not needed, set_p?d() macro will do it for us.
> 
> I think it would be simpler to rely on the fact that we only use puds
> with 4 levels of table (and hence the p??_* macros will operate at the
> levels their names imply).
> 

It's not only about puds.
E.g. if we need to clear PGD with 2-level page tables, than we need to call pmd_clear().

So we should either leave this code as is, or switch to set_pgd/set_pud.


> We can verify that at build time with:
> 
> BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS != 4 &&
> 	     (!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE) ||
> 	      !IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)));
> 
>>>> +static void copy_pagetables(void)
>>>> +{
>>>> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
>>>> +
>>>> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
>>>> +
>>>>  	/*
>>>> -	 * Remove references to kasan page tables from
>>>> -	 * swapper_pg_dir. pgd_clear() can't be used
>>>> -	 * here because it's nop on 2,3-level pagetable setups
>>>> +	 * If kasan shadow shares PGD with other mappings,
>>>> +	 * clear_page_tables() will clear puds instead of pgd,
>>>> +	 * so we need temporary pud table to keep early shadow mapped.
>>>>  	 */
>>>> -	for (; start < end; start += PGDIR_SIZE)
>>>> -		set_pgd(pgd_offset_k(start), __pgd(0));
>>>> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
>>>> +		pud_t *pud;
>>>> +		pmd_t *pmd;
>>>> +		pte_t *pte;
>>>> +
>>>> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
>>>> +
>>>> +		pgd_populate(&init_mm, pgd, tmp_pud);
>>>> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
>>>> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
>>>> +		pud_populate(&init_mm, pud, pmd);
>>>> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
>>>> +		pmd_populate_kernel(&init_mm, pmd, pte);
>>>
>>> I don't understand why we need to do anything below the pud level here.
>>> We only copy down to the pud level, and we already initialised the
>>> shared ptes and pmds earlier.
>>>
>>> Regardless of this patch, we currently initialise the shared tables
>>> repeatedly, which is redundant after the first time we initialise them.
>>> We could improve that.
>>>
>>
>> Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
>> which has 4-level pagetables.
>> But it wouldn't work if 16k+48-bit would have > 4-level.
>> Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'
> 
> I don't follow.
> 
> 16K + 48-bit will always require 4 levels given the page table format.
> We never have more than 4 levels.
> 

Oh, it should be '< 4' of course.
Yes, 16K + 48-bit is always 4-levels, but I tried to not rely on this here.

But since we can rely on 4-levels here, I'm gonna leave only pgd_populate() and add you BUILD_BUG_ON().


> Thanks,
> Mark.
>
Ard Biesheuvel Nov. 26, 2015, 4:40 p.m. UTC | #5
On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> than the whole shadow memory.
>
> This patch tries to fix that case.
> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> clearing pgds it clears top level page table entries that entirely belongs
> to shadow memory.
> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> puds that now might be cleared by clear_page_tables.
>
> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>

I would argue that the Kasan code is complicated enough, and we should
avoid complicating it even further for a configuration that is highly
theoretical in nature.

In a 16k configuration, the 4th level only adds a single bit of VA
space (which is, as I understand it, exactly the issue you need to
address here since the top level page table has only 2 entries and
hence does not divide by 8 cleanly), which means you are better off
using 3 levels unless you *really* need more than 128 TB of VA space.

So can't we just live with the limitation, and keep the current code?
Mark Rutland Nov. 26, 2015, 5:08 p.m. UTC | #6
> >>>> +		if (!pud_none(*pud))
> >>>> +			clear_pmds(pud, addr, next);
> >>>
> >>> I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
> >>> either end, so KASAN should never own a partial pud entry like this.
> >>>
> >>> Regardless, were this case to occur, surely we'd be clearing pmd entries
> >>> in the active page tables? We didn't copy anything at the pmd level.
> >>>
> >>> That doesn't seem right.
> >>>
> >>
> >> Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
> >> pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.
> > 
> > I see. Thanks for pointing that out.
> > 
> > I detest the weird folding behaviour we have in the p??_* macros. It
> > violates least surprise almost every time.
> > 
> >> I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
> >> In that case going down to pmds is not needed, set_p?d() macro will do it for us.
> > 
> > I think it would be simpler to rely on the fact that we only use puds
> > with 4 levels of table (and hence the p??_* macros will operate at the
> > levels their names imply).
> > 
> 
> It's not only about puds.
> E.g. if we need to clear PGD with 2-level page tables, than we need to call pmd_clear().

Ah. Yes :(

I will reiterate that I hate the folding behaviour.

> So we should either leave this code as is, or switch to set_pgd/set_pud.

I think set_p?d is preferable.

> > We can verify that at build time with:
> > 
> > BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS != 4 &&
> > 	     (!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE) ||
> > 	      !IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)));
> > 
> >>>> +static void copy_pagetables(void)
> >>>> +{
> >>>> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
> >>>> +
> >>>> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> >>>> +
> >>>>  	/*
> >>>> -	 * Remove references to kasan page tables from
> >>>> -	 * swapper_pg_dir. pgd_clear() can't be used
> >>>> -	 * here because it's nop on 2,3-level pagetable setups
> >>>> +	 * If kasan shadow shares PGD with other mappings,
> >>>> +	 * clear_page_tables() will clear puds instead of pgd,
> >>>> +	 * so we need temporary pud table to keep early shadow mapped.
> >>>>  	 */
> >>>> -	for (; start < end; start += PGDIR_SIZE)
> >>>> -		set_pgd(pgd_offset_k(start), __pgd(0));
> >>>> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
> >>>> +		pud_t *pud;
> >>>> +		pmd_t *pmd;
> >>>> +		pte_t *pte;
> >>>> +
> >>>> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
> >>>> +
> >>>> +		pgd_populate(&init_mm, pgd, tmp_pud);
> >>>> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
> >>>> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
> >>>> +		pud_populate(&init_mm, pud, pmd);
> >>>> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
> >>>> +		pmd_populate_kernel(&init_mm, pmd, pte);
> >>>
> >>> I don't understand why we need to do anything below the pud level here.
> >>> We only copy down to the pud level, and we already initialised the
> >>> shared ptes and pmds earlier.
> >>>
> >>> Regardless of this patch, we currently initialise the shared tables
> >>> repeatedly, which is redundant after the first time we initialise them.
> >>> We could improve that.
> >>>
> >>
> >> Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
> >> which has 4-level pagetables.
> >> But it wouldn't work if 16k+48-bit would have > 4-level.
> >> Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'
> > 
> > I don't follow.
> > 
> > 16K + 48-bit will always require 4 levels given the page table format.
> > We never have more than 4 levels.
> > 
> 
> Oh, it should be '< 4' of course.
> Yes, 16K + 48-bit is always 4-levels, but I tried to not rely on this here.
> 
> But since we can rely on 4-levels here, I'm gonna leave only pgd_populate() and add you BUILD_BUG_ON().

Ok. That sounds good to me.

Thanks,
Mark.
Andrey Ryabinin Nov. 27, 2015, 8:12 a.m. UTC | #7
On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
>> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>> than the whole shadow memory.
>>
>> This patch tries to fix that case.
>> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>> clearing pgds it clears top level page table entries that entirely belongs
>> to shadow memory.
>> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>> puds that now might be cleared by clear_page_tables.
>>
>> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> 
> I would argue that the Kasan code is complicated enough, and we should
> avoid complicating it even further for a configuration that is highly
> theoretical in nature.
> 
> In a 16k configuration, the 4th level only adds a single bit of VA
> space (which is, as I understand it, exactly the issue you need to
> address here since the top level page table has only 2 entries and
> hence does not divide by 8 cleanly), which means you are better off
> using 3 levels unless you *really* need more than 128 TB of VA space.
> 
> So can't we just live with the limitation, and keep the current code?
 

No objections from my side. Let's keep the current code.
Catalin Marinas Nov. 27, 2015, 9:35 a.m. UTC | #8
On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
> On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> >> than the whole shadow memory.
> >>
> >> This patch tries to fix that case.
> >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> >> clearing pgds it clears top level page table entries that entirely belongs
> >> to shadow memory.
> >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> >> puds that now might be cleared by clear_page_tables.
> >>
> >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> > 
> > I would argue that the Kasan code is complicated enough, and we should
> > avoid complicating it even further for a configuration that is highly
> > theoretical in nature.
> > 
> > In a 16k configuration, the 4th level only adds a single bit of VA
> > space (which is, as I understand it, exactly the issue you need to
> > address here since the top level page table has only 2 entries and
> > hence does not divide by 8 cleanly), which means you are better off
> > using 3 levels unless you *really* need more than 128 TB of VA space.
> > 
> > So can't we just live with the limitation, and keep the current code?
> 
> No objections from my side. Let's keep the current code.

Ard had a good point, so fine by me as well.
Will Deacon Nov. 27, 2015, 10:02 a.m. UTC | #9
On Fri, Nov 27, 2015 at 09:35:29AM +0000, Catalin Marinas wrote:
> On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
> > On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> > > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> > >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> > >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> > >> than the whole shadow memory.
> > >>
> > >> This patch tries to fix that case.
> > >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> > >> clearing pgds it clears top level page table entries that entirely belongs
> > >> to shadow memory.
> > >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> > >> puds that now might be cleared by clear_page_tables.
> > >>
> > >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> > >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> > > 
> > > I would argue that the Kasan code is complicated enough, and we should
> > > avoid complicating it even further for a configuration that is highly
> > > theoretical in nature.
> > > 
> > > In a 16k configuration, the 4th level only adds a single bit of VA
> > > space (which is, as I understand it, exactly the issue you need to
> > > address here since the top level page table has only 2 entries and
> > > hence does not divide by 8 cleanly), which means you are better off
> > > using 3 levels unless you *really* need more than 128 TB of VA space.
> > > 
> > > So can't we just live with the limitation, and keep the current code?
> > 
> > No objections from my side. Let's keep the current code.
> 
> Ard had a good point, so fine by me as well.

Ok, so obvious follow-up question: why do we even support 48-bit + 16k
pages in the kernel? Either it's useful, and we make things work with it,
or it's not and we can drop it (or, at least, hide it behind EXPERT like
we do for 36-bit).

Will
Ard Biesheuvel Nov. 27, 2015, 10:39 a.m. UTC | #10
On 27 November 2015 at 11:02, Will Deacon <will.deacon@arm.com> wrote:
> On Fri, Nov 27, 2015 at 09:35:29AM +0000, Catalin Marinas wrote:
>> On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
>> > On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
>> > > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
>> > >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>> > >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>> > >> than the whole shadow memory.
>> > >>
>> > >> This patch tries to fix that case.
>> > >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>> > >> clearing pgds it clears top level page table entries that entirely belongs
>> > >> to shadow memory.
>> > >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>> > >> puds that now might be cleared by clear_page_tables.
>> > >>
>> > >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>> > >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> > >
>> > > I would argue that the Kasan code is complicated enough, and we should
>> > > avoid complicating it even further for a configuration that is highly
>> > > theoretical in nature.
>> > >
>> > > In a 16k configuration, the 4th level only adds a single bit of VA
>> > > space (which is, as I understand it, exactly the issue you need to
>> > > address here since the top level page table has only 2 entries and
>> > > hence does not divide by 8 cleanly), which means you are better off
>> > > using 3 levels unless you *really* need more than 128 TB of VA space.
>> > >
>> > > So can't we just live with the limitation, and keep the current code?
>> >
>> > No objections from my side. Let's keep the current code.
>>
>> Ard had a good point, so fine by me as well.
>
> Ok, so obvious follow-up question: why do we even support 48-bit + 16k
> pages in the kernel? Either it's useful, and we make things work with it,
> or it's not and we can drop it (or, at least, hide it behind EXPERT like
> we do for 36-bit).
>

So there's 10 kinds of features in the world, useful ones and !useful ones? :-)

I think 48-bit/16k is somewhat useful, and I think we should support
it. But I also think we should be pragmatic, and not go out of our way
to support the combinatorial expansion of all niche features enabled
together. I think it is perfectly fine to limit kasan support to
configurations whose top level translation table divides by 8 cleanly
(which only excludes 16k/48-bit anyway)

However, I think it deserves being hidden behind CONFIG_EXPERT more
than 36-bit/16k does.
Catalin Marinas Nov. 27, 2015, 2:11 p.m. UTC | #11
On Fri, Nov 27, 2015 at 10:02:11AM +0000, Will Deacon wrote:
> On Fri, Nov 27, 2015 at 09:35:29AM +0000, Catalin Marinas wrote:
> > On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
> > > On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> > > > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> > > >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> > > >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> > > >> than the whole shadow memory.
> > > >>
> > > >> This patch tries to fix that case.
> > > >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> > > >> clearing pgds it clears top level page table entries that entirely belongs
> > > >> to shadow memory.
> > > >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> > > >> puds that now might be cleared by clear_page_tables.
> > > >>
> > > >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> > > >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> > > > 
> > > > I would argue that the Kasan code is complicated enough, and we should
> > > > avoid complicating it even further for a configuration that is highly
> > > > theoretical in nature.
> > > > 
> > > > In a 16k configuration, the 4th level only adds a single bit of VA
> > > > space (which is, as I understand it, exactly the issue you need to
> > > > address here since the top level page table has only 2 entries and
> > > > hence does not divide by 8 cleanly), which means you are better off
> > > > using 3 levels unless you *really* need more than 128 TB of VA space.
> > > > 
> > > > So can't we just live with the limitation, and keep the current code?
> > > 
> > > No objections from my side. Let's keep the current code.
> > 
> > Ard had a good point, so fine by me as well.
> 
> Ok, so obvious follow-up question: why do we even support 48-bit + 16k
> pages in the kernel? Either it's useful, and we make things work with it,
> or it's not and we can drop it (or, at least, hide it behind EXPERT like
> we do for 36-bit).

One reason is hardware validation (I guess that may be the only reason
for 16KB in general ;)). For each of the page sizes we support two VA
ranges: 48-bit (maximum) and a recommended one for the corresponding
granule. With 16K, the difference is not significant (47 to 48), so we
could follow Ard's suggestion and make it depend on EXPERT (we already
do this for 16KB and 36-bit VA).
diff mbox

Patch

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7..ea9f92a 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -22,6 +22,7 @@ 
 #include <asm/tlbflush.h>
 
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
 
 static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
 					unsigned long end)
@@ -92,20 +93,84 @@  asmlinkage void __init kasan_early_init(void)
 {
 	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
-	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
 	kasan_map_early_shadow();
 }
 
-static void __init clear_pgds(unsigned long start,
-			unsigned long end)
+static void __init clear_pmds(pud_t *pud, unsigned long addr, unsigned long end)
 {
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+
+	do {
+		next = pmd_addr_end(addr, end);
+		if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE)
+			pmd_clear(pmd);
+
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void __init clear_puds(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+
+	do {
+		next = pud_addr_end(addr, end);
+		if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE)
+			pud_clear(pud);
+
+		if (!pud_none(*pud))
+			clear_pmds(pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void __init clear_page_tables(unsigned long addr, unsigned long end)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset_k(addr);
+
+	do {
+		next = pgd_addr_end(addr, end);
+		if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE)
+			pgd_clear(pgd);
+
+		if (!pgd_none(*pgd))
+			clear_puds(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void copy_pagetables(void)
+{
+	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
+
+	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+
 	/*
-	 * Remove references to kasan page tables from
-	 * swapper_pg_dir. pgd_clear() can't be used
-	 * here because it's nop on 2,3-level pagetable setups
+	 * If kasan shadow shares PGD with other mappings,
+	 * clear_page_tables() will clear puds instead of pgd,
+	 * so we need temporary pud table to keep early shadow mapped.
 	 */
-	for (; start < end; start += PGDIR_SIZE)
-		set_pgd(pgd_offset_k(start), __pgd(0));
+	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
+
+		pgd_populate(&init_mm, pgd, tmp_pud);
+		pud = pud_offset(pgd, KASAN_SHADOW_START);
+		pmd = pmd_offset(pud, KASAN_SHADOW_START);
+		pud_populate(&init_mm, pud, pmd);
+		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
+		pmd_populate_kernel(&init_mm, pmd, pte);
+	}
 }
 
 static void __init cpu_set_ttbr1(unsigned long ttbr1)
@@ -123,16 +188,16 @@  void __init kasan_init(void)
 
 	/*
 	 * We are going to perform proper setup of shadow memory.
-	 * At first we should unmap early shadow (clear_pgds() call bellow).
+	 * At first we should unmap early shadow (clear_page_tables()).
 	 * However, instrumented code couldn't execute without shadow memory.
 	 * tmp_pg_dir used to keep early shadow mapped until full shadow
 	 * setup will be finished.
 	 */
-	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+	copy_pagetables();
 	cpu_set_ttbr1(__pa(tmp_pg_dir));
 	flush_tlb_all();
 
-	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+	clear_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
 			kasan_mem_to_shadow((void *)MODULES_VADDR));