diff mbox

[v33,05/14] arm64: mm: allow for unmapping part of kernel mapping

Message ID 20170315095941.25119-3-takahiro.akashi@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

AKASHI Takahiro March 15, 2017, 9:59 a.m. UTC
create_pgd_mapping() is enhanced here so that it will accept
PAGE_KERNEL_INVALID protection attribute and unmap a given range of memory.

The feature will be used in a later kdump patch to implement the protection
against possible corruption of crash dump kernel memory which is to be set
aside from ther other memory on primary kernel.

Note that, in this implementation, it assumes that all the range of memory
to be processed is mapped in page-level since the only current user is
kdump where page mappings are also required.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 arch/arm64/include/asm/pgtable-prot.h |  1 +
 arch/arm64/mm/mmu.c                   | 17 +++++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

Comments

James Morse March 21, 2017, 10:35 a.m. UTC | #1
Hi Akashi,

On 15/03/17 09:59, AKASHI Takahiro wrote:
> create_pgd_mapping() is enhanced here so that it will accept
> PAGE_KERNEL_INVALID protection attribute and unmap a given range of memory.
> 
> The feature will be used in a later kdump patch to implement the protection
> against possible corruption of crash dump kernel memory which is to be set
> aside from ther other memory on primary kernel.

Nit: ther- > the

> Note that, in this implementation, it assumes that all the range of memory
> to be processed is mapped in page-level since the only current user is
> kdump where page mappings are also required.

Using create_pgd_mapping() like this means the mappings will be updated via the
fixmap which is unnecessary as the page tables will be part of mapped memory. In
the worst case this adds an extra tlbi for every 2MB of crash image when we map
or unmap it. I don't think this matters.

This code used to be __init and it is the only user of FIX_PTE, so there won't
be any existing runtime users. The two arch_kexec_unprotect_crashkres() calls in
kexec are both protected by the kexec_mutex, and the call in hibernate happens
after disable_nonboot_cpus(), so these callers can't race with each other.

This looks safe to me.


> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index d28dbcf596b6..cb359a3927ef 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -128,7 +128,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
>  	do {
>  		pte_t old_pte = *pte;
>  
> -		set_pte(pte, pfn_pte(pfn, prot));
> +		if (pgprot_val(prot))
> +			set_pte(pte, pfn_pte(pfn, prot));
> +		else
> +			pte_clear(null, null, pte);

Lowercase NULLs? This relies on these values never being used... __set_fixmap()
in the same file passes &init_mm and the address, can we do the same to be
consistent?


>  		pfn++;
>  
>  		/*

Reviewed-by: James Morse <james.morse@arm.com>


Thanks,

James
Ard Biesheuvel March 21, 2017, 11:16 a.m. UTC | #2
On 15 March 2017 at 09:59, AKASHI Takahiro <takahiro.akashi@linaro.org> wrote:
> create_pgd_mapping() is enhanced here so that it will accept
> PAGE_KERNEL_INVALID protection attribute and unmap a given range of memory.
>
> The feature will be used in a later kdump patch to implement the protection
> against possible corruption of crash dump kernel memory which is to be set
> aside from ther other memory on primary kernel.
>
> Note that, in this implementation, it assumes that all the range of memory
> to be processed is mapped in page-level since the only current user is
> kdump where page mappings are also required.
>
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>

Couldn't we use unmap_kernel_range() for this?

> ---
>  arch/arm64/include/asm/pgtable-prot.h |  1 +
>  arch/arm64/mm/mmu.c                   | 17 +++++++++++------
>  2 files changed, 12 insertions(+), 6 deletions(-)
>
> diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> index 2142c7726e76..945d84cd5df7 100644
> --- a/arch/arm64/include/asm/pgtable-prot.h
> +++ b/arch/arm64/include/asm/pgtable-prot.h
> @@ -54,6 +54,7 @@
>  #define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
>  #define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
>  #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
> +#define PAGE_KERNEL_INVALID    __pgprot(0)
>
>  #define PAGE_HYP               __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
>  #define PAGE_HYP_EXEC          __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index d28dbcf596b6..cb359a3927ef 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -128,7 +128,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
>         do {
>                 pte_t old_pte = *pte;
>
> -               set_pte(pte, pfn_pte(pfn, prot));
> +               if (pgprot_val(prot))
> +                       set_pte(pte, pfn_pte(pfn, prot));
> +               else
> +                       pte_clear(null, null, pte);
>                 pfn++;
>
>                 /*
> @@ -309,12 +312,14 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
>         __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false);
>  }
>
> -void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> -                              unsigned long virt, phys_addr_t size,
> -                              pgprot_t prot, bool page_mappings_only)
> +/*
> + * Note that PAGE_KERNEL_INVALID should be used with page_mappings_only
> + * true for now.
> + */
> +void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> +                       unsigned long virt, phys_addr_t size,
> +                       pgprot_t prot, bool page_mappings_only)
>  {
> -       BUG_ON(mm == &init_mm);
> -
>         __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
>                              pgd_pgtable_alloc, page_mappings_only);
>  }
> --
> 2.11.1
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
AKASHI Takahiro March 23, 2017, 10:56 a.m. UTC | #3
Ard,

On Tue, Mar 21, 2017 at 11:16:34AM +0000, Ard Biesheuvel wrote:
> On 15 March 2017 at 09:59, AKASHI Takahiro <takahiro.akashi@linaro.org> wrote:
> > create_pgd_mapping() is enhanced here so that it will accept
> > PAGE_KERNEL_INVALID protection attribute and unmap a given range of memory.
> >
> > The feature will be used in a later kdump patch to implement the protection
> > against possible corruption of crash dump kernel memory which is to be set
> > aside from ther other memory on primary kernel.
> >
> > Note that, in this implementation, it assumes that all the range of memory
> > to be processed is mapped in page-level since the only current user is
> > kdump where page mappings are also required.
> >
> > Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> 
> Couldn't we use unmap_kernel_range() for this?

I've almost forgotten this function, but my understanding is that
this function (and map counterpart) is mainly for "VM area" (< PAGE_OFFSET).
While it seems to (actually does) work instead of create_pgd_mapping() for now,
I'm not sure whether it is an expected usage (now and future).

So I think that it would be safe to keep using create_pgd_mapping().

Thanks,
-Takahiro AKASHI


> > ---
> >  arch/arm64/include/asm/pgtable-prot.h |  1 +
> >  arch/arm64/mm/mmu.c                   | 17 +++++++++++------
> >  2 files changed, 12 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
> > index 2142c7726e76..945d84cd5df7 100644
> > --- a/arch/arm64/include/asm/pgtable-prot.h
> > +++ b/arch/arm64/include/asm/pgtable-prot.h
> > @@ -54,6 +54,7 @@
> >  #define PAGE_KERNEL_ROX                __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
> >  #define PAGE_KERNEL_EXEC       __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
> >  #define PAGE_KERNEL_EXEC_CONT  __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
> > +#define PAGE_KERNEL_INVALID    __pgprot(0)
> >
> >  #define PAGE_HYP               __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
> >  #define PAGE_HYP_EXEC          __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index d28dbcf596b6..cb359a3927ef 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -128,7 +128,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
> >         do {
> >                 pte_t old_pte = *pte;
> >
> > -               set_pte(pte, pfn_pte(pfn, prot));
> > +               if (pgprot_val(prot))
> > +                       set_pte(pte, pfn_pte(pfn, prot));
> > +               else
> > +                       pte_clear(null, null, pte);
> >                 pfn++;
> >
> >                 /*
> > @@ -309,12 +312,14 @@ static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
> >         __create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false);
> >  }
> >
> > -void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> > -                              unsigned long virt, phys_addr_t size,
> > -                              pgprot_t prot, bool page_mappings_only)
> > +/*
> > + * Note that PAGE_KERNEL_INVALID should be used with page_mappings_only
> > + * true for now.
> > + */
> > +void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
> > +                       unsigned long virt, phys_addr_t size,
> > +                       pgprot_t prot, bool page_mappings_only)
> >  {
> > -       BUG_ON(mm == &init_mm);
> > -
> >         __create_pgd_mapping(mm->pgd, phys, virt, size, prot,
> >                              pgd_pgtable_alloc, page_mappings_only);
> >  }
> > --
> > 2.11.1
> >
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
AKASHI Takahiro March 23, 2017, 11:43 a.m. UTC | #4
On Tue, Mar 21, 2017 at 10:35:53AM +0000, James Morse wrote:
> Hi Akashi,
> 
> On 15/03/17 09:59, AKASHI Takahiro wrote:
> > create_pgd_mapping() is enhanced here so that it will accept
> > PAGE_KERNEL_INVALID protection attribute and unmap a given range of memory.
> > 
> > The feature will be used in a later kdump patch to implement the protection
> > against possible corruption of crash dump kernel memory which is to be set
> > aside from ther other memory on primary kernel.
> 
> Nit: ther- > the

Fix it.

> > Note that, in this implementation, it assumes that all the range of memory
> > to be processed is mapped in page-level since the only current user is
> > kdump where page mappings are also required.
> 
> Using create_pgd_mapping() like this means the mappings will be updated via the
> fixmap which is unnecessary as the page tables will be part of mapped memory. In

This might be a reason that we would go for (un)map_kernel_range()
over create_pgd_mapping() (? not sure)

> the worst case this adds an extra tlbi for every 2MB of crash image when we map
> or unmap it. I don't think this matters.
> 
> This code used to be __init and it is the only user of FIX_PTE, so there won't
> be any existing runtime users. The two arch_kexec_unprotect_crashkres() calls in
> kexec are both protected by the kexec_mutex, and the call in hibernate happens
> after disable_nonboot_cpus(), so these callers can't race with each other.
> 
> This looks safe to me.
> 
> 
> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > index d28dbcf596b6..cb359a3927ef 100644
> > --- a/arch/arm64/mm/mmu.c
> > +++ b/arch/arm64/mm/mmu.c
> > @@ -128,7 +128,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
> >  	do {
> >  		pte_t old_pte = *pte;
> >  
> > -		set_pte(pte, pfn_pte(pfn, prot));
> > +		if (pgprot_val(prot))
> > +			set_pte(pte, pfn_pte(pfn, prot));
> > +		else
> > +			pte_clear(null, null, pte);
> 
> Lowercase NULLs? This relies on these values never being used... __set_fixmap()
> in the same file passes &init_mm and the address, can we do the same to be
> consistent?

OK.

Thanks,
-Takahiro AKASHI

> 
> >  		pfn++;
> >  
> >  		/*
> 
> Reviewed-by: James Morse <james.morse@arm.com>
> 
> 
> Thanks,
> 
> James
> 
>
Ard Biesheuvel March 24, 2017, 10:57 a.m. UTC | #5
On 23 March 2017 at 11:43, AKASHI Takahiro <takahiro.akashi@linaro.org> wrote:
> On Tue, Mar 21, 2017 at 10:35:53AM +0000, James Morse wrote:
>> Hi Akashi,
>>
>> On 15/03/17 09:59, AKASHI Takahiro wrote:
>> > create_pgd_mapping() is enhanced here so that it will accept
>> > PAGE_KERNEL_INVALID protection attribute and unmap a given range of memory.
>> >
>> > The feature will be used in a later kdump patch to implement the protection
>> > against possible corruption of crash dump kernel memory which is to be set
>> > aside from ther other memory on primary kernel.
>>
>> Nit: ther- > the
>
> Fix it.
>
>> > Note that, in this implementation, it assumes that all the range of memory
>> > to be processed is mapped in page-level since the only current user is
>> > kdump where page mappings are also required.
>>
>> Using create_pgd_mapping() like this means the mappings will be updated via the
>> fixmap which is unnecessary as the page tables will be part of mapped memory. In
>
> This might be a reason that we would go for (un)map_kernel_range()
> over create_pgd_mapping() (? not sure)
>

Yes, that is why I suggested it. We already use it to unmap the init
segment at the end of boot, but I do take your point about it being
documented as operating on kernel VMAs only.

Looking at the code, it shouldn't matter (it does not touch or reason
about VMAs at all, it only walks the page tables and unmaps them), but
I agree it is better not to rely on that.

But instead of clearing all permissions, which apparently requires
changes to alloc_init_pte(), and introduces the restriction that the
region should be mapped down to pages, could we not simply clear
PTE_VALID on the region, like we do for debug_pagealloc()?


>> the worst case this adds an extra tlbi for every 2MB of crash image when we map
>> or unmap it. I don't think this matters.
>>
>> This code used to be __init and it is the only user of FIX_PTE, so there won't
>> be any existing runtime users. The two arch_kexec_unprotect_crashkres() calls in
>> kexec are both protected by the kexec_mutex, and the call in hibernate happens
>> after disable_nonboot_cpus(), so these callers can't race with each other.
>>
>> This looks safe to me.
>>
>>
>> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> > index d28dbcf596b6..cb359a3927ef 100644
>> > --- a/arch/arm64/mm/mmu.c
>> > +++ b/arch/arm64/mm/mmu.c
>> > @@ -128,7 +128,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
>> >     do {
>> >             pte_t old_pte = *pte;
>> >
>> > -           set_pte(pte, pfn_pte(pfn, prot));
>> > +           if (pgprot_val(prot))
>> > +                   set_pte(pte, pfn_pte(pfn, prot));
>> > +           else
>> > +                   pte_clear(null, null, pte);
>>
>> Lowercase NULLs? This relies on these values never being used... __set_fixmap()
>> in the same file passes &init_mm and the address, can we do the same to be
>> consistent?
>
> OK.
>
> Thanks,
> -Takahiro AKASHI
>
>>
>> >             pfn++;
>> >
>> >             /*
>>
>> Reviewed-by: James Morse <james.morse@arm.com>
>>
>>
>> Thanks,
>>
>> James
>>
>>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
AKASHI Takahiro March 27, 2017, 1:49 p.m. UTC | #6
Ard,

On Fri, Mar 24, 2017 at 10:57:02AM +0000, Ard Biesheuvel wrote:
> On 23 March 2017 at 11:43, AKASHI Takahiro <takahiro.akashi@linaro.org> wrote:
> > On Tue, Mar 21, 2017 at 10:35:53AM +0000, James Morse wrote:
> >> Hi Akashi,
> >>
> >> On 15/03/17 09:59, AKASHI Takahiro wrote:
> >> > create_pgd_mapping() is enhanced here so that it will accept
> >> > PAGE_KERNEL_INVALID protection attribute and unmap a given range of memory.
> >> >
> >> > The feature will be used in a later kdump patch to implement the protection
> >> > against possible corruption of crash dump kernel memory which is to be set
> >> > aside from ther other memory on primary kernel.
> >>
> >> Nit: ther- > the
> >
> > Fix it.
> >
> >> > Note that, in this implementation, it assumes that all the range of memory
> >> > to be processed is mapped in page-level since the only current user is
> >> > kdump where page mappings are also required.
> >>
> >> Using create_pgd_mapping() like this means the mappings will be updated via the
> >> fixmap which is unnecessary as the page tables will be part of mapped memory. In
> >
> > This might be a reason that we would go for (un)map_kernel_range()
> > over create_pgd_mapping() (? not sure)
> >
> 
> Yes, that is why I suggested it. We already use it to unmap the init
> segment at the end of boot, but I do take your point about it being
> documented as operating on kernel VMAs only.
> 
> Looking at the code, it shouldn't matter (it does not touch or reason
> about VMAs at all, it only walks the page tables and unmaps them), but
> I agree it is better not to rely on that.

OK

> But instead of clearing all permissions, which apparently requires
> changes to alloc_init_pte(), and introduces the restriction that the
> region should be mapped down to pages, could we not simply clear
> PTE_VALID on the region, like we do for debug_pagealloc()?

Now that we are only using page-level mappings for crash kernel memory,
__change_page_common() should work and it even has no concerns that
James pointed out.
I will update my patch soon.

Thanks,
-Takahiro AKASHI

> 
> >> the worst case this adds an extra tlbi for every 2MB of crash image when we map
> >> or unmap it. I don't think this matters.
> >>
> >> This code used to be __init and it is the only user of FIX_PTE, so there won't
> >> be any existing runtime users. The two arch_kexec_unprotect_crashkres() calls in
> >> kexec are both protected by the kexec_mutex, and the call in hibernate happens
> >> after disable_nonboot_cpus(), so these callers can't race with each other.
> >>
> >> This looks safe to me.
> >>
> >>
> >> > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> >> > index d28dbcf596b6..cb359a3927ef 100644
> >> > --- a/arch/arm64/mm/mmu.c
> >> > +++ b/arch/arm64/mm/mmu.c
> >> > @@ -128,7 +128,10 @@ static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
> >> >     do {
> >> >             pte_t old_pte = *pte;
> >> >
> >> > -           set_pte(pte, pfn_pte(pfn, prot));
> >> > +           if (pgprot_val(prot))
> >> > +                   set_pte(pte, pfn_pte(pfn, prot));
> >> > +           else
> >> > +                   pte_clear(null, null, pte);
> >>
> >> Lowercase NULLs? This relies on these values never being used... __set_fixmap()
> >> in the same file passes &init_mm and the address, can we do the same to be
> >> consistent?
> >
> > OK.
> >
> > Thanks,
> > -Takahiro AKASHI
> >
> >>
> >> >             pfn++;
> >> >
> >> >             /*
> >>
> >> Reviewed-by: James Morse <james.morse@arm.com>
> >>
> >>
> >> Thanks,
> >>
> >> James
> >>
> >>
> >
> > _______________________________________________
> > linux-arm-kernel mailing list
> > linux-arm-kernel@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
diff mbox

Patch

diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 2142c7726e76..945d84cd5df7 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -54,6 +54,7 @@ 
 #define PAGE_KERNEL_ROX		__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE)
 #define PAGE_KERNEL_EXEC_CONT	__pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT)
+#define PAGE_KERNEL_INVALID	__pgprot(0)
 
 #define PAGE_HYP		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN)
 #define PAGE_HYP_EXEC		__pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d28dbcf596b6..cb359a3927ef 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -128,7 +128,10 @@  static void alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	do {
 		pte_t old_pte = *pte;
 
-		set_pte(pte, pfn_pte(pfn, prot));
+		if (pgprot_val(prot))
+			set_pte(pte, pfn_pte(pfn, prot));
+		else
+			pte_clear(null, null, pte);
 		pfn++;
 
 		/*
@@ -309,12 +312,14 @@  static void __init create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
 	__create_pgd_mapping(init_mm.pgd, phys, virt, size, prot, NULL, false);
 }
 
-void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
-			       unsigned long virt, phys_addr_t size,
-			       pgprot_t prot, bool page_mappings_only)
+/*
+ * Note that PAGE_KERNEL_INVALID should be used with page_mappings_only
+ * true for now.
+ */
+void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
+			unsigned long virt, phys_addr_t size,
+			pgprot_t prot, bool page_mappings_only)
 {
-	BUG_ON(mm == &init_mm);
-
 	__create_pgd_mapping(mm->pgd, phys, virt, size, prot,
 			     pgd_pgtable_alloc, page_mappings_only);
 }