diff mbox series

[resend,RFC,3/9] s390/mm: validate VMA in PGSTE manipulation functions

Message ID 20210909162248.14969-4-david@redhat.com (mailing list archive)
State New
Headers show
Series s390: fixes, cleanups and optimizations for page table walkers | expand

Commit Message

David Hildenbrand Sept. 9, 2021, 4:22 p.m. UTC
We should not walk/touch page tables outside of VMA boundaries when
holding only the mmap sem in read mode. Evil user space can modify the
VMA layout just before this function runs and e.g., trigger races with
page table removal code since commit dd2283f2605e ("mm: mmap: zap pages
with read mmap_sem in munmap"). gfn_to_hva() will only translate using
KVM memory regions, but won't validate the VMA.

Further, we should not allocate page tables outside of VMA boundaries: if
evil user space decides to map hugetlbfs to these ranges, bad things will
happen because we suddenly have PTE or PMD page tables where we
shouldn't have them.

Similarly, we have to check if we suddenly find a hugetlbfs VMA, before
calling get_locked_pte().

Fixes: 2d42f9477320 ("s390/kvm: Add PGSTE manipulation functions")
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 arch/s390/mm/pgtable.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

Comments

Claudio Imbrenda Sept. 14, 2021, 4:54 p.m. UTC | #1
On Thu,  9 Sep 2021 18:22:42 +0200
David Hildenbrand <david@redhat.com> wrote:

> We should not walk/touch page tables outside of VMA boundaries when
> holding only the mmap sem in read mode. Evil user space can modify the
> VMA layout just before this function runs and e.g., trigger races with
> page table removal code since commit dd2283f2605e ("mm: mmap: zap pages
> with read mmap_sem in munmap"). gfn_to_hva() will only translate using
> KVM memory regions, but won't validate the VMA.
> 
> Further, we should not allocate page tables outside of VMA boundaries: if
> evil user space decides to map hugetlbfs to these ranges, bad things will
> happen because we suddenly have PTE or PMD page tables where we
> shouldn't have them.
> 
> Similarly, we have to check if we suddenly find a hugetlbfs VMA, before
> calling get_locked_pte().
> 
> Fixes: 2d42f9477320 ("s390/kvm: Add PGSTE manipulation functions")
> Signed-off-by: David Hildenbrand <david@redhat.com>

Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

> ---
>  arch/s390/mm/pgtable.c | 13 +++++++++++++
>  1 file changed, 13 insertions(+)
> 
> diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
> index eec3a9d7176e..54969e0f3a94 100644
> --- a/arch/s390/mm/pgtable.c
> +++ b/arch/s390/mm/pgtable.c
> @@ -988,6 +988,7 @@ EXPORT_SYMBOL(get_guest_storage_key);
>  int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
>  			unsigned long *oldpte, unsigned long *oldpgste)
>  {
> +	struct vm_area_struct *vma;
>  	unsigned long pgstev;
>  	spinlock_t *ptl;
>  	pgste_t pgste;
> @@ -997,6 +998,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
>  	WARN_ON_ONCE(orc > ESSA_MAX);
>  	if (unlikely(orc > ESSA_MAX))
>  		return -EINVAL;
> +
> +	vma = vma_lookup(mm, hva);
> +	if (!vma || is_vm_hugetlb_page(vma))
> +		return -EFAULT;
>  	ptep = get_locked_pte(mm, hva, &ptl);
>  	if (unlikely(!ptep))
>  		return -EFAULT;
> @@ -1089,10 +1094,14 @@ EXPORT_SYMBOL(pgste_perform_essa);
>  int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
>  			unsigned long bits, unsigned long value)
>  {
> +	struct vm_area_struct *vma;
>  	spinlock_t *ptl;
>  	pgste_t new;
>  	pte_t *ptep;
>  
> +	vma = vma_lookup(mm, hva);
> +	if (!vma || is_vm_hugetlb_page(vma))
> +		return -EFAULT;
>  	ptep = get_locked_pte(mm, hva, &ptl);
>  	if (unlikely(!ptep))
>  		return -EFAULT;
> @@ -1117,9 +1126,13 @@ EXPORT_SYMBOL(set_pgste_bits);
>   */
>  int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
>  {
> +	struct vm_area_struct *vma;
>  	spinlock_t *ptl;
>  	pte_t *ptep;
>  
> +	vma = vma_lookup(mm, hva);
> +	if (!vma || is_vm_hugetlb_page(vma))
> +		return -EFAULT;
>  	ptep = get_locked_pte(mm, hva, &ptl);
>  	if (unlikely(!ptep))
>  		return -EFAULT;
diff mbox series

Patch

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index eec3a9d7176e..54969e0f3a94 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -988,6 +988,7 @@  EXPORT_SYMBOL(get_guest_storage_key);
 int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 			unsigned long *oldpte, unsigned long *oldpgste)
 {
+	struct vm_area_struct *vma;
 	unsigned long pgstev;
 	spinlock_t *ptl;
 	pgste_t pgste;
@@ -997,6 +998,10 @@  int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc,
 	WARN_ON_ONCE(orc > ESSA_MAX);
 	if (unlikely(orc > ESSA_MAX))
 		return -EINVAL;
+
+	vma = vma_lookup(mm, hva);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return -EFAULT;
 	ptep = get_locked_pte(mm, hva, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;
@@ -1089,10 +1094,14 @@  EXPORT_SYMBOL(pgste_perform_essa);
 int set_pgste_bits(struct mm_struct *mm, unsigned long hva,
 			unsigned long bits, unsigned long value)
 {
+	struct vm_area_struct *vma;
 	spinlock_t *ptl;
 	pgste_t new;
 	pte_t *ptep;
 
+	vma = vma_lookup(mm, hva);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return -EFAULT;
 	ptep = get_locked_pte(mm, hva, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;
@@ -1117,9 +1126,13 @@  EXPORT_SYMBOL(set_pgste_bits);
  */
 int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep)
 {
+	struct vm_area_struct *vma;
 	spinlock_t *ptl;
 	pte_t *ptep;
 
+	vma = vma_lookup(mm, hva);
+	if (!vma || is_vm_hugetlb_page(vma))
+		return -EFAULT;
 	ptep = get_locked_pte(mm, hva, &ptl);
 	if (unlikely(!ptep))
 		return -EFAULT;