diff mbox series

[v4] mm: hugetlb: fix UAF in hugetlb_handle_userfault

Message ID 20220924034905.2694686-1-liushixin2@huawei.com (mailing list archive)
State New
Headers show
Series [v4] mm: hugetlb: fix UAF in hugetlb_handle_userfault | expand

Commit Message

Liu Shixin Sept. 24, 2022, 3:49 a.m. UTC
The vma_lock and hugetlb_fault_mutex are dropped before handling
userfault and reacquire them again after handle_userfault(), but
reacquire the vma_lock could lead to UAF[1,2] due to the following
race,

hugetlb_fault
  hugetlb_no_page
    /*unlock vma_lock */
    hugetlb_handle_userfault
      handle_userfault
        /* unlock mm->mmap_lock*/
                                           vm_mmap_pgoff
                                             do_mmap
                                               mmap_region
                                                 munmap_vma_range
                                                   /* clean old vma */
        /* lock vma_lock again  <--- UAF */
    /* unlock vma_lock */

Since the vma_lock will unlock immediately after hugetlb_handle_userfault(),
let's drop the unneeded lock and unlock in hugetlb_handle_userfault() to fix
the issue.

[1] https://lore.kernel.org/linux-mm/000000000000d5e00a05e834962e@google.com/
[2] https://lore.kernel.org/linux-mm/20220921014457.1668-1-liuzixian4@huawei.com/
Reported-by: syzbot+193f9cee8638750b23cf@syzkaller.appspotmail.com
Reported-by: Liu Zixian <liuzixian4@huawei.com>
Fixes: 1a1aad8a9b7b ("userfaultfd: hugetlbfs: add userfaultfd hugetlb hook")
CC: stable@vger.kernel.org # 4.14+
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
---
v1->v2: add reported-by and link [1].
v2->v3: add comment to explain why unlock in hugetlb_no_page.
v3->v4: Rebase on mainline.

 mm/hugetlb.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

Comments

Andrew Morton Sept. 25, 2022, 6:28 p.m. UTC | #1
On Sat, 24 Sep 2022 11:49:05 +0800 Liu Shixin <liushixin2@huawei.com> wrote:

> The vma_lock and hugetlb_fault_mutex are dropped before handling
> userfault and reacquire them again after handle_userfault(), but
> reacquire the vma_lock could lead to UAF[1,2] due to the following
> race,
> 
> hugetlb_fault
>   hugetlb_no_page
>     /*unlock vma_lock */
>     hugetlb_handle_userfault
>       handle_userfault
>         /* unlock mm->mmap_lock*/
>                                            vm_mmap_pgoff
>                                              do_mmap
>                                                mmap_region
>                                                  munmap_vma_range
>                                                    /* clean old vma */
>         /* lock vma_lock again  <--- UAF */
>     /* unlock vma_lock */
> 
> Since the vma_lock will unlock immediately after hugetlb_handle_userfault(),
> let's drop the unneeded lock and unlock in hugetlb_handle_userfault() to fix
> the issue.
> 

Thanks.  Turns out that porting all the pending material on top of this
change was not a confidence-inspiring activity.  So I ended up merging
your v3.  Please work with Greg on the backporting when he gets on to
it?  Hopefully that will merely involve sending him this v4.
Liu Shixin Sept. 26, 2022, 6:48 a.m. UTC | #2
On 2022/9/26 2:28, Andrew Morton wrote:
> On Sat, 24 Sep 2022 11:49:05 +0800 Liu Shixin <liushixin2@huawei.com> wrote:
>
>> The vma_lock and hugetlb_fault_mutex are dropped before handling
>> userfault and reacquire them again after handle_userfault(), but
>> reacquire the vma_lock could lead to UAF[1,2] due to the following
>> race,
>>
>> hugetlb_fault
>>   hugetlb_no_page
>>     /*unlock vma_lock */
>>     hugetlb_handle_userfault
>>       handle_userfault
>>         /* unlock mm->mmap_lock*/
>>                                            vm_mmap_pgoff
>>                                              do_mmap
>>                                                mmap_region
>>                                                  munmap_vma_range
>>                                                    /* clean old vma */
>>         /* lock vma_lock again  <--- UAF */
>>     /* unlock vma_lock */
>>
>> Since the vma_lock will unlock immediately after hugetlb_handle_userfault(),
>> let's drop the unneeded lock and unlock in hugetlb_handle_userfault() to fix
>> the issue.
>>
> Thanks.  Turns out that porting all the pending material on top of this
> change was not a confidence-inspiring activity.  So I ended up merging
> your v3.  Please work with Greg on the backporting when he gets on to
> it?  Hopefully that will merely involve sending him this v4.
OK, I'll do it.
> .
>
diff mbox series

Patch

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e070b8593b37..477e3c2369d2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5476,7 +5476,6 @@  static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
 						  unsigned long addr,
 						  unsigned long reason)
 {
-	vm_fault_t ret;
 	u32 hash;
 	struct vm_fault vmf = {
 		.vma = vma,
@@ -5494,18 +5493,14 @@  static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
 	};
 
 	/*
-	 * hugetlb_fault_mutex and i_mmap_rwsem must be
-	 * dropped before handling userfault.  Reacquire
-	 * after handling fault to make calling code simpler.
+	 * vma_lock and hugetlb_fault_mutex must be dropped before handling
+	 * userfault. Also mmap_lock will be dropped during handling
+	 * userfault, any vma operation should be careful from here.
 	 */
 	hash = hugetlb_fault_mutex_hash(mapping, idx);
 	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 	i_mmap_unlock_read(mapping);
-	ret = handle_userfault(&vmf, reason);
-	i_mmap_lock_read(mapping);
-	mutex_lock(&hugetlb_fault_mutex_table[hash]);
-
-	return ret;
+	return handle_userfault(&vmf, reason);
 }
 
 static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
@@ -5523,6 +5518,7 @@  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	spinlock_t *ptl;
 	unsigned long haddr = address & huge_page_mask(h);
 	bool new_page, new_pagecache_page = false;
+	u32 hash = hugetlb_fault_mutex_hash(mapping, idx);
 
 	/*
 	 * Currently, we are forced to kill the process in the event the
@@ -5533,7 +5529,7 @@  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
 		pr_warn_ratelimited("PID %d killed due to inadequate hugepage pool\n",
 			   current->pid);
-		return ret;
+		goto out;
 	}
 
 	/*
@@ -5550,12 +5546,10 @@  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	page = find_lock_page(mapping, idx);
 	if (!page) {
 		/* Check for page in userfault range */
-		if (userfaultfd_missing(vma)) {
-			ret = hugetlb_handle_userfault(vma, mapping, idx,
+		if (userfaultfd_missing(vma))
+			return hugetlb_handle_userfault(vma, mapping, idx,
 						       flags, haddr, address,
 						       VM_UFFD_MISSING);
-			goto out;
-		}
 
 		page = alloc_huge_page(vma, haddr, 0);
 		if (IS_ERR(page)) {
@@ -5615,10 +5609,9 @@  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 		if (userfaultfd_minor(vma)) {
 			unlock_page(page);
 			put_page(page);
-			ret = hugetlb_handle_userfault(vma, mapping, idx,
+			return hugetlb_handle_userfault(vma, mapping, idx,
 						       flags, haddr, address,
 						       VM_UFFD_MINOR);
-			goto out;
 		}
 	}
 
@@ -5676,6 +5669,8 @@  static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 
 	unlock_page(page);
 out:
+	mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+	i_mmap_unlock_read(mapping);
 	return ret;
 
 backout:
@@ -5774,11 +5769,13 @@  vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	entry = huge_ptep_get(ptep);
 	/* PTE markers should be handled the same way as none pte */
-	if (huge_pte_none_mostly(entry)) {
-		ret = hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
+	if (huge_pte_none_mostly(entry))
+		/*
+		 * hugetlb_no_page will drop vma lock and hugetlb fault
+		 * mutex internally, which make us return immediately.
+		 */
+		return hugetlb_no_page(mm, vma, mapping, idx, address, ptep,
 				      entry, flags);
-		goto out_mutex;
-	}
 
 	ret = 0;