diff mbox series

[v5,2/3] x86/sgx: Resolve EAUG race where losing thread returns SIGBUS

Message ID 20240821100215.4119457-3-dmitrii.kuvaiskii@intel.com (mailing list archive)
State New, archived
Headers show
Series x86/sgx: Fix two data races in EAUG/EREMOVE flows | expand

Commit Message

Dmitrii Kuvaiskii Aug. 21, 2024, 10:02 a.m. UTC
Imagine an mmap()'d file. Two threads touch the same address at the same
time and fault. Both allocate a physical page and race to install a PTE
for that page. Only one will win the race. The loser frees its page, but
still continues handling the fault as a success and returns
VM_FAULT_NOPAGE from the fault handler.

The same race can happen with SGX. But there's a bug: the loser in the
SGX steers into a failure path. The loser EREMOVE's the winner's EPC
page, then returns SIGBUS, likely killing the app.

Fix the SGX loser's behavior. Check whether another thread already
allocated the page and if yes, return with VM_FAULT_NOPAGE.

The race can be illustrated as follows:

/*                             /*
 * Fault on CPU1                * Fault on CPU2
 * on enclave page X            * on enclave page X
 */                             */
sgx_vma_fault() {              sgx_vma_fault() {

  xa_load(&encl->page_array)     xa_load(&encl->page_array)
      == NULL -->                    == NULL -->

  sgx_encl_eaug_page() {         sgx_encl_eaug_page() {

    ...                            ...

    /*                             /*
     * alloc encl_page              * alloc encl_page
     */                             */
                                   mutex_lock(&encl->lock);
                                   /*
                                    * alloc EPC page
                                    */
                                   epc_page = sgx_alloc_epc_page(...);
                                   /*
                                    * add page to enclave's xarray
                                    */
                                   xa_insert(&encl->page_array, ...);
                                   /*
                                    * add page to enclave via EAUG
                                    * (page is in pending state)
                                    */
                                   /*
                                    * add PTE entry
                                    */
                                   vmf_insert_pfn(...);

                                   mutex_unlock(&encl->lock);
                                   return VM_FAULT_NOPAGE;
                                 }
                               }
                               /*
                                * All good up to here: enclave page
                                * successfully added to enclave,
                                * ready for EACCEPT from user space
                                */
    mutex_lock(&encl->lock);
    /*
     * alloc EPC page
     */
    epc_page = sgx_alloc_epc_page(...);
    /*
     * add page to enclave's xarray,
     * this fails with -EBUSY as this
     * page was already added by CPU2
     */
    xa_insert(&encl->page_array, ...);

  err_out_shrink:
    sgx_encl_free_epc_page(epc_page) {
      /*
       * remove page via EREMOVE
       *
       * *BUG*: page added by CPU2 is
       * yanked from enclave while it
       * remains accessible from OS
       * perspective (PTE installed)
       */
      /*
       * free EPC page
       */
      sgx_free_epc_page(epc_page);
    }

    mutex_unlock(&encl->lock);
    /*
     * *BUG*: SIGBUS is returned
     * for a valid enclave page
     */
    return VM_FAULT_SIGBUS;
  }
}

Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
Cc: stable@vger.kernel.org
Reported-by: Marcelina Kościelnicka <mwk@invisiblethingslab.com>
Suggested-by: Kai Huang <kai.huang@intel.com>
Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
---
 arch/x86/kernel/cpu/sgx/encl.c | 36 ++++++++++++++++++++--------------
 1 file changed, 21 insertions(+), 15 deletions(-)

Comments

Huang, Kai Aug. 21, 2024, 11:15 a.m. UTC | #1
On Wed, 2024-08-21 at 03:02 -0700, Dmitrii Kuvaiskii wrote:
> Imagine an mmap()'d file. Two threads touch the same address at the same
> time and fault. Both allocate a physical page and race to install a PTE
> for that page. Only one will win the race. The loser frees its page, but
> still continues handling the fault as a success and returns
> VM_FAULT_NOPAGE from the fault handler.
> 
> The same race can happen with SGX. But there's a bug: the loser in the
> SGX steers into a failure path. The loser EREMOVE's the winner's EPC
> page, then returns SIGBUS, likely killing the app.
> 
> Fix the SGX loser's behavior. Check whether another thread already
> allocated the page and if yes, return with VM_FAULT_NOPAGE.
> 
[...]

> Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
> Cc: stable@vger.kernel.org
> Reported-by: Marcelina Kościelnicka <mwk@invisiblethingslab.com>
> Suggested-by: Kai Huang <kai.huang@intel.com>
> Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
> 

Reviewed-by: Kai Huang <kai.huang@intel.com>
Jarkko Sakkinen Aug. 27, 2024, 6:19 p.m. UTC | #2
On Wed Aug 21, 2024 at 1:02 PM EEST, Dmitrii Kuvaiskii wrote:
> Imagine an mmap()'d file. Two threads touch the same address at the same
> time and fault. Both allocate a physical page and race to install a PTE
> for that page. Only one will win the race. The loser frees its page, but
> still continues handling the fault as a success and returns
> VM_FAULT_NOPAGE from the fault handler.
>
> The same race can happen with SGX. But there's a bug: the loser in the
> SGX steers into a failure path. The loser EREMOVE's the winner's EPC
> page, then returns SIGBUS, likely killing the app.
>
> Fix the SGX loser's behavior. Check whether another thread already
> allocated the page and if yes, return with VM_FAULT_NOPAGE.
>
> The race can be illustrated as follows:
>
> /*                             /*
>  * Fault on CPU1                * Fault on CPU2
>  * on enclave page X            * on enclave page X
>  */                             */
> sgx_vma_fault() {              sgx_vma_fault() {
>
>   xa_load(&encl->page_array)     xa_load(&encl->page_array)
>       == NULL -->                    == NULL -->
>
>   sgx_encl_eaug_page() {         sgx_encl_eaug_page() {
>
>     ...                            ...
>
>     /*                             /*
>      * alloc encl_page              * alloc encl_page
>      */                             */
>                                    mutex_lock(&encl->lock);
>                                    /*
>                                     * alloc EPC page
>                                     */
>                                    epc_page = sgx_alloc_epc_page(...);
>                                    /*
>                                     * add page to enclave's xarray
>                                     */
>                                    xa_insert(&encl->page_array, ...);
>                                    /*
>                                     * add page to enclave via EAUG
>                                     * (page is in pending state)
>                                     */
>                                    /*
>                                     * add PTE entry
>                                     */
>                                    vmf_insert_pfn(...);
>
>                                    mutex_unlock(&encl->lock);
>                                    return VM_FAULT_NOPAGE;
>                                  }
>                                }
>                                /*
>                                 * All good up to here: enclave page
>                                 * successfully added to enclave,
>                                 * ready for EACCEPT from user space
>                                 */
>     mutex_lock(&encl->lock);
>     /*
>      * alloc EPC page
>      */
>     epc_page = sgx_alloc_epc_page(...);
>     /*
>      * add page to enclave's xarray,
>      * this fails with -EBUSY as this
>      * page was already added by CPU2
>      */
>     xa_insert(&encl->page_array, ...);
>
>   err_out_shrink:
>     sgx_encl_free_epc_page(epc_page) {
>       /*
>        * remove page via EREMOVE
>        *
>        * *BUG*: page added by CPU2 is
>        * yanked from enclave while it
>        * remains accessible from OS
>        * perspective (PTE installed)
>        */
>       /*
>        * free EPC page
>        */
>       sgx_free_epc_page(epc_page);
>     }
>
>     mutex_unlock(&encl->lock);
>     /*
>      * *BUG*: SIGBUS is returned
>      * for a valid enclave page
>      */
>     return VM_FAULT_SIGBUS;
>   }
> }
>
> Fixes: 5a90d2c3f5ef ("x86/sgx: Support adding of pages to an initialized enclave")
> Cc: stable@vger.kernel.org
> Reported-by: Marcelina Kościelnicka <mwk@invisiblethingslab.com>
> Suggested-by: Kai Huang <kai.huang@intel.com>
> Signed-off-by: Dmitrii Kuvaiskii <dmitrii.kuvaiskii@intel.com>
> ---
>  arch/x86/kernel/cpu/sgx/encl.c | 36 ++++++++++++++++++++--------------
>  1 file changed, 21 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index c0a3c00284c8..2aa7ced0e4a0 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -337,6 +337,16 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>  	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
>  		return VM_FAULT_SIGBUS;
>  
> +	mutex_lock(&encl->lock);
> +
> +	/*
> +	 * Multiple threads may try to fault on the same page concurrently.
> +	 * Re-check if another thread has already done that.
> +	 */
> +	encl_page = xa_load(&encl->page_array, PFN_DOWN(addr));
> +	if (encl_page)
> +		goto done;
> +
>  	/*
>  	 * Ignore internal permission checking for dynamically added pages.
>  	 * They matter only for data added during the pre-initialization
> @@ -345,23 +355,23 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>  	 */
>  	secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
>  	encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
> -	if (IS_ERR(encl_page))
> -		return VM_FAULT_OOM;
> -
> -	mutex_lock(&encl->lock);
> +	if (IS_ERR(encl_page)) {
> +		vmret = VM_FAULT_OOM;
> +		goto err_out_unlock;
> +	}
>  
>  	epc_page = sgx_encl_load_secs(encl);
>  	if (IS_ERR(epc_page)) {
>  		if (PTR_ERR(epc_page) == -EBUSY)
>  			vmret = VM_FAULT_NOPAGE;
> -		goto err_out_unlock;
> +		goto err_out_encl;
>  	}
>  
>  	epc_page = sgx_alloc_epc_page(encl_page, false);
>  	if (IS_ERR(epc_page)) {
>  		if (PTR_ERR(epc_page) == -EBUSY)
>  			vmret =  VM_FAULT_NOPAGE;
> -		goto err_out_unlock;
> +		goto err_out_encl;
>  	}
>  
>  	va_page = sgx_encl_grow(encl, false);
> @@ -376,10 +386,6 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>  
>  	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
>  			encl_page, GFP_KERNEL);
> -	/*
> -	 * If ret == -EBUSY then page was created in another flow while
> -	 * running without encl->lock
> -	 */
>  	if (ret)
>  		goto err_out_shrink;
>  
> @@ -389,7 +395,7 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>  
>  	ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
>  	if (ret)
> -		goto err_out;
> +		goto err_out_eaug;
>  
>  	encl_page->encl = encl;
>  	encl_page->epc_page = epc_page;
> @@ -408,20 +414,20 @@ static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>  		mutex_unlock(&encl->lock);
>  		return VM_FAULT_SIGBUS;
>  	}
> +done:
>  	mutex_unlock(&encl->lock);
>  	return VM_FAULT_NOPAGE;
>  
> -err_out:
> +err_out_eaug:
>  	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
> -
>  err_out_shrink:
>  	sgx_encl_shrink(encl, va_page);
>  err_out_epc:
>  	sgx_encl_free_epc_page(epc_page);
> +err_out_encl:
> +	kfree(encl_page);
>  err_out_unlock:
>  	mutex_unlock(&encl->lock);
> -	kfree(encl_page);
> -
>  	return vmret;
>  }
>  

Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>

BR, Jarkko
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index c0a3c00284c8..2aa7ced0e4a0 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -337,6 +337,16 @@  static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
 		return VM_FAULT_SIGBUS;
 
+	mutex_lock(&encl->lock);
+
+	/*
+	 * Multiple threads may try to fault on the same page concurrently.
+	 * Re-check if another thread has already done that.
+	 */
+	encl_page = xa_load(&encl->page_array, PFN_DOWN(addr));
+	if (encl_page)
+		goto done;
+
 	/*
 	 * Ignore internal permission checking for dynamically added pages.
 	 * They matter only for data added during the pre-initialization
@@ -345,23 +355,23 @@  static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 	 */
 	secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
 	encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
-	if (IS_ERR(encl_page))
-		return VM_FAULT_OOM;
-
-	mutex_lock(&encl->lock);
+	if (IS_ERR(encl_page)) {
+		vmret = VM_FAULT_OOM;
+		goto err_out_unlock;
+	}
 
 	epc_page = sgx_encl_load_secs(encl);
 	if (IS_ERR(epc_page)) {
 		if (PTR_ERR(epc_page) == -EBUSY)
 			vmret = VM_FAULT_NOPAGE;
-		goto err_out_unlock;
+		goto err_out_encl;
 	}
 
 	epc_page = sgx_alloc_epc_page(encl_page, false);
 	if (IS_ERR(epc_page)) {
 		if (PTR_ERR(epc_page) == -EBUSY)
 			vmret =  VM_FAULT_NOPAGE;
-		goto err_out_unlock;
+		goto err_out_encl;
 	}
 
 	va_page = sgx_encl_grow(encl, false);
@@ -376,10 +386,6 @@  static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 
 	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
 			encl_page, GFP_KERNEL);
-	/*
-	 * If ret == -EBUSY then page was created in another flow while
-	 * running without encl->lock
-	 */
 	if (ret)
 		goto err_out_shrink;
 
@@ -389,7 +395,7 @@  static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 
 	ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
 	if (ret)
-		goto err_out;
+		goto err_out_eaug;
 
 	encl_page->encl = encl;
 	encl_page->epc_page = epc_page;
@@ -408,20 +414,20 @@  static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
 		mutex_unlock(&encl->lock);
 		return VM_FAULT_SIGBUS;
 	}
+done:
 	mutex_unlock(&encl->lock);
 	return VM_FAULT_NOPAGE;
 
-err_out:
+err_out_eaug:
 	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
-
 err_out_shrink:
 	sgx_encl_shrink(encl, va_page);
 err_out_epc:
 	sgx_encl_free_epc_page(epc_page);
+err_out_encl:
+	kfree(encl_page);
 err_out_unlock:
 	mutex_unlock(&encl->lock);
-	kfree(encl_page);
-
 	return vmret;
 }