diff mbox series

x86/sgx: Fix double-free when EADD fails

Message ID 20191205100151.18950-1-jarkko.sakkinen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series x86/sgx: Fix double-free when EADD fails | expand

Commit Message

Jarkko Sakkinen Dec. 5, 2019, 10:01 a.m. UTC
radix_tree_delete() gets called twice for the same page  when EADD
fails. This commit fixes the issue.

Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Reported-by: Huang Haitao <haitao.huang@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/kernel/cpu/sgx/ioctl.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

Comments

Sean Christopherson Dec. 9, 2019, 8:52 p.m. UTC | #1
On Thu, Dec 05, 2019 at 12:01:51PM +0200, Jarkko Sakkinen wrote:
> radix_tree_delete() gets called twice for the same page  when EADD
> fails. This commit fixes the issue.
> 
> Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> Reported-by: Huang Haitao <haitao.huang@intel.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/sgx/ioctl.c | 23 ++++++++++-------------
>  1 file changed, 10 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index ab9e48cd294b..2ff12038a8a4 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -413,13 +413,8 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
>  
>  	ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
>  				  src);
> -	if (ret) {
> -		/* ENCLS failure. */
> -		if (ret == -EIO)
> -			sgx_encl_destroy(encl);
> -
> +	if (ret)
>  		goto err_out;
> -	}
>  
>  	/*
>  	 * Complete the "add" before doing the "extend" so that the "add"
> @@ -432,17 +427,12 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
>  
>  	if (flags & SGX_PAGE_MEASURE) {
>  		ret = __sgx_encl_extend(encl, epc_page);
> -
> -		/* ENCLS failure. */
> -		if (ret) {
> -			sgx_encl_destroy(encl);
> -			goto out_unlock;
> -		}
> +		if (ret)
> +			goto err_out;
>  	}
>  
>  	sgx_mark_page_reclaimable(encl_page->epc_page);
>  
> -out_unlock:
>  	mutex_unlock(&encl->lock);
>  	up_read(&current->mm->mmap_sem);
>  	return ret;
> @@ -460,6 +450,13 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
>  	sgx_free_page(epc_page);
>  	kfree(encl_page);
>  
> +	/*
> +	 * Destroy enclave on ENCLS failure as this means that EPC has been
> +	 * invalidated.

This comment is wrong, EADD can fail due to bad userspace input, and both
EADD and EEXTEND can fail due to hardware/software bugs. 

> +	 */
> +	if (ret == -EIO)

Not a fan of making this dependent on -EIO, IMO invalidating iff EEXTEND
fails is cleaner.  In other words, I still think killing the enclave on
on EADD failure is unnecessary.

Side topic, __sgx_encl_add_page() doesn't correctly get_user_pages()
returning zero, e.g. the code should be something like:

	ret = get_user_pages(src, 1, 0, &src_page, NULL);
	if (!ret)
		return -EBUSY:
	if (ret < 0)
		return ret;

> +		sgx_encl_destroy(encl);
> +
>  	return ret;
>  }
>  
> -- 
> 2.20.1
>
Jarkko Sakkinen Dec. 11, 2019, 11:11 a.m. UTC | #2
On Mon, Dec 09, 2019 at 12:52:55PM -0800, Sean Christopherson wrote:
> On Thu, Dec 05, 2019 at 12:01:51PM +0200, Jarkko Sakkinen wrote:
> > radix_tree_delete() gets called twice for the same page  when EADD
> > fails. This commit fixes the issue.
> > 
> > Cc: Sean Christopherson <sean.j.christopherson@intel.com>
> > Reported-by: Huang Haitao <haitao.huang@intel.com>
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > ---
> >  arch/x86/kernel/cpu/sgx/ioctl.c | 23 ++++++++++-------------
> >  1 file changed, 10 insertions(+), 13 deletions(-)
> > 
> > diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> > index ab9e48cd294b..2ff12038a8a4 100644
> > --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> > +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> > @@ -413,13 +413,8 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
> >  
> >  	ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
> >  				  src);
> > -	if (ret) {
> > -		/* ENCLS failure. */
> > -		if (ret == -EIO)
> > -			sgx_encl_destroy(encl);
> > -
> > +	if (ret)
> >  		goto err_out;
> > -	}
> >  
> >  	/*
> >  	 * Complete the "add" before doing the "extend" so that the "add"
> > @@ -432,17 +427,12 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
> >  
> >  	if (flags & SGX_PAGE_MEASURE) {
> >  		ret = __sgx_encl_extend(encl, epc_page);
> > -
> > -		/* ENCLS failure. */
> > -		if (ret) {
> > -			sgx_encl_destroy(encl);
> > -			goto out_unlock;
> > -		}
> > +		if (ret)
> > +			goto err_out;
> >  	}
> >  
> >  	sgx_mark_page_reclaimable(encl_page->epc_page);
> >  
> > -out_unlock:
> >  	mutex_unlock(&encl->lock);
> >  	up_read(&current->mm->mmap_sem);
> >  	return ret;
> > @@ -460,6 +450,13 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
> >  	sgx_free_page(epc_page);
> >  	kfree(encl_page);
> >  
> > +	/*
> > +	 * Destroy enclave on ENCLS failure as this means that EPC has been
> > +	 * invalidated.
> 
> This comment is wrong, EADD can fail due to bad userspace input, and both
> EADD and EEXTEND can fail due to hardware/software bugs. 

Any piece of kernel code can fail because of either hardware or software
bug. Still almost none of the comments explicitly state this.

However, userspace input is a valid remark.

> 
> > +	 */
> > +	if (ret == -EIO)
> 
> Not a fan of making this dependent on -EIO, IMO invalidating iff EEXTEND
> fails is cleaner.  In other words, I still think killing the enclave on
> on EADD failure is unnecessary.

This comes down to whether you consider them as a transaction. I do
and it makes a coherent API.

If user code wants to give on purpose bad input for EADD, I think
it is sane to kill the enclave also in that case. There is no legit
use case where user code would do that.

Potentially that could be malicious behaviour (maybe nothing useful can
be done with that primitive but that is only thing that it could be
theoretically used for since legit use cases do not exist).

> Side topic, __sgx_encl_add_page() doesn't correctly get_user_pages()
> returning zero, e.g. the code should be something like:
> 
> 	ret = get_user_pages(src, 1, 0, &src_page, NULL);
> 	if (!ret)
> 		return -EBUSY:
> 	if (ret < 0)
> 		return ret;

Thanks! I'll update the GIT based on your feedback (it's probably
better that at this point for small scoped changes you just
describe the change and I'll update. With all squashing and
rebasing that is fastest).

/Jarkko
Sean Christopherson Dec. 11, 2019, 4:07 p.m. UTC | #3
On Wed, Dec 11, 2019 at 01:11:52PM +0200, Jarkko Sakkinen wrote:
> On Mon, Dec 09, 2019 at 12:52:55PM -0800, Sean Christopherson wrote:
> > Not a fan of making this dependent on -EIO, IMO invalidating iff EEXTEND
> > fails is cleaner.  In other words, I still think killing the enclave on
> > on EADD failure is unnecessary.
> 
> This comes down to whether you consider them as a transaction. I do
> and it makes a coherent API.

What's your definition of transaction in this context?  My interpretation
of transaction here would be that each ioctl() should either succeed, fail
without modifying persistent (enclave) state, or fail and kill the enclave
(because its state modifications are irreversible).

EEXTEND falls into the last case because EADD can't be unwound.  EADD falls
into the middle case because everything up to EADD can be cleanly undone.
Jarkko Sakkinen Dec. 12, 2019, 11:59 p.m. UTC | #4
On Wed, Dec 11, 2019 at 08:07:55AM -0800, Sean Christopherson wrote:
> On Wed, Dec 11, 2019 at 01:11:52PM +0200, Jarkko Sakkinen wrote:
> > On Mon, Dec 09, 2019 at 12:52:55PM -0800, Sean Christopherson wrote:
> > > Not a fan of making this dependent on -EIO, IMO invalidating iff EEXTEND
> > > fails is cleaner.  In other words, I still think killing the enclave on
> > > on EADD failure is unnecessary.
> > 
> > This comes down to whether you consider them as a transaction. I do
> > and it makes a coherent API.
> 
> What's your definition of transaction in this context?  My interpretation
> of transaction here would be that each ioctl() should either succeed, fail
> without modifying persistent (enclave) state, or fail and kill the enclave
> (because its state modifications are irreversible).
> 
> EEXTEND falls into the last case because EADD can't be unwound.  EADD falls
> into the middle case because everything up to EADD can be cleanly undone.

My definition is that if any of EADD/EEXTEND fails the enclave
should be killed as there is no good reason to support that kind
of use in any possible way. As long as it is documented, it is
fine.

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index ab9e48cd294b..2ff12038a8a4 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -413,13 +413,8 @@  static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 
 	ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
 				  src);
-	if (ret) {
-		/* ENCLS failure. */
-		if (ret == -EIO)
-			sgx_encl_destroy(encl);
-
+	if (ret)
 		goto err_out;
-	}
 
 	/*
 	 * Complete the "add" before doing the "extend" so that the "add"
@@ -432,17 +427,12 @@  static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 
 	if (flags & SGX_PAGE_MEASURE) {
 		ret = __sgx_encl_extend(encl, epc_page);
-
-		/* ENCLS failure. */
-		if (ret) {
-			sgx_encl_destroy(encl);
-			goto out_unlock;
-		}
+		if (ret)
+			goto err_out;
 	}
 
 	sgx_mark_page_reclaimable(encl_page->epc_page);
 
-out_unlock:
 	mutex_unlock(&encl->lock);
 	up_read(&current->mm->mmap_sem);
 	return ret;
@@ -460,6 +450,13 @@  static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
 	sgx_free_page(epc_page);
 	kfree(encl_page);
 
+	/*
+	 * Destroy enclave on ENCLS failure as this means that EPC has been
+	 * invalidated.
+	 */
+	if (ret == -EIO)
+		sgx_encl_destroy(encl);
+
 	return ret;
 }