diff mbox series

[13/25] x86/sgx: Support adding of pages to initialized enclave

Message ID 9ab661a845d242cb10a90ade997f8ebda33cc7c9.1638381245.git.reinette.chatre@intel.com (mailing list archive)
State New, archived
Headers show
Series x86/sgx and selftests/sgx: Support SGX2 | expand

Commit Message

Reinette Chatre Dec. 1, 2021, 7:23 p.m. UTC
With SGX1 an enclave needs to be created with its maximum memory demands
allocated. Pages cannot be added to an enclave after it is initialized.
SGX2 introduces a new function, ENCLS[EAUG], that can be used to add
pages to an initialized enclave. With SGX2 the enclave still needs to
set aside address space for its maximum memory demands during enclave
creation, but all pages need not be added before enclave initialization.
Pages can be added during enclave runtime.

Add support for dynamically adding pages to an initialized enclave,
architecturally limited to RW permission. Add pages via the page fault
handler at the time an enclave address without a backing enclave page
is accessed, potentially directly reclaiming pages if no free pages
are available.

The enclave is still required to run ENCLU[EACCEPT] on the page before
it can be used. A useful flow is for the enclave to run ENCLU[EACCEPT]
on an uninitialized address. This will trigger the page fault handler
that will add the enclave page and return execution to the enclave to
repeat the ENCLU[EACCEPT] instruction, this time successful.

If the enclave accesses an uninitialized address in another way, for
example by expanding the enclave stack to a page that has not yet been
added, then the page fault handler would add the page on the first
write but upon returning to the enclave the instruction that triggered
the page fault would be repeated and since ENCLU[EACCEPT] was not run
yet it would trigger a second page fault, this time with the SGX flag
set in the page fault error code. This can only be recovered by entering
the enclave again and directly running the ENCLU[EACCEPT] instruction on
the now initialized address.

Accessing an uninitialized address from outside the enclave also triggers
this flow but the page will remain in PENDING state until accepted from
within the enclave.

The page is added with the architecturally constrained RW permissions
as runtime as well as maximum allowed permissions. It is understood that
there are some use cases, for example code relocation, that requires RWX
maximum permissions. Supporting these use cases require guidance from user
space policy before such maximum permissions can be allowed. Integration
with user policy is deferred to a follow-up series.

Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
---
 arch/x86/kernel/cpu/sgx/encl.c  | 133 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/sgx/encl.h  |   2 +
 arch/x86/kernel/cpu/sgx/ioctl.c |   4 +-
 3 files changed, 137 insertions(+), 2 deletions(-)

Comments

Dave Hansen Dec. 3, 2021, 12:38 a.m. UTC | #1
On 12/1/21 11:23 AM, Reinette Chatre wrote:
> +static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
> +				     struct sgx_encl *encl, unsigned long addr)
> +{
> +	struct sgx_pageinfo pginfo = {0};
> +	struct sgx_encl_page *encl_page;
> +	struct sgx_epc_page *epc_page;
> +	struct sgx_va_page *va_page;
> +	unsigned long phys_addr;
> +	unsigned long prot;
> +	vm_fault_t vmret;
> +	int ret;
> +
> +	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
> +		return VM_FAULT_SIGBUS;
> +
> +	encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
> +	if (!encl_page)
> +		return VM_FAULT_OOM;
> +
> +	encl_page->desc = addr;
> +	encl_page->encl = encl;
> +
> +	/*
> +	 * Adding a regular page that is architecturally allowed to only
> +	 * be created with RW permissions.
> +	 * TBD: Interface with user space policy to support max permissions
> +	 * of RWX.
> +	 */
> +	prot = PROT_READ | PROT_WRITE;
> +	encl_page->vm_run_prot_bits = calc_vm_prot_bits(prot, 0);
> +	encl_page->vm_max_prot_bits = encl_page->vm_run_prot_bits;
> +
> +	epc_page = sgx_alloc_epc_page(encl_page, true);
> +	if (IS_ERR(epc_page)) {
> +		kfree(encl_page);
> +		return VM_FAULT_SIGBUS;
> +	}
> +
> +	va_page = sgx_encl_grow(encl);
> +	if (IS_ERR(va_page)) {
> +		ret = PTR_ERR(va_page);
> +		goto err_out_free;
> +	}
> +
> +	mutex_lock(&encl->lock);
> +
> +	/*
> +	 * Copy comment from sgx_encl_add_page() to maintain guidance in
> +	 * this similar flow:
> +	 * Adding to encl->va_pages must be done under encl->lock.  Ditto for
> +	 * deleting (via sgx_encl_shrink()) in the error path.
> +	 */
> +	if (va_page)
> +		list_add(&va_page->list, &encl->va_pages);
> +
> +	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
> +			encl_page, GFP_KERNEL);
> +	/*
> +	 * If ret == -EBUSY then page was created in another flow while
> +	 * running without encl->lock
> +	 */
> +	if (ret)
> +		goto err_out_unlock;
> +
> +	pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
> +	pginfo.addr = encl_page->desc & PAGE_MASK;
> +	pginfo.metadata = 0;
> +
> +	ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
> +	if (ret)
> +		goto err_out;
> +
> +	encl_page->encl = encl;
> +	encl_page->epc_page = epc_page;
> +	encl_page->type = SGX_PAGE_TYPE_REG;
> +	encl->secs_child_cnt++;
> +
> +	sgx_mark_page_reclaimable(encl_page->epc_page);
> +
> +	phys_addr = sgx_get_epc_phys_addr(epc_page);
> +	/*
> +	 * Do not undo everything when creating PTE entry fails - next #PF
> +	 * would find page ready for a PTE.
> +	 * PAGE_SHARED because protection is forced to be RW above and COW
> +	 * is not supported.
> +	 */
> +	vmret = vmf_insert_pfn_prot(vma, addr, PFN_DOWN(phys_addr),
> +				    PAGE_SHARED);
> +	if (vmret != VM_FAULT_NOPAGE) {
> +		mutex_unlock(&encl->lock);
> +		return VM_FAULT_SIGBUS;
> +	}
> +	mutex_unlock(&encl->lock);
> +	return VM_FAULT_NOPAGE;
> +
> +err_out:
> +	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
> +
> +err_out_unlock:
> +	sgx_encl_shrink(encl, va_page);
> +	mutex_unlock(&encl->lock);
> +
> +err_out_free:
> +	sgx_encl_free_epc_page(epc_page);
> +	kfree(encl_page);
> +
> +	return VM_FAULT_SIGBUS;
> +}

There seems to be very little code sharing between this and the existing
page addition.  Are we confident that no refactoring here is in order?
Reinette Chatre Dec. 3, 2021, 6:47 p.m. UTC | #2
Hi Dave,

On 12/2/2021 4:38 PM, Dave Hansen wrote:
> On 12/1/21 11:23 AM, Reinette Chatre wrote:
>> +static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
>> +				     struct sgx_encl *encl, unsigned long addr)
>> +{
>> +	struct sgx_pageinfo pginfo = {0};
>> +	struct sgx_encl_page *encl_page;
>> +	struct sgx_epc_page *epc_page;
>> +	struct sgx_va_page *va_page;
>> +	unsigned long phys_addr;
>> +	unsigned long prot;
>> +	vm_fault_t vmret;
>> +	int ret;
>> +
>> +	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
>> +		return VM_FAULT_SIGBUS;
>> +
>> +	encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
>> +	if (!encl_page)
>> +		return VM_FAULT_OOM;
>> +
>> +	encl_page->desc = addr;
>> +	encl_page->encl = encl;
>> +
>> +	/*
>> +	 * Adding a regular page that is architecturally allowed to only
>> +	 * be created with RW permissions.
>> +	 * TBD: Interface with user space policy to support max permissions
>> +	 * of RWX.
>> +	 */
>> +	prot = PROT_READ | PROT_WRITE;
>> +	encl_page->vm_run_prot_bits = calc_vm_prot_bits(prot, 0);
>> +	encl_page->vm_max_prot_bits = encl_page->vm_run_prot_bits;
>> +
>> +	epc_page = sgx_alloc_epc_page(encl_page, true);
>> +	if (IS_ERR(epc_page)) {
>> +		kfree(encl_page);
>> +		return VM_FAULT_SIGBUS;
>> +	}
>> +
>> +	va_page = sgx_encl_grow(encl);
>> +	if (IS_ERR(va_page)) {
>> +		ret = PTR_ERR(va_page);
>> +		goto err_out_free;
>> +	}
>> +
>> +	mutex_lock(&encl->lock);
>> +
>> +	/*
>> +	 * Copy comment from sgx_encl_add_page() to maintain guidance in
>> +	 * this similar flow:
>> +	 * Adding to encl->va_pages must be done under encl->lock.  Ditto for
>> +	 * deleting (via sgx_encl_shrink()) in the error path.
>> +	 */
>> +	if (va_page)
>> +		list_add(&va_page->list, &encl->va_pages);
>> +
>> +	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
>> +			encl_page, GFP_KERNEL);
>> +	/*
>> +	 * If ret == -EBUSY then page was created in another flow while
>> +	 * running without encl->lock
>> +	 */
>> +	if (ret)
>> +		goto err_out_unlock;
>> +
>> +	pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
>> +	pginfo.addr = encl_page->desc & PAGE_MASK;
>> +	pginfo.metadata = 0;
>> +
>> +	ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
>> +	if (ret)
>> +		goto err_out;
>> +
>> +	encl_page->encl = encl;
>> +	encl_page->epc_page = epc_page;
>> +	encl_page->type = SGX_PAGE_TYPE_REG;
>> +	encl->secs_child_cnt++;
>> +
>> +	sgx_mark_page_reclaimable(encl_page->epc_page);
>> +
>> +	phys_addr = sgx_get_epc_phys_addr(epc_page);
>> +	/*
>> +	 * Do not undo everything when creating PTE entry fails - next #PF
>> +	 * would find page ready for a PTE.
>> +	 * PAGE_SHARED because protection is forced to be RW above and COW
>> +	 * is not supported.
>> +	 */
>> +	vmret = vmf_insert_pfn_prot(vma, addr, PFN_DOWN(phys_addr),
>> +				    PAGE_SHARED);
>> +	if (vmret != VM_FAULT_NOPAGE) {
>> +		mutex_unlock(&encl->lock);
>> +		return VM_FAULT_SIGBUS;
>> +	}
>> +	mutex_unlock(&encl->lock);
>> +	return VM_FAULT_NOPAGE;
>> +
>> +err_out:
>> +	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
>> +
>> +err_out_unlock:
>> +	sgx_encl_shrink(encl, va_page);
>> +	mutex_unlock(&encl->lock);
>> +
>> +err_out_free:
>> +	sgx_encl_free_epc_page(epc_page);
>> +	kfree(encl_page);
>> +
>> +	return VM_FAULT_SIGBUS;
>> +}
> 
> There seems to be very little code sharing between this and the existing
> page addition.  Are we confident that no refactoring here is in order?
> 

I can understand your concern here because this code looks similar to 
the page addition code. Primarily because it uses the same objects (an 
enclave page, an EPC page, and a VA page).

The flow is different though because (1) the enclave page needs to be 
created differently to handle its static (RW) permissions as opposed to 
the permissions from additional meta data, (2) a different instruction 
(ENCLS[EAUG] vs ENCLS[EADD]) is used, and (3) the page table entries are 
installed which does not form part of the original page addition.

A major complication to factoring out code is that there are (slightly 
different) allocations needed before the mutex is obtained (enclave 
page, EPC page, and VA page) and then different actions taken on these 
individual allocations with the mutex held. With the mutex in the middle 
of difference in allocation and different actions it is not clear to me 
how to refactor this.

Please do let me know if you see any ways in which I can improve this code.

Reinette
Jarkko Sakkinen Dec. 4, 2021, 11:13 p.m. UTC | #3
"to initialize" -> "to an initialized"

On Wed, Dec 01, 2021 at 11:23:11AM -0800, Reinette Chatre wrote:
> With SGX1 an enclave needs to be created with its maximum memory demands
> allocated. Pages cannot be added to an enclave after it is initialized.
> SGX2 introduces a new function, ENCLS[EAUG], that can be used to add
> pages to an initialized enclave. With SGX2 the enclave still needs to
> set aside address space for its maximum memory demands during enclave
> creation, but all pages need not be added before enclave initialization.
> Pages can be added during enclave runtime.
> 
> Add support for dynamically adding pages to an initialized enclave,
> architecturally limited to RW permission. Add pages via the page fault
> handler at the time an enclave address without a backing enclave page
> is accessed, potentially directly reclaiming pages if no free pages
> are available.
> 
> The enclave is still required to run ENCLU[EACCEPT] on the page before
> it can be used. A useful flow is for the enclave to run ENCLU[EACCEPT]
> on an uninitialized address. This will trigger the page fault handler
> that will add the enclave page and return execution to the enclave to
> repeat the ENCLU[EACCEPT] instruction, this time successful.
> 
> If the enclave accesses an uninitialized address in another way, for
> example by expanding the enclave stack to a page that has not yet been
> added, then the page fault handler would add the page on the first
> write but upon returning to the enclave the instruction that triggered
> the page fault would be repeated and since ENCLU[EACCEPT] was not run
> yet it would trigger a second page fault, this time with the SGX flag
> set in the page fault error code. This can only be recovered by entering
> the enclave again and directly running the ENCLU[EACCEPT] instruction on
> the now initialized address.
> 
> Accessing an uninitialized address from outside the enclave also triggers
> this flow but the page will remain in PENDING state until accepted from
> within the enclave.

What does it mean being in PENDING state, and more imporantly, what is
PENDING state? What does a memory access within enclave cause when it
touch a page within this state?

I see a lot of text in the commit message but zero mentions about EPCM
expect this one sudden mention about PENDING field without attaching
it to anything concrete.

/Jarkko
Reinette Chatre Dec. 6, 2021, 9:44 p.m. UTC | #4
Hi Jarkko,

On 12/4/2021 3:13 PM, Jarkko Sakkinen wrote:
> "to initialize" -> "to an initialized"

Will do.


> 
> On Wed, Dec 01, 2021 at 11:23:11AM -0800, Reinette Chatre wrote:
>> With SGX1 an enclave needs to be created with its maximum memory demands
>> allocated. Pages cannot be added to an enclave after it is initialized.
>> SGX2 introduces a new function, ENCLS[EAUG], that can be used to add
>> pages to an initialized enclave. With SGX2 the enclave still needs to
>> set aside address space for its maximum memory demands during enclave
>> creation, but all pages need not be added before enclave initialization.
>> Pages can be added during enclave runtime.
>>
>> Add support for dynamically adding pages to an initialized enclave,
>> architecturally limited to RW permission. Add pages via the page fault
>> handler at the time an enclave address without a backing enclave page
>> is accessed, potentially directly reclaiming pages if no free pages
>> are available.
>>
>> The enclave is still required to run ENCLU[EACCEPT] on the page before
>> it can be used. A useful flow is for the enclave to run ENCLU[EACCEPT]
>> on an uninitialized address. This will trigger the page fault handler
>> that will add the enclave page and return execution to the enclave to
>> repeat the ENCLU[EACCEPT] instruction, this time successful.
>>
>> If the enclave accesses an uninitialized address in another way, for
>> example by expanding the enclave stack to a page that has not yet been
>> added, then the page fault handler would add the page on the first
>> write but upon returning to the enclave the instruction that triggered
>> the page fault would be repeated and since ENCLU[EACCEPT] was not run
>> yet it would trigger a second page fault, this time with the SGX flag
>> set in the page fault error code. This can only be recovered by entering
>> the enclave again and directly running the ENCLU[EACCEPT] instruction on
>> the now initialized address.
>>
>> Accessing an uninitialized address from outside the enclave also triggers
>> this flow but the page will remain in PENDING state until accepted from
>> within the enclave.
> 
> What does it mean being in PENDING state, and more imporantly, what is
> PENDING state? What does a memory access within enclave cause when it
> touch a page within this state?

The PENDING state is the enclave page state from the SGX hardware's 
perspective. The OS uses the ENCLS[EAUG] SGX2 function to add a new page 
to the enclave but from the SGX hardware's perspective it would be in a 
PENDING state until the enclave accepts the page. An access to the page 
in PENDING state would result in a page fault.


> I see a lot of text in the commit message but zero mentions about EPCM
> expect this one sudden mention about PENDING field without attaching
> it to anything concrete.

My apologies - I will add this to this changelog. This matches your 
request to describe the __eaug() wrapper introduced in patch 02/25. 
Would you like me to duplicate this information here and in that patch 
(a new patch dedicated to the __eaug() wrapper) or would you be ok if I 
introduce the wrappers all together briefly as in the example you 
provide and then detail the flows where the wrappers are used - like 
this patch?

Reinette
Jarkko Sakkinen Dec. 11, 2021, 8 a.m. UTC | #5
On Mon, 2021-12-06 at 13:44 -0800, Reinette Chatre wrote:
> Hi Jarkko,
> 
> On 12/4/2021 3:13 PM, Jarkko Sakkinen wrote:
> > "to initialize" -> "to an initialized"
> 
> Will do.
> 
> 
> > 
> > On Wed, Dec 01, 2021 at 11:23:11AM -0800, Reinette Chatre wrote:
> > > With SGX1 an enclave needs to be created with its maximum memory demands
> > > allocated. Pages cannot be added to an enclave after it is initialized.
> > > SGX2 introduces a new function, ENCLS[EAUG], that can be used to add
> > > pages to an initialized enclave. With SGX2 the enclave still needs to
> > > set aside address space for its maximum memory demands during enclave
> > > creation, but all pages need not be added before enclave initialization.
> > > Pages can be added during enclave runtime.
> > > 
> > > Add support for dynamically adding pages to an initialized enclave,
> > > architecturally limited to RW permission. Add pages via the page fault
> > > handler at the time an enclave address without a backing enclave page
> > > is accessed, potentially directly reclaiming pages if no free pages
> > > are available.
> > > 
> > > The enclave is still required to run ENCLU[EACCEPT] on the page before
> > > it can be used. A useful flow is for the enclave to run ENCLU[EACCEPT]
> > > on an uninitialized address. This will trigger the page fault handler
> > > that will add the enclave page and return execution to the enclave to
> > > repeat the ENCLU[EACCEPT] instruction, this time successful.
> > > 
> > > If the enclave accesses an uninitialized address in another way, for
> > > example by expanding the enclave stack to a page that has not yet been
> > > added, then the page fault handler would add the page on the first
> > > write but upon returning to the enclave the instruction that triggered
> > > the page fault would be repeated and since ENCLU[EACCEPT] was not run
> > > yet it would trigger a second page fault, this time with the SGX flag
> > > set in the page fault error code. This can only be recovered by entering
> > > the enclave again and directly running the ENCLU[EACCEPT] instruction on
> > > the now initialized address.
> > > 
> > > Accessing an uninitialized address from outside the enclave also triggers
> > > this flow but the page will remain in PENDING state until accepted from
> > > within the enclave.
> > 
> > What does it mean being in PENDING state, and more imporantly, what is
> > PENDING state? What does a memory access within enclave cause when it
> > touch a page within this state?
> 
> The PENDING state is the enclave page state from the SGX hardware's 
> perspective. The OS uses the ENCLS[EAUG] SGX2 function to add a new page 
> to the enclave but from the SGX hardware's perspective it would be in a 
> PENDING state until the enclave accepts the page. An access to the page 
> in PENDING state would result in a page fault.
> 
> 
> > I see a lot of text in the commit message but zero mentions about EPCM
> > expect this one sudden mention about PENDING field without attaching
> > it to anything concrete.
> 
> My apologies - I will add this to this changelog. This matches your 
> request to describe the __eaug() wrapper introduced in patch 02/25. 
> Would you like me to duplicate this information here and in that patch 
> (a new patch dedicated to the __eaug() wrapper) or would you be ok if I 
> introduce the wrappers all together briefly as in the example you 
> provide and then detail the flows where the wrappers are used - like 
> this patch?

I think it would be a good place to describe these details in 02/25,
and skip them in rest of the patches.

/Jarkko
Reinette Chatre Dec. 13, 2021, 10:12 p.m. UTC | #6
Hi Jarkko,

On 12/11/2021 12:00 AM, Jarkko Sakkinen wrote:
> On Mon, 2021-12-06 at 13:44 -0800, Reinette Chatre wrote:
>> On 12/4/2021 3:13 PM, Jarkko Sakkinen wrote:
>>> On Wed, Dec 01, 2021 at 11:23:11AM -0800, Reinette Chatre wrote:

...

>>>> Accessing an uninitialized address from outside the enclave also triggers
>>>> this flow but the page will remain in PENDING state until accepted from
>>>> within the enclave.
>>>
>>> What does it mean being in PENDING state, and more imporantly, what is
>>> PENDING state? What does a memory access within enclave cause when it
>>> touch a page within this state?
>>
>> The PENDING state is the enclave page state from the SGX hardware's
>> perspective. The OS uses the ENCLS[EAUG] SGX2 function to add a new page
>> to the enclave but from the SGX hardware's perspective it would be in a
>> PENDING state until the enclave accepts the page. An access to the page
>> in PENDING state would result in a page fault.
>>
>>
>>> I see a lot of text in the commit message but zero mentions about EPCM
>>> expect this one sudden mention about PENDING field without attaching
>>> it to anything concrete.
>>
>> My apologies - I will add this to this changelog. This matches your
>> request to describe the __eaug() wrapper introduced in patch 02/25.
>> Would you like me to duplicate this information here and in that patch
>> (a new patch dedicated to the __eaug() wrapper) or would you be ok if I
>> introduce the wrappers all together briefly as in the example you
>> provide and then detail the flows where the wrappers are used - like
>> this patch?
> 
> I think it would be a good place to describe these details in 02/25,
> and skip them in rest of the patches.
> 

Will do. I do think describing this amount of detail for the new SGX2 
functions would be too much for a single patch so I currently plan to 
split that (02/25) patch into a new patch per SGX2 instruction. Is that 
ok with you or would you like to keep it in a single patch?

Reinette
Jarkko Sakkinen Dec. 28, 2021, 2:57 p.m. UTC | #7
On Mon, Dec 13, 2021 at 02:12:57PM -0800, Reinette Chatre wrote:
> Hi Jarkko,
> 
> On 12/11/2021 12:00 AM, Jarkko Sakkinen wrote:
> > On Mon, 2021-12-06 at 13:44 -0800, Reinette Chatre wrote:
> > > On 12/4/2021 3:13 PM, Jarkko Sakkinen wrote:
> > > > On Wed, Dec 01, 2021 at 11:23:11AM -0800, Reinette Chatre wrote:
> 
> ...
> 
> > > > > Accessing an uninitialized address from outside the enclave also triggers
> > > > > this flow but the page will remain in PENDING state until accepted from
> > > > > within the enclave.
> > > > 
> > > > What does it mean being in PENDING state, and more imporantly, what is
> > > > PENDING state? What does a memory access within enclave cause when it
> > > > touch a page within this state?
> > > 
> > > The PENDING state is the enclave page state from the SGX hardware's
> > > perspective. The OS uses the ENCLS[EAUG] SGX2 function to add a new page
> > > to the enclave but from the SGX hardware's perspective it would be in a
> > > PENDING state until the enclave accepts the page. An access to the page
> > > in PENDING state would result in a page fault.
> > > 
> > > 
> > > > I see a lot of text in the commit message but zero mentions about EPCM
> > > > expect this one sudden mention about PENDING field without attaching
> > > > it to anything concrete.
> > > 
> > > My apologies - I will add this to this changelog. This matches your
> > > request to describe the __eaug() wrapper introduced in patch 02/25.
> > > Would you like me to duplicate this information here and in that patch
> > > (a new patch dedicated to the __eaug() wrapper) or would you be ok if I
> > > introduce the wrappers all together briefly as in the example you
> > > provide and then detail the flows where the wrappers are used - like
> > > this patch?
> > 
> > I think it would be a good place to describe these details in 02/25,
> > and skip them in rest of the patches.
> > 
> 
> Will do. I do think describing this amount of detail for the new SGX2
> functions would be too much for a single patch so I currently plan to split
> that (02/25) patch into a new patch per SGX2 instruction. Is that ok with
> you or would you like to keep it in a single patch?

It's ok for me.

/Jarkko
Jarkko Sakkinen March 1, 2022, 3:13 p.m. UTC | #8
On Wed, Dec 01, 2021 at 11:23:11AM -0800, Reinette Chatre wrote:
> diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
> index 848a28d28d3d..1b6ce1da7c92 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.h
> +++ b/arch/x86/kernel/cpu/sgx/encl.h
> @@ -123,4 +123,6 @@ void sgx_encl_free_epc_page(struct sgx_epc_page *page);
>  struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
>  					 unsigned long addr);
>  
> +struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl);
> +void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page);
>  #endif /* _X86_ENCL_H */
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index 5dddb3c9f742..de0bf68ee842 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -17,7 +17,7 @@
>  #include "encl.h"
>  #include "encls.h"
>  
> -static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
> +struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
>  {
>  	struct sgx_va_page *va_page = NULL;
>  	void *err;
> @@ -43,7 +43,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
>  	return va_page;
>  }
>  
> -static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
> +void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
>  {
>  	encl->page_cnt--;

Nit: this should be a separate patch, e.g.

  x86/sgx: Export sgx_encl_{grow,shrink}()

  In order to use sgx_encl_{grow,shrink}() in the page augementation code
  located in encl.c, export these functions.

BR, Jarkko
Reinette Chatre March 1, 2022, 5:08 p.m. UTC | #9
Hi Jarkko,

On 3/1/2022 7:13 AM, Jarkko Sakkinen wrote:
> On Wed, Dec 01, 2021 at 11:23:11AM -0800, Reinette Chatre wrote:
>> diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
>> index 848a28d28d3d..1b6ce1da7c92 100644
>> --- a/arch/x86/kernel/cpu/sgx/encl.h
>> +++ b/arch/x86/kernel/cpu/sgx/encl.h
>> @@ -123,4 +123,6 @@ void sgx_encl_free_epc_page(struct sgx_epc_page *page);
>>  struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
>>  					 unsigned long addr);
>>  
>> +struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl);
>> +void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page);
>>  #endif /* _X86_ENCL_H */
>> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
>> index 5dddb3c9f742..de0bf68ee842 100644
>> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
>> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
>> @@ -17,7 +17,7 @@
>>  #include "encl.h"
>>  #include "encls.h"
>>  
>> -static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
>> +struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
>>  {
>>  	struct sgx_va_page *va_page = NULL;
>>  	void *err;
>> @@ -43,7 +43,7 @@ static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
>>  	return va_page;
>>  }
>>  
>> -static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
>> +void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
>>  {
>>  	encl->page_cnt--;
> 
> Nit: this should be a separate patch, e.g.
> 
>   x86/sgx: Export sgx_encl_{grow,shrink}()
> 
>   In order to use sgx_encl_{grow,shrink}() in the page augementation code
>   located in encl.c, export these functions.
> 

Sure, will do.

Reinette
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 03c4d7e00b44..342b97dd4c33 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -124,6 +124,128 @@  struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
 	return entry;
 }
 
+/**
+ * sgx_encl_eaug_page - Dynamically add page to initialized enclave
+ * @vma:	VMA obtained from fault info from where page is accessed
+ * @encl:	enclave accessing the page
+ * @addr:	address that triggered the page fault
+ *
+ * When an initialized enclave accesses a page with no backing EPC page
+ * on a SGX2 system then the EPC can be added dynamically via the SGX2
+ * ENCLS[EAUG] instruction.
+ *
+ * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
+ * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
+ */
+static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
+				     struct sgx_encl *encl, unsigned long addr)
+{
+	struct sgx_pageinfo pginfo = {0};
+	struct sgx_encl_page *encl_page;
+	struct sgx_epc_page *epc_page;
+	struct sgx_va_page *va_page;
+	unsigned long phys_addr;
+	unsigned long prot;
+	vm_fault_t vmret;
+	int ret;
+
+	if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
+		return VM_FAULT_SIGBUS;
+
+	encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
+	if (!encl_page)
+		return VM_FAULT_OOM;
+
+	encl_page->desc = addr;
+	encl_page->encl = encl;
+
+	/*
+	 * Adding a regular page that is architecturally allowed to only
+	 * be created with RW permissions.
+	 * TBD: Interface with user space policy to support max permissions
+	 * of RWX.
+	 */
+	prot = PROT_READ | PROT_WRITE;
+	encl_page->vm_run_prot_bits = calc_vm_prot_bits(prot, 0);
+	encl_page->vm_max_prot_bits = encl_page->vm_run_prot_bits;
+
+	epc_page = sgx_alloc_epc_page(encl_page, true);
+	if (IS_ERR(epc_page)) {
+		kfree(encl_page);
+		return VM_FAULT_SIGBUS;
+	}
+
+	va_page = sgx_encl_grow(encl);
+	if (IS_ERR(va_page)) {
+		ret = PTR_ERR(va_page);
+		goto err_out_free;
+	}
+
+	mutex_lock(&encl->lock);
+
+	/*
+	 * Copy comment from sgx_encl_add_page() to maintain guidance in
+	 * this similar flow:
+	 * Adding to encl->va_pages must be done under encl->lock.  Ditto for
+	 * deleting (via sgx_encl_shrink()) in the error path.
+	 */
+	if (va_page)
+		list_add(&va_page->list, &encl->va_pages);
+
+	ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
+			encl_page, GFP_KERNEL);
+	/*
+	 * If ret == -EBUSY then page was created in another flow while
+	 * running without encl->lock
+	 */
+	if (ret)
+		goto err_out_unlock;
+
+	pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
+	pginfo.addr = encl_page->desc & PAGE_MASK;
+	pginfo.metadata = 0;
+
+	ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
+	if (ret)
+		goto err_out;
+
+	encl_page->encl = encl;
+	encl_page->epc_page = epc_page;
+	encl_page->type = SGX_PAGE_TYPE_REG;
+	encl->secs_child_cnt++;
+
+	sgx_mark_page_reclaimable(encl_page->epc_page);
+
+	phys_addr = sgx_get_epc_phys_addr(epc_page);
+	/*
+	 * Do not undo everything when creating PTE entry fails - next #PF
+	 * would find page ready for a PTE.
+	 * PAGE_SHARED because protection is forced to be RW above and COW
+	 * is not supported.
+	 */
+	vmret = vmf_insert_pfn_prot(vma, addr, PFN_DOWN(phys_addr),
+				    PAGE_SHARED);
+	if (vmret != VM_FAULT_NOPAGE) {
+		mutex_unlock(&encl->lock);
+		return VM_FAULT_SIGBUS;
+	}
+	mutex_unlock(&encl->lock);
+	return VM_FAULT_NOPAGE;
+
+err_out:
+	xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
+
+err_out_unlock:
+	sgx_encl_shrink(encl, va_page);
+	mutex_unlock(&encl->lock);
+
+err_out_free:
+	sgx_encl_free_epc_page(epc_page);
+	kfree(encl_page);
+
+	return VM_FAULT_SIGBUS;
+}
+
 static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
 {
 	unsigned long addr = (unsigned long)vmf->address;
@@ -145,6 +267,17 @@  static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
 	if (unlikely(!encl))
 		return VM_FAULT_SIGBUS;
 
+	/*
+	 * The page_array keeps track of all enclave pages, whether they
+	 * are swapped out or not. If there is no entry for this page and
+	 * the system supports SGX2 then it is possible to dynamically add
+	 * a new enclave page. This is only possible for an initialized
+	 * enclave that will be checked for right away.
+	 */
+	if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
+	    (!xa_load(&encl->page_array, PFN_DOWN(addr))))
+		return sgx_encl_eaug_page(vma, encl, addr);
+
 	mutex_lock(&encl->lock);
 
 	entry = sgx_encl_load_page(encl, addr);
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
index 848a28d28d3d..1b6ce1da7c92 100644
--- a/arch/x86/kernel/cpu/sgx/encl.h
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -123,4 +123,6 @@  void sgx_encl_free_epc_page(struct sgx_epc_page *page);
 struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
 					 unsigned long addr);
 
+struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl);
+void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page);
 #endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
index 5dddb3c9f742..de0bf68ee842 100644
--- a/arch/x86/kernel/cpu/sgx/ioctl.c
+++ b/arch/x86/kernel/cpu/sgx/ioctl.c
@@ -17,7 +17,7 @@ 
 #include "encl.h"
 #include "encls.h"
 
-static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
+struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
 {
 	struct sgx_va_page *va_page = NULL;
 	void *err;
@@ -43,7 +43,7 @@  static struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl)
 	return va_page;
 }
 
-static void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
+void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
 {
 	encl->page_cnt--;