diff mbox series

[v35,21/24] x86/vdso: Implement a vDSO for Intel SGX enclave call

Message ID 20200707033747.142828-22-jarkko.sakkinen@linux.intel.com (mailing list archive)
State Under Review
Headers show
Series Intel SGX foundations | expand

Commit Message

Jarkko Sakkinen July 7, 2020, 3:37 a.m. UTC
From: Sean Christopherson <sean.j.christopherson@intel.com>

An SGX runtime must be aware of the exceptions, which happen inside an
enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
the CPU exception back to the caller exactly when it happens.

Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
vDSO handler fills this information to the user provided buffer or
alternatively trigger user provided callback at the time of the exception.

The calling convention is custom and does not follow System V x86-64 ABI.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Jethro Beekman <jethro@fortanix.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Co-developed-by: Cedric Xing <cedric.xing@intel.com>
Signed-off-by: Cedric Xing <cedric.xing@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/x86/entry/vdso/Makefile             |   2 +
 arch/x86/entry/vdso/vdso.lds.S           |   1 +
 arch/x86/entry/vdso/vsgx_enter_enclave.S | 131 +++++++++++++++++++++++
 arch/x86/include/asm/enclu.h             |   8 ++
 arch/x86/include/uapi/asm/sgx.h          |  98 +++++++++++++++++
 5 files changed, 240 insertions(+)
 create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
 create mode 100644 arch/x86/include/asm/enclu.h

Comments

Jethro Beekman July 14, 2020, 7:30 a.m. UTC | #1
On 2020-07-07 05:37, Jarkko Sakkinen wrote:
> From: Sean Christopherson <sean.j.christopherson@intel.com>
> 
> An SGX runtime must be aware of the exceptions, which happen inside an
> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
> the CPU exception back to the caller exactly when it happens.
> 
> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
> vDSO handler fills this information to the user provided buffer or
> alternatively trigger user provided callback at the time of the exception.
> 
> The calling convention is custom and does not follow System V x86-64 ABI.
> 
> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> Acked-by: Jethro Beekman <jethro@fortanix.com>
> Tested-by: Jethro Beekman <jethro@fortanix.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  arch/x86/entry/vdso/Makefile             |   2 +
>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 131 +++++++++++++++++++++++
>  arch/x86/include/asm/enclu.h             |   8 ++
>  arch/x86/include/uapi/asm/sgx.h          |  98 +++++++++++++++++
>  5 files changed, 240 insertions(+)
>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
>  create mode 100644 arch/x86/include/asm/enclu.h
> 
> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> index ebe82b7aecda..f71ad5ebd0c4 100644
> --- a/arch/x86/entry/vdso/Makefile
> +++ b/arch/x86/entry/vdso/Makefile
> @@ -29,6 +29,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)	:= y
>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
>  vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
>  vobjs32-y += vdso32/vclock_gettime.o
> +vobjs-$(VDSO64-y)		+= vsgx_enter_enclave.o
>  
>  # files to link into kernel
>  obj-y				+= vma.o extable.o
> @@ -100,6 +101,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
>  CFLAGS_REMOVE_vclock_gettime.o = -pg
>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
>  CFLAGS_REMOVE_vgetcpu.o = -pg
> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
>  
>  #
>  # X32 processes use x32 vDSO to access 64bit kernel data.
> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
> index 36b644e16272..4bf48462fca7 100644
> --- a/arch/x86/entry/vdso/vdso.lds.S
> +++ b/arch/x86/entry/vdso/vdso.lds.S
> @@ -27,6 +27,7 @@ VERSION {
>  		__vdso_time;
>  		clock_getres;
>  		__vdso_clock_getres;
> +		__vdso_sgx_enter_enclave;
>  	local: *;
>  	};
>  }
> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> new file mode 100644
> index 000000000000..be7e467e1efb
> --- /dev/null
> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> @@ -0,0 +1,131 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/linkage.h>
> +#include <asm/export.h>
> +#include <asm/errno.h>
> +#include <asm/enclu.h>
> +
> +#include "extable.h"
> +
> +#define EX_LEAF		0*8
> +#define EX_TRAPNR	0*8+4
> +#define EX_ERROR_CODE	0*8+6
> +#define EX_ADDRESS	1*8
> +
> +.code64
> +.section .text, "ax"
> +
> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> +	/* Prolog */
> +	.cfi_startproc
> +	push	%rbp
> +	.cfi_adjust_cfa_offset	8
> +	.cfi_rel_offset		%rbp, 0
> +	mov	%rsp, %rbp
> +	.cfi_def_cfa_register	%rbp
> +	push	%rbx
> +	.cfi_rel_offset		%rbx, -8
> +
> +	mov	%ecx, %eax
> +.Lenter_enclave:
> +	/* EENTER <= leaf <= ERESUME */
> +	cmp	$EENTER, %eax
> +	jb	.Linvalid_leaf
> +	cmp	$ERESUME, %eax
> +	ja	.Linvalid_leaf
> +
> +	/* Load TCS and AEP */
> +	mov	0x10(%rbp), %rbx
> +	lea	.Lasync_exit_pointer(%rip), %rcx
> +
> +	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
> +.Lasync_exit_pointer:
> +.Lenclu_eenter_eresume:
> +	enclu

After thinking about this some more, I'd like to come back to this setup. Prior discussion at https://lkml.org/lkml/2018/11/2/597 . I hope I'm not derailing the discussion so much as to delay the patch set :(

I previously mentioned “Userspace may want fine-grained control over enclave scheduling” as a reason userspace may want to specify a different AEP, but gave a bad example. Here's a better example: If I'm running my enclave in an M:N threading model (where M user threads run N TCSs, with N > M), an AEX is a good oppurtunity to switch contexts. Yes, I could implement this with alarm() or so, but that adds overhead while missing out on a lot of opportunities for context switching.

--
Jethro Beekman | Fortanix
Jarkko Sakkinen July 14, 2020, 9:56 a.m. UTC | #2
On Tue, Jul 14, 2020 at 09:30:03AM +0200, Jethro Beekman wrote:
> On 2020-07-07 05:37, Jarkko Sakkinen wrote:
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> > 
> > An SGX runtime must be aware of the exceptions, which happen inside an
> > enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
> > the CPU exception back to the caller exactly when it happens.
> > 
> > Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
> > vDSO handler fills this information to the user provided buffer or
> > alternatively trigger user provided callback at the time of the exception.
> > 
> > The calling convention is custom and does not follow System V x86-64 ABI.
> > 
> > Suggested-by: Andy Lutomirski <luto@amacapital.net>
> > Acked-by: Jethro Beekman <jethro@fortanix.com>
> > Tested-by: Jethro Beekman <jethro@fortanix.com>
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > Co-developed-by: Cedric Xing <cedric.xing@intel.com>
> > Signed-off-by: Cedric Xing <cedric.xing@intel.com>
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > ---
> >  arch/x86/entry/vdso/Makefile             |   2 +
> >  arch/x86/entry/vdso/vdso.lds.S           |   1 +
> >  arch/x86/entry/vdso/vsgx_enter_enclave.S | 131 +++++++++++++++++++++++
> >  arch/x86/include/asm/enclu.h             |   8 ++
> >  arch/x86/include/uapi/asm/sgx.h          |  98 +++++++++++++++++
> >  5 files changed, 240 insertions(+)
> >  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
> >  create mode 100644 arch/x86/include/asm/enclu.h
> > 
> > diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> > index ebe82b7aecda..f71ad5ebd0c4 100644
> > --- a/arch/x86/entry/vdso/Makefile
> > +++ b/arch/x86/entry/vdso/Makefile
> > @@ -29,6 +29,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)	:= y
> >  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
> >  vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
> >  vobjs32-y += vdso32/vclock_gettime.o
> > +vobjs-$(VDSO64-y)		+= vsgx_enter_enclave.o
> >  
> >  # files to link into kernel
> >  obj-y				+= vma.o extable.o
> > @@ -100,6 +101,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
> >  CFLAGS_REMOVE_vclock_gettime.o = -pg
> >  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
> >  CFLAGS_REMOVE_vgetcpu.o = -pg
> > +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
> >  
> >  #
> >  # X32 processes use x32 vDSO to access 64bit kernel data.
> > diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
> > index 36b644e16272..4bf48462fca7 100644
> > --- a/arch/x86/entry/vdso/vdso.lds.S
> > +++ b/arch/x86/entry/vdso/vdso.lds.S
> > @@ -27,6 +27,7 @@ VERSION {
> >  		__vdso_time;
> >  		clock_getres;
> >  		__vdso_clock_getres;
> > +		__vdso_sgx_enter_enclave;
> >  	local: *;
> >  	};
> >  }
> > diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > new file mode 100644
> > index 000000000000..be7e467e1efb
> > --- /dev/null
> > +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> > @@ -0,0 +1,131 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +
> > +#include <linux/linkage.h>
> > +#include <asm/export.h>
> > +#include <asm/errno.h>
> > +#include <asm/enclu.h>
> > +
> > +#include "extable.h"
> > +
> > +#define EX_LEAF		0*8
> > +#define EX_TRAPNR	0*8+4
> > +#define EX_ERROR_CODE	0*8+6
> > +#define EX_ADDRESS	1*8
> > +
> > +.code64
> > +.section .text, "ax"
> > +
> > +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> > +	/* Prolog */
> > +	.cfi_startproc
> > +	push	%rbp
> > +	.cfi_adjust_cfa_offset	8
> > +	.cfi_rel_offset		%rbp, 0
> > +	mov	%rsp, %rbp
> > +	.cfi_def_cfa_register	%rbp
> > +	push	%rbx
> > +	.cfi_rel_offset		%rbx, -8
> > +
> > +	mov	%ecx, %eax
> > +.Lenter_enclave:
> > +	/* EENTER <= leaf <= ERESUME */
> > +	cmp	$EENTER, %eax
> > +	jb	.Linvalid_leaf
> > +	cmp	$ERESUME, %eax
> > +	ja	.Linvalid_leaf
> > +
> > +	/* Load TCS and AEP */
> > +	mov	0x10(%rbp), %rbx
> > +	lea	.Lasync_exit_pointer(%rip), %rcx
> > +
> > +	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
> > +.Lasync_exit_pointer:
> > +.Lenclu_eenter_eresume:
> > +	enclu
> 
> After thinking about this some more, I'd like to come back to this
> setup. Prior discussion at https://lkml.org/lkml/2018/11/2/597 . I
> hope I'm not derailing the discussion so much as to delay the patch
> set :(
> 
> I previously mentioned “Userspace may want fine-grained control over
> enclave scheduling” as a reason userspace may want to specify a
> different AEP, but gave a bad example. Here's a better example: If I'm
> running my enclave in an M:N threading model (where M user threads run
> N TCSs, with N > M), an AEX is a good oppurtunity to switch contexts.
> Yes, I could implement this with alarm() or so, but that adds overhead
> while missing out on a lot of opportunities for context switching.

The vDSO interface also provides optional callback. Wonder if that
works for this or can it be refined to work for this?

/Jarkko
Jethro Beekman July 14, 2020, 10:07 a.m. UTC | #3
On 2020-07-14 11:56, Jarkko Sakkinen wrote:
> On Tue, Jul 14, 2020 at 09:30:03AM +0200, Jethro Beekman wrote:
>> On 2020-07-07 05:37, Jarkko Sakkinen wrote:
>>> From: Sean Christopherson <sean.j.christopherson@intel.com>
>>>
>>> An SGX runtime must be aware of the exceptions, which happen inside an
>>> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
>>> the CPU exception back to the caller exactly when it happens.
>>>
>>> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
>>> vDSO handler fills this information to the user provided buffer or
>>> alternatively trigger user provided callback at the time of the exception.
>>>
>>> The calling convention is custom and does not follow System V x86-64 ABI.
>>>
>>> Suggested-by: Andy Lutomirski <luto@amacapital.net>
>>> Acked-by: Jethro Beekman <jethro@fortanix.com>
>>> Tested-by: Jethro Beekman <jethro@fortanix.com>
>>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
>>> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
>>> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
>>> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
>>> ---
>>>  arch/x86/entry/vdso/Makefile             |   2 +
>>>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
>>>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 131 +++++++++++++++++++++++
>>>  arch/x86/include/asm/enclu.h             |   8 ++
>>>  arch/x86/include/uapi/asm/sgx.h          |  98 +++++++++++++++++
>>>  5 files changed, 240 insertions(+)
>>>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
>>>  create mode 100644 arch/x86/include/asm/enclu.h
>>>
>>> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
>>> index ebe82b7aecda..f71ad5ebd0c4 100644
>>> --- a/arch/x86/entry/vdso/Makefile
>>> +++ b/arch/x86/entry/vdso/Makefile
>>> @@ -29,6 +29,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)	:= y
>>>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
>>>  vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
>>>  vobjs32-y += vdso32/vclock_gettime.o
>>> +vobjs-$(VDSO64-y)		+= vsgx_enter_enclave.o
>>>  
>>>  # files to link into kernel
>>>  obj-y				+= vma.o extable.o
>>> @@ -100,6 +101,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
>>>  CFLAGS_REMOVE_vclock_gettime.o = -pg
>>>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
>>>  CFLAGS_REMOVE_vgetcpu.o = -pg
>>> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
>>>  
>>>  #
>>>  # X32 processes use x32 vDSO to access 64bit kernel data.
>>> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
>>> index 36b644e16272..4bf48462fca7 100644
>>> --- a/arch/x86/entry/vdso/vdso.lds.S
>>> +++ b/arch/x86/entry/vdso/vdso.lds.S
>>> @@ -27,6 +27,7 @@ VERSION {
>>>  		__vdso_time;
>>>  		clock_getres;
>>>  		__vdso_clock_getres;
>>> +		__vdso_sgx_enter_enclave;
>>>  	local: *;
>>>  	};
>>>  }
>>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>> new file mode 100644
>>> index 000000000000..be7e467e1efb
>>> --- /dev/null
>>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
>>> @@ -0,0 +1,131 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +
>>> +#include <linux/linkage.h>
>>> +#include <asm/export.h>
>>> +#include <asm/errno.h>
>>> +#include <asm/enclu.h>
>>> +
>>> +#include "extable.h"
>>> +
>>> +#define EX_LEAF		0*8
>>> +#define EX_TRAPNR	0*8+4
>>> +#define EX_ERROR_CODE	0*8+6
>>> +#define EX_ADDRESS	1*8
>>> +
>>> +.code64
>>> +.section .text, "ax"
>>> +
>>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
>>> +	/* Prolog */
>>> +	.cfi_startproc
>>> +	push	%rbp
>>> +	.cfi_adjust_cfa_offset	8
>>> +	.cfi_rel_offset		%rbp, 0
>>> +	mov	%rsp, %rbp
>>> +	.cfi_def_cfa_register	%rbp
>>> +	push	%rbx
>>> +	.cfi_rel_offset		%rbx, -8
>>> +
>>> +	mov	%ecx, %eax
>>> +.Lenter_enclave:
>>> +	/* EENTER <= leaf <= ERESUME */
>>> +	cmp	$EENTER, %eax
>>> +	jb	.Linvalid_leaf
>>> +	cmp	$ERESUME, %eax
>>> +	ja	.Linvalid_leaf
>>> +
>>> +	/* Load TCS and AEP */
>>> +	mov	0x10(%rbp), %rbx
>>> +	lea	.Lasync_exit_pointer(%rip), %rcx
>>> +
>>> +	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
>>> +.Lasync_exit_pointer:
>>> +.Lenclu_eenter_eresume:
>>> +	enclu
>>
>> After thinking about this some more, I'd like to come back to this
>> setup. Prior discussion at https://lkml.org/lkml/2018/11/2/597 . I
>> hope I'm not derailing the discussion so much as to delay the patch
>> set :(
>>
>> I previously mentioned “Userspace may want fine-grained control over
>> enclave scheduling” as a reason userspace may want to specify a
>> different AEP, but gave a bad example. Here's a better example: If I'm
>> running my enclave in an M:N threading model (where M user threads run
>> N TCSs, with N > M), an AEX is a good oppurtunity to switch contexts.
>> Yes, I could implement this with alarm() or so, but that adds overhead
>> while missing out on a lot of opportunities for context switching.
> 
> The vDSO interface also provides optional callback. Wonder if that
> works for this or can it be refined to work for this?

Yeah I think if the callback was called instead of ENCLU, the callback has the opportunity to return non-positive which will trigger a return from __vdso_sgx_enter_enclave. Moving .Lasync_exit_pointer to .Lhandle_exit might be sufficient. But I imagine not all users would want this behavior (although calling the few userspace instructions is likely negligible compared to the actual ERESUME).

--
Jethro Beekman | Fortanix
Jarkko Sakkinen July 14, 2020, 11:38 a.m. UTC | #4
On Tue, Jul 14, 2020 at 12:07:54PM +0200, Jethro Beekman wrote:
> On 2020-07-14 11:56, Jarkko Sakkinen wrote:
> > On Tue, Jul 14, 2020 at 09:30:03AM +0200, Jethro Beekman wrote:
> >> On 2020-07-07 05:37, Jarkko Sakkinen wrote:
> >>> From: Sean Christopherson <sean.j.christopherson@intel.com>
> >>>
> >>> An SGX runtime must be aware of the exceptions, which happen inside an
> >>> enclave. Introduce a vDSO call that wraps EENTER/ERESUME cycle and returns
> >>> the CPU exception back to the caller exactly when it happens.
> >>>
> >>> Kernel fixups the exception information to RDI, RSI and RDX. The SGX call
> >>> vDSO handler fills this information to the user provided buffer or
> >>> alternatively trigger user provided callback at the time of the exception.
> >>>
> >>> The calling convention is custom and does not follow System V x86-64 ABI.
> >>>
> >>> Suggested-by: Andy Lutomirski <luto@amacapital.net>
> >>> Acked-by: Jethro Beekman <jethro@fortanix.com>
> >>> Tested-by: Jethro Beekman <jethro@fortanix.com>
> >>> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> >>> Co-developed-by: Cedric Xing <cedric.xing@intel.com>
> >>> Signed-off-by: Cedric Xing <cedric.xing@intel.com>
> >>> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> >>> ---
> >>>  arch/x86/entry/vdso/Makefile             |   2 +
> >>>  arch/x86/entry/vdso/vdso.lds.S           |   1 +
> >>>  arch/x86/entry/vdso/vsgx_enter_enclave.S | 131 +++++++++++++++++++++++
> >>>  arch/x86/include/asm/enclu.h             |   8 ++
> >>>  arch/x86/include/uapi/asm/sgx.h          |  98 +++++++++++++++++
> >>>  5 files changed, 240 insertions(+)
> >>>  create mode 100644 arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>>  create mode 100644 arch/x86/include/asm/enclu.h
> >>>
> >>> diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
> >>> index ebe82b7aecda..f71ad5ebd0c4 100644
> >>> --- a/arch/x86/entry/vdso/Makefile
> >>> +++ b/arch/x86/entry/vdso/Makefile
> >>> @@ -29,6 +29,7 @@ VDSO32-$(CONFIG_IA32_EMULATION)	:= y
> >>>  vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
> >>>  vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
> >>>  vobjs32-y += vdso32/vclock_gettime.o
> >>> +vobjs-$(VDSO64-y)		+= vsgx_enter_enclave.o
> >>>  
> >>>  # files to link into kernel
> >>>  obj-y				+= vma.o extable.o
> >>> @@ -100,6 +101,7 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
> >>>  CFLAGS_REMOVE_vclock_gettime.o = -pg
> >>>  CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
> >>>  CFLAGS_REMOVE_vgetcpu.o = -pg
> >>> +CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
> >>>  
> >>>  #
> >>>  # X32 processes use x32 vDSO to access 64bit kernel data.
> >>> diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
> >>> index 36b644e16272..4bf48462fca7 100644
> >>> --- a/arch/x86/entry/vdso/vdso.lds.S
> >>> +++ b/arch/x86/entry/vdso/vdso.lds.S
> >>> @@ -27,6 +27,7 @@ VERSION {
> >>>  		__vdso_time;
> >>>  		clock_getres;
> >>>  		__vdso_clock_getres;
> >>> +		__vdso_sgx_enter_enclave;
> >>>  	local: *;
> >>>  	};
> >>>  }
> >>> diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>> new file mode 100644
> >>> index 000000000000..be7e467e1efb
> >>> --- /dev/null
> >>> +++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
> >>> @@ -0,0 +1,131 @@
> >>> +/* SPDX-License-Identifier: GPL-2.0 */
> >>> +
> >>> +#include <linux/linkage.h>
> >>> +#include <asm/export.h>
> >>> +#include <asm/errno.h>
> >>> +#include <asm/enclu.h>
> >>> +
> >>> +#include "extable.h"
> >>> +
> >>> +#define EX_LEAF		0*8
> >>> +#define EX_TRAPNR	0*8+4
> >>> +#define EX_ERROR_CODE	0*8+6
> >>> +#define EX_ADDRESS	1*8
> >>> +
> >>> +.code64
> >>> +.section .text, "ax"
> >>> +
> >>> +SYM_FUNC_START(__vdso_sgx_enter_enclave)
> >>> +	/* Prolog */
> >>> +	.cfi_startproc
> >>> +	push	%rbp
> >>> +	.cfi_adjust_cfa_offset	8
> >>> +	.cfi_rel_offset		%rbp, 0
> >>> +	mov	%rsp, %rbp
> >>> +	.cfi_def_cfa_register	%rbp
> >>> +	push	%rbx
> >>> +	.cfi_rel_offset		%rbx, -8
> >>> +
> >>> +	mov	%ecx, %eax
> >>> +.Lenter_enclave:
> >>> +	/* EENTER <= leaf <= ERESUME */
> >>> +	cmp	$EENTER, %eax
> >>> +	jb	.Linvalid_leaf
> >>> +	cmp	$ERESUME, %eax
> >>> +	ja	.Linvalid_leaf
> >>> +
> >>> +	/* Load TCS and AEP */
> >>> +	mov	0x10(%rbp), %rbx
> >>> +	lea	.Lasync_exit_pointer(%rip), %rcx
> >>> +
> >>> +	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
> >>> +.Lasync_exit_pointer:
> >>> +.Lenclu_eenter_eresume:
> >>> +	enclu
> >>
> >> After thinking about this some more, I'd like to come back to this
> >> setup. Prior discussion at https://lkml.org/lkml/2018/11/2/597 . I
> >> hope I'm not derailing the discussion so much as to delay the patch
> >> set :(
> >>
> >> I previously mentioned “Userspace may want fine-grained control over
> >> enclave scheduling” as a reason userspace may want to specify a
> >> different AEP, but gave a bad example. Here's a better example: If I'm
> >> running my enclave in an M:N threading model (where M user threads run
> >> N TCSs, with N > M), an AEX is a good oppurtunity to switch contexts.
> >> Yes, I could implement this with alarm() or so, but that adds overhead
> >> while missing out on a lot of opportunities for context switching.
> > 
> > The vDSO interface also provides optional callback. Wonder if that
> > works for this or can it be refined to work for this?
> 
> Yeah I think if the callback was called instead of ENCLU, the callback
> has the opportunity to return non-positive which will trigger a return
> from __vdso_sgx_enter_enclave. Moving .Lasync_exit_pointer to
> .Lhandle_exit might be sufficient. But I imagine not all users would
> want this behavior (although calling the few userspace instructions is
> likely negligible compared to the actual ERESUME).

Have you tried the callback interface if it suits for your workload?

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index ebe82b7aecda..f71ad5ebd0c4 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -29,6 +29,7 @@  VDSO32-$(CONFIG_IA32_EMULATION)	:= y
 vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o
 vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o
 vobjs32-y += vdso32/vclock_gettime.o
+vobjs-$(VDSO64-y)		+= vsgx_enter_enclave.o
 
 # files to link into kernel
 obj-y				+= vma.o extable.o
@@ -100,6 +101,7 @@  $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
 CFLAGS_REMOVE_vclock_gettime.o = -pg
 CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
 CFLAGS_REMOVE_vgetcpu.o = -pg
+CFLAGS_REMOVE_vsgx_enter_enclave.o = -pg
 
 #
 # X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vdso.lds.S b/arch/x86/entry/vdso/vdso.lds.S
index 36b644e16272..4bf48462fca7 100644
--- a/arch/x86/entry/vdso/vdso.lds.S
+++ b/arch/x86/entry/vdso/vdso.lds.S
@@ -27,6 +27,7 @@  VERSION {
 		__vdso_time;
 		clock_getres;
 		__vdso_clock_getres;
+		__vdso_sgx_enter_enclave;
 	local: *;
 	};
 }
diff --git a/arch/x86/entry/vdso/vsgx_enter_enclave.S b/arch/x86/entry/vdso/vsgx_enter_enclave.S
new file mode 100644
index 000000000000..be7e467e1efb
--- /dev/null
+++ b/arch/x86/entry/vdso/vsgx_enter_enclave.S
@@ -0,0 +1,131 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/errno.h>
+#include <asm/enclu.h>
+
+#include "extable.h"
+
+#define EX_LEAF		0*8
+#define EX_TRAPNR	0*8+4
+#define EX_ERROR_CODE	0*8+6
+#define EX_ADDRESS	1*8
+
+.code64
+.section .text, "ax"
+
+SYM_FUNC_START(__vdso_sgx_enter_enclave)
+	/* Prolog */
+	.cfi_startproc
+	push	%rbp
+	.cfi_adjust_cfa_offset	8
+	.cfi_rel_offset		%rbp, 0
+	mov	%rsp, %rbp
+	.cfi_def_cfa_register	%rbp
+	push	%rbx
+	.cfi_rel_offset		%rbx, -8
+
+	mov	%ecx, %eax
+.Lenter_enclave:
+	/* EENTER <= leaf <= ERESUME */
+	cmp	$EENTER, %eax
+	jb	.Linvalid_leaf
+	cmp	$ERESUME, %eax
+	ja	.Linvalid_leaf
+
+	/* Load TCS and AEP */
+	mov	0x10(%rbp), %rbx
+	lea	.Lasync_exit_pointer(%rip), %rcx
+
+	/* Single ENCLU serving as both EENTER and AEP (ERESUME) */
+.Lasync_exit_pointer:
+.Lenclu_eenter_eresume:
+	enclu
+
+	/* EEXIT jumps here unless the enclave is doing something fancy. */
+	xor	%eax, %eax
+
+	/* Invoke userspace's exit handler if one was provided. */
+.Lhandle_exit:
+	cmp	$0, 0x20(%rbp)
+	jne	.Linvoke_userspace_handler
+
+.Lout:
+	pop	%rbx
+	leave
+	.cfi_def_cfa		%rsp, 8
+	ret
+
+	/* The out-of-line code runs with the pre-leave stack frame. */
+	.cfi_def_cfa		%rbp, 16
+
+.Linvalid_leaf:
+	mov	$(-EINVAL), %eax
+	jmp	.Lout
+
+.Lhandle_exception:
+	mov	0x18(%rbp), %rcx
+	test    %rcx, %rcx
+	je	.Lskip_exception_info
+
+	/* Fill optional exception info. */
+	mov	%eax, EX_LEAF(%rcx)
+	mov	%di,  EX_TRAPNR(%rcx)
+	mov	%si,  EX_ERROR_CODE(%rcx)
+	mov	%rdx, EX_ADDRESS(%rcx)
+.Lskip_exception_info:
+	mov	$(-EFAULT), %eax
+	jmp	.Lhandle_exit
+
+.Linvoke_userspace_handler:
+	/* Pass the untrusted RSP (at exit) to the callback via %rcx. */
+	mov	%rsp, %rcx
+
+	/* Save the untrusted RSP offset in %rbx (non-volatile register). */
+	mov	%rsp, %rbx
+	and	$0xf, %rbx
+
+	/*
+	 * Align stack per x86_64 ABI. Note, %rsp needs to be 16-byte aligned
+	 * _after_ pushing the parameters on the stack, hence the bonus push.
+	 */
+	and	$-0x10, %rsp
+	push	%rax
+
+	/* Push @e, the "return" value and @tcs as params to the callback. */
+	push	0x18(%rbp)
+	push	%rax
+	push	0x10(%rbp)
+
+	/* Clear RFLAGS.DF per x86_64 ABI */
+	cld
+
+	/* Load the callback pointer to %rax and invoke it via retpoline. */
+	mov	0x20(%rbp), %rax
+	call	.Lretpoline
+
+	/* Undo the post-exit %rsp adjustment. */
+	lea	0x20(%rsp, %rbx), %rsp
+
+	/*
+	 * If the return from callback is zero or negative, return immediately,
+	 * else re-execute ENCLU with the postive return value interpreted as
+	 * the requested ENCLU leaf.
+	 */
+	cmp	$0, %eax
+	jle	.Lout
+	jmp	.Lenter_enclave
+
+.Lretpoline:
+	call	2f
+1:	pause
+	lfence
+	jmp	1b
+2:	mov	%rax, (%rsp)
+	ret
+	.cfi_endproc
+
+_ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception)
+
+SYM_FUNC_END(__vdso_sgx_enter_enclave)
diff --git a/arch/x86/include/asm/enclu.h b/arch/x86/include/asm/enclu.h
new file mode 100644
index 000000000000..06157b3e9ede
--- /dev/null
+++ b/arch/x86/include/asm/enclu.h
@@ -0,0 +1,8 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ENCLU_H
+#define _ASM_X86_ENCLU_H
+
+#define EENTER	0x02
+#define ERESUME	0x03
+
+#endif /* _ASM_X86_ENCLU_H */
diff --git a/arch/x86/include/uapi/asm/sgx.h b/arch/x86/include/uapi/asm/sgx.h
index 57d0d30c79b3..3760e5d5dc0c 100644
--- a/arch/x86/include/uapi/asm/sgx.h
+++ b/arch/x86/include/uapi/asm/sgx.h
@@ -74,4 +74,102 @@  struct sgx_enclave_set_attribute {
 	__u64 attribute_fd;
 };
 
+/**
+ * struct sgx_enclave_exception - structure to report exceptions encountered in
+ *				  __vdso_sgx_enter_enclave()
+ *
+ * @leaf:	ENCLU leaf from \%eax at time of exception
+ * @trapnr:	exception trap number, a.k.a. fault vector
+ * @error_code:	exception error code
+ * @address:	exception address, e.g. CR2 on a #PF
+ * @reserved:	reserved for future use
+ */
+struct sgx_enclave_exception {
+	__u32 leaf;
+	__u16 trapnr;
+	__u16 error_code;
+	__u64 address;
+	__u64 reserved[2];
+};
+
+/**
+ * typedef sgx_enclave_exit_handler_t - Exit handler function accepted by
+ *					__vdso_sgx_enter_enclave()
+ *
+ * @rdi:	RDI at the time of enclave exit
+ * @rsi:	RSI at the time of enclave exit
+ * @rdx:	RDX at the time of enclave exit
+ * @ursp:	RSP at the time of enclave exit (untrusted stack)
+ * @r8:		R8 at the time of enclave exit
+ * @r9:		R9 at the time of enclave exit
+ * @tcs:	Thread Control Structure used to enter enclave
+ * @ret:	0 on success (EEXIT), -EFAULT on an exception
+ * @e:		Pointer to struct sgx_enclave_exception (as provided by caller)
+ */
+typedef int (*sgx_enclave_exit_handler_t)(long rdi, long rsi, long rdx,
+					  long ursp, long r8, long r9,
+					  void *tcs, int ret,
+					  struct sgx_enclave_exception *e);
+
+/**
+ * __vdso_sgx_enter_enclave() - Enter an SGX enclave
+ * @rdi:	Pass-through value for RDI
+ * @rsi:	Pass-through value for RSI
+ * @rdx:	Pass-through value for RDX
+ * @leaf:	ENCLU leaf, must be EENTER or ERESUME
+ * @r8:		Pass-through value for R8
+ * @r9:		Pass-through value for R9
+ * @tcs:	TCS, must be non-NULL
+ * @e:		Optional struct sgx_enclave_exception instance
+ * @handler:	Optional enclave exit handler
+ *
+ * NOTE: __vdso_sgx_enter_enclave() does not ensure full compliance with the
+ * x86-64 ABI, e.g. doesn't explicitly clear EFLAGS.DF after EEXIT.  Except for
+ * non-volatile general purpose registers, preserving/setting state in
+ * accordance with the x86-64 ABI is the responsibility of the enclave and its
+ * runtime, i.e. __vdso_sgx_enter_enclave() cannot be called from C code
+ * without careful consideration by both the enclave and its runtime.
+ *
+ * All general purpose registers except RAX, RBX and RCX are passed as-is to
+ * the enclave.  RAX, RBX and RCX are consumed by EENTER and ERESUME and are
+ * loaded with @leaf, asynchronous exit pointer, and @tcs respectively.
+ *
+ * RBP and the stack are used to anchor __vdso_sgx_enter_enclave() to the
+ * pre-enclave state, e.g. to retrieve @e and @handler after an enclave exit.
+ * All other registers are available for use by the enclave and its runtime,
+ * e.g. an enclave can push additional data onto the stack (and modify RSP) to
+ * pass information to the optional exit handler (see below).
+ *
+ * Most exceptions reported on ENCLU, including those that occur within the
+ * enclave, are fixed up and reported synchronously instead of being delivered
+ * via a standard signal. Debug Exceptions (#DB) and Breakpoints (#BP) are
+ * never fixed up and are always delivered via standard signals. On synchrously
+ * reported exceptions, -EFAULT is returned and details about the exception are
+ * recorded in @e, the optional sgx_enclave_exception struct.
+
+ * If an exit handler is provided, the handler will be invoked on synchronous
+ * exits from the enclave and for all synchronously reported exceptions. In
+ * latter case, @e is filled prior to invoking the handler.
+ *
+ * The exit handler's return value is interpreted as follows:
+ *  >0:		continue, restart __vdso_sgx_enter_enclave() with @ret as @leaf
+ *   0:		success, return @ret to the caller
+ *  <0:		error, return @ret to the caller
+ *
+ * The exit handler may transfer control, e.g. via longjmp() or C++ exception,
+ * without returning to __vdso_sgx_enter_enclave().
+ *
+ * Return:
+ *  0 on success,
+ *  -EINVAL if ENCLU leaf is not allowed,
+ *  -EFAULT if an exception occurs on ENCLU or within the enclave
+ *  -errno for all other negative values returned by the userspace exit handler
+ */
+typedef int (*vdso_sgx_enter_enclave_t)(unsigned long rdi, unsigned long rsi,
+					unsigned long rdx, unsigned int leaf,
+					unsigned long r8,  unsigned long r9,
+					void *tcs,
+					struct sgx_enclave_exception *e,
+					sgx_enclave_exit_handler_t handler);
+
 #endif /* _UAPI_ASM_X86_SGX_H */