diff mbox series

[v40,11/24] x86/sgx: Add SGX misc driver interface

Message ID 20201104145430.300542-12-jarkko.sakkinen@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Jarkko Sakkinen Nov. 4, 2020, 2:54 p.m. UTC
Intel(R) SGX is new hardware functionality that can be used by applications
to set aside private regions of code and data called enclaves. New hardware
protects enclave code and data from outside access and modification.

Add a driver that presents a device file and ioctl API to build and manage
enclaves.  Subsequent patches will expend the ioctl()’s functionality.

Cc: linux-security-module@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Acked-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Jethro Beekman <jethro@fortanix.com>
Tested-by: Haitao Huang <haitao.huang@linux.intel.com>
Tested-by: Chunyang Hui <sanqian.hcy@antfin.com>
Tested-by: Jordan Hand <jorhand@linux.microsoft.com>
Tested-by: Nathaniel McCallum <npmccallum@redhat.com>
Tested-by: Seth Moore <sethmo@google.com>
Tested-by: Darren Kenny <darren.kenny@oracle.com>
Reviewed-by: Darren Kenny <darren.kenny@oracle.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Co-developed-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
Changes from v39:
* Rename /dev/sgx/enclave as /dev/sgx_enclave.
* In the page fault handler, do not check for SGX_ENCL_DEAD. This allows
  to do forensics to the memory of debug enclaves.

 arch/x86/kernel/cpu/sgx/Makefile |   2 +
 arch/x86/kernel/cpu/sgx/driver.c | 112 ++++++++++++++++++
 arch/x86/kernel/cpu/sgx/driver.h |  16 +++
 arch/x86/kernel/cpu/sgx/encl.c   | 188 +++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/sgx/encl.h   |  46 ++++++++
 arch/x86/kernel/cpu/sgx/main.c   |  12 +-
 6 files changed, 375 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/sgx/driver.c
 create mode 100644 arch/x86/kernel/cpu/sgx/driver.h
 create mode 100644 arch/x86/kernel/cpu/sgx/encl.c
 create mode 100644 arch/x86/kernel/cpu/sgx/encl.h

Comments

Jarkko Sakkinen Nov. 5, 2020, 1:10 a.m. UTC | #1
Noticed couple of minor glitches.

On Wed, Nov 04, 2020 at 04:54:17PM +0200, Jarkko Sakkinen wrote:
> +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
> +		     unsigned long end, unsigned long vm_flags)
> +{
> +	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
> +	struct sgx_encl_page *page;
> +	unsigned long count = 0;
> +	int ret = 0;
> +
> +	XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
> +
> +	/*
> +	 * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
> +	 * conflict with the enclave page permissions.
> +	 */
> +	if (current->personality & READ_IMPLIES_EXEC)
> +		return -EACCES;
> +
> +	mutex_lock(&encl->lock);
> +	xas_lock(&xas);
> +	xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
> +		if (!page)
> +			break;

A redundant check, can be removed.

> +
> +		if (~page->vm_max_prot_bits & vm_prot_bits) {
> +			ret = -EACCES;
> +			break;
> +		}
> +
> +		/* Reschedule on every XA_CHECK_SCHED iteration. */
> +		if (!(++count % XA_CHECK_SCHED)) {
> +			xas_pause(&xas);
> +			xas_unlock(&xas);
> +			mutex_unlock(&encl->lock);
> +
> +			cond_resched();
> +
> +			mutex_lock(&encl->lock);
> +			xas_lock(&xas);
> +		}
> +	}
> +	xas_unlock(&xas);
> +	mutex_unlock(&encl->lock);
> +
> +	return ret;
> +}
> +
> +static int sgx_vma_mprotect(struct vm_area_struct *vma,
> +			    struct vm_area_struct **pprev, unsigned long start,
> +			    unsigned long end, unsigned long newflags)
> +{
> +	int ret;
> +
> +	ret = sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
> +	if (ret)
> +		return ret;
> +
> +	return mprotect_fixup(vma, pprev, start, end, newflags);
> +}
> +
> +const struct vm_operations_struct sgx_vm_ops = {
> +	.fault = sgx_vma_fault,
> +	.mprotect = sgx_vma_mprotect,
> +};
> +
> +/**
> + * sgx_encl_find - find an enclave
> + * @mm:		mm struct of the current process
> + * @addr:	address in the ELRANGE
> + * @vma:	the resulting VMA
> + *
> + * Find an enclave identified by the given address. Give back a VMA that is
> + * part of the enclave and located in that address. The VMA is given back if it
> + * is a proper enclave VMA even if an &sgx_encl instance does not exist yet
> + * (enclave creation has not been performed).
> + *
> + * Return:
> + *   0 on success,
> + *   -EINVAL if an enclave was not found,
> + *   -ENOENT if the enclave has not been created yet
> + */
> +int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
> +		  struct vm_area_struct **vma)
> +{
> +	struct vm_area_struct *result;
> +	struct sgx_encl *encl;
> +
> +	result = find_vma(mm, addr);
> +	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
> +		return -EINVAL;
> +
> +	encl = result->vm_private_data;
> +	*vma = result;
> +
> +	return encl ? 0 : -ENOENT;
> +}

Since v20 there has been 1:1 assocition between enclaves and files.
In other words, this can never return -ENOENT.

With this reduction the function turns into:

int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
		  struct vm_area_struct **vma)
{
	struct vm_area_struct *result;

	result = find_vma(mm, addr);
	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
		return -EINVAL;

	*vma = result;

	return 0;
}

There are only two call sites:

1. sgx_encl_test_and_clear_young()
2. sgx_reclaimer_block()

I.e. would not be a big trouble to tune the signature a bit:

struct vm_area_struct *sgx_encl_find_vma(struct mm_struct *mm, unsigned long addr)
{
	struct vm_area_struct *result;

	result = find_vma(mm, addr);
	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
		return NULL;

	return result;
}

There is a function called sgx_encl_find_mm(), which is *unrelated* to
this function and has only one call sites. Its flow is very linear. In
order to avoid confusion, I'd open code that into sgx_encl_mm_add().

/Jarkko
Jarkko Sakkinen Nov. 5, 2020, 1:16 a.m. UTC | #2
On Thu, Nov 05, 2020 at 03:10:54AM +0200, Jarkko Sakkinen wrote:
> Noticed couple of minor glitches.
> 
> On Wed, Nov 04, 2020 at 04:54:17PM +0200, Jarkko Sakkinen wrote:
> > +int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
> > +		     unsigned long end, unsigned long vm_flags)
> > +{
> > +	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
> > +	struct sgx_encl_page *page;
> > +	unsigned long count = 0;
> > +	int ret = 0;
> > +
> > +	XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
> > +
> > +	/*
> > +	 * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
> > +	 * conflict with the enclave page permissions.
> > +	 */
> > +	if (current->personality & READ_IMPLIES_EXEC)
> > +		return -EACCES;
> > +
> > +	mutex_lock(&encl->lock);
> > +	xas_lock(&xas);
> > +	xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
> > +		if (!page)
> > +			break;
> 
> A redundant check, can be removed.
> 
> > +
> > +		if (~page->vm_max_prot_bits & vm_prot_bits) {
> > +			ret = -EACCES;
> > +			break;
> > +		}
> > +
> > +		/* Reschedule on every XA_CHECK_SCHED iteration. */
> > +		if (!(++count % XA_CHECK_SCHED)) {
> > +			xas_pause(&xas);
> > +			xas_unlock(&xas);
> > +			mutex_unlock(&encl->lock);
> > +
> > +			cond_resched();
> > +
> > +			mutex_lock(&encl->lock);
> > +			xas_lock(&xas);
> > +		}
> > +	}
> > +	xas_unlock(&xas);
> > +	mutex_unlock(&encl->lock);
> > +
> > +	return ret;
> > +}
> > +
> > +static int sgx_vma_mprotect(struct vm_area_struct *vma,
> > +			    struct vm_area_struct **pprev, unsigned long start,
> > +			    unsigned long end, unsigned long newflags)
> > +{
> > +	int ret;
> > +
> > +	ret = sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
> > +	if (ret)
> > +		return ret;
> > +
> > +	return mprotect_fixup(vma, pprev, start, end, newflags);
> > +}
> > +
> > +const struct vm_operations_struct sgx_vm_ops = {
> > +	.fault = sgx_vma_fault,
> > +	.mprotect = sgx_vma_mprotect,
> > +};
> > +
> > +/**
> > + * sgx_encl_find - find an enclave
> > + * @mm:		mm struct of the current process
> > + * @addr:	address in the ELRANGE
> > + * @vma:	the resulting VMA
> > + *
> > + * Find an enclave identified by the given address. Give back a VMA that is
> > + * part of the enclave and located in that address. The VMA is given back if it
> > + * is a proper enclave VMA even if an &sgx_encl instance does not exist yet
> > + * (enclave creation has not been performed).
> > + *
> > + * Return:
> > + *   0 on success,
> > + *   -EINVAL if an enclave was not found,
> > + *   -ENOENT if the enclave has not been created yet
> > + */
> > +int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
> > +		  struct vm_area_struct **vma)
> > +{
> > +	struct vm_area_struct *result;
> > +	struct sgx_encl *encl;
> > +
> > +	result = find_vma(mm, addr);
> > +	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
> > +		return -EINVAL;
> > +
> > +	encl = result->vm_private_data;
> > +	*vma = result;
> > +
> > +	return encl ? 0 : -ENOENT;
> > +}
> 
> Since v20 there has been 1:1 assocition between enclaves and files.
> In other words, this can never return -ENOENT.
> 
> With this reduction the function turns into:
> 
> int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
> 		  struct vm_area_struct **vma)
> {
> 	struct vm_area_struct *result;
> 
> 	result = find_vma(mm, addr);
> 	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
> 		return -EINVAL;
> 
> 	*vma = result;
> 
> 	return 0;
> }
> 
> There are only two call sites:
> 
> 1. sgx_encl_test_and_clear_young()
> 2. sgx_reclaimer_block()
> 
> I.e. would not be a big trouble to tune the signature a bit:
> 
> struct vm_area_struct *sgx_encl_find_vma(struct mm_struct *mm, unsigned long addr)
> {
> 	struct vm_area_struct *result;
> 
> 	result = find_vma(mm, addr);
> 	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
> 		return NULL;
> 
> 	return result;
> }

Further, I'd declare this as an inline function given how trivial it
turn into.

> There is a function called sgx_encl_find_mm(), which is *unrelated* to
> this function and has only one call sites. Its flow is very linear. In
> order to avoid confusion, I'd open code that into sgx_encl_mm_add().
> 
> /Jarkko

/Jarkko
Borislav Petkov Nov. 5, 2020, 4:05 p.m. UTC | #3
On Thu, Nov 05, 2020 at 03:16:15AM +0200, Jarkko Sakkinen wrote:
> Further, I'd declare this as an inline function given how trivial it
> turn into.
> 
...

So are you sending a new version of only this patch as a reply to this
subthread?
Jarkko Sakkinen Nov. 5, 2020, 5:57 p.m. UTC | #4
On Thu, Nov 05, 2020 at 05:05:59PM +0100, Borislav Petkov wrote:
> On Thu, Nov 05, 2020 at 03:16:15AM +0200, Jarkko Sakkinen wrote:
> > Further, I'd declare this as an inline function given how trivial it
> > turn into.
> > 
> ...
> 
> So are you sending a new version of only this patch as a reply to this
> subthread?

Just remarked those, so that I will not forget either, e.g. even in the
case the patch was pulled as it is, I would eventually refine these
parts.

I'll rather send a full patch set if required.

> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

/Jarkko
Borislav Petkov Nov. 5, 2020, 6:10 p.m. UTC | #5
On Thu, Nov 05, 2020 at 07:57:45PM +0200, Jarkko Sakkinen wrote:
> I'll rather send a full patch set if required.

Why if the changes all belong to this patch and why should I take a
patch which clearly needs improving?

Just send the fixed version of this and I can take it now.

Thx.
Jarkko Sakkinen Nov. 6, 2020, 4:07 p.m. UTC | #6
On Thu, Nov 05, 2020 at 07:10:47PM +0100, Borislav Petkov wrote:
> On Thu, Nov 05, 2020 at 07:57:45PM +0200, Jarkko Sakkinen wrote:
> > I'll rather send a full patch set if required.
> 
> Why if the changes all belong to this patch and why should I take a
> patch which clearly needs improving?
> 
> Just send the fixed version of this and I can take it now.
> 
> Thx.

Here's an update patch. I kept the name as sgx_encl_find() so and output
argument instead of return value, so that the change is localized. I
think this is good enough, i.e. the semantically obsolete stuff has been
wiped off.

> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

/Jarkko
Borislav Petkov Nov. 6, 2020, 5:09 p.m. UTC | #7
On Fri, Nov 06, 2020 at 06:07:42PM +0200, Jarkko Sakkinen wrote:
> On Thu, Nov 05, 2020 at 07:10:47PM +0100, Borislav Petkov wrote:
> > On Thu, Nov 05, 2020 at 07:57:45PM +0200, Jarkko Sakkinen wrote:
> > > I'll rather send a full patch set if required.
> > 
> > Why if the changes all belong to this patch and why should I take a
> > patch which clearly needs improving?
> > 
> > Just send the fixed version of this and I can take it now.
> > 
> > Thx.
> 
> Here's an update patch. I kept the name as sgx_encl_find() so and output
> argument instead of return value, so that the change is localized. I
> think this is good enough, i.e. the semantically obsolete stuff has been
> wiped off.

Thanks.

> Tested-by: Jethro Beekman <jethro@fortanix.com>
> Tested-by: Haitao Huang <haitao.huang@linux.intel.com>
> Tested-by: Chunyang Hui <sanqian.hcy@antfin.com>
> Tested-by: Jordan Hand <jorhand@linux.microsoft.com>
> Tested-by: Nathaniel McCallum <npmccallum@redhat.com>
> Tested-by: Seth Moore <sethmo@google.com>
> Tested-by: Darren Kenny <darren.kenny@oracle.com>

Btw, you do know that when you change the patch, those tested-by's don't
hold true anymore, right?

The Reviewed-by's too, actually.

I'll zap them.
Jarkko Sakkinen Nov. 6, 2020, 10:01 p.m. UTC | #8
On Fri, Nov 06, 2020 at 06:09:20PM +0100, Borislav Petkov wrote:
> On Fri, Nov 06, 2020 at 06:07:42PM +0200, Jarkko Sakkinen wrote:
> > On Thu, Nov 05, 2020 at 07:10:47PM +0100, Borislav Petkov wrote:
> > > On Thu, Nov 05, 2020 at 07:57:45PM +0200, Jarkko Sakkinen wrote:
> > > > I'll rather send a full patch set if required.
> > > 
> > > Why if the changes all belong to this patch and why should I take a
> > > patch which clearly needs improving?
> > > 
> > > Just send the fixed version of this and I can take it now.
> > > 
> > > Thx.
> > 
> > Here's an update patch. I kept the name as sgx_encl_find() so and output
> > argument instead of return value, so that the change is localized. I
> > think this is good enough, i.e. the semantically obsolete stuff has been
> > wiped off.
> 
> Thanks.
> 
> > Tested-by: Jethro Beekman <jethro@fortanix.com>
> > Tested-by: Haitao Huang <haitao.huang@linux.intel.com>
> > Tested-by: Chunyang Hui <sanqian.hcy@antfin.com>
> > Tested-by: Jordan Hand <jorhand@linux.microsoft.com>
> > Tested-by: Nathaniel McCallum <npmccallum@redhat.com>
> > Tested-by: Seth Moore <sethmo@google.com>
> > Tested-by: Darren Kenny <darren.kenny@oracle.com>
> 
> Btw, you do know that when you change the patch, those tested-by's don't
> hold true anymore, right?
> 
> The Reviewed-by's too, actually.
> 
> I'll zap them.

Yes, I know that. That was something that I should have done for this
version. I was too busy turning every rock to make everything as clean
as possible, sorry about that. I'll also update my tree accordingly.

> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

/Jarkko
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/sgx/Makefile b/arch/x86/kernel/cpu/sgx/Makefile
index 79510ce01b3b..3fc451120735 100644
--- a/arch/x86/kernel/cpu/sgx/Makefile
+++ b/arch/x86/kernel/cpu/sgx/Makefile
@@ -1,2 +1,4 @@ 
 obj-y += \
+	driver.o \
+	encl.o \
 	main.o
diff --git a/arch/x86/kernel/cpu/sgx/driver.c b/arch/x86/kernel/cpu/sgx/driver.c
new file mode 100644
index 000000000000..248213dea78e
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/driver.c
@@ -0,0 +1,112 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <linux/acpi.h>
+#include <linux/miscdevice.h>
+#include <linux/mman.h>
+#include <linux/security.h>
+#include <linux/suspend.h>
+#include <asm/traps.h>
+#include "driver.h"
+#include "encl.h"
+
+static int sgx_open(struct inode *inode, struct file *file)
+{
+	struct sgx_encl *encl;
+
+	encl = kzalloc(sizeof(*encl), GFP_KERNEL);
+	if (!encl)
+		return -ENOMEM;
+
+	xa_init(&encl->page_array);
+	mutex_init(&encl->lock);
+
+	file->private_data = encl;
+
+	return 0;
+}
+
+static int sgx_release(struct inode *inode, struct file *file)
+{
+	struct sgx_encl *encl = file->private_data;
+	struct sgx_encl_page *entry;
+	unsigned long index;
+
+	xa_for_each(&encl->page_array, index, entry) {
+		if (entry->epc_page) {
+			sgx_free_epc_page(entry->epc_page);
+			encl->secs_child_cnt--;
+			entry->epc_page = NULL;
+		}
+
+		kfree(entry);
+	}
+
+	xa_destroy(&encl->page_array);
+
+	if (!encl->secs_child_cnt && encl->secs.epc_page) {
+		sgx_free_epc_page(encl->secs.epc_page);
+		encl->secs.epc_page = NULL;
+	}
+
+	/* Detect EPC page leak's. */
+	WARN_ON_ONCE(encl->secs_child_cnt);
+	WARN_ON_ONCE(encl->secs.epc_page);
+
+	kfree(encl);
+	return 0;
+}
+
+static int sgx_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct sgx_encl *encl = file->private_data;
+	int ret;
+
+	ret = sgx_encl_may_map(encl, vma->vm_start, vma->vm_end, vma->vm_flags);
+	if (ret)
+		return ret;
+
+	vma->vm_ops = &sgx_vm_ops;
+	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
+	vma->vm_private_data = encl;
+
+	return 0;
+}
+
+static unsigned long sgx_get_unmapped_area(struct file *file,
+					   unsigned long addr,
+					   unsigned long len,
+					   unsigned long pgoff,
+					   unsigned long flags)
+{
+	if ((flags & MAP_TYPE) == MAP_PRIVATE)
+		return -EINVAL;
+
+	if (flags & MAP_FIXED)
+		return addr;
+
+	return current->mm->get_unmapped_area(file, addr, len, pgoff, flags);
+}
+
+static const struct file_operations sgx_encl_fops = {
+	.owner			= THIS_MODULE,
+	.open			= sgx_open,
+	.release		= sgx_release,
+	.mmap			= sgx_mmap,
+	.get_unmapped_area	= sgx_get_unmapped_area,
+};
+
+static struct miscdevice sgx_dev_enclave = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "sgx_enclave",
+	.nodename = "sgx_enclave",
+	.fops = &sgx_encl_fops,
+};
+
+int __init sgx_drv_init(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_SGX_LC))
+		return -ENODEV;
+
+	return misc_register(&sgx_dev_enclave);
+}
diff --git a/arch/x86/kernel/cpu/sgx/driver.h b/arch/x86/kernel/cpu/sgx/driver.h
new file mode 100644
index 000000000000..cda9c43b7543
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/driver.h
@@ -0,0 +1,16 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ARCH_SGX_DRIVER_H__
+#define __ARCH_SGX_DRIVER_H__
+
+#include <crypto/hash.h>
+#include <linux/kref.h>
+#include <linux/mmu_notifier.h>
+#include <linux/radix-tree.h>
+#include <linux/rwsem.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include "sgx.h"
+
+int sgx_drv_init(void);
+
+#endif /* __ARCH_X86_SGX_DRIVER_H__ */
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
new file mode 100644
index 000000000000..d47caa106350
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -0,0 +1,188 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*  Copyright(c) 2016-20 Intel Corporation. */
+
+#include <linux/lockdep.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/shmem_fs.h>
+#include <linux/suspend.h>
+#include <linux/sched/mm.h>
+#include "arch.h"
+#include "encl.h"
+#include "encls.h"
+#include "sgx.h"
+
+static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
+						unsigned long addr,
+						unsigned long vm_flags)
+{
+	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+	struct sgx_encl_page *entry;
+
+	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+	if (!entry)
+		return ERR_PTR(-EFAULT);
+
+	/*
+	 * Verify that the faulted page has equal or higher build time
+	 * permissions than the VMA permissions (i.e. the subset of {VM_READ,
+	 * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
+	 */
+	if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
+		return ERR_PTR(-EFAULT);
+
+	/* No page found. */
+	if (!entry->epc_page)
+		return ERR_PTR(-EFAULT);
+
+	/* Entry successfully located. */
+	return entry;
+}
+
+static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
+{
+	unsigned long addr = (unsigned long)vmf->address;
+	struct vm_area_struct *vma = vmf->vma;
+	struct sgx_encl_page *entry;
+	unsigned long phys_addr;
+	struct sgx_encl *encl;
+	vm_fault_t ret;
+
+	encl = vma->vm_private_data;
+
+	mutex_lock(&encl->lock);
+
+	entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
+	if (IS_ERR(entry)) {
+		mutex_unlock(&encl->lock);
+
+		return VM_FAULT_SIGBUS;
+	}
+
+	phys_addr = sgx_get_epc_phys_addr(entry->epc_page);
+
+	ret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
+	if (ret != VM_FAULT_NOPAGE) {
+		mutex_unlock(&encl->lock);
+
+		return VM_FAULT_SIGBUS;
+	}
+
+	mutex_unlock(&encl->lock);
+
+	return VM_FAULT_NOPAGE;
+}
+
+/**
+ * sgx_encl_may_map() - Check if a requested VMA mapping is allowed
+ * @encl:		an enclave pointer
+ * @start:		lower bound of the address range, inclusive
+ * @end:		upper bound of the address range, exclusive
+ * @vm_flags:		VMA flags
+ *
+ * Iterate through the enclave pages contained within [@start, @end) to verify
+ * that the permissions requested by a subset of {VM_READ, VM_WRITE, VM_EXEC}
+ * does not contain any permissions that are not contained in the build time
+ * permissions of any of the enclave pages within the given address range.
+ *
+ * An enclave creator must declare the strongest permissions that will be
+ * needed for each enclave page  This ensures that mappings  have the identical
+ * or weaker permissions that the earlier declared permissions.
+ *
+ * Return: 0 on success, -EACCES otherwise
+ */
+int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
+		     unsigned long end, unsigned long vm_flags)
+{
+	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+	struct sgx_encl_page *page;
+	unsigned long count = 0;
+	int ret = 0;
+
+	XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
+
+	/*
+	 * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
+	 * conflict with the enclave page permissions.
+	 */
+	if (current->personality & READ_IMPLIES_EXEC)
+		return -EACCES;
+
+	mutex_lock(&encl->lock);
+	xas_lock(&xas);
+	xas_for_each(&xas, page, PFN_DOWN(end - 1)) {
+		if (!page)
+			break;
+
+		if (~page->vm_max_prot_bits & vm_prot_bits) {
+			ret = -EACCES;
+			break;
+		}
+
+		/* Reschedule on every XA_CHECK_SCHED iteration. */
+		if (!(++count % XA_CHECK_SCHED)) {
+			xas_pause(&xas);
+			xas_unlock(&xas);
+			mutex_unlock(&encl->lock);
+
+			cond_resched();
+
+			mutex_lock(&encl->lock);
+			xas_lock(&xas);
+		}
+	}
+	xas_unlock(&xas);
+	mutex_unlock(&encl->lock);
+
+	return ret;
+}
+
+static int sgx_vma_mprotect(struct vm_area_struct *vma,
+			    struct vm_area_struct **pprev, unsigned long start,
+			    unsigned long end, unsigned long newflags)
+{
+	int ret;
+
+	ret = sgx_encl_may_map(vma->vm_private_data, start, end, newflags);
+	if (ret)
+		return ret;
+
+	return mprotect_fixup(vma, pprev, start, end, newflags);
+}
+
+const struct vm_operations_struct sgx_vm_ops = {
+	.fault = sgx_vma_fault,
+	.mprotect = sgx_vma_mprotect,
+};
+
+/**
+ * sgx_encl_find - find an enclave
+ * @mm:		mm struct of the current process
+ * @addr:	address in the ELRANGE
+ * @vma:	the resulting VMA
+ *
+ * Find an enclave identified by the given address. Give back a VMA that is
+ * part of the enclave and located in that address. The VMA is given back if it
+ * is a proper enclave VMA even if an &sgx_encl instance does not exist yet
+ * (enclave creation has not been performed).
+ *
+ * Return:
+ *   0 on success,
+ *   -EINVAL if an enclave was not found,
+ *   -ENOENT if the enclave has not been created yet
+ */
+int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
+		  struct vm_area_struct **vma)
+{
+	struct vm_area_struct *result;
+	struct sgx_encl *encl;
+
+	result = find_vma(mm, addr);
+	if (!result || result->vm_ops != &sgx_vm_ops || addr < result->vm_start)
+		return -EINVAL;
+
+	encl = result->vm_private_data;
+	*vma = result;
+
+	return encl ? 0 : -ENOENT;
+}
diff --git a/arch/x86/kernel/cpu/sgx/encl.h b/arch/x86/kernel/cpu/sgx/encl.h
new file mode 100644
index 000000000000..8eb34e95feda
--- /dev/null
+++ b/arch/x86/kernel/cpu/sgx/encl.h
@@ -0,0 +1,46 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+ * Copyright(c) 2016-20 Intel Corporation.
+ *
+ * Contains the software defined data structures for enclaves.
+ */
+#ifndef _X86_ENCL_H
+#define _X86_ENCL_H
+
+#include <linux/cpumask.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/mmu_notifier.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/srcu.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+#include "sgx.h"
+
+struct sgx_encl_page {
+	unsigned long desc;
+	unsigned long vm_max_prot_bits;
+	struct sgx_epc_page *epc_page;
+	struct sgx_encl *encl;
+};
+
+struct sgx_encl {
+	unsigned long base;
+	unsigned long size;
+	unsigned int page_cnt;
+	unsigned int secs_child_cnt;
+	struct mutex lock;
+	struct xarray page_array;
+	struct sgx_encl_page secs;
+};
+
+extern const struct vm_operations_struct sgx_vm_ops;
+
+int sgx_encl_find(struct mm_struct *mm, unsigned long addr,
+		  struct vm_area_struct **vma);
+int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
+		     unsigned long end, unsigned long vm_flags);
+
+#endif /* _X86_ENCL_H */
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index b9ac438a13a4..c2740e0630d1 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -9,6 +9,8 @@ 
 #include <linux/sched/mm.h>
 #include <linux/sched/signal.h>
 #include <linux/slab.h>
+#include "driver.h"
+#include "encl.h"
 #include "encls.h"
 
 struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];
@@ -229,9 +231,10 @@  static bool __init sgx_page_cache_init(void)
 
 static void __init sgx_init(void)
 {
+	int ret;
 	int i;
 
-	if (!boot_cpu_has(X86_FEATURE_SGX))
+	if (!cpu_feature_enabled(X86_FEATURE_SGX))
 		return;
 
 	if (!sgx_page_cache_init())
@@ -240,8 +243,15 @@  static void __init sgx_init(void)
 	if (!sgx_page_reclaimer_init())
 		goto err_page_cache;
 
+	ret = sgx_drv_init();
+	if (ret)
+		goto err_kthread;
+
 	return;
 
+err_kthread:
+	kthread_stop(ksgxswapd_tsk);
+
 err_page_cache:
 	for (i = 0; i < sgx_nr_epc_sections; i++) {
 		vfree(sgx_epc_sections[i].pages);