diff mbox

[intel-sgx-kernel-dev,RFC,10/12] intel_sgx: enable EPC cgroup controller in SGX core

Message ID 1497461858-20309-11-git-send-email-sean.j.christopherson@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sean Christopherson June 14, 2017, 5:37 p.m. UTC
Add the appropriate calls to (un)charge a cgroup during EPC page
allocation and free.  Rework sgx_isolate_pages to iterate over
the cgroup hierarchy when grabbing EPC pages for reclaim.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
---
 arch/x86/include/asm/sgx.h                      |  3 +
 drivers/platform/x86/intel_sgx/sgx_ioctl.c      | 15 +++-
 drivers/platform/x86/intel_sgx/sgx_page_cache.c | 96 +++++++++++++++++++++++--
 drivers/platform/x86/intel_sgx/sgx_util.c       | 13 ++++
 4 files changed, 120 insertions(+), 7 deletions(-)
diff mbox

Patch

diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
index 97119b8..92e02c3 100644
--- a/arch/x86/include/asm/sgx.h
+++ b/arch/x86/include/asm/sgx.h
@@ -346,6 +346,9 @@  struct sgx_epc_page {
 	struct list_head list;
 	struct sgx_encl *encl;
 	struct sgx_encl_page *encl_page;
+#ifdef CONFIG_CGROUP_SGX_EPC
+	struct sgx_epc_cgroup *epc_cg;
+#endif
 };
 
 enum sgx_alloc_flags {
diff --git a/drivers/platform/x86/intel_sgx/sgx_ioctl.c b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
index 374e2a3..27612bc 100644
--- a/drivers/platform/x86/intel_sgx/sgx_ioctl.c
+++ b/drivers/platform/x86/intel_sgx/sgx_ioctl.c
@@ -59,6 +59,8 @@ 
  */
 
 #include "sgx.h"
+#include "sgx_epc_cgroup.h"
+
 #include <asm/mman.h>
 #include <linux/delay.h>
 #include <linux/file.h>
@@ -154,12 +156,19 @@  static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
 	struct sgx_epc_page *epc_page;
 	struct sgx_encl_page *encl_page = req->encl_page;
 	struct sgx_encl *encl = req->encl;
+	struct sgx_epc_cgroup *epc_cg;
 	struct vm_area_struct *vma;
 	int ret;
 
+	ret = sgx_epc_cgroup_try_charge(encl->mm, 0, 1, &epc_cg);
+	if (ret)
+		return false;
+
 	epc_page = sgx_alloc_page(0);
-	if (IS_ERR(epc_page))
+	if (IS_ERR(epc_page)) {
+		sgx_epc_cgroup_uncharge(epc_cg, 1);
 		return false;
+	}
 
 	down_read(&encl->mm->mmap_sem);
 
@@ -204,6 +213,9 @@  static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
 		goto out;
 	}
 
+#ifdef CONFIG_CGROUP_SGX_EPC
+	epc_page->epc_cg = epc_cg;
+#endif
 	encl_page->epc_page = epc_page;
 	sgx_activate_page(epc_page, encl, encl_page);
 
@@ -211,6 +223,7 @@  static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
 	up_read(&encl->mm->mmap_sem);
 	return true;
 out:
+	sgx_epc_cgroup_uncharge(epc_cg, 1);
 	sgx_free_page(epc_page, encl);
 	mutex_unlock(&encl->lock);
 	up_read(&encl->mm->mmap_sem);
diff --git a/drivers/platform/x86/intel_sgx/sgx_page_cache.c b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
index 86685c1..c5e7210 100644
--- a/drivers/platform/x86/intel_sgx/sgx_page_cache.c
+++ b/drivers/platform/x86/intel_sgx/sgx_page_cache.c
@@ -59,6 +59,8 @@ 
  */
 
 #include "sgx.h"
+#include "sgx_epc_cgroup.h"
+
 #include <linux/freezer.h>
 #include <linux/highmem.h>
 #include <linux/kthread.h>
@@ -83,6 +85,15 @@  struct task_struct *ksgxswapd_tsk;
 static DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
 
 
+static inline struct sgx_epc_lru *sgx_lru(struct sgx_epc_page *epc_page)
+{
+#ifdef CONFIG_CGROUP_SGX_EPC
+	return epc_page->epc_cg ? &epc_page->epc_cg->lru : &sgx_global_lru;
+#else
+	return &sgx_global_lru;
+#endif
+}
+
 static int sgx_test_and_clear_young_cb(pte_t *ptep, pgtable_t token,
 				       unsigned long addr, void *data)
 {
@@ -122,7 +133,7 @@  void sgx_activate_page(struct sgx_epc_page *epc_page,
 		       struct sgx_encl *encl,
 		       struct sgx_encl_page *encl_page)
 {
-	struct sgx_epc_lru *lru = &sgx_global_lru;
+	struct sgx_epc_lru *lru = sgx_lru(epc_page);
 
 	epc_page->encl = encl;
 	epc_page->encl_page = encl_page;
@@ -136,19 +147,19 @@  void sgx_activate_page(struct sgx_epc_page *epc_page,
 
 void sgx_deactivate_page(struct sgx_epc_page *epc_page)
 {
-	struct sgx_epc_lru *lru = &sgx_global_lru;
+	struct sgx_epc_lru *lru = sgx_lru(epc_page);
 
 	spin_lock(&lru->lock);
 	list_del_init(&epc_page->list);
 	spin_unlock(&lru->lock);
 }
 
-static void sgx_isolate_pages(struct list_head *dst,
-			      unsigned long nr_to_scan)
+static unsigned long __sgx_isolate_pages(struct list_head *dst,
+					 unsigned long nr_to_scan,
+					 struct sgx_epc_lru *lru)
 {
 	unsigned long i;
 	struct sgx_epc_page *entry;
-	struct sgx_epc_lru *lru = &sgx_global_lru;
 
 	spin_lock(&lru->lock);
 
@@ -168,6 +179,51 @@  static void sgx_isolate_pages(struct list_head *dst,
 	}
 
 	spin_unlock(&lru->lock);
+
+	return i;
+}
+
+static void sgx_isolate_pages(struct list_head *dst,
+			      unsigned long nr_to_scan,
+			      struct sgx_epc_cgroup *root)
+{
+#ifdef CONFIG_CGROUP_SGX_EPC
+	struct sgx_epc_cgroup *epc_cg;
+	unsigned long nr_scanned = 0;
+	struct sgx_epc_reclaim reclaim;
+
+	/*
+	 * If we're not targeting a specific cgroup, take from the global
+	 * list first, even when cgroups are enabled.  If we somehow have
+	 * pages on the global LRU then they should get reclaimed asap.
+	 */
+	if (!root) {
+		nr_scanned += __sgx_isolate_pages(dst, nr_to_scan, &sgx_global_lru);
+		if (nr_scanned >= nr_to_scan)
+			return;
+	}
+
+	for (epc_cg = sgx_epc_cgroup_iter(NULL, root, &reclaim);
+	     epc_cg;
+	     epc_cg = sgx_epc_cgroup_iter(epc_cg, root, &reclaim)) {
+		if (sgx_epc_cgroup_is_low(epc_cg, root)) {
+			/*
+			 * Ignore low if all cgroups in the tree we're looking at
+			 * are low, in which case low is effectively "normal".
+			 */
+			if (!sgx_epc_cgroup_all_in_use_are_low(root))
+				continue;
+		}
+
+		nr_scanned += __sgx_isolate_pages(dst, nr_to_scan - nr_scanned, &epc_cg->lru);
+		if (nr_scanned >= nr_to_scan) {
+			sgx_epc_cgroup_iter_break(epc_cg, root);
+			break;
+		}
+	}
+#else
+	__sgx_isolate_pages(dst, nr_to_scan, &sgx_global_lru);
+#endif
 }
 
 static int __sgx_ewb(struct sgx_encl *encl,
@@ -333,10 +389,33 @@  static inline void sgx_del_if_dead(struct sgx_encl *encl,
 static inline void sgx_lru_putback(struct list_head *src)
 {
 	struct sgx_epc_lru *lru = &sgx_global_lru;
+#ifdef CONFIG_CGROUP_SGX_EPC
+	struct sgx_epc_page *entry, *tmp, *master = NULL;
+	LIST_HEAD(cur);
+#endif
 
 	if (list_empty(src))
 		return;
 
+#ifdef CONFIG_CGROUP_SGX_EPC
+	list_for_each_entry_safe(entry, tmp, src, list) {
+		if (!master) {
+			master = entry;
+		} else if (entry->epc_cg != master->epc_cg) {
+			lru = sgx_lru(master);
+
+			spin_lock(&lru->lock);
+			list_splice_tail_init(&cur, &lru->active_lru);
+			spin_unlock(&lru->lock);
+
+			master = entry;
+		}
+
+		list_move_tail(&entry->list, &cur);
+	}
+	lru = sgx_lru(master);
+	src = &cur;
+#endif
 	spin_lock(&lru->lock);
 	list_splice_tail_init(src, &lru->active_lru);
 	spin_unlock(&lru->lock);
@@ -354,7 +433,7 @@  unsigned long sgx_swap_pages(unsigned long nr_to_scan,
 	LIST_HEAD(swap);
 	LIST_HEAD(skip);
 
-	sgx_isolate_pages(&iso, nr_to_scan);
+	sgx_isolate_pages(&iso, nr_to_scan, epc_cg);
 
 	while (!list_empty(&iso)) {
 		encl = list_first_entry(&iso, struct sgx_epc_page, list)->encl;
@@ -562,6 +641,11 @@  int sgx_free_page(struct sgx_epc_page *entry, struct sgx_encl *encl)
 	entry->encl = NULL;
 	entry->encl_page = NULL;
 
+#ifdef CONFIG_CGROUP_SGX_EPC
+	sgx_epc_cgroup_uncharge(entry->epc_cg, 1);
+	entry->epc_cg = NULL;
+#endif
+
 	spin_lock(&sgx_free_list_lock);
 	list_add(&entry->list, &sgx_free_list);
 	sgx_nr_free_pages++;
diff --git a/drivers/platform/x86/intel_sgx/sgx_util.c b/drivers/platform/x86/intel_sgx/sgx_util.c
index 543faa3..1c93b2a 100644
--- a/drivers/platform/x86/intel_sgx/sgx_util.c
+++ b/drivers/platform/x86/intel_sgx/sgx_util.c
@@ -59,6 +59,8 @@ 
  */
 
 #include "sgx.h"
+#include "sgx_epc_cgroup.h"
+
 #include <linux/highmem.h>
 #include <linux/shmem_fs.h>
 #include <linux/sched/mm.h>
@@ -249,6 +251,7 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 	struct sgx_encl_page *entry;
 	struct sgx_epc_page *epc_page = NULL;
 	struct sgx_epc_page *secs_epc_page = NULL;
+	struct sgx_epc_cgroup *epc_cg;
 	bool reserve = (flags & SGX_FAULT_RESERVE) != 0;
 	int rc = 0;
 
@@ -258,6 +261,10 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 	if (!encl)
 		return ERR_PTR(-EFAULT);
 
+	rc = sgx_epc_cgroup_try_charge(encl->mm, SGX_ALLOC_ATOMIC, 1, &epc_cg);
+	if (rc)
+		return ERR_PTR(rc);
+
 	mutex_lock(&encl->lock);
 
 	entry = radix_tree_lookup(&encl->page_tree, addr >> PAGE_SHIFT);
@@ -330,6 +337,9 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 	 */
 	encl->secs_child_cnt++;
 
+#ifdef CONFIG_CGROUP_SGX_EPC
+	epc_page->epc_cg = epc_cg;
+#endif
 	entry->epc_page = epc_page;
 
 	if (reserve)
@@ -337,6 +347,7 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 
 	/* Do not free */
 	epc_page = NULL;
+	epc_cg = NULL;
 
 	rc = vm_insert_pfn(vma, entry->addr, PFN_DOWN(entry->epc_page->pa));
 	if (rc) {
@@ -351,6 +362,8 @@  static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
 	sgx_activate_page(entry->epc_page, encl, entry);
 out:
 	mutex_unlock(&encl->lock);
+	if (epc_cg)
+		sgx_epc_cgroup_uncharge(epc_cg, 1);
 	if (epc_page)
 		sgx_free_page(epc_page, encl);
 	if (secs_epc_page)