@@ -346,6 +346,9 @@ struct sgx_epc_page {
struct list_head list;
struct sgx_encl *encl;
struct sgx_encl_page *encl_page;
+#ifdef CONFIG_CGROUP_SGX_EPC
+ struct sgx_epc_cgroup *epc_cg;
+#endif
};
enum sgx_alloc_flags {
@@ -59,6 +59,8 @@
*/
#include "sgx.h"
+#include "sgx_epc_cgroup.h"
+
#include <asm/mman.h>
#include <linux/delay.h>
#include <linux/file.h>
@@ -154,12 +156,19 @@ static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
struct sgx_epc_page *epc_page;
struct sgx_encl_page *encl_page = req->encl_page;
struct sgx_encl *encl = req->encl;
+ struct sgx_epc_cgroup *epc_cg;
struct vm_area_struct *vma;
int ret;
+ ret = sgx_epc_cgroup_try_charge(encl->mm, 0, 1, &epc_cg);
+ if (ret)
+ return false;
+
epc_page = sgx_alloc_page(0);
- if (IS_ERR(epc_page))
+ if (IS_ERR(epc_page)) {
+ sgx_epc_cgroup_uncharge(epc_cg, 1);
return false;
+ }
down_read(&encl->mm->mmap_sem);
@@ -204,6 +213,9 @@ static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
goto out;
}
+#ifdef CONFIG_CGROUP_SGX_EPC
+ epc_page->epc_cg = epc_cg;
+#endif
encl_page->epc_page = epc_page;
sgx_activate_page(epc_page, encl, encl_page);
@@ -211,6 +223,7 @@ static bool sgx_process_add_page_req(struct sgx_add_page_req *req)
up_read(&encl->mm->mmap_sem);
return true;
out:
+ sgx_epc_cgroup_uncharge(epc_cg, 1);
sgx_free_page(epc_page, encl);
mutex_unlock(&encl->lock);
up_read(&encl->mm->mmap_sem);
@@ -59,6 +59,8 @@
*/
#include "sgx.h"
+#include "sgx_epc_cgroup.h"
+
#include <linux/freezer.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
@@ -83,6 +85,15 @@ struct task_struct *ksgxswapd_tsk;
static DECLARE_WAIT_QUEUE_HEAD(ksgxswapd_waitq);
+static inline struct sgx_epc_lru *sgx_lru(struct sgx_epc_page *epc_page)
+{
+#ifdef CONFIG_CGROUP_SGX_EPC
+ return epc_page->epc_cg ? &epc_page->epc_cg->lru : &sgx_global_lru;
+#else
+ return &sgx_global_lru;
+#endif
+}
+
static int sgx_test_and_clear_young_cb(pte_t *ptep, pgtable_t token,
unsigned long addr, void *data)
{
@@ -122,7 +133,7 @@ void sgx_activate_page(struct sgx_epc_page *epc_page,
struct sgx_encl *encl,
struct sgx_encl_page *encl_page)
{
- struct sgx_epc_lru *lru = &sgx_global_lru;
+ struct sgx_epc_lru *lru = sgx_lru(epc_page);
epc_page->encl = encl;
epc_page->encl_page = encl_page;
@@ -136,19 +147,19 @@ void sgx_activate_page(struct sgx_epc_page *epc_page,
void sgx_deactivate_page(struct sgx_epc_page *epc_page)
{
- struct sgx_epc_lru *lru = &sgx_global_lru;
+ struct sgx_epc_lru *lru = sgx_lru(epc_page);
spin_lock(&lru->lock);
list_del_init(&epc_page->list);
spin_unlock(&lru->lock);
}
-static void sgx_isolate_pages(struct list_head *dst,
- unsigned long nr_to_scan)
+static unsigned long __sgx_isolate_pages(struct list_head *dst,
+ unsigned long nr_to_scan,
+ struct sgx_epc_lru *lru)
{
unsigned long i;
struct sgx_epc_page *entry;
- struct sgx_epc_lru *lru = &sgx_global_lru;
spin_lock(&lru->lock);
@@ -168,6 +179,51 @@ static void sgx_isolate_pages(struct list_head *dst,
}
spin_unlock(&lru->lock);
+
+ return i;
+}
+
+static void sgx_isolate_pages(struct list_head *dst,
+ unsigned long nr_to_scan,
+ struct sgx_epc_cgroup *root)
+{
+#ifdef CONFIG_CGROUP_SGX_EPC
+ struct sgx_epc_cgroup *epc_cg;
+ unsigned long nr_scanned = 0;
+ struct sgx_epc_reclaim reclaim;
+
+ /*
+ * If we're not targeting a specific cgroup, take from the global
+ * list first, even when cgroups are enabled. If we somehow have
+ * pages on the global LRU then they should get reclaimed asap.
+ */
+ if (!root) {
+ nr_scanned += __sgx_isolate_pages(dst, nr_to_scan, &sgx_global_lru);
+ if (nr_scanned >= nr_to_scan)
+ return;
+ }
+
+ for (epc_cg = sgx_epc_cgroup_iter(NULL, root, &reclaim);
+ epc_cg;
+ epc_cg = sgx_epc_cgroup_iter(epc_cg, root, &reclaim)) {
+ if (sgx_epc_cgroup_is_low(epc_cg, root)) {
+ /*
+ * Ignore low if all cgroups in the tree we're looking at
+ * are low, in which case low is effectively "normal".
+ */
+ if (!sgx_epc_cgroup_all_in_use_are_low(root))
+ continue;
+ }
+
+ nr_scanned += __sgx_isolate_pages(dst, nr_to_scan - nr_scanned, &epc_cg->lru);
+ if (nr_scanned >= nr_to_scan) {
+ sgx_epc_cgroup_iter_break(epc_cg, root);
+ break;
+ }
+ }
+#else
+ __sgx_isolate_pages(dst, nr_to_scan, &sgx_global_lru);
+#endif
}
static int __sgx_ewb(struct sgx_encl *encl,
@@ -333,10 +389,33 @@ static inline void sgx_del_if_dead(struct sgx_encl *encl,
static inline void sgx_lru_putback(struct list_head *src)
{
struct sgx_epc_lru *lru = &sgx_global_lru;
+#ifdef CONFIG_CGROUP_SGX_EPC
+ struct sgx_epc_page *entry, *tmp, *master = NULL;
+ LIST_HEAD(cur);
+#endif
if (list_empty(src))
return;
+#ifdef CONFIG_CGROUP_SGX_EPC
+ list_for_each_entry_safe(entry, tmp, src, list) {
+ if (!master) {
+ master = entry;
+ } else if (entry->epc_cg != master->epc_cg) {
+ lru = sgx_lru(master);
+
+ spin_lock(&lru->lock);
+ list_splice_tail_init(&cur, &lru->active_lru);
+ spin_unlock(&lru->lock);
+
+ master = entry;
+ }
+
+ list_move_tail(&entry->list, &cur);
+ }
+ lru = sgx_lru(master);
+ src = &cur;
+#endif
spin_lock(&lru->lock);
list_splice_tail_init(src, &lru->active_lru);
spin_unlock(&lru->lock);
@@ -354,7 +433,7 @@ unsigned long sgx_swap_pages(unsigned long nr_to_scan,
LIST_HEAD(swap);
LIST_HEAD(skip);
- sgx_isolate_pages(&iso, nr_to_scan);
+ sgx_isolate_pages(&iso, nr_to_scan, epc_cg);
while (!list_empty(&iso)) {
encl = list_first_entry(&iso, struct sgx_epc_page, list)->encl;
@@ -562,6 +641,11 @@ int sgx_free_page(struct sgx_epc_page *entry, struct sgx_encl *encl)
entry->encl = NULL;
entry->encl_page = NULL;
+#ifdef CONFIG_CGROUP_SGX_EPC
+ sgx_epc_cgroup_uncharge(entry->epc_cg, 1);
+ entry->epc_cg = NULL;
+#endif
+
spin_lock(&sgx_free_list_lock);
list_add(&entry->list, &sgx_free_list);
sgx_nr_free_pages++;
@@ -59,6 +59,8 @@
*/
#include "sgx.h"
+#include "sgx_epc_cgroup.h"
+
#include <linux/highmem.h>
#include <linux/shmem_fs.h>
#include <linux/sched/mm.h>
@@ -249,6 +251,7 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
struct sgx_encl_page *entry;
struct sgx_epc_page *epc_page = NULL;
struct sgx_epc_page *secs_epc_page = NULL;
+ struct sgx_epc_cgroup *epc_cg;
bool reserve = (flags & SGX_FAULT_RESERVE) != 0;
int rc = 0;
@@ -258,6 +261,10 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
if (!encl)
return ERR_PTR(-EFAULT);
+ rc = sgx_epc_cgroup_try_charge(encl->mm, SGX_ALLOC_ATOMIC, 1, &epc_cg);
+ if (rc)
+ return ERR_PTR(rc);
+
mutex_lock(&encl->lock);
entry = radix_tree_lookup(&encl->page_tree, addr >> PAGE_SHIFT);
@@ -330,6 +337,9 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
*/
encl->secs_child_cnt++;
+#ifdef CONFIG_CGROUP_SGX_EPC
+ epc_page->epc_cg = epc_cg;
+#endif
entry->epc_page = epc_page;
if (reserve)
@@ -337,6 +347,7 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
/* Do not free */
epc_page = NULL;
+ epc_cg = NULL;
rc = vm_insert_pfn(vma, entry->addr, PFN_DOWN(entry->epc_page->pa));
if (rc) {
@@ -351,6 +362,8 @@ static struct sgx_encl_page *sgx_do_fault(struct vm_area_struct *vma,
sgx_activate_page(entry->epc_page, encl, entry);
out:
mutex_unlock(&encl->lock);
+ if (epc_cg)
+ sgx_epc_cgroup_uncharge(epc_cg, 1);
if (epc_page)
sgx_free_page(epc_page, encl);
if (secs_epc_page)
Add the appropriate calls to (un)charge a cgroup during EPC page allocation and free. Rework sgx_isolate_pages to iterate over the cgroup hierarchy when grabbing EPC pages for reclaim. Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com> --- arch/x86/include/asm/sgx.h | 3 + drivers/platform/x86/intel_sgx/sgx_ioctl.c | 15 +++- drivers/platform/x86/intel_sgx/sgx_page_cache.c | 96 +++++++++++++++++++++++-- drivers/platform/x86/intel_sgx/sgx_util.c | 13 ++++ 4 files changed, 120 insertions(+), 7 deletions(-)