@@ -8,16 +8,128 @@
static struct sgx_cgroup sgx_cg_root;
/**
- * sgx_cgroup_try_charge() - try to charge cgroup for a single EPC page
+ * sgx_cgroup_lru_empty() - check if a cgroup tree has no pages on its LRUs
+ * @root: Root of the tree to check
*
+ * Return: %true if all cgroups under the specified root have empty LRU lists.
+ */
+static bool sgx_cgroup_lru_empty(struct misc_cg *root)
+{
+ struct cgroup_subsys_state *css_root;
+ struct cgroup_subsys_state *pos;
+ struct sgx_cgroup *sgx_cg;
+ bool ret = true;
+
+ /*
+ * Caller must ensure css_root ref acquired
+ */
+ css_root = &root->css;
+
+ rcu_read_lock();
+ css_for_each_descendant_pre(pos, css_root) {
+ if (!css_tryget(pos))
+ break;
+
+ rcu_read_unlock();
+
+ sgx_cg = sgx_cgroup_from_misc_cg(css_misc(pos));
+
+ spin_lock(&sgx_cg->lru.lock);
+ ret = list_empty(&sgx_cg->lru.reclaimable);
+ spin_unlock(&sgx_cg->lru.lock);
+
+ rcu_read_lock();
+ css_put(pos);
+ if (!ret)
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/**
+ * sgx_cgroup_reclaim_pages() - reclaim EPC from a cgroup tree
+ * @root: The root of cgroup tree to reclaim from.
+ *
+ * This function performs a pre-order walk in the cgroup tree under the given
+ * root, attempting to reclaim pages at each node until a fixed number of pages
+ * (%SGX_NR_TO_SCAN) are attempted for reclamation. No guarantee of success on
+ * the actual reclamation process. In extreme cases, if all pages in front of
+ * the LRUs are recently accessed, i.e., considered "too young" to reclaim, no
+ * page will actually be reclaimed after walking the whole tree.
+ */
+static void sgx_cgroup_reclaim_pages(struct misc_cg *root)
+{
+ struct cgroup_subsys_state *css_root;
+ struct cgroup_subsys_state *pos;
+ struct sgx_cgroup *sgx_cg;
+ unsigned int cnt = 0;
+
+ /* Caller must ensure css_root ref acquired */
+ css_root = &root->css;
+
+ rcu_read_lock();
+ css_for_each_descendant_pre(pos, css_root) {
+ if (!css_tryget(pos))
+ break;
+ rcu_read_unlock();
+
+ sgx_cg = sgx_cgroup_from_misc_cg(css_misc(pos));
+ cnt += sgx_reclaim_pages(&sgx_cg->lru);
+
+ rcu_read_lock();
+ css_put(pos);
+
+ if (cnt >= SGX_NR_TO_SCAN)
+ break;
+ }
+
+ rcu_read_unlock();
+}
+
+static int __sgx_cgroup_try_charge(struct sgx_cgroup *epc_cg)
+{
+ if (!misc_cg_try_charge(MISC_CG_RES_SGX_EPC, epc_cg->cg, PAGE_SIZE))
+ return 0;
+
+ /* No reclaimable pages left in the cgroup */
+ if (sgx_cgroup_lru_empty(epc_cg->cg))
+ return -ENOMEM;
+
+ if (signal_pending(current))
+ return -ERESTARTSYS;
+
+ return -EBUSY;
+}
+
+/**
+ * sgx_cgroup_try_charge() - try to charge cgroup for a single EPC page
* @sgx_cg: The EPC cgroup to be charged for the page.
+ * @reclaim: Whether or not synchronous EPC reclaim is allowed.
* Return:
* * %0 - If successfully charged.
* * -errno - for failures.
*/
-int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg)
+int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim)
{
- return misc_cg_try_charge(MISC_CG_RES_SGX_EPC, sgx_cg->cg, PAGE_SIZE);
+ int ret;
+
+ for (;;) {
+ ret = __sgx_cgroup_try_charge(sgx_cg);
+
+ if (ret != -EBUSY)
+ return ret;
+
+ if (reclaim == SGX_NO_RECLAIM)
+ return -ENOMEM;
+
+ sgx_cgroup_reclaim_pages(sgx_cg->cg);
+ cond_resched();
+ }
+
+ return 0;
}
/**
@@ -42,6 +154,7 @@ static void sgx_cgroup_free(struct misc_cg *cg)
static void sgx_cgroup_misc_init(struct misc_cg *cg, struct sgx_cgroup *sgx_cg)
{
+ sgx_lru_init(&sgx_cg->lru);
cg->res[MISC_CG_RES_SGX_EPC].priv = sgx_cg;
sgx_cg->cg = cg;
}
@@ -20,7 +20,7 @@ static inline struct sgx_cgroup *sgx_get_current_cg(void)
static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg) { }
-static inline int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg)
+static inline int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim)
{
return 0;
}
@@ -33,6 +33,7 @@ static inline void sgx_cgroup_init(void) { }
struct sgx_cgroup {
struct misc_cg *cg;
+ struct sgx_epc_lru_list lru;
};
static inline struct sgx_cgroup *sgx_cgroup_from_misc_cg(struct misc_cg *cg)
@@ -63,7 +64,7 @@ static inline void sgx_put_cg(struct sgx_cgroup *sgx_cg)
put_misc_cg(sgx_cg->cg);
}
-int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg);
+int sgx_cgroup_try_charge(struct sgx_cgroup *sgx_cg, enum sgx_reclaim reclaim);
void sgx_cgroup_uncharge(struct sgx_cgroup *sgx_cg);
void sgx_cgroup_init(void);
@@ -285,11 +285,14 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
mutex_unlock(&encl->lock);
}
-/*
- * Take a fixed number of pages from the head of the active page pool and
- * reclaim them to the enclave's private shmem files. Skip the pages, which have
- * been accessed since the last scan. Move those pages to the tail of active
- * page pool so that the pages get scanned in LRU like fashion.
+/**
+ * sgx_reclaim_pages() - Attempt to reclaim a fixed number of pages from an LRU
+ * @lru: The LRU from which pages are reclaimed.
+ *
+ * Take a fixed number of pages from the head of a given LRU and reclaim them to
+ * the enclave's private shmem files. Skip the pages, which have been accessed
+ * since the last scan. Move those pages to the tail of the list so that the
+ * pages get scanned in LRU like fashion.
*
* Batch process a chunk of pages (at the moment 16) in order to degrade amount
* of IPI's and ETRACK's potentially required. sgx_encl_ewb() does degrade a bit
@@ -297,8 +300,10 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
* + EWB) but not sufficiently. Reclaiming one page at a time would also be
* problematic as it would increase the lock contention too much, which would
* halt forward progress.
+ *
+ * Return: Number of pages attempted for reclamation.
*/
-static void sgx_reclaim_pages(void)
+unsigned int sgx_reclaim_pages(struct sgx_epc_lru_list *lru)
{
struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
struct sgx_backing backing[SGX_NR_TO_SCAN];
@@ -309,10 +314,9 @@ static void sgx_reclaim_pages(void)
int ret;
int i;
- spin_lock(&sgx_global_lru.lock);
+ spin_lock(&lru->lock);
for (i = 0; i < SGX_NR_TO_SCAN; i++) {
- epc_page = list_first_entry_or_null(&sgx_global_lru.reclaimable,
- struct sgx_epc_page, list);
+ epc_page = list_first_entry_or_null(&lru->reclaimable, struct sgx_epc_page, list);
if (!epc_page)
break;
@@ -327,7 +331,7 @@ static void sgx_reclaim_pages(void)
*/
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
}
- spin_unlock(&sgx_global_lru.lock);
+ spin_unlock(&lru->lock);
for (i = 0; i < cnt; i++) {
epc_page = chunk[i];
@@ -350,9 +354,9 @@ static void sgx_reclaim_pages(void)
continue;
skip:
- spin_lock(&sgx_global_lru.lock);
- list_add_tail(&epc_page->list, &sgx_global_lru.reclaimable);
- spin_unlock(&sgx_global_lru.lock);
+ spin_lock(&lru->lock);
+ list_add_tail(&epc_page->list, &lru->reclaimable);
+ spin_unlock(&lru->lock);
kref_put(&encl_page->encl->refcount, sgx_encl_release);
@@ -378,6 +382,8 @@ static void sgx_reclaim_pages(void)
sgx_free_epc_page(epc_page);
}
+
+ return cnt;
}
static bool sgx_should_reclaim(unsigned long watermark)
@@ -386,6 +392,11 @@ static bool sgx_should_reclaim(unsigned long watermark)
!list_empty(&sgx_global_lru.reclaimable);
}
+static void sgx_reclaim_pages_global(void)
+{
+ sgx_reclaim_pages(&sgx_global_lru);
+}
+
/*
* sgx_reclaim_direct() should be called (without enclave's mutex held)
* in locations where SGX memory resources might be low and might be
@@ -394,7 +405,7 @@ static bool sgx_should_reclaim(unsigned long watermark)
void sgx_reclaim_direct(void)
{
if (sgx_should_reclaim(SGX_NR_LOW_PAGES))
- sgx_reclaim_pages();
+ sgx_reclaim_pages_global();
}
static int ksgxd(void *p)
@@ -417,7 +428,7 @@ static int ksgxd(void *p)
sgx_should_reclaim(SGX_NR_HIGH_PAGES));
if (sgx_should_reclaim(SGX_NR_HIGH_PAGES))
- sgx_reclaim_pages();
+ sgx_reclaim_pages_global();
cond_resched();
}
@@ -571,7 +582,7 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim)
int ret;
sgx_cg = sgx_get_current_cg();
- ret = sgx_cgroup_try_charge(sgx_cg);
+ ret = sgx_cgroup_try_charge(sgx_cg, reclaim);
if (ret) {
sgx_put_cg(sgx_cg);
return ERR_PTR(ret);
@@ -599,7 +610,7 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim)
break;
}
- sgx_reclaim_pages();
+ sgx_reclaim_pages_global();
cond_resched();
}
@@ -135,6 +135,7 @@ void sgx_reclaim_direct(void);
void sgx_mark_page_reclaimable(struct sgx_epc_page *page);
int sgx_unmark_page_reclaimable(struct sgx_epc_page *page);
struct sgx_epc_page *sgx_alloc_epc_page(void *owner, enum sgx_reclaim reclaim);
+unsigned int sgx_reclaim_pages(struct sgx_epc_lru_list *lru);
void sgx_ipi_cb(void *info);