@@ -1930,6 +1930,7 @@ config X86_SGX
depends on CRYPTO_SHA256=y
select SRCU
select MMU_NOTIFIER
+ select NUMA_KEEP_MEMINFO if NUMA
help
Intel(R) Software Guard eXtensions (SGX) is a set of CPU instructions
that can be used by applications to set aside private regions of code
@@ -23,9 +23,21 @@ static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq);
* with sgx_reclaimer_lock acquired.
*/
static LIST_HEAD(sgx_active_page_list);
-
static DEFINE_SPINLOCK(sgx_reclaimer_lock);
+/* The free page list lock protected variables prepend the lock. */
+static unsigned long sgx_nr_free_pages;
+
+/* Nodes with one or more EPC sections. */
+static nodemask_t sgx_numa_mask;
+
+/*
+ * Array with one list_head for each possible NUMA node. Each
+ * list contains all the sgx_epc_section's which are on that
+ * node.
+ */
+static struct sgx_numa_node *sgx_numa_nodes;
+
static LIST_HEAD(sgx_dirty_page_list);
/*
@@ -312,6 +324,7 @@ static void sgx_reclaim_pages(void)
struct sgx_epc_section *section;
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
+ struct sgx_numa_node *node;
pgoff_t page_index;
int cnt = 0;
int ret;
@@ -383,28 +396,18 @@ static void sgx_reclaim_pages(void)
epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
section = &sgx_epc_sections[epc_page->section];
- spin_lock(§ion->lock);
- list_add_tail(&epc_page->list, §ion->page_list);
- section->free_cnt++;
- spin_unlock(§ion->lock);
- }
-}
+ node = section->node;
-static unsigned long sgx_nr_free_pages(void)
-{
- unsigned long cnt = 0;
- int i;
-
- for (i = 0; i < sgx_nr_epc_sections; i++)
- cnt += sgx_epc_sections[i].free_cnt;
-
- return cnt;
+ spin_lock(&node->lock);
+ list_add_tail(&epc_page->list, &node->free_page_list);
+ sgx_nr_free_pages++;
+ spin_unlock(&node->lock);
+ }
}
static bool sgx_should_reclaim(unsigned long watermark)
{
- return sgx_nr_free_pages() < watermark &&
- !list_empty(&sgx_active_page_list);
+ return sgx_nr_free_pages < watermark && !list_empty(&sgx_active_page_list);
}
static int ksgxd(void *p)
@@ -451,50 +454,61 @@ static bool __init sgx_page_reclaimer_init(void)
return true;
}
-static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_section *section)
+static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid)
{
- struct sgx_epc_page *page;
+ struct sgx_numa_node *node = &sgx_numa_nodes[nid];
+ struct sgx_epc_page *page = NULL;
- spin_lock(§ion->lock);
+ spin_lock(&node->lock);
- if (list_empty(§ion->page_list)) {
- spin_unlock(§ion->lock);
+ if (list_empty(&node->free_page_list)) {
+ spin_unlock(&node->lock);
return NULL;
}
- page = list_first_entry(§ion->page_list, struct sgx_epc_page, list);
+ page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list);
list_del_init(&page->list);
- section->free_cnt--;
+ sgx_nr_free_pages--;
+
+ spin_unlock(&node->lock);
- spin_unlock(§ion->lock);
return page;
}
/**
* __sgx_alloc_epc_page() - Allocate an EPC page
*
- * Iterate through EPC sections and borrow a free EPC page to the caller. When a
- * page is no longer needed it must be released with sgx_free_epc_page().
+ * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start
+ * from the NUMA node, where the caller is executing.
*
* Return:
- * an EPC page,
- * -errno on error
+ * - an EPC page: A borrowed EPC pages were available.
+ * - NULL: Out of EPC pages.
*/
struct sgx_epc_page *__sgx_alloc_epc_page(void)
{
- struct sgx_epc_section *section;
struct sgx_epc_page *page;
- int i;
-
- for (i = 0; i < sgx_nr_epc_sections; i++) {
- section = &sgx_epc_sections[i];
+ int nid_of_current = numa_node_id();
+ int nid;
- page = __sgx_alloc_epc_page_from_section(section);
+ if (node_isset(nid_of_current, sgx_numa_mask)) {
+ page = __sgx_alloc_epc_page_from_node(nid_of_current);
if (page)
return page;
}
- return ERR_PTR(-ENOMEM);
+ /* Fall back to the non-local NUMA nodes: */
+ while (true) {
+ nid = next_node_in(nid, sgx_numa_mask);
+ if (nid == nid_of_current)
+ break;
+
+ page = __sgx_alloc_epc_page_from_node(nid);
+ if (page)
+ break;
+ }
+
+ return page;
}
/**
@@ -600,6 +614,7 @@ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim)
void sgx_free_epc_page(struct sgx_epc_page *page)
{
struct sgx_epc_section *section = &sgx_epc_sections[page->section];
+ struct sgx_numa_node *node = section->node;
int ret;
WARN_ON_ONCE(page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED);
@@ -608,10 +623,12 @@ void sgx_free_epc_page(struct sgx_epc_page *page)
if (WARN_ONCE(ret, "EREMOVE returned %d (0x%x)", ret, ret))
return;
- spin_lock(§ion->lock);
- list_add_tail(&page->list, §ion->page_list);
- section->free_cnt++;
- spin_unlock(§ion->lock);
+ spin_lock(&node->lock);
+
+ list_add_tail(&page->list, &node->free_page_list);
+ sgx_nr_free_pages++;
+
+ spin_unlock(&node->lock);
}
static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
@@ -632,8 +649,6 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
}
section->phys_addr = phys_addr;
- spin_lock_init(§ion->lock);
- INIT_LIST_HEAD(§ion->page_list);
for (i = 0; i < nr_pages; i++) {
section->pages[i].section = index;
@@ -642,7 +657,7 @@ static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size,
list_add_tail(§ion->pages[i].list, &sgx_dirty_page_list);
}
- section->free_cnt = nr_pages;
+ sgx_nr_free_pages += nr_pages;
return true;
}
@@ -661,8 +676,13 @@ static bool __init sgx_page_cache_init(void)
{
u32 eax, ebx, ecx, edx, type;
u64 pa, size;
+ int nid;
int i;
+ sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL);
+ if (!sgx_numa_nodes)
+ return false;
+
for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) {
cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx);
@@ -685,6 +705,21 @@ static bool __init sgx_page_cache_init(void)
break;
}
+ nid = numa_map_to_online_node(phys_to_target_node(pa));
+ if (nid == NUMA_NO_NODE) {
+ /* The physical address is already printed above. */
+ pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n");
+ nid = 0;
+ }
+
+ if (!node_isset(nid, sgx_numa_mask)) {
+ spin_lock_init(&sgx_numa_nodes[nid].lock);
+ INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
+ node_set(nid, sgx_numa_mask);
+ }
+
+ sgx_epc_sections[i].node = &sgx_numa_nodes[nid];
+
sgx_nr_epc_sections++;
}
@@ -29,22 +29,26 @@ struct sgx_epc_page {
struct list_head list;
};
+/*
+ * Contains the tracking data for NUMA nodes having EPC pages. Most importantly,
+ * the free page list local to the node is stored here.
+ */
+struct sgx_numa_node {
+ struct list_head free_page_list;
+ spinlock_t lock;
+};
+
/*
* The firmware can define multiple chunks of EPC to the different areas of the
* physical memory e.g. for memory areas of the each node. This structure is
* used to store EPC pages for one EPC section and virtual memory area where
* the pages have been mapped.
- *
- * 'lock' must be held before accessing 'page_list' or 'free_cnt'.
*/
struct sgx_epc_section {
unsigned long phys_addr;
void *virt_addr;
struct sgx_epc_page *pages;
-
- spinlock_t lock;
- struct list_head page_list;
- unsigned long free_cnt;
+ struct sgx_numa_node *node;
};
extern struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS];