diff mbox series

[V2,03/15] x86/sgx: Add an attribute for the amount of SGX memory in a NUMA node

Message ID 6df03c54cc8b533de4389b663ec9e4803ced1beb.1635447301.git.reinette.chatre@intel.com (mailing list archive)
State New, archived
Headers show
Series selftests/sgx: Oversubscription, page permission, thread entry | expand

Commit Message

Reinette Chatre Oct. 28, 2021, 8:37 p.m. UTC
From: Jarkko Sakkinen <jarkko@kernel.org>

The amount of SGX memory on the system is determined by the BIOS and it
varies wildly between systems.  It can be from dozens of MB's on desktops
or VM's, up to many GB's on servers.  Just like for regular memory, it is
sometimes useful to know the amount of usable SGX memory in the system.

Add an attribute for the amount of SGX memory in bytes to each NUMA
node. The path is /sys/devices/system/node/node[0-9]*/sgx/size.
Calculate these values by summing up EPC section sizes for each node
during the driver initalization.

Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
[reinette: Placeholder patch - submitted separately upstream
https://lore.kernel.org/lkml/20211018135744.45527-2-jarkko@kernel.org/
]
---
 Documentation/ABI/stable/sysfs-devices-node |  7 ++
 arch/x86/kernel/cpu/sgx/main.c              | 85 +++++++++++++++++++++
 arch/x86/kernel/cpu/sgx/sgx.h               |  2 +
 3 files changed, 94 insertions(+)

Comments

Dave Hansen Oct. 29, 2021, 6:06 p.m. UTC | #1
On 10/28/21 1:37 PM, Reinette Chatre wrote:
> The amount of SGX memory on the system is determined by the BIOS and it
> varies wildly between systems.  It can be from dozens of MB's on desktops
> or VM's, up to many GB's on servers.  Just like for regular memory, it is
> sometimes useful to know the amount of usable SGX memory in the system.
> 
> Add an attribute for the amount of SGX memory in bytes to each NUMA
> node. The path is /sys/devices/system/node/node[0-9]*/sgx/size.
> Calculate these values by summing up EPC section sizes for each node
> during the driver initalization.

For now, can we just make the selftests read the SGX CPUID section
leaves?  It's not as precise as knowing how much the kernel actually
decided to use, but it's good enough for a selftest.  It also means we
can merge something without having to worry about long-term ABI.

This is also why I once suggested that we first make the selftests
depend on some debugfs file that would be short-lived.  But, if we use
CPUID, we don't even need to mess with debugfs.

You can even just steal the code from sgx_page_cache_init() to do it.

Would that work, or am I missing something?
Reinette Chatre Oct. 29, 2021, 7:03 p.m. UTC | #2
Hi Dave,

On 10/29/2021 11:06 AM, Dave Hansen wrote:
> On 10/28/21 1:37 PM, Reinette Chatre wrote:
>> The amount of SGX memory on the system is determined by the BIOS and it
>> varies wildly between systems.  It can be from dozens of MB's on desktops
>> or VM's, up to many GB's on servers.  Just like for regular memory, it is
>> sometimes useful to know the amount of usable SGX memory in the system.
>>
>> Add an attribute for the amount of SGX memory in bytes to each NUMA
>> node. The path is /sys/devices/system/node/node[0-9]*/sgx/size.
>> Calculate these values by summing up EPC section sizes for each node
>> during the driver initalization.
> 
> For now, can we just make the selftests read the SGX CPUID section
> leaves?  It's not as precise as knowing how much the kernel actually
> decided to use, but it's good enough for a selftest.  It also means we
> can merge something without having to worry about long-term ABI.

Yes, we can do that.

> 
> This is also why I once suggested that we first make the selftests
> depend on some debugfs file that would be short-lived.  But, if we use
> CPUID, we don't even need to mess with debugfs.

My apologies, this was not intended to avoid your suggestion. V1 did use 
the debugfs solution as you suggested as placeholder but after the 
debufs solution evolved the tests were adapted to follow those changes 
instead of sticking with the debugfs solution as proposed in
https://lore.kernel.org/lkml/6f3cc681e10877e639b882eaabf1a5e21bd2fc94.camel@kernel.org/

> You can even just steal the code from sgx_page_cache_init() to do it.
> 
> Would that work, or am I missing something?
> 

I do think that will work. The selftests are only interested in the 
total SGX memory (as opposed to memory per numa node as exposed with the 
current interface) and that can be obtained via CPUID. I will adapt the 
oversubscription test case to obtain its needed info via CPUID.

Thank you very much

Reinette
diff mbox series

Patch

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 484fc04bcc25..12dc2149e8e0 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -176,3 +176,10 @@  Contact:	Keith Busch <keith.busch@intel.com>
 Description:
 		The cache write policy: 0 for write-back, 1 for write-through,
 		other or unknown.
+
+What:		/sys/devices/system/node/nodeX/sgx/size
+Date:		October 2021
+Contact:	Jarkko Sakkinen <jarkko@kernel.org>
+Description:
+		Total available physical SGX memory, also known as Enclave Page
+		Cache (EPC), in bytes.
diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
index a6e313f1a82d..dc1d46c51323 100644
--- a/arch/x86/kernel/cpu/sgx/main.c
+++ b/arch/x86/kernel/cpu/sgx/main.c
@@ -714,9 +714,11 @@  static bool __init sgx_page_cache_init(void)
 			spin_lock_init(&sgx_numa_nodes[nid].lock);
 			INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list);
 			node_set(nid, sgx_numa_mask);
+			sgx_numa_nodes[nid].size = 0;
 		}
 
 		sgx_epc_sections[i].node =  &sgx_numa_nodes[nid];
+		sgx_numa_nodes[nid].size += size;
 
 		sgx_nr_epc_sections++;
 	}
@@ -790,6 +792,81 @@  int sgx_set_attribute(unsigned long *allowed_attributes,
 }
 EXPORT_SYMBOL_GPL(sgx_set_attribute);
 
+#ifdef CONFIG_NUMA
+static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	unsigned long size = 0;
+	int nid;
+
+	for (nid = 0; nid < num_possible_nodes(); nid++) {
+		if (dev == sgx_numa_nodes[nid].dev) {
+			size = sgx_numa_nodes[nid].size;
+			break;
+		}
+	}
+
+	return sysfs_emit(buf, "%lu\n", size);
+}
+DEVICE_ATTR_RO(size);
+
+static struct attribute *sgx_node_attrs[] = {
+	&dev_attr_size.attr,
+	NULL,
+};
+
+static const struct attribute_group sgx_node_attr_group = {
+	.name = "sgx",
+	.attrs = sgx_node_attrs,
+};
+
+static void sgx_numa_exit(void)
+{
+	struct device *dev;
+	int nid;
+
+	for (nid = 0; nid < num_possible_nodes(); nid++) {
+		dev = &node_devices[nid]->dev;
+		if (dev)
+			sysfs_remove_group(&dev->kobj, &sgx_node_attr_group);
+	}
+}
+
+static bool sgx_numa_init(void)
+{
+	struct sgx_numa_node *node;
+	struct device *dev;
+	int nid;
+	int ret;
+
+	for (nid = 0; nid < num_possible_nodes(); nid++) {
+		if (!sgx_numa_nodes[nid].size)
+			continue;
+
+		node = &sgx_numa_nodes[nid];
+		dev = &node_devices[nid]->dev;
+
+		ret = sysfs_create_group(&dev->kobj, &sgx_node_attr_group);
+		if (ret) {
+			sgx_numa_exit();
+			return false;
+		}
+
+		node->dev = dev;
+	}
+
+	return true;
+}
+#else
+static inline void sgx_numa_exit(void)
+{
+}
+
+static inline bool sgx_numa_init(void)
+{
+	return true;
+}
+#endif /* CONFIG_NUMA */
+
 static int __init sgx_init(void)
 {
 	int ret;
@@ -806,6 +883,11 @@  static int __init sgx_init(void)
 		goto err_reclaimer;
 	}
 
+	if (!sgx_numa_init()) {
+		ret = -ENOMEM;
+		goto err_numa_nodes;
+	}
+
 	ret = misc_register(&sgx_dev_provision);
 	if (ret)
 		goto err_provision;
@@ -829,6 +911,9 @@  static int __init sgx_init(void)
 	misc_deregister(&sgx_dev_provision);
 
 err_provision:
+	sgx_numa_exit();
+
+err_numa_nodes:
 	kthread_stop(ksgxd_tsk);
 
 err_reclaimer:
diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
index 4628acec0009..1de8c627a286 100644
--- a/arch/x86/kernel/cpu/sgx/sgx.h
+++ b/arch/x86/kernel/cpu/sgx/sgx.h
@@ -39,6 +39,8 @@  struct sgx_epc_page {
  */
 struct sgx_numa_node {
 	struct list_head free_page_list;
+	struct device *dev;
+	unsigned long size;
 	spinlock_t lock;
 };