diff mbox series

[v6,08/13] mm/demotion: Add support for memory tier creation from userspace

Message ID 20220610135229.182859-9-aneesh.kumar@linux.ibm.com (mailing list archive)
State New
Headers show
Series mm/demotion: Memory tiers and demotion | expand

Commit Message

Aneesh Kumar K.V June 10, 2022, 1:52 p.m. UTC
This patch adds support to create memory tiers with specific rank
value from userspace. To avoid race while creating memory tiers
/sys/devices/system/memtier/create_tier_from_rank file is provided.
Writing to this file with a specific rank value creates a new memory
tier with the specified rank value.

Memory tiers created from userspace gets destroyed when the memory
tier nodelist becomes empty.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
 include/linux/memory-tiers.h |  3 +-
 mm/memory-tiers.c            | 74 ++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)
diff mbox series

Patch

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index e70f0040d845..52896f5970b7 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -15,7 +15,8 @@ 
 #define MEMORY_RANK_PMEM	100
 
 #define DEFAULT_MEMORY_TIER	MEMORY_TIER_DRAM
-#define MAX_MEMORY_TIERS  3
+#define MAX_STATIC_MEMORY_TIERS  3
+#define MAX_MEMORY_TIERS  (MAX_STATIC_MEMORY_TIERS + 2)
 
 extern bool numa_demotion_enabled;
 int node_create_and_set_memory_tier(int node, int tier);
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 7bfdfac4d43e..de3b7403ae6f 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -6,6 +6,7 @@ 
 #include <linux/memory-tiers.h>
 #include <linux/random.h>
 #include <linux/memory.h>
+#include <linux/idr.h>
 
 #include "internal.h"
 
@@ -126,9 +127,12 @@  static const struct attribute_group *memory_tier_dev_groups[] = {
 	NULL
 };
 
+static DEFINE_IDA(memtier_dev_id);
 static void memory_tier_device_release(struct device *dev)
 {
 	struct memory_tier *tier = to_memory_tier(dev);
+	if (tier->dev.id >= MAX_STATIC_MEMORY_TIERS)
+		ida_free(&memtier_dev_id, tier->dev.id);
 
 	kfree(tier);
 }
@@ -195,6 +199,17 @@  static struct memory_tier *register_memory_tier(unsigned int tier,
 	return memtier;
 }
 
+static void unregister_memory_tier(struct memory_tier *memtier)
+{
+	/*
+	 * Don't destroy static memory tiers.
+	 */
+	if (memtier->dev.id < MAX_STATIC_MEMORY_TIERS)
+		return;
+	list_del(&memtier->list);
+	device_unregister(&memtier->dev);
+}
+
 static struct memory_tier *__node_get_memory_tier(int node)
 {
 	struct memory_tier *memtier;
@@ -267,6 +282,10 @@  int node_create_and_set_memory_tier(int node, int tier)
 		node_set(node, current_tier->nodelist);
 		goto out;
 	}
+
+	if (nodes_empty(current_tier->nodelist))
+		unregister_memory_tier(current_tier);
+
 	establish_migration_targets();
 out:
 	mutex_unlock(&memory_tier_lock);
@@ -350,6 +369,9 @@  int node_reset_memory_tier(int node, int tier)
 		goto out;
 	}
 
+	if (nodes_empty(current_tier->nodelist))
+		unregister_memory_tier(current_tier);
+
 	establish_migration_targets();
 out:
 	mutex_unlock(&memory_tier_lock);
@@ -550,9 +572,61 @@  default_tier_show(struct device *dev, struct device_attribute *attr, char *buf)
 }
 static DEVICE_ATTR_RO(default_tier);
 
+static inline int memtier_alloc_id(void)
+{
+	return ida_alloc_range(&memtier_dev_id,
+			       MAX_STATIC_MEMORY_TIERS,
+			       MAX_MEMORY_TIERS - 1, GFP_KERNEL);
+}
+
+static ssize_t create_tier_from_rank_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	int ret, tier, rank;
+	struct memory_tier *memtier;
+
+	ret = kstrtouint(buf, 10, &rank);
+	if (ret)
+		return ret;
+
+	if (rank == MEMORY_RANK_HBM_GPU ||
+	    rank == MEMORY_RANK_DRAM ||
+	    rank == MEMORY_RANK_PMEM)
+		return -EINVAL;
+
+	mutex_lock(&memory_tier_lock);
+	/*
+	 * We don't support multiple tiers with same rank value
+	 */
+	list_for_each_entry(memtier, &memory_tiers, list) {
+		if (memtier->rank == rank) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+	tier = memtier_alloc_id();
+	if (tier < 0) {
+		ret = -ENOSPC;
+		goto out;
+	}
+	memtier = register_memory_tier(tier, rank);
+	if (IS_ERR(memtier)) {
+		ret = PTR_ERR(memtier);
+		goto out;
+	}
+
+	ret = count;
+out:
+	mutex_unlock(&memory_tier_lock);
+	return ret;
+}
+static DEVICE_ATTR_WO(create_tier_from_rank);
+
 static struct attribute *memory_tier_attrs[] = {
 	&dev_attr_max_tier.attr,
 	&dev_attr_default_tier.attr,
+	&dev_attr_create_tier_from_rank.attr,
 	NULL
 };