@@ -918,10 +918,10 @@ static int hmat_callback(struct notifier_block *self,
unsigned long action, void *arg)
{
struct memory_target *target;
- struct memory_notify *mnb = arg;
+ struct node_notify *mnb = arg;
int pxm, nid = mnb->status_change_nid;
- if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
+ if (nid == NUMA_NO_NODE || action != NODE_BECAME_MEM_AWARE)
return NOTIFY_OK;
pxm = node_to_pxm(nid);
@@ -1074,7 +1074,7 @@ static __init int hmat_init(void)
hmat_register_targets();
/* Keep the table and structures if the notifier may use them */
- if (hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
+ if (hotplug_node_notifier(hmat_callback, HMAT_CALLBACK_PRI))
goto out_put;
if (!hmat_set_default_dram_perf())
@@ -110,6 +110,25 @@ static const struct attribute_group *node_access_node_groups[] = {
NULL,
};
+static BLOCKING_NOTIFIER_HEAD(node_chain);
+
+int register_node_notifier(struct notifier_block *nb)
+{
+ return blocking_notifier_chain_register(&node_chain, nb);
+}
+EXPORT_SYMBOL(register_node_notifier);
+
+void unregister_node_notifier(struct notifier_block *nb)
+{
+ blocking_notifier_chain_unregister(&node_chain, nb);
+}
+EXPORT_SYMBOL(unregister_node_notifier);
+
+int node_notify(unsigned long val, void *v)
+{
+ return blocking_notifier_call_chain(&node_chain, val, v);
+}
+
static void node_remove_accesses(struct node *node)
{
struct node_access_nodes *c, *cnext;
@@ -2409,12 +2409,12 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
unsigned long action, void *arg)
{
struct cxl_region *cxlr = container_of(nb, struct cxl_region,
- memory_notifier);
- struct memory_notify *mnb = arg;
+ node_notifier);
+ struct node_notify *mnb = arg;
int nid = mnb->status_change_nid;
int region_nid;
- if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
+ if (nid == NUMA_NO_NODE || action != NODE_BECAME_MEM_AWARE)
return NOTIFY_DONE;
/*
@@ -3388,7 +3388,7 @@ static void shutdown_notifiers(void *_cxlr)
{
struct cxl_region *cxlr = _cxlr;
- unregister_memory_notifier(&cxlr->memory_notifier);
+ unregister_node_notifier(&cxlr->node_notifier);
unregister_mt_adistance_algorithm(&cxlr->adist_notifier);
}
@@ -3427,9 +3427,9 @@ static int cxl_region_probe(struct device *dev)
if (rc)
return rc;
- cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback;
- cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
- register_memory_notifier(&cxlr->memory_notifier);
+ cxlr->node_notifier.notifier_call = cxl_region_perf_attrs_callback;
+ cxlr->node_notifier.priority = CXL_CALLBACK_PRI;
+ register_node_notifier(&cxlr->node_notifier);
cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
cxlr->adist_notifier.priority = 100;
@@ -532,7 +532,7 @@ struct cxl_region_params {
* @flags: Region state flags
* @params: active + config params for the region
* @coord: QoS access coordinates for the region
- * @memory_notifier: notifier for setting the access coordinates to node
+ * @node_notifier: notifier for setting the access coordinates to node
* @adist_notifier: notifier for calculating the abstract distance of node
*/
struct cxl_region {
@@ -545,7 +545,7 @@ struct cxl_region {
unsigned long flags;
struct cxl_region_params params;
struct access_coordinate coord[ACCESS_COORDINATE_MAX];
- struct notifier_block memory_notifier;
+ struct notifier_block node_notifier;
struct notifier_block adist_notifier;
};
@@ -99,6 +99,14 @@ int set_memory_block_size_order(unsigned int order);
#define MEM_PREPARE_ONLINE (1<<6)
#define MEM_FINISH_OFFLINE (1<<7)
+/* These states are used for numa node notifiers */
+#define NODE_BECOMING_MEM_AWARE (1<<0)
+#define NODE_BECAME_MEM_AWARE (1<<1)
+#define NODE_BECOMING_MEMORYLESS (1<<2)
+#define NODE_BECAME_MEMORYLESS (1<<3)
+#define NODE_CANCEL_MEM_AWARE (1<<4)
+#define NODE_CANCEL_MEMORYLESS (1<<5)
+
struct memory_notify {
/*
* The altmap_start_pfn and altmap_nr_pages fields are designated for
@@ -113,6 +121,11 @@ struct memory_notify {
int status_change_nid;
};
+struct node_notify {
+ int status_change_nid_normal;
+ int status_change_nid;
+};
+
struct notifier_block;
struct mem_section;
@@ -149,15 +162,34 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
{
return 0;
}
+
+static inline int register_node_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline void unregister_node_notifier(struct notifier_block *nb)
+{
+}
+static inline int node_notify(unsigned long val, void *v)
+{
+ return 0;
+}
+static inline int hotplug_node_notifier(notifier_fn_t fn, int pri)
+{
+ return 0;
+}
#else /* CONFIG_MEMORY_HOTPLUG */
extern int register_memory_notifier(struct notifier_block *nb);
+extern int register_node_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
+extern void unregister_node_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
struct vmem_altmap *altmap,
struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
+extern int node_notify(unsigned long val, void *v);
extern struct memory_block *find_memory_block(unsigned long section_nr);
typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
extern int walk_memory_blocks(unsigned long start, unsigned long size,
@@ -177,6 +209,12 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
register_memory_notifier(&fn##_mem_nb); \
})
+#define hotplug_node_notifier(fn, pri) ({ \
+ static __meminitdata struct notifier_block fn##_node_nb =\
+ { .notifier_call = fn, .priority = pri };\
+ register_node_notifier(&fn##_node_nb); \
+})
+
#ifdef CONFIG_NUMA
void memory_block_add_nid(struct memory_block *mem, int nid,
enum meminit_context context);
@@ -3939,7 +3939,7 @@ void __init cpuset_init_smp(void)
cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
top_cpuset.effective_mems = node_states[N_MEMORY];
- hotplug_memory_notifier(cpuset_track_online_nodes, CPUSET_CALLBACK_PRI);
+ hotplug_node_notifier(cpuset_track_online_nodes, CPUSET_CALLBACK_PRI);
cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
BUG_ON(!cpuset_migrate_mm_wq);
@@ -872,7 +872,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
unsigned long action, void *_arg)
{
struct memory_tier *memtier;
- struct memory_notify *arg = _arg;
+ struct node_notify *arg = _arg;
/*
* Only update the node migration order when a node is
@@ -882,13 +882,13 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
return notifier_from_errno(0);
switch (action) {
- case MEM_OFFLINE:
+ case NODE_BECAME_MEMORYLESS:
mutex_lock(&memory_tier_lock);
if (clear_node_memory_tier(arg->status_change_nid))
establish_demotion_targets();
mutex_unlock(&memory_tier_lock);
break;
- case MEM_ONLINE:
+ case NODE_BECAME_MEM_AWARE:
mutex_lock(&memory_tier_lock);
memtier = set_node_memory_tier(arg->status_change_nid);
if (!IS_ERR(memtier))
@@ -929,7 +929,7 @@ static int __init memory_tier_init(void)
nodes_and(default_dram_nodes, node_states[N_MEMORY],
node_states[N_CPU]);
- hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
+ hotplug_node_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
return 0;
}
subsys_initcall(memory_tier_init);
@@ -701,7 +701,7 @@ static void online_pages_range(unsigned long start_pfn, unsigned long nr_pages)
/* check which state of node_states will be changed when online memory */
static void node_states_check_changes_online(unsigned long nr_pages,
- struct zone *zone, struct memory_notify *arg)
+ struct zone *zone, struct node_notify *arg)
{
int nid = zone_to_nid(zone);
@@ -714,7 +714,7 @@ static void node_states_check_changes_online(unsigned long nr_pages,
arg->status_change_nid_normal = nid;
}
-static void node_states_set_node(int node, struct memory_notify *arg)
+static void node_states_set_node(int node, struct node_notify *arg)
{
if (arg->status_change_nid_normal >= 0)
node_set_state(node, N_NORMAL_MEMORY);
@@ -1177,7 +1177,9 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
int need_zonelists_rebuild = 0;
const int nid = zone_to_nid(zone);
int ret;
- struct memory_notify arg;
+ struct memory_notify mem_arg;
+ struct node_notify node_arg;
+ bool cancel_mem_notifier_on_err = false, cancel_node_notifier_on_err = false;
/*
* {on,off}lining is constrained to full memory sections (or more
@@ -1194,11 +1196,23 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
/* associate pfn range with the zone */
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
- arg.start_pfn = pfn;
- arg.nr_pages = nr_pages;
- node_states_check_changes_online(nr_pages, zone, &arg);
+ mem_arg.start_pfn = pfn;
+ mem_arg.nr_pages = nr_pages;
+ node_states_check_changes_online(nr_pages, zone, &node_arg);
- ret = memory_notify(MEM_GOING_ONLINE, &arg);
+ if (node_arg.status_change_nid >= 0) {
+ /* Node is becoming memory aware. Notify consumers */
+ cancel_node_notifier_on_err = true;
+ ret = node_notify(NODE_BECOMING_MEM_AWARE, &node_arg);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto failed_addition;
+ }
+
+ cancel_mem_notifier_on_err = true;
+ mem_arg.status_change_nid = node_arg.status_change_nid;
+ mem_arg.status_change_nid_normal = node_arg.status_change_nid_normal;
+ ret = memory_notify(MEM_GOING_ONLINE, &mem_arg);
ret = notifier_to_errno(ret);
if (ret)
goto failed_addition;
@@ -1224,7 +1238,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
online_pages_range(pfn, nr_pages);
adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
- node_states_set_node(nid, &arg);
+ node_states_set_node(nid, &node_arg);
if (need_zonelists_rebuild)
build_all_zonelists(NULL);
@@ -1245,16 +1259,26 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
kswapd_run(nid);
kcompactd_run(nid);
+ if (node_arg.status_change_nid >= 0)
+ /*
+ * Node went from memoryless to have memory. Notifiy interested
+ * consumers
+ */
+ node_notify(NODE_BECAME_MEM_AWARE, &node_arg);
+
writeback_set_ratelimit();
- memory_notify(MEM_ONLINE, &arg);
+ memory_notify(MEM_ONLINE, &mem_arg);
return 0;
failed_addition:
pr_debug("online_pages [mem %#010llx-%#010llx] failed\n",
(unsigned long long) pfn << PAGE_SHIFT,
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
- memory_notify(MEM_CANCEL_ONLINE, &arg);
+ if (cancel_node_notifier_on_err)
+ node_notify(NODE_CANCEL_MEM_AWARE, &node_arg);
+ if (cancel_mem_notifier_on_err)
+ memory_notify(MEM_CANCEL_ONLINE, &mem_arg);
remove_pfn_range_from_zone(zone, pfn, nr_pages);
return ret;
}
@@ -1898,7 +1922,7 @@ early_param("movable_node", cmdline_parse_movable_node);
/* check which state of node_states will be changed when offline memory */
static void node_states_check_changes_offline(unsigned long nr_pages,
- struct zone *zone, struct memory_notify *arg)
+ struct zone *zone, struct node_notify *arg)
{
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long present_pages = 0;
@@ -1935,7 +1959,7 @@ static void node_states_check_changes_offline(unsigned long nr_pages,
arg->status_change_nid = zone_to_nid(zone);
}
-static void node_states_clear_node(int node, struct memory_notify *arg)
+static void node_states_clear_node(int node, struct node_notify *arg)
{
if (arg->status_change_nid_normal >= 0)
node_clear_state(node, N_NORMAL_MEMORY);
@@ -1963,7 +1987,9 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
unsigned long pfn, managed_pages, system_ram_pages = 0;
const int node = zone_to_nid(zone);
unsigned long flags;
- struct memory_notify arg;
+ struct memory_notify mem_arg;
+ struct node_notify node_arg;
+ bool cancel_mem_notifier_on_err = false, cancel_node_notifier_on_err = false;
char *reason;
int ret;
@@ -2022,11 +2048,22 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
goto failed_removal_pcplists_disabled;
}
- arg.start_pfn = start_pfn;
- arg.nr_pages = nr_pages;
- node_states_check_changes_offline(nr_pages, zone, &arg);
+ mem_arg.start_pfn = start_pfn;
+ mem_arg.nr_pages = nr_pages;
+ node_states_check_changes_offline(nr_pages, zone, &node_arg);
+
+ if (node_arg.status_change_nid >= 0) {
+ cancel_node_notifier_on_err = true;
+ ret = node_notify(NODE_BECOMING_MEMORYLESS, &node_arg);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto failed_removal_isolated;
+ }
- ret = memory_notify(MEM_GOING_OFFLINE, &arg);
+ cancel_mem_notifier_on_err = true;
+ mem_arg.status_change_nid = node_arg.status_change_nid;
+ mem_arg.status_change_nid_normal = node_arg.status_change_nid_normal;
+ ret = memory_notify(MEM_GOING_OFFLINE, &mem_arg);
ret = notifier_to_errno(ret);
if (ret) {
reason = "notifier failure";
@@ -2106,27 +2143,32 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
* Make sure to mark the node as memory-less before rebuilding the zone
* list. Otherwise this node would still appear in the fallback lists.
*/
- node_states_clear_node(node, &arg);
+ node_states_clear_node(node, &node_arg);
if (!populated_zone(zone)) {
zone_pcp_reset(zone);
build_all_zonelists(NULL);
}
- if (arg.status_change_nid >= 0) {
+ if (node_arg.status_change_nid >= 0) {
kcompactd_stop(node);
kswapd_stop(node);
+ /*Node went memoryless. Notifiy interested consumers */
+ node_notify(NODE_BECAME_MEMORYLESS, &node_arg);
}
writeback_set_ratelimit();
- memory_notify(MEM_OFFLINE, &arg);
+ memory_notify(MEM_OFFLINE, &mem_arg);
remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
return 0;
failed_removal_isolated:
/* pushback to free area */
undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
- memory_notify(MEM_CANCEL_OFFLINE, &arg);
+ if (cancel_node_notifier_on_err)
+ node_notify(NODE_CANCEL_MEMORYLESS, &node_arg);
+ if (cancel_mem_notifier_on_err)
+ memory_notify(MEM_CANCEL_OFFLINE, &mem_arg);
failed_removal_pcplists_disabled:
lru_cache_enable();
zone_pcp_enable(zone);
@@ -5928,10 +5928,10 @@ static int slab_mem_going_offline_callback(void *arg)
static void slab_mem_offline_callback(void *arg)
{
- struct memory_notify *marg = arg;
+ struct node_notify *narg = arg;
int offline_node;
- offline_node = marg->status_change_nid_normal;
+ offline_node = narg->status_change_nid_normal;
/*
* If the node still has available memory. we need kmem_cache_node
@@ -5954,8 +5954,8 @@ static int slab_mem_going_online_callback(void *arg)
{
struct kmem_cache_node *n;
struct kmem_cache *s;
- struct memory_notify *marg = arg;
- int nid = marg->status_change_nid_normal;
+ struct node_notify *narg = arg;
+ int nid = narg->status_change_nid_normal;
int ret = 0;
/*
@@ -6007,18 +6007,18 @@ static int slab_memory_callback(struct notifier_block *self,
int ret = 0;
switch (action) {
- case MEM_GOING_ONLINE:
+ case NODE_BECOMING_MEM_AWARE:
ret = slab_mem_going_online_callback(arg);
break;
- case MEM_GOING_OFFLINE:
+ case NODE_BECOMING_MEMORYLESS:
ret = slab_mem_going_offline_callback(arg);
break;
- case MEM_OFFLINE:
- case MEM_CANCEL_ONLINE:
+ case NODE_BECAME_MEMORYLESS:
+ case NODE_CANCEL_MEM_AWARE:
slab_mem_offline_callback(arg);
break;
- case MEM_ONLINE:
- case MEM_CANCEL_OFFLINE:
+ case NODE_BECAME_MEM_AWARE:
+ case NODE_CANCEL_MEMORYLESS:
break;
}
if (ret)
@@ -6094,7 +6094,7 @@ void __init kmem_cache_init(void)
sizeof(struct kmem_cache_node),
SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
- hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
+ hotplug_node_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
/* Able to allocate the per node structures */
slab_state = PARTIAL;
There are at least four consumers of hotplug_memory_notifier that what they really are interested in is whether any numa node changed its state, e.g: going from being memory aware to becoming memoryless. Implement a specific notifier for numa nodes when their state gets changed, and have those consumers that only care about numa node state changes use it. Signed-off-by: Oscar Salvador <osalvador@suse.de> --- drivers/acpi/numa/hmat.c | 6 +-- drivers/base/node.c | 19 +++++++++ drivers/cxl/core/region.c | 14 +++---- drivers/cxl/cxl.h | 4 +- include/linux/memory.h | 38 ++++++++++++++++++ kernel/cgroup/cpuset.c | 2 +- mm/memory-tiers.c | 8 ++-- mm/memory_hotplug.c | 84 +++++++++++++++++++++++++++++---------- mm/slub.c | 22 +++++----- 9 files changed, 148 insertions(+), 49 deletions(-)