Message ID | 20220720025920.1373558-4-aneesh.kumar@linux.ibm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/demotion: Memory tiers and demotion | expand |
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > If the new NUMA node onlined doesn't have a performance level assigned, > the kernel adds the NUMA node to default memory tier. > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > --- > include/linux/memory-tiers.h | 1 + > mm/memory-tiers.c | 75 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 76 insertions(+) > > diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h > index ef380a39db3a..3d5f14d57ae6 100644 > --- a/include/linux/memory-tiers.h > +++ b/include/linux/memory-tiers.h > @@ -14,6 +14,7 @@ > #define MEMTIER_PERF_LEVEL_DRAM (1 << (MEMTIER_CHUNK_BITS + 2)) > /* leave one tier below this slow pmem */ > #define MEMTIER_PERF_LEVEL_PMEM (1 << MEMTIER_CHUNK_BITS) > +#define MEMTIER_HOTPLUG_PRIO 100 > > extern bool numa_demotion_enabled; > > diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c > index 41a21cc5ae55..cc3a47ec18e4 100644 > --- a/mm/memory-tiers.c > +++ b/mm/memory-tiers.c > @@ -5,6 +5,7 @@ > #include <linux/lockdep.h> > #include <linux/moduleparam.h> > #include <linux/node.h> > +#include <linux/memory.h> > #include <linux/memory-tiers.h> > > struct memory_tier { > @@ -64,6 +65,78 @@ static struct memory_tier *find_create_memory_tier(unsigned int perf_level) > return new_memtier; > } > > +static struct memory_tier *__node_get_memory_tier(int node) > +{ > + struct memory_tier *memtier; > + > + list_for_each_entry(memtier, &memory_tiers, list) { > + if (node_isset(node, memtier->nodelist)) > + return memtier; > + } > + return NULL; > +} > + > +static void init_node_memory_tier(int node) set_node_memory_tier()? > +{ > + int perf_level; > + struct memory_tier *memtier; > + > + mutex_lock(&memory_tier_lock); > + > + memtier = __node_get_memory_tier(node); > + if (!memtier) { > + perf_level = node_devices[node]->perf_level; > + memtier = find_create_memory_tier(perf_level); > + node_set(node, memtier->nodelist); > + } > + mutex_unlock(&memory_tier_lock); > +} > + > +static void clear_node_memory_tier(int node) > +{ > + struct memory_tier *memtier; > + > + mutex_lock(&memory_tier_lock); > + memtier = __node_get_memory_tier(node); > + if (memtier) > + node_clear(node, memtier->nodelist); When memtier->nodelist becomes empty, we need to free memtier? > + mutex_unlock(&memory_tier_lock); > +} > + > +/* > + * This runs whether reclaim-based migration is enabled or not, > + * which ensures that the user can turn reclaim-based migration > + * at any time without needing to recalculate migration targets. > + */ The comments doesn't apply here. > +static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, > + unsigned long action, void *_arg) Now we are building memory tiers instead of working on demotion. So I think we should rename the function to memtier_hotplug_callback(). > +{ > + struct memory_notify *arg = _arg; > + > + /* > + * Only update the node migration order when a node is > + * changing status, like online->offline. > + */ > + if (arg->status_change_nid < 0) > + return notifier_from_errno(0); > + > + switch (action) { > + case MEM_OFFLINE: > + clear_node_memory_tier(arg->status_change_nid); > + break; > + case MEM_ONLINE: > + init_node_memory_tier(arg->status_change_nid); > + break; > + } > + > + return notifier_from_errno(0); > +} > + > +static void __init migrate_on_reclaim_init(void) > +{ > + hotplug_memory_notifier(migrate_on_reclaim_callback, MEMTIER_HOTPLUG_PRIO); > +} I suggest to call hotplug_memory_notifier() in memory_tier_init() directly. We are not working on demotion here. > + > static int __init memory_tier_init(void) > { > int node; > @@ -96,6 +169,8 @@ static int __init memory_tier_init(void) > node_property->perf_level = default_memtier_perf_level; > } > mutex_unlock(&memory_tier_lock); > + > + migrate_on_reclaim_init(); > return 0; > } > subsys_initcall(memory_tier_init); Best Regards, Huang, Ying
On 7/26/22 9:33 AM, Huang, Ying wrote: > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > >> If the new NUMA node onlined doesn't have a performance level assigned, >> the kernel adds the NUMA node to default memory tier. >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >> --- >> include/linux/memory-tiers.h | 1 + >> mm/memory-tiers.c | 75 ++++++++++++++++++++++++++++++++++++ >> 2 files changed, 76 insertions(+) >> >> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h >> index ef380a39db3a..3d5f14d57ae6 100644 >> --- a/include/linux/memory-tiers.h >> +++ b/include/linux/memory-tiers.h >> @@ -14,6 +14,7 @@ >> #define MEMTIER_PERF_LEVEL_DRAM (1 << (MEMTIER_CHUNK_BITS + 2)) >> /* leave one tier below this slow pmem */ >> #define MEMTIER_PERF_LEVEL_PMEM (1 << MEMTIER_CHUNK_BITS) >> +#define MEMTIER_HOTPLUG_PRIO 100 >> >> extern bool numa_demotion_enabled; >> >> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >> index 41a21cc5ae55..cc3a47ec18e4 100644 >> --- a/mm/memory-tiers.c >> +++ b/mm/memory-tiers.c >> @@ -5,6 +5,7 @@ >> #include <linux/lockdep.h> >> #include <linux/moduleparam.h> >> #include <linux/node.h> >> +#include <linux/memory.h> >> #include <linux/memory-tiers.h> >> >> struct memory_tier { >> @@ -64,6 +65,78 @@ static struct memory_tier *find_create_memory_tier(unsigned int perf_level) >> return new_memtier; >> } >> >> +static struct memory_tier *__node_get_memory_tier(int node) >> +{ >> + struct memory_tier *memtier; >> + >> + list_for_each_entry(memtier, &memory_tiers, list) { >> + if (node_isset(node, memtier->nodelist)) >> + return memtier; >> + } >> + return NULL; >> +} >> + >> +static void init_node_memory_tier(int node) > > set_node_memory_tier()? That was done based on feedback from Alistair https://lore.kernel.org/linux-mm/87h73iapg1.fsf@nvdebian.thelocal > >> +{ >> + int perf_level; >> + struct memory_tier *memtier; >> + >> + mutex_lock(&memory_tier_lock); >> + >> + memtier = __node_get_memory_tier(node); >> + if (!memtier) { >> + perf_level = node_devices[node]->perf_level; >> + memtier = find_create_memory_tier(perf_level); >> + node_set(node, memtier->nodelist); >> + } >> + mutex_unlock(&memory_tier_lock); >> +} >> + >> +static void clear_node_memory_tier(int node) >> +{ >> + struct memory_tier *memtier; >> + >> + mutex_lock(&memory_tier_lock); >> + memtier = __node_get_memory_tier(node); >> + if (memtier) >> + node_clear(node, memtier->nodelist); > > When memtier->nodelist becomes empty, we need to free memtier? > >> + mutex_unlock(&memory_tier_lock); >> +} >> + >> +/* >> + * This runs whether reclaim-based migration is enabled or not, >> + * which ensures that the user can turn reclaim-based migration >> + * at any time without needing to recalculate migration targets. >> + */ > > The comments doesn't apply here. > >> +static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, >> + unsigned long action, void *_arg) > > Now we are building memory tiers instead of working on demotion. So I > think we should rename the function to memtier_hotplug_callback(). > >> +{ >> + struct memory_notify *arg = _arg; >> + >> + /* >> + * Only update the node migration order when a node is >> + * changing status, like online->offline. >> + */ >> + if (arg->status_change_nid < 0) >> + return notifier_from_errno(0); >> + >> + switch (action) { >> + case MEM_OFFLINE: >> + clear_node_memory_tier(arg->status_change_nid); >> + break; >> + case MEM_ONLINE: >> + init_node_memory_tier(arg->status_change_nid); >> + break; >> + } >> + >> + return notifier_from_errno(0); >> +} >> + >> +static void __init migrate_on_reclaim_init(void) >> +{ >> + hotplug_memory_notifier(migrate_on_reclaim_callback, MEMTIER_HOTPLUG_PRIO); >> +} > > I suggest to call hotplug_memory_notifier() in memory_tier_init() > directly. We are not working on demotion here. > >> + >> static int __init memory_tier_init(void) >> { >> int node; >> @@ -96,6 +169,8 @@ static int __init memory_tier_init(void) >> node_property->perf_level = default_memtier_perf_level; >> } >> mutex_unlock(&memory_tier_lock); >> + >> + migrate_on_reclaim_init(); >> return 0; >> } >> subsys_initcall(memory_tier_init); > > Best Regards, > Huang, Ying Will update the patch in next iteration to take care of other feedback. Thanks -aneesh
Aneesh Kumar K V <aneesh.kumar@linux.ibm.com> writes: > On 7/26/22 9:33 AM, Huang, Ying wrote: >> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: >> >>> If the new NUMA node onlined doesn't have a performance level assigned, >>> the kernel adds the NUMA node to default memory tier. >>> >>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>> --- >>> include/linux/memory-tiers.h | 1 + >>> mm/memory-tiers.c | 75 ++++++++++++++++++++++++++++++++++++ >>> 2 files changed, 76 insertions(+) >>> >>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h >>> index ef380a39db3a..3d5f14d57ae6 100644 >>> --- a/include/linux/memory-tiers.h >>> +++ b/include/linux/memory-tiers.h >>> @@ -14,6 +14,7 @@ >>> #define MEMTIER_PERF_LEVEL_DRAM (1 << (MEMTIER_CHUNK_BITS + 2)) >>> /* leave one tier below this slow pmem */ >>> #define MEMTIER_PERF_LEVEL_PMEM (1 << MEMTIER_CHUNK_BITS) >>> +#define MEMTIER_HOTPLUG_PRIO 100 >>> >>> extern bool numa_demotion_enabled; >>> >>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >>> index 41a21cc5ae55..cc3a47ec18e4 100644 >>> --- a/mm/memory-tiers.c >>> +++ b/mm/memory-tiers.c >>> @@ -5,6 +5,7 @@ >>> #include <linux/lockdep.h> >>> #include <linux/moduleparam.h> >>> #include <linux/node.h> >>> +#include <linux/memory.h> >>> #include <linux/memory-tiers.h> >>> >>> struct memory_tier { >>> @@ -64,6 +65,78 @@ static struct memory_tier *find_create_memory_tier(unsigned int perf_level) >>> return new_memtier; >>> } >>> >>> +static struct memory_tier *__node_get_memory_tier(int node) >>> +{ >>> + struct memory_tier *memtier; >>> + >>> + list_for_each_entry(memtier, &memory_tiers, list) { >>> + if (node_isset(node, memtier->nodelist)) >>> + return memtier; >>> + } >>> + return NULL; >>> +} >>> + >>> +static void init_node_memory_tier(int node) >> >> set_node_memory_tier()? > > That was done based on feedback from Alistair > > https://lore.kernel.org/linux-mm/87h73iapg1.fsf@nvdebian.thelocal > >> >>> +{ >>> + int perf_level; >>> + struct memory_tier *memtier; >>> + >>> + mutex_lock(&memory_tier_lock); >>> + >>> + memtier = __node_get_memory_tier(node); >>> + if (!memtier) { >>> + perf_level = node_devices[node]->perf_level; >>> + memtier = find_create_memory_tier(perf_level); >>> + node_set(node, memtier->nodelist); >>> + } It's related to Alistair's comments too. When will memtier != NULL here? We may need just VM_WARN_ON() here? >>> + mutex_unlock(&memory_tier_lock); >>> +} >>> + >>> +static void clear_node_memory_tier(int node) >>> +{ >>> + struct memory_tier *memtier; >>> + >>> + mutex_lock(&memory_tier_lock); >>> + memtier = __node_get_memory_tier(node); >>> + if (memtier) >>> + node_clear(node, memtier->nodelist); >> >> When memtier->nodelist becomes empty, we need to free memtier? >> >>> + mutex_unlock(&memory_tier_lock); >>> +} >>> + >>> +/* >>> + * This runs whether reclaim-based migration is enabled or not, >>> + * which ensures that the user can turn reclaim-based migration >>> + * at any time without needing to recalculate migration targets. >>> + */ >> >> The comments doesn't apply here. >> >>> +static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, >>> + unsigned long action, void *_arg) >> >> Now we are building memory tiers instead of working on demotion. So I >> think we should rename the function to memtier_hotplug_callback(). >> >>> +{ >>> + struct memory_notify *arg = _arg; >>> + >>> + /* >>> + * Only update the node migration order when a node is >>> + * changing status, like online->offline. >>> + */ >>> + if (arg->status_change_nid < 0) >>> + return notifier_from_errno(0); >>> + >>> + switch (action) { >>> + case MEM_OFFLINE: >>> + clear_node_memory_tier(arg->status_change_nid); >>> + break; >>> + case MEM_ONLINE: >>> + init_node_memory_tier(arg->status_change_nid); >>> + break; >>> + } >>> + >>> + return notifier_from_errno(0); >>> +} >>> + >>> +static void __init migrate_on_reclaim_init(void) >>> +{ >>> + hotplug_memory_notifier(migrate_on_reclaim_callback, MEMTIER_HOTPLUG_PRIO); >>> +} >> >> I suggest to call hotplug_memory_notifier() in memory_tier_init() >> directly. We are not working on demotion here. >> >>> + >>> static int __init memory_tier_init(void) >>> { >>> int node; >>> @@ -96,6 +169,8 @@ static int __init memory_tier_init(void) >>> node_property->perf_level = default_memtier_perf_level; >>> } >>> mutex_unlock(&memory_tier_lock); >>> + >>> + migrate_on_reclaim_init(); >>> return 0; >>> } >>> subsys_initcall(memory_tier_init); >> >> Best Regards, >> Huang, Ying > > > Will update the patch in next iteration to take care of other feedback. Thanks! Best Regards, Huang, Ying
"Huang, Ying" <ying.huang@intel.com> writes: > Aneesh Kumar K V <aneesh.kumar@linux.ibm.com> writes: > >> On 7/26/22 9:33 AM, Huang, Ying wrote: >>> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: >>> >>>> If the new NUMA node onlined doesn't have a performance level assigned, >>>> the kernel adds the NUMA node to default memory tier. >>>> >>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>>> --- >>>> include/linux/memory-tiers.h | 1 + >>>> mm/memory-tiers.c | 75 ++++++++++++++++++++++++++++++++++++ >>>> 2 files changed, 76 insertions(+) >>>> >>>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h >>>> index ef380a39db3a..3d5f14d57ae6 100644 >>>> --- a/include/linux/memory-tiers.h >>>> +++ b/include/linux/memory-tiers.h >>>> @@ -14,6 +14,7 @@ >>>> #define MEMTIER_PERF_LEVEL_DRAM (1 << (MEMTIER_CHUNK_BITS + 2)) >>>> /* leave one tier below this slow pmem */ >>>> #define MEMTIER_PERF_LEVEL_PMEM (1 << MEMTIER_CHUNK_BITS) >>>> +#define MEMTIER_HOTPLUG_PRIO 100 >>>> >>>> extern bool numa_demotion_enabled; >>>> >>>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >>>> index 41a21cc5ae55..cc3a47ec18e4 100644 >>>> --- a/mm/memory-tiers.c >>>> +++ b/mm/memory-tiers.c >>>> @@ -5,6 +5,7 @@ >>>> #include <linux/lockdep.h> >>>> #include <linux/moduleparam.h> >>>> #include <linux/node.h> >>>> +#include <linux/memory.h> >>>> #include <linux/memory-tiers.h> >>>> >>>> struct memory_tier { >>>> @@ -64,6 +65,78 @@ static struct memory_tier *find_create_memory_tier(unsigned int perf_level) >>>> return new_memtier; >>>> } >>>> >>>> +static struct memory_tier *__node_get_memory_tier(int node) >>>> +{ >>>> + struct memory_tier *memtier; >>>> + >>>> + list_for_each_entry(memtier, &memory_tiers, list) { >>>> + if (node_isset(node, memtier->nodelist)) >>>> + return memtier; >>>> + } >>>> + return NULL; >>>> +} >>>> + >>>> +static void init_node_memory_tier(int node) >>> >>> set_node_memory_tier()? >> >> That was done based on feedback from Alistair >> >> https://lore.kernel.org/linux-mm/87h73iapg1.fsf@nvdebian.thelocal >> >>> >>>> +{ >>>> + int perf_level; >>>> + struct memory_tier *memtier; >>>> + >>>> + mutex_lock(&memory_tier_lock); >>>> + >>>> + memtier = __node_get_memory_tier(node); >>>> + if (!memtier) { >>>> + perf_level = node_devices[node]->perf_level; >>>> + memtier = find_create_memory_tier(perf_level); >>>> + node_set(node, memtier->nodelist); >>>> + } > > It's related to Alistair's comments too. When will memtier != NULL > here? We may need just VM_WARN_ON() here? When the platform driver sets memory tier directly. With the old code it can happen when dax/kmem register a node to a memory tier. With memory_type proposal this can happen if the node is part of memory type that is already added to a memory tier. > >>>> + mutex_unlock(&memory_tier_lock); >>>> +} >>>> + >>>> +static void clear_node_memory_tier(int node) >>>> +{ >>>> + struct memory_tier *memtier; >>>> + >>>> + mutex_lock(&memory_tier_lock); >>>> + memtier = __node_get_memory_tier(node); >>>> + if (memtier) >>>> + node_clear(node, memtier->nodelist); >>> >>> When memtier->nodelist becomes empty, we need to free memtier? >>> >>>> + mutex_unlock(&memory_tier_lock); >>>> +} >>>> + >>>> +/* >>>> + * This runs whether reclaim-based migration is enabled or not, >>>> + * which ensures that the user can turn reclaim-based migration >>>> + * at any time without needing to recalculate migration targets. >>>> + */ >>> >>> The comments doesn't apply here. >>> >>>> +static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, >>>> + unsigned long action, void *_arg) >>> >>> Now we are building memory tiers instead of working on demotion. So I >>> think we should rename the function to memtier_hotplug_callback(). >>> >>>> +{ >>>> + struct memory_notify *arg = _arg; >>>> + >>>> + /* >>>> + * Only update the node migration order when a node is >>>> + * changing status, like online->offline. >>>> + */ >>>> + if (arg->status_change_nid < 0) >>>> + return notifier_from_errno(0); >>>> + >>>> + switch (action) { >>>> + case MEM_OFFLINE: >>>> + clear_node_memory_tier(arg->status_change_nid); >>>> + break; >>>> + case MEM_ONLINE: >>>> + init_node_memory_tier(arg->status_change_nid); >>>> + break; >>>> + } >>>> + >>>> + return notifier_from_errno(0); >>>> +} >>>> + >>>> +static void __init migrate_on_reclaim_init(void) >>>> +{ >>>> + hotplug_memory_notifier(migrate_on_reclaim_callback, MEMTIER_HOTPLUG_PRIO); >>>> +} >>> >>> I suggest to call hotplug_memory_notifier() in memory_tier_init() >>> directly. We are not working on demotion here. >>> >>>> + >>>> static int __init memory_tier_init(void) >>>> { >>>> int node; >>>> @@ -96,6 +169,8 @@ static int __init memory_tier_init(void) >>>> node_property->perf_level = default_memtier_perf_level; >>>> } >>>> mutex_unlock(&memory_tier_lock); >>>> + >>>> + migrate_on_reclaim_init(); >>>> return 0; >>>> } >>>> subsys_initcall(memory_tier_init); >>> >>> Best Regards, >>> Huang, Ying >> >> >> Will update the patch in next iteration to take care of other feedback. > > Thanks! > > Best Regards, > Huang, Ying
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > "Huang, Ying" <ying.huang@intel.com> writes: > >> Aneesh Kumar K V <aneesh.kumar@linux.ibm.com> writes: >> >>> On 7/26/22 9:33 AM, Huang, Ying wrote: >>>> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: [snip] >>>>> >>>>> +static struct memory_tier *__node_get_memory_tier(int node) >>>>> +{ >>>>> + struct memory_tier *memtier; >>>>> + >>>>> + list_for_each_entry(memtier, &memory_tiers, list) { >>>>> + if (node_isset(node, memtier->nodelist)) >>>>> + return memtier; >>>>> + } >>>>> + return NULL; >>>>> +} >>>>> + >>>>> +static void init_node_memory_tier(int node) >>>> >>>> set_node_memory_tier()? >>> >>> That was done based on feedback from Alistair >>> >>> https://lore.kernel.org/linux-mm/87h73iapg1.fsf@nvdebian.thelocal >>> >>>> >>>>> +{ >>>>> + int perf_level; >>>>> + struct memory_tier *memtier; >>>>> + >>>>> + mutex_lock(&memory_tier_lock); >>>>> + >>>>> + memtier = __node_get_memory_tier(node); >>>>> + if (!memtier) { >>>>> + perf_level = node_devices[node]->perf_level; >>>>> + memtier = find_create_memory_tier(perf_level); >>>>> + node_set(node, memtier->nodelist); >>>>> + } >> >> It's related to Alistair's comments too. When will memtier != NULL >> here? We may need just VM_WARN_ON() here? > > When the platform driver sets memory tier directly. With the old code > it can happen when dax/kmem register a node to a memory tier. With > memory_type proposal this can happen if the node is part of memory > type that is already added to a memory tier. Let's look at what it looks like with memory_type in place. Best Regards, Huang, Ying >> >>>>> + mutex_unlock(&memory_tier_lock); >>>>> +} >>>>> + [snip]
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index ef380a39db3a..3d5f14d57ae6 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -14,6 +14,7 @@ #define MEMTIER_PERF_LEVEL_DRAM (1 << (MEMTIER_CHUNK_BITS + 2)) /* leave one tier below this slow pmem */ #define MEMTIER_PERF_LEVEL_PMEM (1 << MEMTIER_CHUNK_BITS) +#define MEMTIER_HOTPLUG_PRIO 100 extern bool numa_demotion_enabled; diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 41a21cc5ae55..cc3a47ec18e4 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -5,6 +5,7 @@ #include <linux/lockdep.h> #include <linux/moduleparam.h> #include <linux/node.h> +#include <linux/memory.h> #include <linux/memory-tiers.h> struct memory_tier { @@ -64,6 +65,78 @@ static struct memory_tier *find_create_memory_tier(unsigned int perf_level) return new_memtier; } +static struct memory_tier *__node_get_memory_tier(int node) +{ + struct memory_tier *memtier; + + list_for_each_entry(memtier, &memory_tiers, list) { + if (node_isset(node, memtier->nodelist)) + return memtier; + } + return NULL; +} + +static void init_node_memory_tier(int node) +{ + int perf_level; + struct memory_tier *memtier; + + mutex_lock(&memory_tier_lock); + + memtier = __node_get_memory_tier(node); + if (!memtier) { + perf_level = node_devices[node]->perf_level; + memtier = find_create_memory_tier(perf_level); + node_set(node, memtier->nodelist); + } + mutex_unlock(&memory_tier_lock); +} + +static void clear_node_memory_tier(int node) +{ + struct memory_tier *memtier; + + mutex_lock(&memory_tier_lock); + memtier = __node_get_memory_tier(node); + if (memtier) + node_clear(node, memtier->nodelist); + mutex_unlock(&memory_tier_lock); +} + +/* + * This runs whether reclaim-based migration is enabled or not, + * which ensures that the user can turn reclaim-based migration + * at any time without needing to recalculate migration targets. + */ +static int __meminit migrate_on_reclaim_callback(struct notifier_block *self, + unsigned long action, void *_arg) +{ + struct memory_notify *arg = _arg; + + /* + * Only update the node migration order when a node is + * changing status, like online->offline. + */ + if (arg->status_change_nid < 0) + return notifier_from_errno(0); + + switch (action) { + case MEM_OFFLINE: + clear_node_memory_tier(arg->status_change_nid); + break; + case MEM_ONLINE: + init_node_memory_tier(arg->status_change_nid); + break; + } + + return notifier_from_errno(0); +} + +static void __init migrate_on_reclaim_init(void) +{ + hotplug_memory_notifier(migrate_on_reclaim_callback, MEMTIER_HOTPLUG_PRIO); +} + static int __init memory_tier_init(void) { int node; @@ -96,6 +169,8 @@ static int __init memory_tier_init(void) node_property->perf_level = default_memtier_perf_level; } mutex_unlock(&memory_tier_lock); + + migrate_on_reclaim_init(); return 0; } subsys_initcall(memory_tier_init);
If the new NUMA node onlined doesn't have a performance level assigned, the kernel adds the NUMA node to default memory tier. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- include/linux/memory-tiers.h | 1 + mm/memory-tiers.c | 75 ++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+)