Message ID | 20220728190436.858458-9-aneesh.kumar@linux.ibm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm/demotion: Memory tiers and demotion | expand |
"Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > With memory tiers support we can have memory only NUMA nodes > in the top tier from which we want to avoid promotion tracking NUMA > faults. Update node_is_toptier to work with memory tiers. > All NUMA nodes are by default top tier nodes. With lower memory > tiers added we consider all memory tiers above a memory tier having > CPU NUMA nodes as a top memory tier > > Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> > --- > include/linux/memory-tiers.h | 11 ++++++++++ > include/linux/node.h | 5 ----- > mm/huge_memory.c | 1 + > mm/memory-tiers.c | 42 ++++++++++++++++++++++++++++++++++++ > mm/migrate.c | 1 + > mm/mprotect.c | 1 + > 6 files changed, 56 insertions(+), 5 deletions(-) > > diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h > index f8dbeda617a7..bc9fb9d39b2c 100644 > --- a/include/linux/memory-tiers.h > +++ b/include/linux/memory-tiers.h > @@ -35,6 +35,7 @@ struct memory_dev_type *init_node_memory_type(int node, struct memory_dev_type * > #ifdef CONFIG_MIGRATION > int next_demotion_node(int node); > void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); > +bool node_is_toptier(int node); > #else > static inline int next_demotion_node(int node) > { > @@ -45,6 +46,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target > { > *targets = NODE_MASK_NONE; > } > + > +static inline bool node_is_toptier(int node) > +{ > + return true; > +} > #endif > > #else > @@ -64,5 +70,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target > { > *targets = NODE_MASK_NONE; > } > + > +static inline bool node_is_toptier(int node) > +{ > + return true; > +} > #endif /* CONFIG_NUMA */ > #endif /* _LINUX_MEMORY_TIERS_H */ > diff --git a/include/linux/node.h b/include/linux/node.h > index 40d641a8bfb0..9ec680dd607f 100644 > --- a/include/linux/node.h > +++ b/include/linux/node.h > @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, > > #define to_node(device) container_of(device, struct node, dev) > > -static inline bool node_is_toptier(int node) > -{ > - return node_state(node, N_CPU); > -} > - > #endif /* _LINUX_NODE_H_ */ > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 834f288b3769..8405662646e9 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -35,6 +35,7 @@ > #include <linux/numa.h> > #include <linux/page_owner.h> > #include <linux/sched/sysctl.h> > +#include <linux/memory-tiers.h> > > #include <asm/tlb.h> > #include <asm/pgalloc.h> > diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c > index 84e2be31a853..36d87dc422ab 100644 > --- a/mm/memory-tiers.c > +++ b/mm/memory-tiers.c > @@ -30,6 +30,7 @@ static DEFINE_MUTEX(memory_tier_lock); > static LIST_HEAD(memory_tiers); > struct memory_dev_type *node_memory_types[MAX_NUMNODES]; > #ifdef CONFIG_MIGRATION > +static int top_tier_adistance; > /* > * node_demotion[] examples: > * > @@ -159,6 +160,31 @@ static struct memory_tier *__node_get_memory_tier(int node) > } > > #ifdef CONFIG_MIGRATION > +bool node_is_toptier(int node) > +{ > + bool toptier; > + pg_data_t *pgdat; > + struct memory_tier *memtier; > + > + pgdat = NODE_DATA(node); > + if (!pgdat) > + return false; > + > + rcu_read_lock(); > + memtier = rcu_dereference(pgdat->memtier); > + if (!memtier) { > + toptier = true; > + goto out; > + } > + if (memtier->adistance_start >= top_tier_adistance) > + toptier = true; > + else > + toptier = false; > +out: > + rcu_read_unlock(); > + return toptier; > +} > + > void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) > { > struct memory_tier *memtier; > @@ -315,6 +341,22 @@ static void establish_demotion_targets(void) > } > } while (1); > } > + /* > + * Promotion is allowed from a memory tier to higher > + * memory tier only if the memory tier doesn't include > + * compute. We want to skip promotion from a memory tier, > + * if any node that is part of the memory tier have CPUs. > + * Once we detect such a memory tier, we consider that tier > + * as top tiper from which promotion on is not allowed. > + */ > + list_for_each_entry(memtier, &memory_tiers, list) { > + tier_nodes = get_memtier_nodemask(memtier); > + nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); > + if (!nodes_empty(tier_nodes)) { > + top_tier_adistance = memtier->adistance_start; IMHO, this should be, top_tier_adistance = memtier->adistance_start + MEMTIER_CHUNK_SIZE; Best Regards, Huang, Ying > + break; > + } > + } > /* > * Now build the lower_tier mask for each node collecting node mask from > * all memory tier below it. This allows us to fallback demotion page > diff --git a/mm/migrate.c b/mm/migrate.c > index c758c9c21d7d..1da81136eaaa 100644 > --- a/mm/migrate.c > +++ b/mm/migrate.c > @@ -50,6 +50,7 @@ > #include <linux/memory.h> > #include <linux/random.h> > #include <linux/sched/sysctl.h> > +#include <linux/memory-tiers.h> > > #include <asm/tlbflush.h> > > diff --git a/mm/mprotect.c b/mm/mprotect.c > index ba5592655ee3..92a2fc0fa88b 100644 > --- a/mm/mprotect.c > +++ b/mm/mprotect.c > @@ -31,6 +31,7 @@ > #include <linux/pgtable.h> > #include <linux/sched/sysctl.h> > #include <linux/userfaultfd_k.h> > +#include <linux/memory-tiers.h> > #include <asm/cacheflush.h> > #include <asm/mmu_context.h> > #include <asm/tlbflush.h>
On 7/29/22 12:09 PM, Huang, Ying wrote: > "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: > >> With memory tiers support we can have memory only NUMA nodes >> in the top tier from which we want to avoid promotion tracking NUMA >> faults. Update node_is_toptier to work with memory tiers. >> All NUMA nodes are by default top tier nodes. With lower memory >> tiers added we consider all memory tiers above a memory tier having >> CPU NUMA nodes as a top memory tier >> >> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >> --- >> include/linux/memory-tiers.h | 11 ++++++++++ >> include/linux/node.h | 5 ----- >> mm/huge_memory.c | 1 + >> mm/memory-tiers.c | 42 ++++++++++++++++++++++++++++++++++++ >> mm/migrate.c | 1 + >> mm/mprotect.c | 1 + >> 6 files changed, 56 insertions(+), 5 deletions(-) >> >> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h >> index f8dbeda617a7..bc9fb9d39b2c 100644 >> --- a/include/linux/memory-tiers.h >> +++ b/include/linux/memory-tiers.h >> @@ -35,6 +35,7 @@ struct memory_dev_type *init_node_memory_type(int node, struct memory_dev_type * >> #ifdef CONFIG_MIGRATION >> int next_demotion_node(int node); >> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); >> +bool node_is_toptier(int node); >> #else >> static inline int next_demotion_node(int node) >> { >> @@ -45,6 +46,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >> { >> *targets = NODE_MASK_NONE; >> } >> + >> +static inline bool node_is_toptier(int node) >> +{ >> + return true; >> +} >> #endif >> >> #else >> @@ -64,5 +70,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >> { >> *targets = NODE_MASK_NONE; >> } >> + >> +static inline bool node_is_toptier(int node) >> +{ >> + return true; >> +} >> #endif /* CONFIG_NUMA */ >> #endif /* _LINUX_MEMORY_TIERS_H */ >> diff --git a/include/linux/node.h b/include/linux/node.h >> index 40d641a8bfb0..9ec680dd607f 100644 >> --- a/include/linux/node.h >> +++ b/include/linux/node.h >> @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, >> >> #define to_node(device) container_of(device, struct node, dev) >> >> -static inline bool node_is_toptier(int node) >> -{ >> - return node_state(node, N_CPU); >> -} >> - >> #endif /* _LINUX_NODE_H_ */ >> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >> index 834f288b3769..8405662646e9 100644 >> --- a/mm/huge_memory.c >> +++ b/mm/huge_memory.c >> @@ -35,6 +35,7 @@ >> #include <linux/numa.h> >> #include <linux/page_owner.h> >> #include <linux/sched/sysctl.h> >> +#include <linux/memory-tiers.h> >> >> #include <asm/tlb.h> >> #include <asm/pgalloc.h> >> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >> index 84e2be31a853..36d87dc422ab 100644 >> --- a/mm/memory-tiers.c >> +++ b/mm/memory-tiers.c >> @@ -30,6 +30,7 @@ static DEFINE_MUTEX(memory_tier_lock); >> static LIST_HEAD(memory_tiers); >> struct memory_dev_type *node_memory_types[MAX_NUMNODES]; >> #ifdef CONFIG_MIGRATION >> +static int top_tier_adistance; >> /* >> * node_demotion[] examples: >> * >> @@ -159,6 +160,31 @@ static struct memory_tier *__node_get_memory_tier(int node) >> } >> >> #ifdef CONFIG_MIGRATION >> +bool node_is_toptier(int node) >> +{ >> + bool toptier; >> + pg_data_t *pgdat; >> + struct memory_tier *memtier; >> + >> + pgdat = NODE_DATA(node); >> + if (!pgdat) >> + return false; >> + >> + rcu_read_lock(); >> + memtier = rcu_dereference(pgdat->memtier); >> + if (!memtier) { >> + toptier = true; >> + goto out; >> + } >> + if (memtier->adistance_start >= top_tier_adistance) >> + toptier = true; >> + else >> + toptier = false; >> +out: >> + rcu_read_unlock(); >> + return toptier; >> +} >> + >> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) >> { >> struct memory_tier *memtier; >> @@ -315,6 +341,22 @@ static void establish_demotion_targets(void) >> } >> } while (1); >> } >> + /* >> + * Promotion is allowed from a memory tier to higher >> + * memory tier only if the memory tier doesn't include >> + * compute. We want to skip promotion from a memory tier, >> + * if any node that is part of the memory tier have CPUs. >> + * Once we detect such a memory tier, we consider that tier >> + * as top tiper from which promotion on is not allowed. >> + */ >> + list_for_each_entry(memtier, &memory_tiers, list) { >> + tier_nodes = get_memtier_nodemask(memtier); >> + nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); >> + if (!nodes_empty(tier_nodes)) { >> + top_tier_adistance = memtier->adistance_start; > > IMHO, this should be, > > top_tier_adistance = memtier->adistance_start + MEMTIER_CHUNK_SIZE; > Good catch. Will update. BTW i did send v12 version of the patchset already to the list. -aneesh
On 7/29/22 12:11 PM, Aneesh Kumar K V wrote: > On 7/29/22 12:09 PM, Huang, Ying wrote: >> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: >> >>> With memory tiers support we can have memory only NUMA nodes >>> in the top tier from which we want to avoid promotion tracking NUMA >>> faults. Update node_is_toptier to work with memory tiers. >>> All NUMA nodes are by default top tier nodes. With lower memory >>> tiers added we consider all memory tiers above a memory tier having >>> CPU NUMA nodes as a top memory tier >>> >>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>> --- >>> include/linux/memory-tiers.h | 11 ++++++++++ >>> include/linux/node.h | 5 ----- >>> mm/huge_memory.c | 1 + >>> mm/memory-tiers.c | 42 ++++++++++++++++++++++++++++++++++++ >>> mm/migrate.c | 1 + >>> mm/mprotect.c | 1 + >>> 6 files changed, 56 insertions(+), 5 deletions(-) >>> >>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h >>> index f8dbeda617a7..bc9fb9d39b2c 100644 >>> --- a/include/linux/memory-tiers.h >>> +++ b/include/linux/memory-tiers.h >>> @@ -35,6 +35,7 @@ struct memory_dev_type *init_node_memory_type(int node, struct memory_dev_type * >>> #ifdef CONFIG_MIGRATION >>> int next_demotion_node(int node); >>> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); >>> +bool node_is_toptier(int node); >>> #else >>> static inline int next_demotion_node(int node) >>> { >>> @@ -45,6 +46,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >>> { >>> *targets = NODE_MASK_NONE; >>> } >>> + >>> +static inline bool node_is_toptier(int node) >>> +{ >>> + return true; >>> +} >>> #endif >>> >>> #else >>> @@ -64,5 +70,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >>> { >>> *targets = NODE_MASK_NONE; >>> } >>> + >>> +static inline bool node_is_toptier(int node) >>> +{ >>> + return true; >>> +} >>> #endif /* CONFIG_NUMA */ >>> #endif /* _LINUX_MEMORY_TIERS_H */ >>> diff --git a/include/linux/node.h b/include/linux/node.h >>> index 40d641a8bfb0..9ec680dd607f 100644 >>> --- a/include/linux/node.h >>> +++ b/include/linux/node.h >>> @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, >>> >>> #define to_node(device) container_of(device, struct node, dev) >>> >>> -static inline bool node_is_toptier(int node) >>> -{ >>> - return node_state(node, N_CPU); >>> -} >>> - >>> #endif /* _LINUX_NODE_H_ */ >>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >>> index 834f288b3769..8405662646e9 100644 >>> --- a/mm/huge_memory.c >>> +++ b/mm/huge_memory.c >>> @@ -35,6 +35,7 @@ >>> #include <linux/numa.h> >>> #include <linux/page_owner.h> >>> #include <linux/sched/sysctl.h> >>> +#include <linux/memory-tiers.h> >>> >>> #include <asm/tlb.h> >>> #include <asm/pgalloc.h> >>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >>> index 84e2be31a853..36d87dc422ab 100644 >>> --- a/mm/memory-tiers.c >>> +++ b/mm/memory-tiers.c >>> @@ -30,6 +30,7 @@ static DEFINE_MUTEX(memory_tier_lock); >>> static LIST_HEAD(memory_tiers); >>> struct memory_dev_type *node_memory_types[MAX_NUMNODES]; >>> #ifdef CONFIG_MIGRATION >>> +static int top_tier_adistance; >>> /* >>> * node_demotion[] examples: >>> * >>> @@ -159,6 +160,31 @@ static struct memory_tier *__node_get_memory_tier(int node) >>> } >>> >>> #ifdef CONFIG_MIGRATION >>> +bool node_is_toptier(int node) >>> +{ >>> + bool toptier; >>> + pg_data_t *pgdat; >>> + struct memory_tier *memtier; >>> + >>> + pgdat = NODE_DATA(node); >>> + if (!pgdat) >>> + return false; >>> + >>> + rcu_read_lock(); >>> + memtier = rcu_dereference(pgdat->memtier); >>> + if (!memtier) { >>> + toptier = true; >>> + goto out; >>> + } >>> + if (memtier->adistance_start >= top_tier_adistance) >>> + toptier = true; >>> + else >>> + toptier = false; >>> +out: >>> + rcu_read_unlock(); >>> + return toptier; >>> +} >>> + >>> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) >>> { >>> struct memory_tier *memtier; >>> @@ -315,6 +341,22 @@ static void establish_demotion_targets(void) >>> } >>> } while (1); >>> } >>> + /* >>> + * Promotion is allowed from a memory tier to higher >>> + * memory tier only if the memory tier doesn't include >>> + * compute. We want to skip promotion from a memory tier, >>> + * if any node that is part of the memory tier have CPUs. >>> + * Once we detect such a memory tier, we consider that tier >>> + * as top tiper from which promotion on is not allowed. >>> + */ >>> + list_for_each_entry(memtier, &memory_tiers, list) { >>> + tier_nodes = get_memtier_nodemask(memtier); >>> + nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); >>> + if (!nodes_empty(tier_nodes)) { >>> + top_tier_adistance = memtier->adistance_start; >> >> IMHO, this should be, >> >> top_tier_adistance = memtier->adistance_start + MEMTIER_CHUNK_SIZE; >> > > Good catch. Will update. BTW i did send v12 version of the patchset already to the list. > > Checking this again, we consider a node top tier if the node's memtier abstract distance satisfy the below. if (memtier->adistance_start <= top_tier_adistance) toptier = true; With that we should be good with the current code. But I agree with you that top_tier_distance should cover the full range of the top memory tier. -aneesh
Aneesh Kumar K V <aneesh.kumar@linux.ibm.com> writes: > On 7/29/22 12:11 PM, Aneesh Kumar K V wrote: >> On 7/29/22 12:09 PM, Huang, Ying wrote: >>> "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> writes: >>> >>>> With memory tiers support we can have memory only NUMA nodes >>>> in the top tier from which we want to avoid promotion tracking NUMA >>>> faults. Update node_is_toptier to work with memory tiers. >>>> All NUMA nodes are by default top tier nodes. With lower memory >>>> tiers added we consider all memory tiers above a memory tier having >>>> CPU NUMA nodes as a top memory tier >>>> >>>> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> >>>> --- >>>> include/linux/memory-tiers.h | 11 ++++++++++ >>>> include/linux/node.h | 5 ----- >>>> mm/huge_memory.c | 1 + >>>> mm/memory-tiers.c | 42 ++++++++++++++++++++++++++++++++++++ >>>> mm/migrate.c | 1 + >>>> mm/mprotect.c | 1 + >>>> 6 files changed, 56 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h >>>> index f8dbeda617a7..bc9fb9d39b2c 100644 >>>> --- a/include/linux/memory-tiers.h >>>> +++ b/include/linux/memory-tiers.h >>>> @@ -35,6 +35,7 @@ struct memory_dev_type *init_node_memory_type(int node, struct memory_dev_type * >>>> #ifdef CONFIG_MIGRATION >>>> int next_demotion_node(int node); >>>> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); >>>> +bool node_is_toptier(int node); >>>> #else >>>> static inline int next_demotion_node(int node) >>>> { >>>> @@ -45,6 +46,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >>>> { >>>> *targets = NODE_MASK_NONE; >>>> } >>>> + >>>> +static inline bool node_is_toptier(int node) >>>> +{ >>>> + return true; >>>> +} >>>> #endif >>>> >>>> #else >>>> @@ -64,5 +70,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target >>>> { >>>> *targets = NODE_MASK_NONE; >>>> } >>>> + >>>> +static inline bool node_is_toptier(int node) >>>> +{ >>>> + return true; >>>> +} >>>> #endif /* CONFIG_NUMA */ >>>> #endif /* _LINUX_MEMORY_TIERS_H */ >>>> diff --git a/include/linux/node.h b/include/linux/node.h >>>> index 40d641a8bfb0..9ec680dd607f 100644 >>>> --- a/include/linux/node.h >>>> +++ b/include/linux/node.h >>>> @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, >>>> >>>> #define to_node(device) container_of(device, struct node, dev) >>>> >>>> -static inline bool node_is_toptier(int node) >>>> -{ >>>> - return node_state(node, N_CPU); >>>> -} >>>> - >>>> #endif /* _LINUX_NODE_H_ */ >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >>>> index 834f288b3769..8405662646e9 100644 >>>> --- a/mm/huge_memory.c >>>> +++ b/mm/huge_memory.c >>>> @@ -35,6 +35,7 @@ >>>> #include <linux/numa.h> >>>> #include <linux/page_owner.h> >>>> #include <linux/sched/sysctl.h> >>>> +#include <linux/memory-tiers.h> >>>> >>>> #include <asm/tlb.h> >>>> #include <asm/pgalloc.h> >>>> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c >>>> index 84e2be31a853..36d87dc422ab 100644 >>>> --- a/mm/memory-tiers.c >>>> +++ b/mm/memory-tiers.c >>>> @@ -30,6 +30,7 @@ static DEFINE_MUTEX(memory_tier_lock); >>>> static LIST_HEAD(memory_tiers); >>>> struct memory_dev_type *node_memory_types[MAX_NUMNODES]; >>>> #ifdef CONFIG_MIGRATION >>>> +static int top_tier_adistance; >>>> /* >>>> * node_demotion[] examples: >>>> * >>>> @@ -159,6 +160,31 @@ static struct memory_tier *__node_get_memory_tier(int node) >>>> } >>>> >>>> #ifdef CONFIG_MIGRATION >>>> +bool node_is_toptier(int node) >>>> +{ >>>> + bool toptier; >>>> + pg_data_t *pgdat; >>>> + struct memory_tier *memtier; >>>> + >>>> + pgdat = NODE_DATA(node); >>>> + if (!pgdat) >>>> + return false; >>>> + >>>> + rcu_read_lock(); >>>> + memtier = rcu_dereference(pgdat->memtier); >>>> + if (!memtier) { >>>> + toptier = true; >>>> + goto out; >>>> + } >>>> + if (memtier->adistance_start >= top_tier_adistance) >>>> + toptier = true; >>>> + else >>>> + toptier = false; >>>> +out: >>>> + rcu_read_unlock(); >>>> + return toptier; >>>> +} >>>> + >>>> void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) >>>> { >>>> struct memory_tier *memtier; >>>> @@ -315,6 +341,22 @@ static void establish_demotion_targets(void) >>>> } >>>> } while (1); >>>> } >>>> + /* >>>> + * Promotion is allowed from a memory tier to higher >>>> + * memory tier only if the memory tier doesn't include >>>> + * compute. We want to skip promotion from a memory tier, >>>> + * if any node that is part of the memory tier have CPUs. >>>> + * Once we detect such a memory tier, we consider that tier >>>> + * as top tiper from which promotion on is not allowed. >>>> + */ >>>> + list_for_each_entry(memtier, &memory_tiers, list) { >>>> + tier_nodes = get_memtier_nodemask(memtier); >>>> + nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); >>>> + if (!nodes_empty(tier_nodes)) { >>>> + top_tier_adistance = memtier->adistance_start; >>> >>> IMHO, this should be, >>> >>> top_tier_adistance = memtier->adistance_start + MEMTIER_CHUNK_SIZE; >>> >> >> Good catch. Will update. BTW i did send v12 version of the patchset already to the list. >> >> > > Checking this again, we consider a node top tier if the node's memtier abstract distance > satisfy the below. > > if (memtier->adistance_start <= top_tier_adistance) > toptier = true; I admit that this works correctly. And I think that the following code is even more correct conceptually. If so, why not help the code reader to understand it more easily? if (memtier->adistance_start + MEMTIER_CHUNK_SIZE <= top_tier_adistance) toptier = true; Best Regards, Huang, Ying > With that we should be good with the current code. But I agree with you that top_tier_distance > should cover the full range of the top memory tier. > > -aneesh
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index f8dbeda617a7..bc9fb9d39b2c 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -35,6 +35,7 @@ struct memory_dev_type *init_node_memory_type(int node, struct memory_dev_type * #ifdef CONFIG_MIGRATION int next_demotion_node(int node); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); +bool node_is_toptier(int node); #else static inline int next_demotion_node(int node) { @@ -45,6 +46,11 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target { *targets = NODE_MASK_NONE; } + +static inline bool node_is_toptier(int node) +{ + return true; +} #endif #else @@ -64,5 +70,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target { *targets = NODE_MASK_NONE; } + +static inline bool node_is_toptier(int node) +{ + return true; +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/node.h b/include/linux/node.h index 40d641a8bfb0..9ec680dd607f 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg, #define to_node(device) container_of(device, struct node, dev) -static inline bool node_is_toptier(int node) -{ - return node_state(node, N_CPU); -} - #endif /* _LINUX_NODE_H_ */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 834f288b3769..8405662646e9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -35,6 +35,7 @@ #include <linux/numa.h> #include <linux/page_owner.h> #include <linux/sched/sysctl.h> +#include <linux/memory-tiers.h> #include <asm/tlb.h> #include <asm/pgalloc.h> diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index 84e2be31a853..36d87dc422ab 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -30,6 +30,7 @@ static DEFINE_MUTEX(memory_tier_lock); static LIST_HEAD(memory_tiers); struct memory_dev_type *node_memory_types[MAX_NUMNODES]; #ifdef CONFIG_MIGRATION +static int top_tier_adistance; /* * node_demotion[] examples: * @@ -159,6 +160,31 @@ static struct memory_tier *__node_get_memory_tier(int node) } #ifdef CONFIG_MIGRATION +bool node_is_toptier(int node) +{ + bool toptier; + pg_data_t *pgdat; + struct memory_tier *memtier; + + pgdat = NODE_DATA(node); + if (!pgdat) + return false; + + rcu_read_lock(); + memtier = rcu_dereference(pgdat->memtier); + if (!memtier) { + toptier = true; + goto out; + } + if (memtier->adistance_start >= top_tier_adistance) + toptier = true; + else + toptier = false; +out: + rcu_read_unlock(); + return toptier; +} + void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) { struct memory_tier *memtier; @@ -315,6 +341,22 @@ static void establish_demotion_targets(void) } } while (1); } + /* + * Promotion is allowed from a memory tier to higher + * memory tier only if the memory tier doesn't include + * compute. We want to skip promotion from a memory tier, + * if any node that is part of the memory tier have CPUs. + * Once we detect such a memory tier, we consider that tier + * as top tiper from which promotion on is not allowed. + */ + list_for_each_entry(memtier, &memory_tiers, list) { + tier_nodes = get_memtier_nodemask(memtier); + nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); + if (!nodes_empty(tier_nodes)) { + top_tier_adistance = memtier->adistance_start; + break; + } + } /* * Now build the lower_tier mask for each node collecting node mask from * all memory tier below it. This allows us to fallback demotion page diff --git a/mm/migrate.c b/mm/migrate.c index c758c9c21d7d..1da81136eaaa 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -50,6 +50,7 @@ #include <linux/memory.h> #include <linux/random.h> #include <linux/sched/sysctl.h> +#include <linux/memory-tiers.h> #include <asm/tlbflush.h> diff --git a/mm/mprotect.c b/mm/mprotect.c index ba5592655ee3..92a2fc0fa88b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -31,6 +31,7 @@ #include <linux/pgtable.h> #include <linux/sched/sysctl.h> #include <linux/userfaultfd_k.h> +#include <linux/memory-tiers.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h>
With memory tiers support we can have memory only NUMA nodes in the top tier from which we want to avoid promotion tracking NUMA faults. Update node_is_toptier to work with memory tiers. All NUMA nodes are by default top tier nodes. With lower memory tiers added we consider all memory tiers above a memory tier having CPU NUMA nodes as a top memory tier Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- include/linux/memory-tiers.h | 11 ++++++++++ include/linux/node.h | 5 ----- mm/huge_memory.c | 1 + mm/memory-tiers.c | 42 ++++++++++++++++++++++++++++++++++++ mm/migrate.c | 1 + mm/mprotect.c | 1 + 6 files changed, 56 insertions(+), 5 deletions(-)