diff mbox series

[RESEND,v2,3/5] mm_zone: add function to check if managed dma zone exists

Message ID 20211207030750.30824-4-bhe@redhat.com (mailing list archive)
State New
Headers show
Series Avoid requesting page from DMA zone when no managed pages | expand

Commit Message

Baoquan He Dec. 7, 2021, 3:07 a.m. UTC
In some places of the current kernel, it assumes that dma zone must have
managed pages if CONFIG_ZONE_DMA is enabled. While this is not always true.
E.g in kdump kernel of x86_64, only low 1M is presented and locked down
at very early stage of boot, so that there's no managed pages at all in
DMA zone. This exception will always cause page allocation failure if page
is requested from DMA zone.

Here add function has_managed_dma() and the relevant helper functions to
check if there's DMA zone with managed pages. It will be used in later
patches.

Signed-off-by: Baoquan He <bhe@redhat.com>
---
 include/linux/mmzone.h | 21 +++++++++++++++++++++
 mm/page_alloc.c        | 11 +++++++++++
 2 files changed, 32 insertions(+)

Comments

John Donnelly Dec. 7, 2021, 3:53 a.m. UTC | #1
On 12/6/21 9:07 PM, Baoquan He wrote:
> In some places of the current kernel, it assumes that dma zone must have
> managed pages if CONFIG_ZONE_DMA is enabled. While this is not always true.
> E.g in kdump kernel of x86_64, only low 1M is presented and locked down
> at very early stage of boot, so that there's no managed pages at all in
> DMA zone. This exception will always cause page allocation failure if page
> is requested from DMA zone.
> 
> Here add function has_managed_dma() and the relevant helper functions to
> check if there's DMA zone with managed pages. It will be used in later
> patches.
> 
> Signed-off-by: Baoquan He <bhe@redhat.com>
  Reviewed-by: John Donnelly <john.p.donnelly@oracle.com>
  Tested-by:  John Donnelly <john.p.donnelly@oracle.com>
> ---
>   include/linux/mmzone.h | 21 +++++++++++++++++++++
>   mm/page_alloc.c        | 11 +++++++++++
>   2 files changed, 32 insertions(+)
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 58e744b78c2c..82d23e13e0e5 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -998,6 +998,18 @@ static inline bool zone_is_zone_device(struct zone *zone)
>   }
>   #endif
>   
> +#ifdef CONFIG_ZONE_DMA
> +static inline bool zone_is_dma(struct zone *zone)
> +{
> +	return zone_idx(zone) == ZONE_DMA;
> +}
> +#else
> +static inline bool zone_is_dma(struct zone *zone)
> +{
> +	return false;
> +}
> +#endif
> +
>   /*
>    * Returns true if a zone has pages managed by the buddy allocator.
>    * All the reclaim decisions have to use this function rather than
> @@ -1046,6 +1058,7 @@ static inline int is_highmem_idx(enum zone_type idx)
>   #endif
>   }
>   
> +bool has_managed_dma(void);
>   /**
>    * is_highmem - helper function to quickly check if a struct zone is a
>    *              highmem zone or not.  This is an attempt to keep references
> @@ -1131,6 +1144,14 @@ extern struct zone *next_zone(struct zone *zone);
>   			; /* do nothing */		\
>   		else
>   
> +#define for_each_managed_zone(zone)		        \
> +	for (zone = (first_online_pgdat())->node_zones; \
> +	     zone;					\
> +	     zone = next_zone(zone))			\
> +		if (!managed_zone(zone))		\
> +			; /* do nothing */		\
> +		else
> +
>   static inline struct zone *zonelist_zone(struct zoneref *zoneref)
>   {
>   	return zoneref->zone;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c5952749ad40..ac0ea42a4e5f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -9459,4 +9459,15 @@ bool take_page_off_buddy(struct page *page)
>   	spin_unlock_irqrestore(&zone->lock, flags);
>   	return ret;
>   }
> +
> +bool has_managed_dma(void)
> +{
> +	struct zone *zone;
> +
> +	for_each_managed_zone(zone) {
> +		if (zone_is_dma(zone))
> +			return true;
> +	}
> +	return false;
> +}
>   #endif
>
David Hildenbrand Dec. 7, 2021, 11:23 a.m. UTC | #2
On 07.12.21 04:07, Baoquan He wrote:
> In some places of the current kernel, it assumes that dma zone must have
> managed pages if CONFIG_ZONE_DMA is enabled. While this is not always true.
> E.g in kdump kernel of x86_64, only low 1M is presented and locked down
> at very early stage of boot, so that there's no managed pages at all in
> DMA zone. This exception will always cause page allocation failure if page
> is requested from DMA zone.
> 
> Here add function has_managed_dma() and the relevant helper functions to
> check if there's DMA zone with managed pages. It will be used in later
> patches.
> 
> Signed-off-by: Baoquan He <bhe@redhat.com>
> ---
>  include/linux/mmzone.h | 21 +++++++++++++++++++++
>  mm/page_alloc.c        | 11 +++++++++++
>  2 files changed, 32 insertions(+)
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 58e744b78c2c..82d23e13e0e5 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -998,6 +998,18 @@ static inline bool zone_is_zone_device(struct zone *zone)
>  }
>  #endif
>  
> +#ifdef CONFIG_ZONE_DMA
> +static inline bool zone_is_dma(struct zone *zone)
> +{
> +	return zone_idx(zone) == ZONE_DMA;
> +}
> +#else
> +static inline bool zone_is_dma(struct zone *zone)
> +{
> +	return false;
> +}
> +#endif
> +
>  /*
>   * Returns true if a zone has pages managed by the buddy allocator.
>   * All the reclaim decisions have to use this function rather than
> @@ -1046,6 +1058,7 @@ static inline int is_highmem_idx(enum zone_type idx)
>  #endif
>  }
>  
> +bool has_managed_dma(void);
>  /**
>   * is_highmem - helper function to quickly check if a struct zone is a
>   *              highmem zone or not.  This is an attempt to keep references
> @@ -1131,6 +1144,14 @@ extern struct zone *next_zone(struct zone *zone);
>  			; /* do nothing */		\
>  		else
>  
> +#define for_each_managed_zone(zone)		        \
> +	for (zone = (first_online_pgdat())->node_zones; \
> +	     zone;					\
> +	     zone = next_zone(zone))			\
> +		if (!managed_zone(zone))		\
> +			; /* do nothing */		\
> +		else
> +
>  static inline struct zone *zonelist_zone(struct zoneref *zoneref)
>  {
>  	return zoneref->zone;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c5952749ad40..ac0ea42a4e5f 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -9459,4 +9459,15 @@ bool take_page_off_buddy(struct page *page)
>  	spin_unlock_irqrestore(&zone->lock, flags);
>  	return ret;
>  }
> +
> +bool has_managed_dma(void)
> +{
> +	struct zone *zone;
> +
> +	for_each_managed_zone(zone) {
> +		if (zone_is_dma(zone))
> +			return true;
> +	}
> +	return false;
> +}

Wouldn't it be "easier/faster" to just iterate online nodes and directly
obtain the ZONE_DMA, checking if there are managed pages?
Baoquan He Dec. 9, 2021, 1:02 p.m. UTC | #3
On 12/07/21 at 12:23pm, David Hildenbrand wrote:
> On 07.12.21 04:07, Baoquan He wrote:
> > In some places of the current kernel, it assumes that dma zone must have
> > managed pages if CONFIG_ZONE_DMA is enabled. While this is not always true.
> > E.g in kdump kernel of x86_64, only low 1M is presented and locked down
> > at very early stage of boot, so that there's no managed pages at all in
> > DMA zone. This exception will always cause page allocation failure if page
> > is requested from DMA zone.
> > 
> > Here add function has_managed_dma() and the relevant helper functions to
> > check if there's DMA zone with managed pages. It will be used in later
> > patches.
> > 
> > Signed-off-by: Baoquan He <bhe@redhat.com>
> > ---
> >  include/linux/mmzone.h | 21 +++++++++++++++++++++
> >  mm/page_alloc.c        | 11 +++++++++++
> >  2 files changed, 32 insertions(+)
> > 
> > diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> > index 58e744b78c2c..82d23e13e0e5 100644
> > --- a/include/linux/mmzone.h
> > +++ b/include/linux/mmzone.h
> > @@ -998,6 +998,18 @@ static inline bool zone_is_zone_device(struct zone *zone)
> >  }
> >  #endif
> >  
> > +#ifdef CONFIG_ZONE_DMA
> > +static inline bool zone_is_dma(struct zone *zone)
> > +{
> > +	return zone_idx(zone) == ZONE_DMA;
> > +}
> > +#else
> > +static inline bool zone_is_dma(struct zone *zone)
> > +{
> > +	return false;
> > +}
> > +#endif
> > +
> >  /*
> >   * Returns true if a zone has pages managed by the buddy allocator.
> >   * All the reclaim decisions have to use this function rather than
> > @@ -1046,6 +1058,7 @@ static inline int is_highmem_idx(enum zone_type idx)
> >  #endif
> >  }
> >  
> > +bool has_managed_dma(void);
> >  /**
> >   * is_highmem - helper function to quickly check if a struct zone is a
> >   *              highmem zone or not.  This is an attempt to keep references
> > @@ -1131,6 +1144,14 @@ extern struct zone *next_zone(struct zone *zone);
> >  			; /* do nothing */		\
> >  		else
> >  
> > +#define for_each_managed_zone(zone)		        \
> > +	for (zone = (first_online_pgdat())->node_zones; \
> > +	     zone;					\
> > +	     zone = next_zone(zone))			\
> > +		if (!managed_zone(zone))		\
> > +			; /* do nothing */		\
> > +		else
> > +
> >  static inline struct zone *zonelist_zone(struct zoneref *zoneref)
> >  {
> >  	return zoneref->zone;
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index c5952749ad40..ac0ea42a4e5f 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -9459,4 +9459,15 @@ bool take_page_off_buddy(struct page *page)
> >  	spin_unlock_irqrestore(&zone->lock, flags);
> >  	return ret;
> >  }
> > +
> > +bool has_managed_dma(void)
> > +{
> > +	struct zone *zone;
> > +
> > +	for_each_managed_zone(zone) {
> > +		if (zone_is_dma(zone))
> > +			return true;
> > +	}
> > +	return false;
> > +}
> 
> Wouldn't it be "easier/faster" to just iterate online nodes and directly
> obtain the ZONE_DMA, checking if there are managed pages?

Thanks, Dave.

Please check for_each_managed_zone(), it is iterating online nodes and
it's each managed zone. 

Is below what you are suggesting? The only difference is I introduced
for_each_managed_zone() which can be reused later if needed. Not sure if
I got your suggestion correctly.

bool has_managed_dma(void)
{
        struct pglist_data *pgdat;
        struct zone *zone;
        enum zone_type i, j;

        for_each_online_pgdat(pgdat) {
                for (i = 0; i < MAX_NR_ZONES - 1; i++) {          
                        struct zone *zone = &pgdat->node_zones[i];
                        if (zone_is_dma(zone))                                                                                                    
                                return true;
                }
        }
        return false;

}
David Hildenbrand Dec. 9, 2021, 1:10 p.m. UTC | #4
On 09.12.21 14:02, Baoquan He wrote:
> On 12/07/21 at 12:23pm, David Hildenbrand wrote:
>> On 07.12.21 04:07, Baoquan He wrote:
>>> In some places of the current kernel, it assumes that dma zone must have
>>> managed pages if CONFIG_ZONE_DMA is enabled. While this is not always true.
>>> E.g in kdump kernel of x86_64, only low 1M is presented and locked down
>>> at very early stage of boot, so that there's no managed pages at all in
>>> DMA zone. This exception will always cause page allocation failure if page
>>> is requested from DMA zone.
>>>
>>> Here add function has_managed_dma() and the relevant helper functions to
>>> check if there's DMA zone with managed pages. It will be used in later
>>> patches.
>>>
>>> Signed-off-by: Baoquan He <bhe@redhat.com>
>>> ---
>>>  include/linux/mmzone.h | 21 +++++++++++++++++++++
>>>  mm/page_alloc.c        | 11 +++++++++++
>>>  2 files changed, 32 insertions(+)
>>>
>>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>>> index 58e744b78c2c..82d23e13e0e5 100644
>>> --- a/include/linux/mmzone.h
>>> +++ b/include/linux/mmzone.h
>>> @@ -998,6 +998,18 @@ static inline bool zone_is_zone_device(struct zone *zone)
>>>  }
>>>  #endif
>>>  
>>> +#ifdef CONFIG_ZONE_DMA
>>> +static inline bool zone_is_dma(struct zone *zone)
>>> +{
>>> +	return zone_idx(zone) == ZONE_DMA;
>>> +}
>>> +#else
>>> +static inline bool zone_is_dma(struct zone *zone)
>>> +{
>>> +	return false;
>>> +}
>>> +#endif
>>> +
>>>  /*
>>>   * Returns true if a zone has pages managed by the buddy allocator.
>>>   * All the reclaim decisions have to use this function rather than
>>> @@ -1046,6 +1058,7 @@ static inline int is_highmem_idx(enum zone_type idx)
>>>  #endif
>>>  }
>>>  
>>> +bool has_managed_dma(void);
>>>  /**
>>>   * is_highmem - helper function to quickly check if a struct zone is a
>>>   *              highmem zone or not.  This is an attempt to keep references
>>> @@ -1131,6 +1144,14 @@ extern struct zone *next_zone(struct zone *zone);
>>>  			; /* do nothing */		\
>>>  		else
>>>  
>>> +#define for_each_managed_zone(zone)		        \
>>> +	for (zone = (first_online_pgdat())->node_zones; \
>>> +	     zone;					\
>>> +	     zone = next_zone(zone))			\
>>> +		if (!managed_zone(zone))		\
>>> +			; /* do nothing */		\
>>> +		else
>>> +
>>>  static inline struct zone *zonelist_zone(struct zoneref *zoneref)
>>>  {
>>>  	return zoneref->zone;
>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>> index c5952749ad40..ac0ea42a4e5f 100644
>>> --- a/mm/page_alloc.c
>>> +++ b/mm/page_alloc.c
>>> @@ -9459,4 +9459,15 @@ bool take_page_off_buddy(struct page *page)
>>>  	spin_unlock_irqrestore(&zone->lock, flags);
>>>  	return ret;
>>>  }
>>> +
>>> +bool has_managed_dma(void)
>>> +{
>>> +	struct zone *zone;
>>> +
>>> +	for_each_managed_zone(zone) {
>>> +		if (zone_is_dma(zone))
>>> +			return true;
>>> +	}
>>> +	return false;
>>> +}
>>
>> Wouldn't it be "easier/faster" to just iterate online nodes and directly
>> obtain the ZONE_DMA, checking if there are managed pages?
> 
> Thanks, Dave.
> 
> Please check for_each_managed_zone(), it is iterating online nodes and
> it's each managed zone. 
> 
> Is below what you are suggesting? The only difference is I introduced
> for_each_managed_zone() which can be reused later if needed. Not sure if
> I got your suggestion correctly.
> 
> bool has_managed_dma(void)
> {
>         struct pglist_data *pgdat;
>         struct zone *zone;
>         enum zone_type i, j;
> 
>         for_each_online_pgdat(pgdat) {
>                 for (i = 0; i < MAX_NR_ZONES - 1; i++) {          
>                         struct zone *zone = &pgdat->node_zones[i];
>                         if (zone_is_dma(zone))                                                                                                    
>                                 return true;
>                 }
>         }
>         return false;
> 
> }


Even simpler, no need to iterate over zones at all, only over nodes:

#ifdef CONFIG_ZONE_DMA
bool has_managed_dma(void)
{
	struct pglist_data *pgdat;

	for_each_online_pgdat(pgdat) {
		struct zone *zone = &pgdat->node_zones[ZONE_DMA];

		if (managed_zone(zone)
			return true;
	}
	return false;
}
#endif /* CONFIG_ZONE_DMA */

Without CONFIG_ZONE_DMA, simply provide a dummy in the header that
returns false.
Baoquan He Dec. 9, 2021, 1:23 p.m. UTC | #5
On 12/09/21 at 02:10pm, David Hildenbrand wrote:
......
> >>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> >>> index c5952749ad40..ac0ea42a4e5f 100644
> >>> --- a/mm/page_alloc.c
> >>> +++ b/mm/page_alloc.c
> >>> @@ -9459,4 +9459,15 @@ bool take_page_off_buddy(struct page *page)
> >>>  	spin_unlock_irqrestore(&zone->lock, flags);
> >>>  	return ret;
> >>>  }
> >>> +
> >>> +bool has_managed_dma(void)
> >>> +{
> >>> +	struct zone *zone;
> >>> +
> >>> +	for_each_managed_zone(zone) {
> >>> +		if (zone_is_dma(zone))
> >>> +			return true;
> >>> +	}
> >>> +	return false;
> >>> +}
> >>
> >> Wouldn't it be "easier/faster" to just iterate online nodes and directly
> >> obtain the ZONE_DMA, checking if there are managed pages?
> > 
> > Thanks, Dave.
> > 
> > Please check for_each_managed_zone(), it is iterating online nodes and
> > it's each managed zone. 
> > 
> > Is below what you are suggesting? The only difference is I introduced
> > for_each_managed_zone() which can be reused later if needed. Not sure if
> > I got your suggestion correctly.
> > 
> > bool has_managed_dma(void)
> > {
> >         struct pglist_data *pgdat;
> >         struct zone *zone;
> >         enum zone_type i, j;
> > 
> >         for_each_online_pgdat(pgdat) {
> >                 for (i = 0; i < MAX_NR_ZONES - 1; i++) {          
> >                         struct zone *zone = &pgdat->node_zones[i];
> >                         if (zone_is_dma(zone))                                                                                                    
> >                                 return true;
> >                 }
> >         }
> >         return false;
> > 
> > }
> 
> 
> Even simpler, no need to iterate over zones at all, only over nodes:
> 
> #ifdef CONFIG_ZONE_DMA
> bool has_managed_dma(void)
> {
> 	struct pglist_data *pgdat;
> 
> 	for_each_online_pgdat(pgdat) {
> 		struct zone *zone = &pgdat->node_zones[ZONE_DMA];
> 
> 		if (managed_zone(zone)
> 			return true;
> 	}
> 	return false;
> }
> #endif /* CONFIG_ZONE_DMA */
> 
> Without CONFIG_ZONE_DMA, simply provide a dummy in the header that
> returns false.

Yeah, it only iterates the number of nodes times. I will take this in
v3. Thanks, David.
diff mbox series

Patch

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 58e744b78c2c..82d23e13e0e5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -998,6 +998,18 @@  static inline bool zone_is_zone_device(struct zone *zone)
 }
 #endif
 
+#ifdef CONFIG_ZONE_DMA
+static inline bool zone_is_dma(struct zone *zone)
+{
+	return zone_idx(zone) == ZONE_DMA;
+}
+#else
+static inline bool zone_is_dma(struct zone *zone)
+{
+	return false;
+}
+#endif
+
 /*
  * Returns true if a zone has pages managed by the buddy allocator.
  * All the reclaim decisions have to use this function rather than
@@ -1046,6 +1058,7 @@  static inline int is_highmem_idx(enum zone_type idx)
 #endif
 }
 
+bool has_managed_dma(void);
 /**
  * is_highmem - helper function to quickly check if a struct zone is a
  *              highmem zone or not.  This is an attempt to keep references
@@ -1131,6 +1144,14 @@  extern struct zone *next_zone(struct zone *zone);
 			; /* do nothing */		\
 		else
 
+#define for_each_managed_zone(zone)		        \
+	for (zone = (first_online_pgdat())->node_zones; \
+	     zone;					\
+	     zone = next_zone(zone))			\
+		if (!managed_zone(zone))		\
+			; /* do nothing */		\
+		else
+
 static inline struct zone *zonelist_zone(struct zoneref *zoneref)
 {
 	return zoneref->zone;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c5952749ad40..ac0ea42a4e5f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -9459,4 +9459,15 @@  bool take_page_off_buddy(struct page *page)
 	spin_unlock_irqrestore(&zone->lock, flags);
 	return ret;
 }
+
+bool has_managed_dma(void)
+{
+	struct zone *zone;
+
+	for_each_managed_zone(zone) {
+		if (zone_is_dma(zone))
+			return true;
+	}
+	return false;
+}
 #endif