diff mbox series

mm,memory_hotplug: Drop unneeded locking

Message ID 20210531093958.15021-1-osalvador@suse.de (mailing list archive)
State New, archived
Headers show
Series mm,memory_hotplug: Drop unneeded locking | expand

Commit Message

Oscar Salvador May 31, 2021, 9:39 a.m. UTC
Currently, memory-hotplug code takes zone's span_writelock
and pgdat's resize_lock when resizing the node/zone's spanned
pages via {move_pfn_range_to_zone(),remove_pfn_range_from_zone()}
and when resizing node and zone's present pages via
adjust_present_page_count().

These locks are also taken during the initialization of the system
at boot time, where it protects parallel struct page initialization,
but they should not really be needed in memory-hotplug where all
operations are a) synchronized on device level and b) serialized by
the mem_hotplug_lock lock.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
Acked-by: David Hildenbrand <david@redhat.com>
---
 mm/memory_hotplug.c | 11 -----------
 1 file changed, 11 deletions(-)

Comments

Michal Hocko May 31, 2021, 11:37 a.m. UTC | #1
On Mon 31-05-21 11:39:58, Oscar Salvador wrote:
> Currently, memory-hotplug code takes zone's span_writelock
> and pgdat's resize_lock when resizing the node/zone's spanned
> pages via {move_pfn_range_to_zone(),remove_pfn_range_from_zone()}
> and when resizing node and zone's present pages via
> adjust_present_page_count().
> 
> These locks are also taken during the initialization of the system
> at boot time, where it protects parallel struct page initialization,
> but they should not really be needed in memory-hotplug where all
> operations are a) synchronized on device level and b) serialized by
> the mem_hotplug_lock lock.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> Acked-by: David Hildenbrand <david@redhat.com>

Thanks. I wanted to kill these locks quite some time ago (see the TODO
you are remonig ;)) but never got around to that.

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/memory_hotplug.c | 11 -----------
>  1 file changed, 11 deletions(-)
> 
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 075b34803fec..9edbc57055bf 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -329,7 +329,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
>  	unsigned long pfn;
>  	int nid = zone_to_nid(zone);
>  
> -	zone_span_writelock(zone);
>  	if (zone->zone_start_pfn == start_pfn) {
>  		/*
>  		 * If the section is smallest section in the zone, it need
> @@ -362,7 +361,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
>  			zone->spanned_pages = 0;
>  		}
>  	}
> -	zone_span_writeunlock(zone);
>  }
>  
>  static void update_pgdat_span(struct pglist_data *pgdat)
> @@ -424,10 +422,8 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
>  
>  	clear_zone_contiguous(zone);
>  
> -	pgdat_resize_lock(zone->zone_pgdat, &flags);
>  	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
>  	update_pgdat_span(pgdat);
> -	pgdat_resize_unlock(zone->zone_pgdat, &flags);
>  
>  	set_zone_contiguous(zone);
>  }
> @@ -638,15 +634,10 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>  
>  	clear_zone_contiguous(zone);
>  
> -	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
> -	pgdat_resize_lock(pgdat, &flags);
> -	zone_span_writelock(zone);
>  	if (zone_is_empty(zone))
>  		init_currently_empty_zone(zone, start_pfn, nr_pages);
>  	resize_zone_range(zone, start_pfn, nr_pages);
> -	zone_span_writeunlock(zone);
>  	resize_pgdat_range(pgdat, start_pfn, nr_pages);
> -	pgdat_resize_unlock(pgdat, &flags);
>  
>  	/*
>  	 * Subsection population requires care in pfn_to_online_page().
> @@ -739,9 +730,7 @@ void adjust_present_page_count(struct zone *zone, long nr_pages)
>  	unsigned long flags;
>  
>  	zone->present_pages += nr_pages;
> -	pgdat_resize_lock(zone->zone_pgdat, &flags);
>  	zone->zone_pgdat->node_present_pages += nr_pages;
> -	pgdat_resize_unlock(zone->zone_pgdat, &flags);
>  }
>  
>  int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
> -- 
> 2.16.3
Anshuman Khandual June 1, 2021, 5:17 a.m. UTC | #2
On 5/31/21 3:09 PM, Oscar Salvador wrote:
> Currently, memory-hotplug code takes zone's span_writelock
> and pgdat's resize_lock when resizing the node/zone's spanned
> pages via {move_pfn_range_to_zone(),remove_pfn_range_from_zone()}
> and when resizing node and zone's present pages via
> adjust_present_page_count().
> 
> These locks are also taken during the initialization of the system
> at boot time, where it protects parallel struct page initialization,
> but they should not really be needed in memory-hotplug where all
> operations are a) synchronized on device level and b) serialized by
> the mem_hotplug_lock lock.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> Acked-by: David Hildenbrand <david@redhat.com>
> ---
>  mm/memory_hotplug.c | 11 -----------
>  1 file changed, 11 deletions(-)
> 
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 075b34803fec..9edbc57055bf 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -329,7 +329,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
>  	unsigned long pfn;
>  	int nid = zone_to_nid(zone);
>  
> -	zone_span_writelock(zone);
>  	if (zone->zone_start_pfn == start_pfn) {
>  		/*
>  		 * If the section is smallest section in the zone, it need
> @@ -362,7 +361,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
>  			zone->spanned_pages = 0;
>  		}
>  	}
> -	zone_span_writeunlock(zone);
>  }
>  
>  static void update_pgdat_span(struct pglist_data *pgdat)
> @@ -424,10 +422,8 @@ void __ref remove_pfn_range_from_zone(struct zone *zone,
>  
>  	clear_zone_contiguous(zone);
>  
> -	pgdat_resize_lock(zone->zone_pgdat, &flags);
>  	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
>  	update_pgdat_span(pgdat);
> -	pgdat_resize_unlock(zone->zone_pgdat, &flags);
>  
>  	set_zone_contiguous(zone);
>  }
> @@ -638,15 +634,10 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
>  
>  	clear_zone_contiguous(zone);
>  
> -	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
> -	pgdat_resize_lock(pgdat, &flags);
> -	zone_span_writelock(zone);
>  	if (zone_is_empty(zone))
>  		init_currently_empty_zone(zone, start_pfn, nr_pages);
>  	resize_zone_range(zone, start_pfn, nr_pages);
> -	zone_span_writeunlock(zone);
>  	resize_pgdat_range(pgdat, start_pfn, nr_pages);
> -	pgdat_resize_unlock(pgdat, &flags);
>  
>  	/*
>  	 * Subsection population requires care in pfn_to_online_page().
> @@ -739,9 +730,7 @@ void adjust_present_page_count(struct zone *zone, long nr_pages)
>  	unsigned long flags;
>  
>  	zone->present_pages += nr_pages;
> -	pgdat_resize_lock(zone->zone_pgdat, &flags);
>  	zone->zone_pgdat->node_present_pages += nr_pages;
> -	pgdat_resize_unlock(zone->zone_pgdat, &flags);
>  }
>  
>  int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
> 

Should also just drop zone_span_write[lock|unlock]() helpers as there
are no users left ?
Oscar Salvador June 1, 2021, 7:47 a.m. UTC | #3
On Tue, Jun 01, 2021 at 10:47:31AM +0530, Anshuman Khandual wrote:
> Should also just drop zone_span_write[lock|unlock]() helpers as there
> are no users left ?

Yes, definitely.
Andrew, can you squash this on top? Thanks:

diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index a7fd2c3ccb77..27d8ba1d32cb 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -67,10 +67,6 @@ struct range mhp_get_pluggable_range(bool need_mapping);
 
 /*
  * Zone resizing functions
- *
- * Note: any attempt to resize a zone should has pgdat_resize_lock()
- * zone_span_writelock() both held. This ensure the size of a zone
- * can't be changed while pgdat_resize_lock() held.
  */
 static inline unsigned zone_span_seqbegin(struct zone *zone)
 {
@@ -80,14 +76,6 @@ static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
 {
 	return read_seqretry(&zone->span_seqlock, iv);
 }
-static inline void zone_span_writelock(struct zone *zone)
-{
-	write_seqlock(&zone->span_seqlock);
-}
-static inline void zone_span_writeunlock(struct zone *zone)
-{
-	write_sequnlock(&zone->span_seqlock);
-}
 static inline void zone_seqlock_init(struct zone *zone)
 {
 	seqlock_init(&zone->span_seqlock);
@@ -233,8 +221,6 @@ static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
 {
 	return 0;
 }
-static inline void zone_span_writelock(struct zone *zone) {}
-static inline void zone_span_writeunlock(struct zone *zone) {}
 static inline void zone_seqlock_init(struct zone *zone) {}
 
 static inline int try_online_node(int nid)
David Hildenbrand June 1, 2021, 8:02 a.m. UTC | #4
On 01.06.21 09:47, Oscar Salvador wrote:
> On Tue, Jun 01, 2021 at 10:47:31AM +0530, Anshuman Khandual wrote:
>> Should also just drop zone_span_write[lock|unlock]() helpers as there
>> are no users left ?
> 
> Yes, definitely.
> Andrew, can you squash this on top? Thanks:
> 
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index a7fd2c3ccb77..27d8ba1d32cb 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -67,10 +67,6 @@ struct range mhp_get_pluggable_range(bool need_mapping);
>   
>   /*
>    * Zone resizing functions
> - *
> - * Note: any attempt to resize a zone should has pgdat_resize_lock()
> - * zone_span_writelock() both held. This ensure the size of a zone
> - * can't be changed while pgdat_resize_lock() held.
>    */
>   static inline unsigned zone_span_seqbegin(struct zone *zone)
>   {
> @@ -80,14 +76,6 @@ static inline int zone_span_seqretry(struct zone *zone, unsigned iv)
>   {
>   	return read_seqretry(&zone->span_seqlock, iv);
>   }
> -static inline void zone_span_writelock(struct zone *zone)
> -{
> -	write_seqlock(&zone->span_seqlock);
> -}
> -static inline void zone_span_writeunlock(struct zone *zone)
> -{
> -	write_sequnlock(&zone->span_seqlock);
> -}

If there is no writer anymore, why do we have to protect readers?
Oscar Salvador June 1, 2021, 8:12 a.m. UTC | #5
On Tue, Jun 01, 2021 at 10:02:54AM +0200, David Hildenbrand wrote:
> If there is no writer anymore, why do we have to protect readers?

Yeah, you are right. 
Let me prepare a v2 as this is getting too sloppy.
Michal Hocko June 1, 2021, 9:47 a.m. UTC | #6
On Tue 01-06-21 10:12:54, Oscar Salvador wrote:
> On Tue, Jun 01, 2021 at 10:02:54AM +0200, David Hildenbrand wrote:
> > If there is no writer anymore, why do we have to protect readers?
> 
> Yeah, you are right. 
> Let me prepare a v2 as this is getting too sloppy.

While you are touching this and want to drill all the way down then it
would be reasonable to drop pgdat resize locks as well.
It is only used in the early boot code and we have one executing thread
context per numa node during the deferred initialization. I haven't
checked all potential side effects the lock might have but it sounds
like there is quite some clean up potential over there.
Oscar Salvador June 1, 2021, 10:33 a.m. UTC | #7
On Tue, Jun 01, 2021 at 11:47:13AM +0200, Michal Hocko wrote:
> While you are touching this and want to drill all the way down then it
> would be reasonable to drop pgdat resize locks as well.
> It is only used in the early boot code and we have one executing thread
> context per numa node during the deferred initialization. I haven't
> checked all potential side effects the lock might have but it sounds
> like there is quite some clean up potential over there.

I am not sure about that. True is that deferred_init_memmap() gets executed
on numa-thread so it's not a problem for itself, but we also have deferred_grow_zone().
It might be that while deferred_init_memmap() is running, we also have calls to
deferred_grow_zone() for the same node and that would cause some trouble wrt.
first_deferred_pfn. I need to double check it, but IIRC, that is the case.
diff mbox series

Patch

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 075b34803fec..9edbc57055bf 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -329,7 +329,6 @@  static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
 	unsigned long pfn;
 	int nid = zone_to_nid(zone);
 
-	zone_span_writelock(zone);
 	if (zone->zone_start_pfn == start_pfn) {
 		/*
 		 * If the section is smallest section in the zone, it need
@@ -362,7 +361,6 @@  static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
 			zone->spanned_pages = 0;
 		}
 	}
-	zone_span_writeunlock(zone);
 }
 
 static void update_pgdat_span(struct pglist_data *pgdat)
@@ -424,10 +422,8 @@  void __ref remove_pfn_range_from_zone(struct zone *zone,
 
 	clear_zone_contiguous(zone);
 
-	pgdat_resize_lock(zone->zone_pgdat, &flags);
 	shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
 	update_pgdat_span(pgdat);
-	pgdat_resize_unlock(zone->zone_pgdat, &flags);
 
 	set_zone_contiguous(zone);
 }
@@ -638,15 +634,10 @@  void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 
 	clear_zone_contiguous(zone);
 
-	/* TODO Huh pgdat is irqsave while zone is not. It used to be like that before */
-	pgdat_resize_lock(pgdat, &flags);
-	zone_span_writelock(zone);
 	if (zone_is_empty(zone))
 		init_currently_empty_zone(zone, start_pfn, nr_pages);
 	resize_zone_range(zone, start_pfn, nr_pages);
-	zone_span_writeunlock(zone);
 	resize_pgdat_range(pgdat, start_pfn, nr_pages);
-	pgdat_resize_unlock(pgdat, &flags);
 
 	/*
 	 * Subsection population requires care in pfn_to_online_page().
@@ -739,9 +730,7 @@  void adjust_present_page_count(struct zone *zone, long nr_pages)
 	unsigned long flags;
 
 	zone->present_pages += nr_pages;
-	pgdat_resize_lock(zone->zone_pgdat, &flags);
 	zone->zone_pgdat->node_present_pages += nr_pages;
-	pgdat_resize_unlock(zone->zone_pgdat, &flags);
 }
 
 int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,