diff mbox series

[4/4] mm/gup: remove get_user_pages_locked()

Message ID 20220131051752.447699-5-jhubbard@nvidia.com (mailing list archive)
State New
Headers show
Series mm/gup: some cleanups | expand

Commit Message

John Hubbard Jan. 31, 2022, 5:17 a.m. UTC
Unraveling the rat's nest set of APIs in mm/gup.c a bit more.
get_user_pages_locked() was not helping at all, so remove it.

Also, lookup_node() has only a single caller, but it is still worth
having a clearer locking policy there. Changing it so that the caller
both takes and releases the mmap_lock, thus leaving lookup_node() with
the sole job of translating a virtual address into a numa node ID.

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 include/linux/mm.h |  2 --
 mm/gup.c           | 59 ----------------------------------------------
 mm/mempolicy.c     | 22 ++++++++---------
 3 files changed, 10 insertions(+), 73 deletions(-)

Comments

Jan Kara Jan. 31, 2022, 12:05 p.m. UTC | #1
On Sun 30-01-22 21:17:52, John Hubbard wrote:
> Unraveling the rat's nest set of APIs in mm/gup.c a bit more.
> get_user_pages_locked() was not helping at all, so remove it.
> 
> Also, lookup_node() has only a single caller, but it is still worth
> having a clearer locking policy there. Changing it so that the caller
> both takes and releases the mmap_lock, thus leaving lookup_node() with
> the sole job of translating a virtual address into a numa node ID.
> 
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>

Well, the point of _locked() GUP variants is that we can unlock mmap_sem
when reading a page from the disk during a page fault (hidden behind
VM_FAULT_RETRY). So as such _locked() variants are about reducing mmap_sem
latency rather than code readability.  In this particular case, I don't
think using _locked() variant in lookup_node() is very beneficial
(generally I would not expect to take a fault there) but at least a
justification in the commit message should be different :).

								Honza

> ---
>  include/linux/mm.h |  2 --
>  mm/gup.c           | 59 ----------------------------------------------
>  mm/mempolicy.c     | 22 ++++++++---------
>  3 files changed, 10 insertions(+), 73 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 80c540c17d83..528ef1cb4f3a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1916,8 +1916,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
>  long pin_user_pages(unsigned long start, unsigned long nr_pages,
>  		    unsigned int gup_flags, struct page **pages,
>  		    struct vm_area_struct **vmas);
> -long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
> -		    unsigned int gup_flags, struct page **pages, int *locked);
>  long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
>  		    struct page **pages, unsigned int gup_flags);
>  long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
> diff --git a/mm/gup.c b/mm/gup.c
> index 58d01a96ab30..4a43c79f0972 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2119,65 +2119,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
>  }
>  EXPORT_SYMBOL(get_user_pages);
>  
> -/**
> - * get_user_pages_locked() - variant of get_user_pages()
> - *
> - * @start:      starting user address
> - * @nr_pages:   number of pages from start to pin
> - * @gup_flags:  flags modifying lookup behaviour
> - * @pages:      array that receives pointers to the pages pinned.
> - *              Should be at least nr_pages long. Or NULL, if caller
> - *              only intends to ensure the pages are faulted in.
> - * @locked:     pointer to lock flag indicating whether lock is held and
> - *              subsequently whether VM_FAULT_RETRY functionality can be
> - *              utilised. Lock must initially be held.
> - *
> - * It is suitable to replace the form:
> - *
> - *      mmap_read_lock(mm);
> - *      do_something()
> - *      get_user_pages(mm, ..., pages, NULL);
> - *      mmap_read_unlock(mm);
> - *
> - *  to:
> - *
> - *      int locked = 1;
> - *      mmap_read_lock(mm);
> - *      do_something()
> - *      get_user_pages_locked(mm, ..., pages, &locked);
> - *      if (locked)
> - *          mmap_read_unlock(mm);
> - *
> - * We can leverage the VM_FAULT_RETRY functionality in the page fault
> - * paths better by using either get_user_pages_locked() or
> - * get_user_pages_unlocked().
> - *
> - */
> -long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
> -			   unsigned int gup_flags, struct page **pages,
> -			   int *locked)
> -{
> -	/*
> -	 * FIXME: Current FOLL_LONGTERM behavior is incompatible with
> -	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
> -	 * vmas.  As there are no users of this flag in this call we simply
> -	 * disallow this option for now.
> -	 */
> -	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
> -		return -EINVAL;
> -	/*
> -	 * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
> -	 * never directly by the caller, so enforce that:
> -	 */
> -	if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
> -		return -EINVAL;
> -
> -	return __get_user_pages_locked(current->mm, start, nr_pages,
> -				       pages, NULL, locked,
> -				       gup_flags | FOLL_TOUCH);
> -}
> -EXPORT_SYMBOL(get_user_pages_locked);
> -
>  /*
>   * get_user_pages_unlocked() is suitable to replace the form:
>   *
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 028e8dd82b44..040d88354cfa 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -907,17 +907,15 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
>  static int lookup_node(struct mm_struct *mm, unsigned long addr)
>  {
>  	struct page *p = NULL;
> -	int err;
> +	int ret;
>  
> -	int locked = 1;
> -	err = get_user_pages_locked(addr & PAGE_MASK, 1, 0, &p, &locked);
> -	if (err > 0) {
> -		err = page_to_nid(p);
> +	mmap_assert_locked(mm);
> +	ret = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL);
> +	if (ret > 0) {
> +		ret = page_to_nid(p);
>  		put_page(p);
>  	}
> -	if (locked)
> -		mmap_read_unlock(mm);
> -	return err;
> +	return ret;
>  }
>  
>  /* Retrieve NUMA policy */
> @@ -968,15 +966,15 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
>  	if (flags & MPOL_F_NODE) {
>  		if (flags & MPOL_F_ADDR) {
>  			/*
> -			 * Take a refcount on the mpol, lookup_node()
> -			 * will drop the mmap_lock, so after calling
> -			 * lookup_node() only "pol" remains valid, "vma"
> -			 * is stale.
> +			 * Take a refcount on the mpol, because we are about to
> +			 * drop the mmap_lock, after which only "pol" remains
> +			 * valid, "vma" is stale.
>  			 */
>  			pol_refcount = pol;
>  			vma = NULL;
>  			mpol_get(pol);
>  			err = lookup_node(mm, addr);
> +			mmap_read_unlock(mm);
>  			if (err < 0)
>  				goto out;
>  			*policy = err;
> -- 
> 2.35.0
>
Jason Gunthorpe Jan. 31, 2022, 1:36 p.m. UTC | #2
On Sun, Jan 30, 2022 at 09:17:52PM -0800, John Hubbard wrote:

> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 028e8dd82b44..040d88354cfa 100644
> +++ b/mm/mempolicy.c
> @@ -907,17 +907,15 @@ static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
>  static int lookup_node(struct mm_struct *mm, unsigned long addr)
>  {
>  	struct page *p = NULL;
> -	int err;
> +	int ret;
>  
> -	int locked = 1;
> -	err = get_user_pages_locked(addr & PAGE_MASK, 1, 0, &p, &locked);
> -	if (err > 0) {
> -		err = page_to_nid(p);
> +	mmap_assert_locked(mm);
> +	ret = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL);
> +	if (ret > 0) {
> +		ret = page_to_nid(p);
>  		put_page(p);
>  	}
> -	if (locked)
> -		mmap_read_unlock(mm);
> -	return err;
> +	return ret;
>  }
>  
>  /* Retrieve NUMA policy */
> @@ -968,15 +966,15 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
>  	if (flags & MPOL_F_NODE) {
>  		if (flags & MPOL_F_ADDR) {
>  			/*
> -			 * Take a refcount on the mpol, lookup_node()
> -			 * will drop the mmap_lock, so after calling
> -			 * lookup_node() only "pol" remains valid, "vma"
> -			 * is stale.
> +			 * Take a refcount on the mpol, because we are about to
> +			 * drop the mmap_lock, after which only "pol" remains
> +			 * valid, "vma" is stale.
>  			 */
>  			pol_refcount = pol;
>  			vma = NULL;
>  			mpol_get(pol);
>  			err = lookup_node(mm, addr);
> +			mmap_read_unlock(mm);

How about move the mmap_read_unlock up one line and then use
get_user_pages_fast()

I'm guessing in most cases here the PTE will be present so that should
be a net win?

Jason
John Hubbard Jan. 31, 2022, 8:01 p.m. UTC | #3
On 1/31/22 04:05, Jan Kara wrote:
> On Sun 30-01-22 21:17:52, John Hubbard wrote:
>> Unraveling the rat's nest set of APIs in mm/gup.c a bit more.
>> get_user_pages_locked() was not helping at all, so remove it.
>>
>> Also, lookup_node() has only a single caller, but it is still worth
>> having a clearer locking policy there. Changing it so that the caller
>> both takes and releases the mmap_lock, thus leaving lookup_node() with
>> the sole job of translating a virtual address into a numa node ID.
>>
>> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
> 
> Well, the point of _locked() GUP variants is that we can unlock mmap_sem
> when reading a page from the disk during a page fault (hidden behind
> VM_FAULT_RETRY). So as such _locked() variants are about reducing mmap_sem
> latency rather than code readability.  In this particular case, I don't
> think using _locked() variant in lookup_node() is very beneficial
> (generally I would not expect to take a fault there) but at least a
> justification in the commit message should be different :).
> 
> 								Honza

I'll rewrite this commit description to cover this point properly.
Jason also suggested using gup-fast, which I like.
  thanks,
John Hubbard Jan. 31, 2022, 8:01 p.m. UTC | #4
On 1/31/22 05:36, Jason Gunthorpe wrote:
...
>> @@ -968,15 +966,15 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
>>   	if (flags & MPOL_F_NODE) {
>>   		if (flags & MPOL_F_ADDR) {
>>   			/*
>> -			 * Take a refcount on the mpol, lookup_node()
>> -			 * will drop the mmap_lock, so after calling
>> -			 * lookup_node() only "pol" remains valid, "vma"
>> -			 * is stale.
>> +			 * Take a refcount on the mpol, because we are about to
>> +			 * drop the mmap_lock, after which only "pol" remains
>> +			 * valid, "vma" is stale.
>>   			 */
>>   			pol_refcount = pol;
>>   			vma = NULL;
>>   			mpol_get(pol);
>>   			err = lookup_node(mm, addr);
>> +			mmap_read_unlock(mm);
> 
> How about move the mmap_read_unlock up one line and then use
> get_user_pages_fast()
> 
> I'm guessing in most cases here the PTE will be present so that should
> be a net win?

Neat, I'll do that.


thanks,
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80c540c17d83..528ef1cb4f3a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1916,8 +1916,6 @@  long get_user_pages(unsigned long start, unsigned long nr_pages,
 long pin_user_pages(unsigned long start, unsigned long nr_pages,
 		    unsigned int gup_flags, struct page **pages,
 		    struct vm_area_struct **vmas);
-long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-		    unsigned int gup_flags, struct page **pages, int *locked);
 long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 		    struct page **pages, unsigned int gup_flags);
 long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
diff --git a/mm/gup.c b/mm/gup.c
index 58d01a96ab30..4a43c79f0972 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2119,65 +2119,6 @@  long get_user_pages(unsigned long start, unsigned long nr_pages,
 }
 EXPORT_SYMBOL(get_user_pages);
 
-/**
- * get_user_pages_locked() - variant of get_user_pages()
- *
- * @start:      starting user address
- * @nr_pages:   number of pages from start to pin
- * @gup_flags:  flags modifying lookup behaviour
- * @pages:      array that receives pointers to the pages pinned.
- *              Should be at least nr_pages long. Or NULL, if caller
- *              only intends to ensure the pages are faulted in.
- * @locked:     pointer to lock flag indicating whether lock is held and
- *              subsequently whether VM_FAULT_RETRY functionality can be
- *              utilised. Lock must initially be held.
- *
- * It is suitable to replace the form:
- *
- *      mmap_read_lock(mm);
- *      do_something()
- *      get_user_pages(mm, ..., pages, NULL);
- *      mmap_read_unlock(mm);
- *
- *  to:
- *
- *      int locked = 1;
- *      mmap_read_lock(mm);
- *      do_something()
- *      get_user_pages_locked(mm, ..., pages, &locked);
- *      if (locked)
- *          mmap_read_unlock(mm);
- *
- * We can leverage the VM_FAULT_RETRY functionality in the page fault
- * paths better by using either get_user_pages_locked() or
- * get_user_pages_unlocked().
- *
- */
-long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-			   unsigned int gup_flags, struct page **pages,
-			   int *locked)
-{
-	/*
-	 * FIXME: Current FOLL_LONGTERM behavior is incompatible with
-	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-	 * vmas.  As there are no users of this flag in this call we simply
-	 * disallow this option for now.
-	 */
-	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
-		return -EINVAL;
-	/*
-	 * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
-	 * never directly by the caller, so enforce that:
-	 */
-	if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
-		return -EINVAL;
-
-	return __get_user_pages_locked(current->mm, start, nr_pages,
-				       pages, NULL, locked,
-				       gup_flags | FOLL_TOUCH);
-}
-EXPORT_SYMBOL(get_user_pages_locked);
-
 /*
  * get_user_pages_unlocked() is suitable to replace the form:
  *
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 028e8dd82b44..040d88354cfa 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -907,17 +907,15 @@  static void get_policy_nodemask(struct mempolicy *p, nodemask_t *nodes)
 static int lookup_node(struct mm_struct *mm, unsigned long addr)
 {
 	struct page *p = NULL;
-	int err;
+	int ret;
 
-	int locked = 1;
-	err = get_user_pages_locked(addr & PAGE_MASK, 1, 0, &p, &locked);
-	if (err > 0) {
-		err = page_to_nid(p);
+	mmap_assert_locked(mm);
+	ret = get_user_pages(addr & PAGE_MASK, 1, 0, &p, NULL);
+	if (ret > 0) {
+		ret = page_to_nid(p);
 		put_page(p);
 	}
-	if (locked)
-		mmap_read_unlock(mm);
-	return err;
+	return ret;
 }
 
 /* Retrieve NUMA policy */
@@ -968,15 +966,15 @@  static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 	if (flags & MPOL_F_NODE) {
 		if (flags & MPOL_F_ADDR) {
 			/*
-			 * Take a refcount on the mpol, lookup_node()
-			 * will drop the mmap_lock, so after calling
-			 * lookup_node() only "pol" remains valid, "vma"
-			 * is stale.
+			 * Take a refcount on the mpol, because we are about to
+			 * drop the mmap_lock, after which only "pol" remains
+			 * valid, "vma" is stale.
 			 */
 			pol_refcount = pol;
 			vma = NULL;
 			mpol_get(pol);
 			err = lookup_node(mm, addr);
+			mmap_read_unlock(mm);
 			if (err < 0)
 				goto out;
 			*policy = err;