diff mbox series

[v1,3/3] mm: merge folio_is_secretmem() into folio_fast_pin_allowed()

Message ID 20240325134114.257544-4-david@redhat.com (mailing list archive)
State New
Headers show
Series mm/secretmem: one fix and one refactoring | expand

Commit Message

David Hildenbrand March 25, 2024, 1:41 p.m. UTC
folio_is_secretmem() is currently only used during GUP-fast, and using
it in wrong context where concurrent truncation might happen, could be
problematic.

Nowadays, folio_fast_pin_allowed() performs similar checks during
GUP-fast and contains a lot of careful handling -- READ_ONCE( -- ), sanity
checks -- lockdep_assert_irqs_disabled() --  and helpful comments on how
this handling is safe and correct.

So let's merge folio_is_secretmem() into folio_fast_pin_allowed(), still
avoiding checking the actual mapping only if really required.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/linux/secretmem.h | 21 ++-------------------
 mm/gup.c                  | 33 +++++++++++++++++++++------------
 2 files changed, 23 insertions(+), 31 deletions(-)

Comments

Mike Rapoport March 26, 2024, 6:30 a.m. UTC | #1
On Mon, Mar 25, 2024 at 02:41:14PM +0100, David Hildenbrand wrote:
> folio_is_secretmem() is currently only used during GUP-fast, and using
> it in wrong context where concurrent truncation might happen, could be
> problematic.
> 
> Nowadays, folio_fast_pin_allowed() performs similar checks during
> GUP-fast and contains a lot of careful handling -- READ_ONCE( -- ), sanity
> checks -- lockdep_assert_irqs_disabled() --  and helpful comments on how
> this handling is safe and correct.
> 
> So let's merge folio_is_secretmem() into folio_fast_pin_allowed(), still
> avoiding checking the actual mapping only if really required.
> 
> Signed-off-by: David Hildenbrand <david@redhat.com>

Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>

A few comments below, no strong feelings about them.

> ---
>  include/linux/secretmem.h | 21 ++-------------------
>  mm/gup.c                  | 33 +++++++++++++++++++++------------
>  2 files changed, 23 insertions(+), 31 deletions(-)
> 
> diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
> index 6996f1f53f14..e918f96881f5 100644
> --- a/include/linux/secretmem.h
> +++ b/include/linux/secretmem.h
> @@ -6,25 +6,8 @@
>  
>  extern const struct address_space_operations secretmem_aops;
>  
> -static inline bool folio_is_secretmem(struct folio *folio)
> +static inline bool secretmem_mapping(struct address_space *mapping)
>  {
> -	struct address_space *mapping;
> -
> -	/*
> -	 * Using folio_mapping() is quite slow because of the actual call
> -	 * instruction.
> -	 * We know that secretmem pages are not compound and LRU so we can
> -	 * save a couple of cycles here.
> -	 */
> -	if (folio_test_large(folio) || folio_test_lru(folio))
> -		return false;
> -
> -	mapping = (struct address_space *)
> -		((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS);
> -
> -	if (!mapping || mapping != folio->mapping)
> -		return false;
> -
>  	return mapping->a_ops == &secretmem_aops;
>  }
>  
> @@ -38,7 +21,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma)
>  	return false;
>  }
>  
> -static inline bool folio_is_secretmem(struct folio *folio)
> +static inline bool secretmem_mapping(struct address_space *mapping)
>  {
>  	return false;
>  }
> diff --git a/mm/gup.c b/mm/gup.c
> index e7510b6ce765..69d8bc8e4451 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2472,6 +2472,8 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
>   * This call assumes the caller has pinned the folio, that the lowest page table
>   * level still points to this folio, and that interrupts have been disabled.
>   *
> + * GUP-fast must reject all secretmem folios.
> + *
>   * Writing to pinned file-backed dirty tracked folios is inherently problematic
>   * (see comment describing the writable_file_mapping_allowed() function). We
>   * therefore try to avoid the most egregious case of a long-term mapping doing
> @@ -2484,22 +2486,32 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
>  static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)

Now when this function checks for gup in general, maybe it's worth to
rename it to, say, folio_fast_gup_allowed.

>  {
>  	struct address_space *mapping;
> +	bool check_secretmem = false;
> +	bool reject_file_backed = false;
>  	unsigned long mapping_flags;
>  
>  	/*
>  	 * If we aren't pinning then no problematic write can occur. A long term
>  	 * pin is the most egregious case so this is the one we disallow.
>  	 */
> -	if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) !=
> +	if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) ==
>  	    (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE))
> -		return true;
> +		reject_file_backed = true;
> +
> +	/* We hold a folio reference, so we can safely access folio fields. */
>  
> -	/* The folio is pinned, so we can safely access folio fields. */
> +	/* secretmem folios are only order-0 folios and never LRU folios. */

Nit:                           ^ always

> +	if (IS_ENABLED(CONFIG_SECRETMEM) && !folio_test_large(folio) &&
> +	    !folio_test_lru(folio))
> +		check_secretmem = true;
> +
> +	if (!reject_file_backed && !check_secretmem)
> +		return true;
>  
>  	if (WARN_ON_ONCE(folio_test_slab(folio)))
>  		return false;
>  
> -	/* hugetlb mappings do not require dirty-tracking. */
> +	/* hugetlb neither requires dirty-tracking nor can be secretmem. */
>  	if (folio_test_hugetlb(folio))
>  		return true;
>  
> @@ -2535,10 +2547,12 @@ static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
>  
>  	/*
>  	 * At this point, we know the mapping is non-null and points to an
> -	 * address_space object. The only remaining whitelisted file system is
> -	 * shmem.
> +	 * address_space object.
>  	 */
> -	return shmem_mapping(mapping);
> +	if (check_secretmem && secretmem_mapping(mapping))
> +		return false;
> +	/* The only remaining allowed file system is shmem. */
> +	return !reject_file_backed || shmem_mapping(mapping);
>  }
>  
>  static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
> @@ -2624,11 +2638,6 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
>  		if (!folio)
>  			goto pte_unmap;
>  
> -		if (unlikely(folio_is_secretmem(folio))) {
> -			gup_put_folio(folio, 1, flags);
> -			goto pte_unmap;
> -		}
> -
>  		if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
>  		    unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
>  			gup_put_folio(folio, 1, flags);
> -- 
> 2.43.2
>
David Hildenbrand March 26, 2024, 8:40 a.m. UTC | #2
On 26.03.24 07:30, Mike Rapoport wrote:
> On Mon, Mar 25, 2024 at 02:41:14PM +0100, David Hildenbrand wrote:
>> folio_is_secretmem() is currently only used during GUP-fast, and using
>> it in wrong context where concurrent truncation might happen, could be
>> problematic.
>>
>> Nowadays, folio_fast_pin_allowed() performs similar checks during
>> GUP-fast and contains a lot of careful handling -- READ_ONCE( -- ), sanity

Re-reading my stuff ...

s/( -- )/() --/

>> checks -- lockdep_assert_irqs_disabled() --  and helpful comments on how
>> this handling is safe and correct.
>>
>> So let's merge folio_is_secretmem() into folio_fast_pin_allowed(), still
>> avoiding checking the actual mapping only if really required.

s/avoiding//

>>
>> Signed-off-by: David Hildenbrand <david@redhat.com>
> 
> Reviewed-by: Mike Rapoport (IBM) <rppt@kernel.org>
> 

Thanks!

> A few comments below, no strong feelings about them.
> 
>> ---
>>   include/linux/secretmem.h | 21 ++-------------------
>>   mm/gup.c                  | 33 +++++++++++++++++++++------------
>>   2 files changed, 23 insertions(+), 31 deletions(-)
>>
>> diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
>> index 6996f1f53f14..e918f96881f5 100644
>> --- a/include/linux/secretmem.h
>> +++ b/include/linux/secretmem.h
>> @@ -6,25 +6,8 @@
>>   
>>   extern const struct address_space_operations secretmem_aops;
>>   
>> -static inline bool folio_is_secretmem(struct folio *folio)
>> +static inline bool secretmem_mapping(struct address_space *mapping)
>>   {
>> -	struct address_space *mapping;
>> -
>> -	/*
>> -	 * Using folio_mapping() is quite slow because of the actual call
>> -	 * instruction.
>> -	 * We know that secretmem pages are not compound and LRU so we can
>> -	 * save a couple of cycles here.
>> -	 */
>> -	if (folio_test_large(folio) || folio_test_lru(folio))
>> -		return false;
>> -
>> -	mapping = (struct address_space *)
>> -		((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS);
>> -
>> -	if (!mapping || mapping != folio->mapping)
>> -		return false;
>> -
>>   	return mapping->a_ops == &secretmem_aops;
>>   }
>>   
>> @@ -38,7 +21,7 @@ static inline bool vma_is_secretmem(struct vm_area_struct *vma)
>>   	return false;
>>   }
>>   
>> -static inline bool folio_is_secretmem(struct folio *folio)
>> +static inline bool secretmem_mapping(struct address_space *mapping)
>>   {
>>   	return false;
>>   }
>> diff --git a/mm/gup.c b/mm/gup.c
>> index e7510b6ce765..69d8bc8e4451 100644
>> --- a/mm/gup.c
>> +++ b/mm/gup.c
>> @@ -2472,6 +2472,8 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
>>    * This call assumes the caller has pinned the folio, that the lowest page table
>>    * level still points to this folio, and that interrupts have been disabled.
>>    *
>> + * GUP-fast must reject all secretmem folios.
>> + *
>>    * Writing to pinned file-backed dirty tracked folios is inherently problematic
>>    * (see comment describing the writable_file_mapping_allowed() function). We
>>    * therefore try to avoid the most egregious case of a long-term mapping doing
>> @@ -2484,22 +2486,32 @@ EXPORT_SYMBOL(get_user_pages_unlocked);
>>   static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
> 
> Now when this function checks for gup in general, maybe it's worth to
> rename it to, say, folio_fast_gup_allowed.

Had the exact the same thought, so I'll do it!

Not sure about "fast gup" vs. "gup fast" vs. "lockless gup", it's all 
inconsistent and we should likely clean that up.

Likely, we should just prefix all relevant functions with "gup_fast". 
I'll call this "gup_fast_folio_allowed" for now.

The first description of the function becomes: "Used in the GUP-fast 
path to determine whether GUP is permitted to work on a specific folio."

> 
>>   {
>>   	struct address_space *mapping;
>> +	bool check_secretmem = false;
>> +	bool reject_file_backed = false;
>>   	unsigned long mapping_flags;
>>   
>>   	/*
>>   	 * If we aren't pinning then no problematic write can occur. A long term
>>   	 * pin is the most egregious case so this is the one we disallow.
>>   	 */
>> -	if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) !=
>> +	if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) ==
>>   	    (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE))
>> -		return true;
>> +		reject_file_backed = true;
>> +
>> +	/* We hold a folio reference, so we can safely access folio fields. */
>>   
>> -	/* The folio is pinned, so we can safely access folio fields. */
>> +	/* secretmem folios are only order-0 folios and never LRU folios. */
> 
> Nit:                           ^ always

ack


Thanks for the review!
diff mbox series

Patch

diff --git a/include/linux/secretmem.h b/include/linux/secretmem.h
index 6996f1f53f14..e918f96881f5 100644
--- a/include/linux/secretmem.h
+++ b/include/linux/secretmem.h
@@ -6,25 +6,8 @@ 
 
 extern const struct address_space_operations secretmem_aops;
 
-static inline bool folio_is_secretmem(struct folio *folio)
+static inline bool secretmem_mapping(struct address_space *mapping)
 {
-	struct address_space *mapping;
-
-	/*
-	 * Using folio_mapping() is quite slow because of the actual call
-	 * instruction.
-	 * We know that secretmem pages are not compound and LRU so we can
-	 * save a couple of cycles here.
-	 */
-	if (folio_test_large(folio) || folio_test_lru(folio))
-		return false;
-
-	mapping = (struct address_space *)
-		((unsigned long)folio->mapping & ~PAGE_MAPPING_FLAGS);
-
-	if (!mapping || mapping != folio->mapping)
-		return false;
-
 	return mapping->a_ops == &secretmem_aops;
 }
 
@@ -38,7 +21,7 @@  static inline bool vma_is_secretmem(struct vm_area_struct *vma)
 	return false;
 }
 
-static inline bool folio_is_secretmem(struct folio *folio)
+static inline bool secretmem_mapping(struct address_space *mapping)
 {
 	return false;
 }
diff --git a/mm/gup.c b/mm/gup.c
index e7510b6ce765..69d8bc8e4451 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2472,6 +2472,8 @@  EXPORT_SYMBOL(get_user_pages_unlocked);
  * This call assumes the caller has pinned the folio, that the lowest page table
  * level still points to this folio, and that interrupts have been disabled.
  *
+ * GUP-fast must reject all secretmem folios.
+ *
  * Writing to pinned file-backed dirty tracked folios is inherently problematic
  * (see comment describing the writable_file_mapping_allowed() function). We
  * therefore try to avoid the most egregious case of a long-term mapping doing
@@ -2484,22 +2486,32 @@  EXPORT_SYMBOL(get_user_pages_unlocked);
 static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
 {
 	struct address_space *mapping;
+	bool check_secretmem = false;
+	bool reject_file_backed = false;
 	unsigned long mapping_flags;
 
 	/*
 	 * If we aren't pinning then no problematic write can occur. A long term
 	 * pin is the most egregious case so this is the one we disallow.
 	 */
-	if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) !=
+	if ((flags & (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE)) ==
 	    (FOLL_PIN | FOLL_LONGTERM | FOLL_WRITE))
-		return true;
+		reject_file_backed = true;
+
+	/* We hold a folio reference, so we can safely access folio fields. */
 
-	/* The folio is pinned, so we can safely access folio fields. */
+	/* secretmem folios are only order-0 folios and never LRU folios. */
+	if (IS_ENABLED(CONFIG_SECRETMEM) && !folio_test_large(folio) &&
+	    !folio_test_lru(folio))
+		check_secretmem = true;
+
+	if (!reject_file_backed && !check_secretmem)
+		return true;
 
 	if (WARN_ON_ONCE(folio_test_slab(folio)))
 		return false;
 
-	/* hugetlb mappings do not require dirty-tracking. */
+	/* hugetlb neither requires dirty-tracking nor can be secretmem. */
 	if (folio_test_hugetlb(folio))
 		return true;
 
@@ -2535,10 +2547,12 @@  static bool folio_fast_pin_allowed(struct folio *folio, unsigned int flags)
 
 	/*
 	 * At this point, we know the mapping is non-null and points to an
-	 * address_space object. The only remaining whitelisted file system is
-	 * shmem.
+	 * address_space object.
 	 */
-	return shmem_mapping(mapping);
+	if (check_secretmem && secretmem_mapping(mapping))
+		return false;
+	/* The only remaining allowed file system is shmem. */
+	return !reject_file_backed || shmem_mapping(mapping);
 }
 
 static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
@@ -2624,11 +2638,6 @@  static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
 		if (!folio)
 			goto pte_unmap;
 
-		if (unlikely(folio_is_secretmem(folio))) {
-			gup_put_folio(folio, 1, flags);
-			goto pte_unmap;
-		}
-
 		if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) ||
 		    unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {
 			gup_put_folio(folio, 1, flags);