diff mbox series

[STABLE,4.9,1/1] mm, gup: add missing refcount overflow checks on x86 and s390

Message ID 20191129090351.3507-2-vbabka@suse.cz (mailing list archive)
State New, archived
Headers show
Series [STABLE,4.9,1/1] mm, gup: add missing refcount overflow checks on x86 and s390 | expand

Commit Message

Vlastimil Babka Nov. 29, 2019, 9:03 a.m. UTC
The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from
overflowing page refcount") was backported to 4.9.y stable as commit
2ed768cfd895. The backport however missed that in 4.9, there are several
arch-specific gup.c versions with fast gup implementations, so these do not
prevent refcount overflow.

This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm,
gup: prevent get_page() race with munmap in paravirt guest"). This stable-only
commit adds missing parts to x86 version, as well as s390 version, both taken
from the SUSE SLES/openSUSE 4.12-based kernels.

The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely
the known overflow scenario based on FUSE, which needs 140GB of RAM, is a
problem for those architectures, and I don't feel confident enough to patch
them.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 arch/s390/mm/gup.c |  9 ++++++---
 arch/x86/mm/gup.c  | 10 ++++++++--
 2 files changed, 14 insertions(+), 5 deletions(-)

Comments

Ben Hutchings Dec. 3, 2019, 12:22 p.m. UTC | #1
On Fri, 2019-11-29 at 10:03 +0100, Vlastimil Babka wrote:
> The mainline commit 8fde12ca79af ("mm: prevent get_user_pages() from
> overflowing page refcount") was backported to 4.9.y stable as commit
> 2ed768cfd895. The backport however missed that in 4.9, there are several
> arch-specific gup.c versions with fast gup implementations, so these do not
> prevent refcount overflow.
> 
> This is partially fixed for x86 in stable-only commit d73af79742e7 ("x86, mm,
> gup: prevent get_page() race with munmap in paravirt guest"). This stable-only
> commit adds missing parts to x86 version, as well as s390 version, both taken
> from the SUSE SLES/openSUSE 4.12-based kernels.
> 
> The remaining architectures with own gup.c are sparc, mips, sh. It's unlikely
> the known overflow scenario based on FUSE, which needs 140GB of RAM, is a
> problem for those architectures, and I don't feel confident enough to patch
> them.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  arch/s390/mm/gup.c |  9 ++++++---
>  arch/x86/mm/gup.c  | 10 ++++++++--
>  2 files changed, 14 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
> index 97fc449a7470..33a940389a6d 100644
> --- a/arch/s390/mm/gup.c
> +++ b/arch/s390/mm/gup.c
> @@ -38,7 +38,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
>  		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
>  		page = pte_page(pte);
>  		head = compound_head(page);
> -		if (!page_cache_get_speculative(head))
> +		if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)

No need for unlikely(); WARN_ON() includes that.

> +		    || !page_cache_get_speculative(head)))
>  			return 0;
>  		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
>  			put_page(head);
[...]
> --- a/arch/x86/mm/gup.c
> +++ b/arch/x86/mm/gup.c
> @@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
>  			undo_dev_pagemap(nr, nr_start, pages);
>  			return 0;
>  		}
> +		if (unlikely(!try_get_page(page))) {
> +			put_dev_pagemap(pgmap);
> +			return 0;
> +		}
>  		SetPageReferenced(page);
>  		pages[*nr] = page;
> -		get_page(page);
> -		put_dev_pagemap(pgmap);

This leaks a pgmap reference on success!

>  		(*nr)++;
>  		pfn++;
>  	} while (addr += PAGE_SIZE, addr != end);
> @@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
>  
>  	refs = 0;
>  	head = pmd_page(pmd);
> +	if (WARN_ON_ONCE(page_ref_count(head) <= 0))

Why <= 0, given we use < 0 elsewhere?

> +		return 0;
>  	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
>  	do {
>  		VM_BUG_ON_PAGE(compound_head(page) != head, page);
> @@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
>  
>  	refs = 0;
>  	head = pud_page(pud);
> +	if (WARN_ON_ONCE(page_ref_count(head) <= 0))

Same question here.

Ben.

> +		return 0;
>  	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
>  	do {
>  		VM_BUG_ON_PAGE(compound_head(page) != head, page);
Vlastimil Babka Dec. 3, 2019, 12:46 p.m. UTC | #2
On 12/3/19 1:22 PM, Ben Hutchings wrote:
>> +		    || !page_cache_get_speculative(head)))
>>  			return 0;
>>  		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
>>  			put_page(head);
> [...]
>> --- a/arch/x86/mm/gup.c
>> +++ b/arch/x86/mm/gup.c
>> @@ -202,10 +202,12 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
>>  			undo_dev_pagemap(nr, nr_start, pages);
>>  			return 0;
>>  		}
>> +		if (unlikely(!try_get_page(page))) {
>> +			put_dev_pagemap(pgmap);
>> +			return 0;
>> +		}
>>  		SetPageReferenced(page);
>>  		pages[*nr] = page;
>> -		get_page(page);
>> -		put_dev_pagemap(pgmap);
> 
> This leaks a pgmap reference on success!

Good catch, deleted one line too many!

>>  		(*nr)++;
>>  		pfn++;
>>  	} while (addr += PAGE_SIZE, addr != end);
>> @@ -230,6 +232,8 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
>>  
>>  	refs = 0;
>>  	head = pmd_page(pmd);
>> +	if (WARN_ON_ONCE(page_ref_count(head) <= 0))
> 
> Why <= 0, given we use < 0 elsewhere?

The code uses get_head_page_multiple() which boils down to atomic_add
and not add_unless_zero(), so it assumes a pre-existing pin that must
not go away or it's a bug (one that I've been hunting recently in this
area). The check makes it explicit.

> 
>> +		return 0;
>>  	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
>>  	do {
>>  		VM_BUG_ON_PAGE(compound_head(page) != head, page);
>> @@ -289,6 +293,8 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
>>  
>>  	refs = 0;
>>  	head = pud_page(pud);
>> +	if (WARN_ON_ONCE(page_ref_count(head) <= 0))
> 
> Same question here.

Same as above.

> Ben.
> 
>> +		return 0;
>>  	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
>>  	do {
>>  		VM_BUG_ON_PAGE(compound_head(page) != head, page);
diff mbox series

Patch

diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 97fc449a7470..33a940389a6d 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -38,7 +38,8 @@  static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 		page = pte_page(pte);
 		head = compound_head(page);
-		if (!page_cache_get_speculative(head))
+		if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+		    || !page_cache_get_speculative(head)))
 			return 0;
 		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
 			put_page(head);
@@ -76,7 +77,8 @@  static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	if (!page_cache_add_speculative(head, refs)) {
+	if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+	    || !page_cache_add_speculative(head, refs))) {
 		*nr -= refs;
 		return 0;
 	}
@@ -150,7 +152,8 @@  static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
 		refs++;
 	} while (addr += PAGE_SIZE, addr != end);
 
-	if (!page_cache_add_speculative(head, refs)) {
+	if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0)
+	    || !page_cache_add_speculative(head, refs))) {
 		*nr -= refs;
 		return 0;
 	}
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index d7db45bdfb3b..551fc7fea046 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -202,10 +202,12 @@  static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
 			undo_dev_pagemap(nr, nr_start, pages);
 			return 0;
 		}
+		if (unlikely(!try_get_page(page))) {
+			put_dev_pagemap(pgmap);
+			return 0;
+		}
 		SetPageReferenced(page);
 		pages[*nr] = page;
-		get_page(page);
-		put_dev_pagemap(pgmap);
 		(*nr)++;
 		pfn++;
 	} while (addr += PAGE_SIZE, addr != end);
@@ -230,6 +232,8 @@  static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr,
 
 	refs = 0;
 	head = pmd_page(pmd);
+	if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+		return 0;
 	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
 	do {
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
@@ -289,6 +293,8 @@  static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
 
 	refs = 0;
 	head = pud_page(pud);
+	if (WARN_ON_ONCE(page_ref_count(head) <= 0))
+		return 0;
 	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
 	do {
 		VM_BUG_ON_PAGE(compound_head(page) != head, page);