diff mbox series

[4/5] mm/swap_state: fix potential faulted in race in swap_ra_info()

Message ID 20210408130820.48233-5-linmiaohe@huawei.com (mailing list archive)
State New, archived
Headers show
Series close various race windows for swap | expand

Commit Message

Miaohe Lin April 8, 2021, 1:08 p.m. UTC
While we released the pte lock, somebody else might faulted in this pte.
So we should check whether it's swap pte first to guard against such race
or swp_type would be unexpected. And we can also avoid some unnecessary
readahead cpu cycles possibly.

Fixes: ec560175c0b6 ("mm, swap: VMA based swap readahead")
Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
---
 mm/swap_state.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

Comments

Huang, Ying April 9, 2021, 8:50 a.m. UTC | #1
Miaohe Lin <linmiaohe@huawei.com> writes:

> While we released the pte lock, somebody else might faulted in this pte.
> So we should check whether it's swap pte first to guard against such race
> or swp_type would be unexpected. And we can also avoid some unnecessary
> readahead cpu cycles possibly.
>
> Fixes: ec560175c0b6 ("mm, swap: VMA based swap readahead")
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> ---
>  mm/swap_state.c | 13 +++++++++----
>  1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 709c260d644a..3bf0d0c297bc 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -724,10 +724,10 @@ static void swap_ra_info(struct vm_fault *vmf,
>  {
>  	struct vm_area_struct *vma = vmf->vma;
>  	unsigned long ra_val;
> -	swp_entry_t entry;
> +	swp_entry_t swap_entry;
>  	unsigned long faddr, pfn, fpfn;
>  	unsigned long start, end;
> -	pte_t *pte, *orig_pte;
> +	pte_t *pte, *orig_pte, entry;
>  	unsigned int max_win, hits, prev_win, win, left;
>  #ifndef CONFIG_64BIT
>  	pte_t *tpte;
> @@ -742,8 +742,13 @@ static void swap_ra_info(struct vm_fault *vmf,
>  
>  	faddr = vmf->address;
>  	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
> -	entry = pte_to_swp_entry(*pte);
> -	if ((unlikely(non_swap_entry(entry)))) {
> +	entry = *pte;
> +	if (unlikely(!is_swap_pte(entry))) {
> +		pte_unmap(orig_pte);
> +		return;
> +	}
> +	swap_entry = pte_to_swp_entry(entry);
> +	if ((unlikely(non_swap_entry(swap_entry)))) {
>  		pte_unmap(orig_pte);
>  		return;
>  	}

This isn't a real issue.  entry or swap_entry isn't used in this
function.  And we have enough checking when we really operate the PTE
entries later.  But I admit it's confusing.  So I suggest to just remove
the checking.  We will check it when necessary.

Best Regards,
Huang, Ying
Miaohe Lin April 9, 2021, 9 a.m. UTC | #2
On 2021/4/9 16:50, Huang, Ying wrote:
> Miaohe Lin <linmiaohe@huawei.com> writes:
> 
>> While we released the pte lock, somebody else might faulted in this pte.
>> So we should check whether it's swap pte first to guard against such race
>> or swp_type would be unexpected. And we can also avoid some unnecessary
>> readahead cpu cycles possibly.
>>
>> Fixes: ec560175c0b6 ("mm, swap: VMA based swap readahead")
>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>> ---
>>  mm/swap_state.c | 13 +++++++++----
>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>
>> diff --git a/mm/swap_state.c b/mm/swap_state.c
>> index 709c260d644a..3bf0d0c297bc 100644
>> --- a/mm/swap_state.c
>> +++ b/mm/swap_state.c
>> @@ -724,10 +724,10 @@ static void swap_ra_info(struct vm_fault *vmf,
>>  {
>>  	struct vm_area_struct *vma = vmf->vma;
>>  	unsigned long ra_val;
>> -	swp_entry_t entry;
>> +	swp_entry_t swap_entry;
>>  	unsigned long faddr, pfn, fpfn;
>>  	unsigned long start, end;
>> -	pte_t *pte, *orig_pte;
>> +	pte_t *pte, *orig_pte, entry;
>>  	unsigned int max_win, hits, prev_win, win, left;
>>  #ifndef CONFIG_64BIT
>>  	pte_t *tpte;
>> @@ -742,8 +742,13 @@ static void swap_ra_info(struct vm_fault *vmf,
>>  
>>  	faddr = vmf->address;
>>  	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
>> -	entry = pte_to_swp_entry(*pte);
>> -	if ((unlikely(non_swap_entry(entry)))) {
>> +	entry = *pte;
>> +	if (unlikely(!is_swap_pte(entry))) {
>> +		pte_unmap(orig_pte);
>> +		return;
>> +	}
>> +	swap_entry = pte_to_swp_entry(entry);
>> +	if ((unlikely(non_swap_entry(swap_entry)))) {
>>  		pte_unmap(orig_pte);
>>  		return;
>>  	}
> 
> This isn't a real issue.  entry or swap_entry isn't used in this

Agree. It seems the entry or swap_entry here is just used for check whether
pte is still valid swap_entry.

> function.  And we have enough checking when we really operate the PTE
> entries later.  But I admit it's confusing.  So I suggest to just remove
> the checking.  We will check it when necessary.

Sounds reasonable. Will do it in v2.

Many thanks for review and reply!

> 
> Best Regards,
> Huang, Ying
> .
>
Huang, Ying April 12, 2021, 12:55 a.m. UTC | #3
Miaohe Lin <linmiaohe@huawei.com> writes:

> On 2021/4/9 16:50, Huang, Ying wrote:
>> Miaohe Lin <linmiaohe@huawei.com> writes:
>> 
>>> While we released the pte lock, somebody else might faulted in this pte.
>>> So we should check whether it's swap pte first to guard against such race
>>> or swp_type would be unexpected. And we can also avoid some unnecessary
>>> readahead cpu cycles possibly.
>>>
>>> Fixes: ec560175c0b6 ("mm, swap: VMA based swap readahead")
>>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>>> ---
>>>  mm/swap_state.c | 13 +++++++++----
>>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/mm/swap_state.c b/mm/swap_state.c
>>> index 709c260d644a..3bf0d0c297bc 100644
>>> --- a/mm/swap_state.c
>>> +++ b/mm/swap_state.c
>>> @@ -724,10 +724,10 @@ static void swap_ra_info(struct vm_fault *vmf,
>>>  {
>>>  	struct vm_area_struct *vma = vmf->vma;
>>>  	unsigned long ra_val;
>>> -	swp_entry_t entry;
>>> +	swp_entry_t swap_entry;
>>>  	unsigned long faddr, pfn, fpfn;
>>>  	unsigned long start, end;
>>> -	pte_t *pte, *orig_pte;
>>> +	pte_t *pte, *orig_pte, entry;
>>>  	unsigned int max_win, hits, prev_win, win, left;
>>>  #ifndef CONFIG_64BIT
>>>  	pte_t *tpte;
>>> @@ -742,8 +742,13 @@ static void swap_ra_info(struct vm_fault *vmf,
>>>  
>>>  	faddr = vmf->address;
>>>  	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
>>> -	entry = pte_to_swp_entry(*pte);
>>> -	if ((unlikely(non_swap_entry(entry)))) {
>>> +	entry = *pte;
>>> +	if (unlikely(!is_swap_pte(entry))) {
>>> +		pte_unmap(orig_pte);
>>> +		return;
>>> +	}
>>> +	swap_entry = pte_to_swp_entry(entry);
>>> +	if ((unlikely(non_swap_entry(swap_entry)))) {
>>>  		pte_unmap(orig_pte);
>>>  		return;
>>>  	}
>> 
>> This isn't a real issue.  entry or swap_entry isn't used in this
>
> Agree. It seems the entry or swap_entry here is just used for check whether
> pte is still valid swap_entry.

If you check the git history, you will find that the check has been
necessary before.  Because the function is used earlier in
do_swap_page() at that time.

Best Regards,
Huang, Ying
Miaohe Lin April 12, 2021, 3:17 a.m. UTC | #4
On 2021/4/12 8:55, Huang, Ying wrote:
> Miaohe Lin <linmiaohe@huawei.com> writes:
> 
>> On 2021/4/9 16:50, Huang, Ying wrote:
>>> Miaohe Lin <linmiaohe@huawei.com> writes:
>>>
>>>> While we released the pte lock, somebody else might faulted in this pte.
>>>> So we should check whether it's swap pte first to guard against such race
>>>> or swp_type would be unexpected. And we can also avoid some unnecessary
>>>> readahead cpu cycles possibly.
>>>>
>>>> Fixes: ec560175c0b6 ("mm, swap: VMA based swap readahead")
>>>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>>>> ---
>>>>  mm/swap_state.c | 13 +++++++++----
>>>>  1 file changed, 9 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/mm/swap_state.c b/mm/swap_state.c
>>>> index 709c260d644a..3bf0d0c297bc 100644
>>>> --- a/mm/swap_state.c
>>>> +++ b/mm/swap_state.c
>>>> @@ -724,10 +724,10 @@ static void swap_ra_info(struct vm_fault *vmf,
>>>>  {
>>>>  	struct vm_area_struct *vma = vmf->vma;
>>>>  	unsigned long ra_val;
>>>> -	swp_entry_t entry;
>>>> +	swp_entry_t swap_entry;
>>>>  	unsigned long faddr, pfn, fpfn;
>>>>  	unsigned long start, end;
>>>> -	pte_t *pte, *orig_pte;
>>>> +	pte_t *pte, *orig_pte, entry;
>>>>  	unsigned int max_win, hits, prev_win, win, left;
>>>>  #ifndef CONFIG_64BIT
>>>>  	pte_t *tpte;
>>>> @@ -742,8 +742,13 @@ static void swap_ra_info(struct vm_fault *vmf,
>>>>  
>>>>  	faddr = vmf->address;
>>>>  	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
>>>> -	entry = pte_to_swp_entry(*pte);
>>>> -	if ((unlikely(non_swap_entry(entry)))) {
>>>> +	entry = *pte;
>>>> +	if (unlikely(!is_swap_pte(entry))) {
>>>> +		pte_unmap(orig_pte);
>>>> +		return;
>>>> +	}
>>>> +	swap_entry = pte_to_swp_entry(entry);
>>>> +	if ((unlikely(non_swap_entry(swap_entry)))) {
>>>>  		pte_unmap(orig_pte);
>>>>  		return;
>>>>  	}
>>>
>>> This isn't a real issue.  entry or swap_entry isn't used in this
>>
>> Agree. It seems the entry or swap_entry here is just used for check whether
>> pte is still valid swap_entry.
> 
> If you check the git history, you will find that the check has been
> necessary before.  Because the function is used earlier in
> do_swap_page() at that time.
> 

I see. Many thanks for explanation. :)

> Best Regards,
> Huang, Ying
> .
>
diff mbox series

Patch

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 709c260d644a..3bf0d0c297bc 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -724,10 +724,10 @@  static void swap_ra_info(struct vm_fault *vmf,
 {
 	struct vm_area_struct *vma = vmf->vma;
 	unsigned long ra_val;
-	swp_entry_t entry;
+	swp_entry_t swap_entry;
 	unsigned long faddr, pfn, fpfn;
 	unsigned long start, end;
-	pte_t *pte, *orig_pte;
+	pte_t *pte, *orig_pte, entry;
 	unsigned int max_win, hits, prev_win, win, left;
 #ifndef CONFIG_64BIT
 	pte_t *tpte;
@@ -742,8 +742,13 @@  static void swap_ra_info(struct vm_fault *vmf,
 
 	faddr = vmf->address;
 	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
-	entry = pte_to_swp_entry(*pte);
-	if ((unlikely(non_swap_entry(entry)))) {
+	entry = *pte;
+	if (unlikely(!is_swap_pte(entry))) {
+		pte_unmap(orig_pte);
+		return;
+	}
+	swap_entry = pte_to_swp_entry(entry);
+	if ((unlikely(non_swap_entry(swap_entry)))) {
 		pte_unmap(orig_pte);
 		return;
 	}