diff mbox series

[v2,2/4] KVM: x86/xen: Fix potential deadlock in kvm_xen_update_runstate_guest()

Message ID 20230111180651.14394-2-dwmw2@infradead.org (mailing list archive)
State New, archived
Headers show
Series [v2,1/4] KVM: x86/xen: Fix lockdep warning on "recursive" gpc locking | expand

Commit Message

David Woodhouse Jan. 11, 2023, 6:06 p.m. UTC
From: David Woodhouse <dwmw@amazon.co.uk>

The kvm_xen_update_runstate_guest() function can be called when the vCPU
is being scheduled out, from a preempt notifier. It *opportunistically*
updates the runstate area in the guest memory, if the gfn_to_pfn_cache
which caches the appropriate address is still valid.

If there is *contention* when it attempts to obtain gpc->lock, then
locking inside the priority inheritance checks may cause a deadlock.
Lockdep reports:

[13890.148997] Chain exists of:
                 &gpc->lock --> &p->pi_lock --> &rq->__lock

[13890.149002]  Possible unsafe locking scenario:

[13890.149003]        CPU0                    CPU1
[13890.149004]        ----                    ----
[13890.149005]   lock(&rq->__lock);
[13890.149007]                                lock(&p->pi_lock);
[13890.149009]                                lock(&rq->__lock);
[13890.149011]   lock(&gpc->lock);
[13890.149013]
                *** DEADLOCK ***

In the general case, if there's contention for a read lock on gpc->lock,
that's going to be because something else is either invalidating or
revalidating the cache. Either way, we've raced with seeing it in an
invalid state, in which case we would have aborted the opportunistic
update anyway.

So in the 'atomic' case when called from the preempt notifier, just
switch to using read_trylock() and avoid the PI handling altogether.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/xen.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

Comments

Paul Durrant Jan. 12, 2023, 4:17 p.m. UTC | #1
On 11/01/2023 18:06, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> The kvm_xen_update_runstate_guest() function can be called when the vCPU
> is being scheduled out, from a preempt notifier. It *opportunistically*
> updates the runstate area in the guest memory, if the gfn_to_pfn_cache
> which caches the appropriate address is still valid.
> 
> If there is *contention* when it attempts to obtain gpc->lock, then
> locking inside the priority inheritance checks may cause a deadlock.
> Lockdep reports:
> 
> [13890.148997] Chain exists of:
>                   &gpc->lock --> &p->pi_lock --> &rq->__lock
> 
> [13890.149002]  Possible unsafe locking scenario:
> 
> [13890.149003]        CPU0                    CPU1
> [13890.149004]        ----                    ----
> [13890.149005]   lock(&rq->__lock);
> [13890.149007]                                lock(&p->pi_lock);
> [13890.149009]                                lock(&rq->__lock);
> [13890.149011]   lock(&gpc->lock);
> [13890.149013]
>                  *** DEADLOCK ***
> 
> In the general case, if there's contention for a read lock on gpc->lock,
> that's going to be because something else is either invalidating or
> revalidating the cache. Either way, we've raced with seeing it in an
> invalid state, in which case we would have aborted the opportunistic
> update anyway.
> 
> So in the 'atomic' case when called from the preempt notifier, just
> switch to using read_trylock() and avoid the PI handling altogether.
> 
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
>   arch/x86/kvm/xen.c | 19 +++++++++++++++++--
>   1 file changed, 17 insertions(+), 2 deletions(-)

Reviewed-by: Paul Durrant <paul@xen.org>

... with one suggestion below.

> 
> diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
> index 07e61cc9881e..809b82bdb9c3 100644
> --- a/arch/x86/kvm/xen.c
> +++ b/arch/x86/kvm/xen.c
> @@ -272,7 +272,15 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
>   	 * Attempt to obtain the GPC lock on *both* (if there are two)
>   	 * gfn_to_pfn caches that cover the region.
>   	 */
> -	read_lock_irqsave(&gpc1->lock, flags);
> +	if (atomic) {
> +		local_irq_save(flags);
> +		if (!read_trylock(&gpc1->lock)) {
> +			local_irq_restore(flags);
> +			return;
> +		}
> +	} else {
> +		read_lock_irqsave(&gpc1->lock, flags);
> +	}
>   	while (!kvm_gpc_check(gpc1, user_len1)) {
>   		read_unlock_irqrestore(&gpc1->lock, flags);
>   
> @@ -309,7 +317,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
>   		 * gpc1 lock to make lockdep shut up about it.
>   		 */
>   		lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
> -		read_lock(&gpc2->lock);
> +		if (atomic) {
> +			if (!read_trylock(&gpc2->lock)) {

You could avoid the nesting in this case with:

if (atomic && !read_trylock(&gpc2->lock))

> +				read_unlock_irqrestore(&gpc1->lock, flags);
> +				return;
> +			}
> +		} else {
> +			read_lock(&gpc2->lock);
> +		}
>   
>   		if (!kvm_gpc_check(gpc2, user_len2)) {
>   			read_unlock(&gpc2->lock);
David Woodhouse Jan. 12, 2023, 4:27 p.m. UTC | #2
On Thu, 2023-01-12 at 16:17 +0000, Paul Durrant wrote:
> >    
> > @@ -309,7 +317,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
> >                  * gpc1 lock to make lockdep shut up about it.
> >                  */
> >                 lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
> > -               read_lock(&gpc2->lock);
> > +               if (atomic) {
> > +                       if (!read_trylock(&gpc2->lock)) {
> 
> You could avoid the nesting in this case with:
> 
> if (atomic && !read_trylock(&gpc2->lock))
> 
> > +                               read_unlock_irqrestore(&gpc1->lock, flags);
> > +                               return;
> > +                       }
> > +               } else {
> > +                       read_lock(&gpc2->lock);
> > +               }

Hm? Wouldn't it take the lock twice then? It'd still take the 'else' branch.
Paul Durrant Jan. 12, 2023, 4:28 p.m. UTC | #3
On 12/01/2023 16:27, David Woodhouse wrote:
> On Thu, 2023-01-12 at 16:17 +0000, Paul Durrant wrote:
>>>     
>>> @@ -309,7 +317,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
>>>                   * gpc1 lock to make lockdep shut up about it.
>>>                   */
>>>                  lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
>>> -               read_lock(&gpc2->lock);
>>> +               if (atomic) {
>>> +                       if (!read_trylock(&gpc2->lock)) {
>>
>> You could avoid the nesting in this case with:
>>
>> if (atomic && !read_trylock(&gpc2->lock))
>>
>>> +                               read_unlock_irqrestore(&gpc1->lock, flags);
>>> +                               return;
>>> +                       }
>>> +               } else {
>>> +                       read_lock(&gpc2->lock);
>>> +               }
> 
> Hm? Wouldn't it take the lock twice then? It'd still take the 'else' branch.

Actually, yes... So much for hoping to make it look prettier.

   Paul
David Woodhouse Jan. 12, 2023, 4:30 p.m. UTC | #4
On Thu, 2023-01-12 at 16:28 +0000, Paul Durrant wrote:
> On 12/01/2023 16:27, David Woodhouse wrote:
> > On Thu, 2023-01-12 at 16:17 +0000, Paul Durrant wrote:
> > > >     
> > > > @@ -309,7 +317,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
> > > >                   * gpc1 lock to make lockdep shut up about it.
> > > >                   */
> > > >                  lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
> > > > -               read_lock(&gpc2->lock);
> > > > +               if (atomic) {
> > > > +                       if (!read_trylock(&gpc2->lock)) {
> > > 
> > > You could avoid the nesting in this case with:
> > > 
> > > if (atomic && !read_trylock(&gpc2->lock))
> > > 
> > > > +                               read_unlock_irqrestore(&gpc1->lock, flags);
> > > > +                               return;
> > > > +                       }
> > > > +               } else {
> > > > +                       read_lock(&gpc2->lock);
> > > > +               }
> > 
> > Hm? Wouldn't it take the lock twice then? It'd still take the 'else' branch.
> 
> Actually, yes... So much for hoping to make it look prettier.

I suppose we could avoid the extra indentation by making it


	if (atomic && !read_trylock(&gpc2->lock)) {
		read_unlock_irqrestore(&gpc1->lock, flags);
		return;
	} else if (!atomic) {
		read_lock(&gpc2->lock);
	}

Not sure it's much of an improvement in overall readability?
Paul Durrant Jan. 12, 2023, 4:31 p.m. UTC | #5
On 12/01/2023 16:30, David Woodhouse wrote:
> On Thu, 2023-01-12 at 16:28 +0000, Paul Durrant wrote:
>> On 12/01/2023 16:27, David Woodhouse wrote:
>>> On Thu, 2023-01-12 at 16:17 +0000, Paul Durrant wrote:
>>>>>      
>>>>> @@ -309,7 +317,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
>>>>>                    * gpc1 lock to make lockdep shut up about it.
>>>>>                    */
>>>>>                   lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
>>>>> -               read_lock(&gpc2->lock);
>>>>> +               if (atomic) {
>>>>> +                       if (!read_trylock(&gpc2->lock)) {
>>>>
>>>> You could avoid the nesting in this case with:
>>>>
>>>> if (atomic && !read_trylock(&gpc2->lock))
>>>>
>>>>> +                               read_unlock_irqrestore(&gpc1->lock, flags);
>>>>> +                               return;
>>>>> +                       }
>>>>> +               } else {
>>>>> +                       read_lock(&gpc2->lock);
>>>>> +               }
>>>
>>> Hm? Wouldn't it take the lock twice then? It'd still take the 'else' branch.
>>
>> Actually, yes... So much for hoping to make it look prettier.
> 
> I suppose we could avoid the extra indentation by making it
> 
> 
> 	if (atomic && !read_trylock(&gpc2->lock)) {
> 		read_unlock_irqrestore(&gpc1->lock, flags);
> 		return;
> 	} else if (!atomic) {
> 		read_lock(&gpc2->lock);
> 	}
> 
> Not sure it's much of an improvement in overall readability?

No. May as well leave it as-is.

   Paul
diff mbox series

Patch

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 07e61cc9881e..809b82bdb9c3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -272,7 +272,15 @@  static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	 * Attempt to obtain the GPC lock on *both* (if there are two)
 	 * gfn_to_pfn caches that cover the region.
 	 */
-	read_lock_irqsave(&gpc1->lock, flags);
+	if (atomic) {
+		local_irq_save(flags);
+		if (!read_trylock(&gpc1->lock)) {
+			local_irq_restore(flags);
+			return;
+		}
+	} else {
+		read_lock_irqsave(&gpc1->lock, flags);
+	}
 	while (!kvm_gpc_check(gpc1, user_len1)) {
 		read_unlock_irqrestore(&gpc1->lock, flags);
 
@@ -309,7 +317,14 @@  static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 		 * gpc1 lock to make lockdep shut up about it.
 		 */
 		lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_);
-		read_lock(&gpc2->lock);
+		if (atomic) {
+			if (!read_trylock(&gpc2->lock)) {
+				read_unlock_irqrestore(&gpc1->lock, flags);
+				return;
+			}
+		} else {
+			read_lock(&gpc2->lock);
+		}
 
 		if (!kvm_gpc_check(gpc2, user_len2)) {
 			read_unlock(&gpc2->lock);