diff mbox series

[4/5] kvm: Replace vcpu->swait with rcuwait

Message ID 20200422040739.18601-5-dave@stgolabs.net (mailing list archive)
State New, archived
Headers show
Series kvm: Use rcuwait for vcpu blocking | expand

Commit Message

Davidlohr Bueso April 22, 2020, 4:07 a.m. UTC
The use of any sort of waitqueue (simple or regular) for
wait/waking vcpus has always been an overkill and semantically
wrong. Because this is per-vcpu (which is blocked) there is
only ever a single waiting vcpu, thus no need for any sort of
queue.

As such, make use of the rcuwait primitive, with the following
considerations:

  - rcuwait already provides the proper barriers that serialize
  concurrent waiter and waker.

  - Task wakeup is done in rcu read critical region, with a
  stable task pointer.

  - Because there is no concurrency among waiters, we need
  not worry about rcuwait_wait_event() calls corrupting
  the wait->task. As a consequence, this saves the locking
  done in swait when modifying the queue. This also applies
  to per-vcore wait for powerpc kvm-hv.

The x86 tscdeadline_latency test mentioned in 8577370fb0cb
("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
latency is reduced by around 15-20% with this change.

Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: kvmarm@lists.cs.columbia.edu
Cc: linux-mips@vger.kernel.org
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
---
 arch/mips/kvm/mips.c                  |  6 ++----
 arch/powerpc/include/asm/kvm_book3s.h |  2 +-
 arch/powerpc/include/asm/kvm_host.h   |  2 +-
 arch/powerpc/kvm/book3s_hv.c          | 22 ++++++++--------------
 arch/powerpc/kvm/powerpc.c            |  2 +-
 arch/x86/kvm/lapic.c                  |  2 +-
 include/linux/kvm_host.h              | 10 +++++-----
 virt/kvm/arm/arch_timer.c             |  2 +-
 virt/kvm/arm/arm.c                    |  9 +++++----
 virt/kvm/async_pf.c                   |  3 +--
 virt/kvm/kvm_main.c                   | 19 +++++++++----------
 11 files changed, 35 insertions(+), 44 deletions(-)

Comments

Paolo Bonzini April 22, 2020, 8:32 a.m. UTC | #1
On 22/04/20 06:07, Davidlohr Bueso wrote:
> The use of any sort of waitqueue (simple or regular) for
> wait/waking vcpus has always been an overkill and semantically
> wrong. Because this is per-vcpu (which is blocked) there is
> only ever a single waiting vcpu, thus no need for any sort of
> queue.
> 
> As such, make use of the rcuwait primitive, with the following
> considerations:
> 
>   - rcuwait already provides the proper barriers that serialize
>   concurrent waiter and waker.
> 
>   - Task wakeup is done in rcu read critical region, with a
>   stable task pointer.
> 
>   - Because there is no concurrency among waiters, we need
>   not worry about rcuwait_wait_event() calls corrupting
>   the wait->task. As a consequence, this saves the locking
>   done in swait when modifying the queue. This also applies
>   to per-vcore wait for powerpc kvm-hv.
> 
> The x86 tscdeadline_latency test mentioned in 8577370fb0cb
> ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
> latency is reduced by around 15-20% with this change.
> 
> Cc: Paul Mackerras <paulus@ozlabs.org>
> Cc: kvmarm@lists.cs.columbia.edu
> Cc: linux-mips@vger.kernel.org
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>

> ---
>  arch/mips/kvm/mips.c                  |  6 ++----
>  arch/powerpc/include/asm/kvm_book3s.h |  2 +-
>  arch/powerpc/include/asm/kvm_host.h   |  2 +-
>  arch/powerpc/kvm/book3s_hv.c          | 22 ++++++++--------------
>  arch/powerpc/kvm/powerpc.c            |  2 +-
>  arch/x86/kvm/lapic.c                  |  2 +-
>  include/linux/kvm_host.h              | 10 +++++-----
>  virt/kvm/arm/arch_timer.c             |  2 +-
>  virt/kvm/arm/arm.c                    |  9 +++++----
>  virt/kvm/async_pf.c                   |  3 +--
>  virt/kvm/kvm_main.c                   | 19 +++++++++----------
>  11 files changed, 35 insertions(+), 44 deletions(-)
> 
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index 8f05dd0a0f4e..fad6acce46e4 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -284,8 +284,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
>  	kvm_mips_callbacks->queue_timer_int(vcpu);
>  
>  	vcpu->arch.wait = 0;
> -	if (swq_has_sleeper(&vcpu->wq))
> -		swake_up_one(&vcpu->wq);
> +	rcuwait_wake_up(&vcpu->wait);
>  
>  	return kvm_mips_count_timeout(vcpu);
>  }
> @@ -511,8 +510,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
>  
>  	dvcpu->arch.wait = 0;
>  
> -	if (swq_has_sleeper(&dvcpu->wq))
> -		swake_up_one(&dvcpu->wq);
> +	rcuwait_wake_up(&dvcpu->wait);
>  
>  	return 0;
>  }
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
> index 506e4df2d730..6e5d85ba588d 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -78,7 +78,7 @@ struct kvmppc_vcore {
>  	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
>  	struct list_head preempt_list;
>  	spinlock_t lock;
> -	struct swait_queue_head wq;
> +	struct rcuwait wait;
>  	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
>  	u64 stolen_tb;
>  	u64 preempt_tb;
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index 1dc63101ffe1..337047ba4a56 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -751,7 +751,7 @@ struct kvm_vcpu_arch {
>  	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
>  	u32 last_inst;
>  
> -	struct swait_queue_head *wqp;
> +	struct rcuwait *waitp;
>  	struct kvmppc_vcore *vcore;
>  	int ret;
>  	int trap;
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 93493f0cbfe8..b8d42f523ca7 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu)
>  static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
>  {
>  	int cpu;
> -	struct swait_queue_head *wqp;
> +	struct rcuwait *wait;
>  
> -	wqp = kvm_arch_vcpu_wq(vcpu);
> -	if (swq_has_sleeper(wqp)) {
> -		swake_up_one(wqp);
> +	wait = kvm_arch_vcpu_get_wait(vcpu);
> +	if (rcuwait_wake_up(wait))
>  		++vcpu->stat.halt_wakeup;
> -	}
>  
>  	cpu = READ_ONCE(vcpu->arch.thread_cpu);
>  	if (cpu >= 0 && kvmppc_ipi_thread(cpu))
> @@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
>  
>  	spin_lock_init(&vcore->lock);
>  	spin_lock_init(&vcore->stoltb_lock);
> -	init_swait_queue_head(&vcore->wq);
> +	rcuwait_init(&vcore->wait);
>  	vcore->preempt_tb = TB_NIL;
>  	vcore->lpcr = kvm->arch.lpcr;
>  	vcore->first_vcpuid = id;
> @@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>  	ktime_t cur, start_poll, start_wait;
>  	int do_sleep = 1;
>  	u64 block_ns;
> -	DECLARE_SWAITQUEUE(wait);
>  
>  	/* Poll for pending exceptions and ceded state */
>  	cur = start_poll = ktime_get();
> @@ -3812,10 +3809,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>  		}
>  	}
>  
> -	prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
> -
>  	if (kvmppc_vcore_check_block(vc)) {
> -		finish_swait(&vc->wq, &wait);
>  		do_sleep = 0;
>  		/* If we polled, count this as a successful poll */
>  		if (vc->halt_poll_ns)
> @@ -3828,8 +3822,8 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
>  	vc->vcore_state = VCORE_SLEEPING;
>  	trace_kvmppc_vcore_blocked(vc, 0);
>  	spin_unlock(&vc->lock);
> -	schedule();
> -	finish_swait(&vc->wq, &wait);
> +	rcuwait_wait_event(&vc->wait,
> +			   kvmppc_vcore_check_block(vc), TASK_INTERRUPTIBLE);
>  	spin_lock(&vc->lock);
>  	vc->vcore_state = VCORE_INACTIVE;
>  	trace_kvmppc_vcore_blocked(vc, 1);
> @@ -3940,7 +3934,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
>  			kvmppc_start_thread(vcpu, vc);
>  			trace_kvm_guest_enter(vcpu);
>  		} else if (vc->vcore_state == VCORE_SLEEPING) {
> -			swake_up_one(&vc->wq);
> +		        rcuwait_wake_up(&vc->wait);
>  		}
>  
>  	}
> @@ -4279,7 +4273,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
>  	}
>  	user_vrsave = mfspr(SPRN_VRSAVE);
>  
> -	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
> +	vcpu->arch.waitp = &vcpu->arch.vcore->wait;
>  	vcpu->arch.pgdir = kvm->mm->pgd;
>  	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
>  
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index e15166b0a16d..4a074b587520 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -751,7 +751,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>  	if (err)
>  		goto out_vcpu_uninit;
>  
> -	vcpu->arch.wqp = &vcpu->wq;
> +	vcpu->arch.waitp = &vcpu->wait;
>  	kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
>  	return 0;
>  
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 9af25c97612a..fb2f56ba171d 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -1833,7 +1833,7 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
>  	/* If the preempt notifier has already run, it also called apic_timer_expired */
>  	if (!apic->lapic_timer.hv_timer_in_use)
>  		goto out;
> -	WARN_ON(swait_active(&vcpu->wq));
> +	WARN_ON(rcu_dereference(vcpu->wait.task));
>  	cancel_hv_timer(apic);
>  	apic_timer_expired(apic);
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6d58beb65454..fc34021546bd 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -23,7 +23,7 @@
>  #include <linux/irqflags.h>
>  #include <linux/context_tracking.h>
>  #include <linux/irqbypass.h>
> -#include <linux/swait.h>
> +#include <linux/rcuwait.h>
>  #include <linux/refcount.h>
>  #include <linux/nospec.h>
>  #include <asm/signal.h>
> @@ -277,7 +277,7 @@ struct kvm_vcpu {
>  	struct mutex mutex;
>  	struct kvm_run *run;
>  
> -	struct swait_queue_head wq;
> +	struct rcuwait wait;
>  	struct pid __rcu *pid;
>  	int sigset_active;
>  	sigset_t sigset;
> @@ -956,12 +956,12 @@ static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
>  }
>  #endif
>  
> -static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
> +static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
>  {
>  #ifdef __KVM_HAVE_ARCH_WQP
> -	return vcpu->arch.wqp;
> +	return vcpu->arch.waitp;
>  #else
> -	return &vcpu->wq;
> +	return &vcpu->wait;
>  #endif
>  }
>  
> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 93bd59b46848..b2805105bbe5 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
>  	if (map.emul_ptimer)
>  		soft_timer_cancel(&map.emul_ptimer->hrtimer);
>  
> -	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
> +	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
>  		kvm_timer_blocking(vcpu);
>  
>  	/*
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 48d0ec44ad77..f94a10bb1251 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)
>  
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		vcpu->arch.pause = false;
> -		swake_up_one(kvm_arch_vcpu_wq(vcpu));
> +		rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
>  	}
>  }
>  
>  static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
>  {
> -	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
> +	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
>  
> -	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
> -				       (!vcpu->arch.pause)));
> +	rcuwait_wait_event(*wait,
> +			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
> +			   TASK_INTERRUPTIBLE);
>  
>  	if (vcpu->arch.power_off || vcpu->arch.pause) {
>  		/* Awaken to handle a signal, request we sleep again later. */
> diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
> index 15e5b037f92d..10b533f641a6 100644
> --- a/virt/kvm/async_pf.c
> +++ b/virt/kvm/async_pf.c
> @@ -80,8 +80,7 @@ static void async_pf_execute(struct work_struct *work)
>  
>  	trace_kvm_async_pf_completed(addr, cr2_or_gpa);
>  
> -	if (swq_has_sleeper(&vcpu->wq))
> -		swake_up_one(&vcpu->wq);
> +	rcuwait_wake_up(&vcpu->wait);
>  
>  	mmput(mm);
>  	kvm_put_kvm(vcpu->kvm);
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 74bdb7bf3295..835fb109badf 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -341,7 +341,7 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
>  	vcpu->kvm = kvm;
>  	vcpu->vcpu_id = id;
>  	vcpu->pid = NULL;
> -	init_swait_queue_head(&vcpu->wq);
> +	rcuwait_init(&vcpu->wait);
>  	kvm_async_pf_vcpu_init(vcpu);
>  
>  	vcpu->pre_pcpu = -1;
> @@ -2671,7 +2671,6 @@ static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
>  void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>  {
>  	ktime_t start, cur;
> -	DECLARE_SWAITQUEUE(wait);
>  	bool waited = false;
>  	u64 block_ns;
>  
> @@ -2697,8 +2696,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>  		} while (single_task_running() && ktime_before(cur, stop));
>  	}
>  
> +	prepare_to_rcuwait(&vcpu->wait);
>  	for (;;) {
> -		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
> +		set_current_state(TASK_INTERRUPTIBLE);
>  
>  		if (kvm_vcpu_check_block(vcpu) < 0)
>  			break;
> @@ -2706,8 +2706,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>  		waited = true;
>  		schedule();
>  	}
> -
> -	finish_swait(&vcpu->wq, &wait);
> +	finish_rcuwait(&vcpu->wait);
>  	cur = ktime_get();
>  out:
>  	kvm_arch_vcpu_unblocking(vcpu);
> @@ -2738,11 +2737,10 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_block);
>  
>  bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
>  {
> -	struct swait_queue_head *wqp;
> +	struct rcuwait *wait;
>  
> -	wqp = kvm_arch_vcpu_wq(vcpu);
> -	if (swq_has_sleeper(wqp)) {
> -		swake_up_one(wqp);
> +	wait = kvm_arch_vcpu_get_wait(vcpu);
> +	if (rcuwait_wake_up(wait)) {
>  		WRITE_ONCE(vcpu->ready, true);
>  		++vcpu->stat.halt_wakeup;
>  		return true;
> @@ -2884,7 +2882,8 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
>  				continue;
>  			if (vcpu == me)
>  				continue;
> -			if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
> +			if (rcu_dereference(vcpu->wait.task) &&
> +			    !vcpu_dy_runnable(vcpu))
>  				continue;
>  			if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
>  				!kvm_arch_vcpu_in_kernel(vcpu))
>
Marc Zyngier April 22, 2020, 10:14 a.m. UTC | #2
On 2020-04-22 05:07, Davidlohr Bueso wrote:
> The use of any sort of waitqueue (simple or regular) for
> wait/waking vcpus has always been an overkill and semantically
> wrong. Because this is per-vcpu (which is blocked) there is
> only ever a single waiting vcpu, thus no need for any sort of
> queue.
> 
> As such, make use of the rcuwait primitive, with the following
> considerations:
> 
>   - rcuwait already provides the proper barriers that serialize
>   concurrent waiter and waker.
> 
>   - Task wakeup is done in rcu read critical region, with a
>   stable task pointer.
> 
>   - Because there is no concurrency among waiters, we need
>   not worry about rcuwait_wait_event() calls corrupting
>   the wait->task. As a consequence, this saves the locking
>   done in swait when modifying the queue. This also applies
>   to per-vcore wait for powerpc kvm-hv.
> 
> The x86 tscdeadline_latency test mentioned in 8577370fb0cb
> ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
> latency is reduced by around 15-20% with this change.
> 
> Cc: Paul Mackerras <paulus@ozlabs.org>
> Cc: kvmarm@lists.cs.columbia.edu
> Cc: linux-mips@vger.kernel.org
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>

Reviewed-by: Marc Zyngier <maz@kernel.org>

Thanks,

         M.
Marc Zyngier April 23, 2020, 8:41 a.m. UTC | #3
On Tue, 21 Apr 2020 21:07:38 -0700
Davidlohr Bueso <dave@stgolabs.net> wrote:

> The use of any sort of waitqueue (simple or regular) for
> wait/waking vcpus has always been an overkill and semantically
> wrong. Because this is per-vcpu (which is blocked) there is
> only ever a single waiting vcpu, thus no need for any sort of
> queue.
> 
> As such, make use of the rcuwait primitive, with the following
> considerations:
> 
>   - rcuwait already provides the proper barriers that serialize
>   concurrent waiter and waker.
> 
>   - Task wakeup is done in rcu read critical region, with a
>   stable task pointer.
> 
>   - Because there is no concurrency among waiters, we need
>   not worry about rcuwait_wait_event() calls corrupting
>   the wait->task. As a consequence, this saves the locking
>   done in swait when modifying the queue. This also applies
>   to per-vcore wait for powerpc kvm-hv.
> 
> The x86 tscdeadline_latency test mentioned in 8577370fb0cb
> ("KVM: Use simple waitqueue for vcpu->wq") shows that, on avg,
> latency is reduced by around 15-20% with this change.
> 
> Cc: Paul Mackerras <paulus@ozlabs.org>
> Cc: kvmarm@lists.cs.columbia.edu
> Cc: linux-mips@vger.kernel.org
> Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
> ---
>  arch/mips/kvm/mips.c                  |  6 ++----
>  arch/powerpc/include/asm/kvm_book3s.h |  2 +-
>  arch/powerpc/include/asm/kvm_host.h   |  2 +-
>  arch/powerpc/kvm/book3s_hv.c          | 22 ++++++++--------------
>  arch/powerpc/kvm/powerpc.c            |  2 +-
>  arch/x86/kvm/lapic.c                  |  2 +-
>  include/linux/kvm_host.h              | 10 +++++-----
>  virt/kvm/arm/arch_timer.c             |  2 +-
>  virt/kvm/arm/arm.c                    |  9 +++++----
>  virt/kvm/async_pf.c                   |  3 +--
>  virt/kvm/kvm_main.c                   | 19 +++++++++----------
>  11 files changed, 35 insertions(+), 44 deletions(-)

[...]

I should have tested it *before* acking it, really.

> diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
> index 93bd59b46848..b2805105bbe5 100644
> --- a/virt/kvm/arm/arch_timer.c
> +++ b/virt/kvm/arm/arch_timer.c
> @@ -593,7 +593,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
>  	if (map.emul_ptimer)
>  		soft_timer_cancel(&map.emul_ptimer->hrtimer);
>  
> -	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
> +	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)

This doesn't compile (wrong function name, and rcu_dereference takes a
variable). But whatever it would do if we fixed it looks dodgy. it isn't
the rcuwait structure that you want to dereference, but rcuwait->task
(we are checking whether we are called because we are blocking or being
preempted).

>  		kvm_timer_blocking(vcpu);
>  
>  	/*
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 48d0ec44ad77..f94a10bb1251 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -579,16 +579,17 @@ void kvm_arm_resume_guest(struct kvm *kvm)
>  
>  	kvm_for_each_vcpu(i, vcpu, kvm) {
>  		vcpu->arch.pause = false;
> -		swake_up_one(kvm_arch_vcpu_wq(vcpu));
> +		rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
>  	}
>  }
>  
>  static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
>  {
> -	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
> +	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
>  
> -	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
> -				       (!vcpu->arch.pause)));
> +	rcuwait_wait_event(*wait,
> +			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
> +			   TASK_INTERRUPTIBLE);

As noticed by the kbuild robot, this doesn't compile either.

I fixed it as follow, and it survived a very basic test run in a model
(more testing later).

Thanks,

	M.

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index b2805105bbe56..2dbd14dcae9fb 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -569,6 +569,7 @@ bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
 
 void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
 	struct timer_map map;
 
@@ -593,7 +594,7 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	if (map.emul_ptimer)
 		soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
-	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
+	if (rcu_dereference(wait->task))
 		kvm_timer_blocking(vcpu);
 
 	/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f94a10bb1251b..479f36d02418d 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -587,7 +587,7 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 {
 	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 
-	rcuwait_wait_event(*wait,
+	rcuwait_wait_event(wait,
 			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
 			   TASK_INTERRUPTIBLE);
Paolo Bonzini April 23, 2020, 8:57 a.m. UTC | #4
On 23/04/20 10:41, Marc Zyngier wrote:
>>  
>> -	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
>> +	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
> This doesn't compile (wrong function name, and rcu_dereference takes a
> variable). But whatever it would do if we fixed it looks dodgy. it isn't
> the rcuwait structure that you want to dereference, but rcuwait->task
> (we are checking whether we are called because we are blocking or being
> preempted).
> 

Yes, I agree.  Replacing swait with rcuwait is all good, but please make
the API look the same first.  Just like you added prepare_to_rcuwait and
finish_rcuwait, let's add rcuwait_active as well.

Actually let's do it like this:

1) Davidlohr, please post only patches 1-3 to "equalize" the swait and
rcuwait APIs.

2) Peter, please prepare a topic branch for those, or provide Acked-by

3) let's get everything else through the KVM tree.

Thanks,

Paolo
Peter Zijlstra April 23, 2020, 9:19 a.m. UTC | #5
On Thu, Apr 23, 2020 at 10:57:57AM +0200, Paolo Bonzini wrote:
> On 23/04/20 10:41, Marc Zyngier wrote:
> >>  
> >> -	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
> >> +	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
> > This doesn't compile (wrong function name, and rcu_dereference takes a
> > variable). But whatever it would do if we fixed it looks dodgy. it isn't
> > the rcuwait structure that you want to dereference, but rcuwait->task
> > (we are checking whether we are called because we are blocking or being
> > preempted).
> > 
> 
> Yes, I agree.  Replacing swait with rcuwait is all good, but please make
> the API look the same first.  Just like you added prepare_to_rcuwait and
> finish_rcuwait, let's add rcuwait_active as well.
> 
> Actually let's do it like this:
> 
> 1) Davidlohr, please post only patches 1-3 to "equalize" the swait and
> rcuwait APIs.
> 
> 2) Peter, please prepare a topic branch for those, or provide Acked-by

I don't think I have anything that conflicts with this, so sure, take
the whole thing through KVM.

For 1-3 (and I'll send a small niggle for 3 right after this):

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

I'll keep 5 as it is unrelated.
Paolo Bonzini April 23, 2020, 9:26 a.m. UTC | #6
On 23/04/20 11:19, Peter Zijlstra wrote:
>>
>> 1) Davidlohr, please post only patches 1-3 to "equalize" the swait and
>> rcuwait APIs.
>>
>> 2) Peter, please prepare a topic branch for those, or provide Acked-by
> I don't think I have anything that conflicts with this, so sure, take
> the whole thing through KVM.
> 
> For 1-3 (and I'll send a small niggle for 3 right after this):
> 
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Great thanks.  I assume you have no issue with rcuwait_active either.

Paolo

> I'll keep 5 as it is unrelated.
>
diff mbox series

Patch

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 8f05dd0a0f4e..fad6acce46e4 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -284,8 +284,7 @@  static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer)
 	kvm_mips_callbacks->queue_timer_int(vcpu);
 
 	vcpu->arch.wait = 0;
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait);
 
 	return kvm_mips_count_timeout(vcpu);
 }
@@ -511,8 +510,7 @@  int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 
 	dvcpu->arch.wait = 0;
 
-	if (swq_has_sleeper(&dvcpu->wq))
-		swake_up_one(&dvcpu->wq);
+	rcuwait_wake_up(&dvcpu->wait);
 
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 506e4df2d730..6e5d85ba588d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -78,7 +78,7 @@  struct kvmppc_vcore {
 	struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS];
 	struct list_head preempt_list;
 	spinlock_t lock;
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1dc63101ffe1..337047ba4a56 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -751,7 +751,7 @@  struct kvm_vcpu_arch {
 	u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
 	u32 last_inst;
 
-	struct swait_queue_head *wqp;
+	struct rcuwait *waitp;
 	struct kvmppc_vcore *vcore;
 	int ret;
 	int trap;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 93493f0cbfe8..b8d42f523ca7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -230,13 +230,11 @@  static bool kvmppc_ipi_thread(int cpu)
 static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
 {
 	int cpu;
-	struct swait_queue_head *wqp;
+	struct rcuwait *wait;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	wait = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(wait))
 		++vcpu->stat.halt_wakeup;
-	}
 
 	cpu = READ_ONCE(vcpu->arch.thread_cpu);
 	if (cpu >= 0 && kvmppc_ipi_thread(cpu))
@@ -2125,7 +2123,7 @@  static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
 
 	spin_lock_init(&vcore->lock);
 	spin_lock_init(&vcore->stoltb_lock);
-	init_swait_queue_head(&vcore->wq);
+	rcuwait_init(&vcore->wait);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
 	vcore->first_vcpuid = id;
@@ -3784,7 +3782,6 @@  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	ktime_t cur, start_poll, start_wait;
 	int do_sleep = 1;
 	u64 block_ns;
-	DECLARE_SWAITQUEUE(wait);
 
 	/* Poll for pending exceptions and ceded state */
 	cur = start_poll = ktime_get();
@@ -3812,10 +3809,7 @@  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 		}
 	}
 
-	prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE);
-
 	if (kvmppc_vcore_check_block(vc)) {
-		finish_swait(&vc->wq, &wait);
 		do_sleep = 0;
 		/* If we polled, count this as a successful poll */
 		if (vc->halt_poll_ns)
@@ -3828,8 +3822,8 @@  static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 	vc->vcore_state = VCORE_SLEEPING;
 	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
-	schedule();
-	finish_swait(&vc->wq, &wait);
+	rcuwait_wait_event(&vc->wait,
+			   kvmppc_vcore_check_block(vc), TASK_INTERRUPTIBLE);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
 	trace_kvmppc_vcore_blocked(vc, 1);
@@ -3940,7 +3934,7 @@  static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 			kvmppc_start_thread(vcpu, vc);
 			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
-			swake_up_one(&vc->wq);
+		        rcuwait_wake_up(&vc->wait);
 		}
 
 	}
@@ -4279,7 +4273,7 @@  static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 	user_vrsave = mfspr(SPRN_VRSAVE);
 
-	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
+	vcpu->arch.waitp = &vcpu->arch.vcore->wait;
 	vcpu->arch.pgdir = kvm->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index e15166b0a16d..4a074b587520 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -751,7 +751,7 @@  int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	if (err)
 		goto out_vcpu_uninit;
 
-	vcpu->arch.wqp = &vcpu->wq;
+	vcpu->arch.waitp = &vcpu->wait;
 	kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id);
 	return 0;
 
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9af25c97612a..fb2f56ba171d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1833,7 +1833,7 @@  void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
 	/* If the preempt notifier has already run, it also called apic_timer_expired */
 	if (!apic->lapic_timer.hv_timer_in_use)
 		goto out;
-	WARN_ON(swait_active(&vcpu->wq));
+	WARN_ON(rcu_dereference(vcpu->wait.task));
 	cancel_hv_timer(apic);
 	apic_timer_expired(apic);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6d58beb65454..fc34021546bd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -23,7 +23,7 @@ 
 #include <linux/irqflags.h>
 #include <linux/context_tracking.h>
 #include <linux/irqbypass.h>
-#include <linux/swait.h>
+#include <linux/rcuwait.h>
 #include <linux/refcount.h>
 #include <linux/nospec.h>
 #include <asm/signal.h>
@@ -277,7 +277,7 @@  struct kvm_vcpu {
 	struct mutex mutex;
 	struct kvm_run *run;
 
-	struct swait_queue_head wq;
+	struct rcuwait wait;
 	struct pid __rcu *pid;
 	int sigset_active;
 	sigset_t sigset;
@@ -956,12 +956,12 @@  static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
 }
 #endif
 
-static inline struct swait_queue_head *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
+static inline struct rcuwait *kvm_arch_vcpu_get_wait(struct kvm_vcpu *vcpu)
 {
 #ifdef __KVM_HAVE_ARCH_WQP
-	return vcpu->arch.wqp;
+	return vcpu->arch.waitp;
 #else
-	return &vcpu->wq;
+	return &vcpu->wait;
 #endif
 }
 
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 93bd59b46848..b2805105bbe5 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -593,7 +593,7 @@  void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
 	if (map.emul_ptimer)
 		soft_timer_cancel(&map.emul_ptimer->hrtimer);
 
-	if (swait_active(kvm_arch_vcpu_wq(vcpu)))
+	if (rcu_dereference(kvm_arch_vpu_get_wait(vcpu)) != NULL)
 		kvm_timer_blocking(vcpu);
 
 	/*
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 48d0ec44ad77..f94a10bb1251 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -579,16 +579,17 @@  void kvm_arm_resume_guest(struct kvm *kvm)
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		vcpu->arch.pause = false;
-		swake_up_one(kvm_arch_vcpu_wq(vcpu));
+		rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu));
 	}
 }
 
 static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
+	struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu);
 
-	swait_event_interruptible_exclusive(*wq, ((!vcpu->arch.power_off) &&
-				       (!vcpu->arch.pause)));
+	rcuwait_wait_event(*wait,
+			   (!vcpu->arch.power_off) &&(!vcpu->arch.pause),
+			   TASK_INTERRUPTIBLE);
 
 	if (vcpu->arch.power_off || vcpu->arch.pause) {
 		/* Awaken to handle a signal, request we sleep again later. */
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 15e5b037f92d..10b533f641a6 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,8 +80,7 @@  static void async_pf_execute(struct work_struct *work)
 
 	trace_kvm_async_pf_completed(addr, cr2_or_gpa);
 
-	if (swq_has_sleeper(&vcpu->wq))
-		swake_up_one(&vcpu->wq);
+	rcuwait_wake_up(&vcpu->wait);
 
 	mmput(mm);
 	kvm_put_kvm(vcpu->kvm);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 74bdb7bf3295..835fb109badf 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -341,7 +341,7 @@  static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	vcpu->kvm = kvm;
 	vcpu->vcpu_id = id;
 	vcpu->pid = NULL;
-	init_swait_queue_head(&vcpu->wq);
+	rcuwait_init(&vcpu->wait);
 	kvm_async_pf_vcpu_init(vcpu);
 
 	vcpu->pre_pcpu = -1;
@@ -2671,7 +2671,6 @@  static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
 void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
 	ktime_t start, cur;
-	DECLARE_SWAITQUEUE(wait);
 	bool waited = false;
 	u64 block_ns;
 
@@ -2697,8 +2696,9 @@  void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		} while (single_task_running() && ktime_before(cur, stop));
 	}
 
+	prepare_to_rcuwait(&vcpu->wait);
 	for (;;) {
-		prepare_to_swait_exclusive(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
+		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (kvm_vcpu_check_block(vcpu) < 0)
 			break;
@@ -2706,8 +2706,7 @@  void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		waited = true;
 		schedule();
 	}
-
-	finish_swait(&vcpu->wq, &wait);
+	finish_rcuwait(&vcpu->wait);
 	cur = ktime_get();
 out:
 	kvm_arch_vcpu_unblocking(vcpu);
@@ -2738,11 +2737,10 @@  EXPORT_SYMBOL_GPL(kvm_vcpu_block);
 
 bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
 {
-	struct swait_queue_head *wqp;
+	struct rcuwait *wait;
 
-	wqp = kvm_arch_vcpu_wq(vcpu);
-	if (swq_has_sleeper(wqp)) {
-		swake_up_one(wqp);
+	wait = kvm_arch_vcpu_get_wait(vcpu);
+	if (rcuwait_wake_up(wait)) {
 		WRITE_ONCE(vcpu->ready, true);
 		++vcpu->stat.halt_wakeup;
 		return true;
@@ -2884,7 +2882,8 @@  void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
 				continue;
 			if (vcpu == me)
 				continue;
-			if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
+			if (rcu_dereference(vcpu->wait.task) &&
+			    !vcpu_dy_runnable(vcpu))
 				continue;
 			if (READ_ONCE(vcpu->preempted) && yield_to_kernel_mode &&
 				!kvm_arch_vcpu_in_kernel(vcpu))