diff mbox series

kvm: add capability for halt polling

Message ID 20200417221446.108733-1-jcargill@google.com (mailing list archive)
State New, archived
Headers show
Series kvm: add capability for halt polling | expand

Commit Message

Jon Cargille April 17, 2020, 10:14 p.m. UTC
From: David Matlack <dmatlack@google.com>

KVM_CAP_HALT_POLL is a per-VM capability that lets userspace
control the halt-polling time, allowing halt-polling to be tuned or
disabled on particular VMs.

With dynamic halt-polling, a VM's VCPUs can poll from anywhere from
[0, halt_poll_ns] on each halt. KVM_CAP_HALT_POLL sets the
upper limit on the poll time.

Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: Jon Cargille <jcargill@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
---
 Documentation/virt/kvm/api.rst | 17 +++++++++++++++++
 include/linux/kvm_host.h       |  1 +
 include/uapi/linux/kvm.h       |  1 +
 virt/kvm/kvm_main.c            | 19 +++++++++++++++----
 4 files changed, 34 insertions(+), 4 deletions(-)

Comments

Vitaly Kuznetsov April 19, 2020, 8:46 p.m. UTC | #1
Jon Cargille <jcargill@google.com> writes:

> From: David Matlack <dmatlack@google.com>
>
> KVM_CAP_HALT_POLL is a per-VM capability that lets userspace
> control the halt-polling time, allowing halt-polling to be tuned or
> disabled on particular VMs.
>
> With dynamic halt-polling, a VM's VCPUs can poll from anywhere from
> [0, halt_poll_ns] on each halt. KVM_CAP_HALT_POLL sets the
> upper limit on the poll time.

Out of pure curiosity, why is this a per-VM and not a per-VCPU property?

>
> Signed-off-by: David Matlack <dmatlack@google.com>
> Signed-off-by: Jon Cargille <jcargill@google.com>
> Reviewed-by: Jim Mattson <jmattson@google.com>
> ---
>  Documentation/virt/kvm/api.rst | 17 +++++++++++++++++
>  include/linux/kvm_host.h       |  1 +
>  include/uapi/linux/kvm.h       |  1 +
>  virt/kvm/kvm_main.c            | 19 +++++++++++++++----
>  4 files changed, 34 insertions(+), 4 deletions(-)
>
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index efbbe570aa9b7b..d871dacb984e98 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -5802,6 +5802,23 @@ If present, this capability can be enabled for a VM, meaning that KVM
>  will allow the transition to secure guest mode.  Otherwise KVM will
>  veto the transition.
>  
> +7.20 KVM_CAP_HALT_POLL
> +----------------------
> +
> +:Architectures: all
> +:Target: VM
> +:Parameters: args[0] is the maximum poll time in nanoseconds
> +:Returns: 0 on success; -1 on error
> +
> +This capability overrides the kvm module parameter halt_poll_ns for the
> +target VM.
> +
> +VCPU polling allows a VCPU to poll for wakeup events instead of immediately
> +scheduling during guest halts. The maximum time a VCPU can spend polling is
> +controlled by the kvm module parameter halt_poll_ns. This capability allows
> +the maximum halt time to specified on a per-VM basis, effectively overriding
> +the module parameter for the target VM.
> +
>  8. Other capabilities.
>  ======================
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6d58beb65454f7..922b24ce5e7297 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -503,6 +503,7 @@ struct kvm {
>  	struct srcu_struct srcu;
>  	struct srcu_struct irq_srcu;
>  	pid_t userspace_pid;
> +	unsigned int max_halt_poll_ns;
>  };
>  
>  #define kvm_err(fmt, ...) \
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 428c7dde6b4b37..ac9eba0289d1b6 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1017,6 +1017,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_S390_VCPU_RESETS 179
>  #define KVM_CAP_S390_PROTECTED 180
>  #define KVM_CAP_PPC_SECURE_GUEST 181
> +#define KVM_CAP_HALT_POLL 182
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 74bdb7bf32952e..ec038a9e60a275 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -710,6 +710,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  			goto out_err_no_arch_destroy_vm;
>  	}
>  
> +	kvm->max_halt_poll_ns = halt_poll_ns;
> +
>  	r = kvm_arch_init_vm(kvm, type);
>  	if (r)
>  		goto out_err_no_arch_destroy_vm;
> @@ -2716,15 +2718,16 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
>  	if (!kvm_arch_no_poll(vcpu)) {
>  		if (!vcpu_valid_wakeup(vcpu)) {
>  			shrink_halt_poll_ns(vcpu);
> -		} else if (halt_poll_ns) {
> +		} else if (vcpu->kvm->max_halt_poll_ns) {
>  			if (block_ns <= vcpu->halt_poll_ns)
>  				;
>  			/* we had a long block, shrink polling */
> -			else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
> +			else if (vcpu->halt_poll_ns &&
> +					block_ns > vcpu->kvm->max_halt_poll_ns)
>  				shrink_halt_poll_ns(vcpu);
>  			/* we had a short halt and our poll time is too small */
> -			else if (vcpu->halt_poll_ns < halt_poll_ns &&
> -				block_ns < halt_poll_ns)
> +			else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
> +					block_ns < vcpu->kvm->max_halt_poll_ns)
>  				grow_halt_poll_ns(vcpu);
>  		} else {
>  			vcpu->halt_poll_ns = 0;
> @@ -3516,6 +3519,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
>  	case KVM_CAP_IOEVENTFD_ANY_LENGTH:
>  	case KVM_CAP_CHECK_EXTENSION_VM:
>  	case KVM_CAP_ENABLE_CAP_VM:
> +	case KVM_CAP_HALT_POLL:
>  		return 1;
>  #ifdef CONFIG_KVM_MMIO
>  	case KVM_CAP_COALESCED_MMIO:
> @@ -3566,6 +3570,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
>  		return 0;
>  	}
>  #endif
> +	case KVM_CAP_HALT_POLL: {
> +		if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
> +			return -EINVAL;
> +
> +		kvm->max_halt_poll_ns = cap->args[0];

Is it safe to allow any value from userspace here or would it maybe make
sense to only allow [0, global halt_poll_ns]?


> +		return 0;
> +	}
>  	default:
>  		return kvm_vm_ioctl_enable_cap(kvm, cap);
>  	}
Jon Cargille April 20, 2020, 6:47 p.m. UTC | #2
On Sun, Apr 19, 2020 at 1:46 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> Jon Cargille <jcargill@google.com> writes:
>
> > From: David Matlack <dmatlack@google.com>
> >
> > KVM_CAP_HALT_POLL is a per-VM capability that lets userspace
> > control the halt-polling time, allowing halt-polling to be tuned or
> > disabled on particular VMs.
> >
> > With dynamic halt-polling, a VM's VCPUs can poll from anywhere from
> > [0, halt_poll_ns] on each halt. KVM_CAP_HALT_POLL sets the
> > upper limit on the poll time.
>
> Out of pure curiosity, why is this a per-VM and not a per-VCPU property?

Great question, Vitaly.  We actually implemented this as a per-VCPU property
initially; however, our user-space implementation was only using it to apply
the same value to all VCPUs, so we later simplified it on the advice of
Jim Mattson. If there is a consensus for this to go in as per-VCPU rather
than per-VM, I'm happy to submit that way instead. The per-VM version did
end up looking simpler, IMO.

>
> >
> > Signed-off-by: David Matlack <dmatlack@google.com>
> > Signed-off-by: Jon Cargille <jcargill@google.com>
> > Reviewed-by: Jim Mattson <jmattson@google.com>
> > ---
> >  Documentation/virt/kvm/api.rst | 17 +++++++++++++++++
> >  include/linux/kvm_host.h       |  1 +
> >  include/uapi/linux/kvm.h       |  1 +
> >  virt/kvm/kvm_main.c            | 19 +++++++++++++++----
> >  4 files changed, 34 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index efbbe570aa9b7b..d871dacb984e98 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -5802,6 +5802,23 @@ If present, this capability can be enabled for a VM, meaning that KVM
> >  will allow the transition to secure guest mode.  Otherwise KVM will
> >  veto the transition.
> >
> > +7.20 KVM_CAP_HALT_POLL
> > +----------------------
> > +
> > +:Architectures: all
> > +:Target: VM
> > +:Parameters: args[0] is the maximum poll time in nanoseconds
> > +:Returns: 0 on success; -1 on error
> > +
> > +This capability overrides the kvm module parameter halt_poll_ns for the
> > +target VM.
> > +
> > +VCPU polling allows a VCPU to poll for wakeup events instead of immediately
> > +scheduling during guest halts. The maximum time a VCPU can spend polling is
> > +controlled by the kvm module parameter halt_poll_ns. This capability allows
> > +the maximum halt time to specified on a per-VM basis, effectively overriding
> > +the module parameter for the target VM.
> > +
> >  8. Other capabilities.
> >  ======================
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 6d58beb65454f7..922b24ce5e7297 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -503,6 +503,7 @@ struct kvm {
> >       struct srcu_struct srcu;
> >       struct srcu_struct irq_srcu;
> >       pid_t userspace_pid;
> > +     unsigned int max_halt_poll_ns;
> >  };
> >
> >  #define kvm_err(fmt, ...) \
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 428c7dde6b4b37..ac9eba0289d1b6 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1017,6 +1017,7 @@ struct kvm_ppc_resize_hpt {
> >  #define KVM_CAP_S390_VCPU_RESETS 179
> >  #define KVM_CAP_S390_PROTECTED 180
> >  #define KVM_CAP_PPC_SECURE_GUEST 181
> > +#define KVM_CAP_HALT_POLL 182
> >
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 74bdb7bf32952e..ec038a9e60a275 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -710,6 +710,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
> >                       goto out_err_no_arch_destroy_vm;
> >       }
> >
> > +     kvm->max_halt_poll_ns = halt_poll_ns;
> > +
> >       r = kvm_arch_init_vm(kvm, type);
> >       if (r)
> >               goto out_err_no_arch_destroy_vm;
> > @@ -2716,15 +2718,16 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> >       if (!kvm_arch_no_poll(vcpu)) {
> >               if (!vcpu_valid_wakeup(vcpu)) {
> >                       shrink_halt_poll_ns(vcpu);
> > -             } else if (halt_poll_ns) {
> > +             } else if (vcpu->kvm->max_halt_poll_ns) {
> >                       if (block_ns <= vcpu->halt_poll_ns)
> >                               ;
> >                       /* we had a long block, shrink polling */
> > -                     else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
> > +                     else if (vcpu->halt_poll_ns &&
> > +                                     block_ns > vcpu->kvm->max_halt_poll_ns)
> >                               shrink_halt_poll_ns(vcpu);
> >                       /* we had a short halt and our poll time is too small */
> > -                     else if (vcpu->halt_poll_ns < halt_poll_ns &&
> > -                             block_ns < halt_poll_ns)
> > +                     else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
> > +                                     block_ns < vcpu->kvm->max_halt_poll_ns)
> >                               grow_halt_poll_ns(vcpu);
> >               } else {
> >                       vcpu->halt_poll_ns = 0;
> > @@ -3516,6 +3519,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
> >       case KVM_CAP_IOEVENTFD_ANY_LENGTH:
> >       case KVM_CAP_CHECK_EXTENSION_VM:
> >       case KVM_CAP_ENABLE_CAP_VM:
> > +     case KVM_CAP_HALT_POLL:
> >               return 1;
> >  #ifdef CONFIG_KVM_MMIO
> >       case KVM_CAP_COALESCED_MMIO:
> > @@ -3566,6 +3570,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
> >               return 0;
> >       }
> >  #endif
> > +     case KVM_CAP_HALT_POLL: {
> > +             if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
> > +                     return -EINVAL;
> > +
> > +             kvm->max_halt_poll_ns = cap->args[0];
>
> Is it safe to allow any value from userspace here or would it maybe make
> sense to only allow [0, global halt_poll_ns]?

I believe that any value is safe; a very large value effectively disables
halt-polling, which is equivalent to setting a value of zero to explicitly
disable it, which is legal.


>
>
> > +             return 0;
> > +     }
> >       default:
> >               return kvm_vm_ioctl_enable_cap(kvm, cap);
> >       }
>
> --
> Vitaly
>
Paolo Bonzini April 20, 2020, 9:07 p.m. UTC | #3
On 20/04/20 20:47, Jon Cargille wrote:
> Great question, Vitaly.  We actually implemented this as a per-VCPU property
> initially; however, our user-space implementation was only using it to apply
> the same value to all VCPUs, so we later simplified it on the advice of
> Jim Mattson. If there is a consensus for this to go in as per-VCPU rather
> than per-VM, I'm happy to submit that way instead. The per-VM version did
> end up looking simpler, IMO.

Yeah, I am not sure what the usecase would be for per-vCPU halt polling.

You could perhaps disable halt polling for vCPUs that are not placed on
isolated physical CPUs (devoting those vCPUs to housekeeping), but it
seems to me that this would be quite hard to get right.  But in that
case you would probably prefer to disable HLT vmexits completely, rather
than use halt polling.

Paolo
Paolo Bonzini April 20, 2020, 9:09 p.m. UTC | #4
On 20/04/20 20:47, Jon Cargille wrote:
>> Is it safe to allow any value from userspace here or would it maybe make
>> sense to only allow [0, global halt_poll_ns]?
> I believe that any value is safe; a very large value effectively disables
> halt-polling, which is equivalent to setting a value of zero to explicitly
> disable it, which is legal.

Doesn't a large value make KVM poll all the time?  But you could do that
just by running "for (;;)" so there's no reason to limit the parameter.

Paolo
Jon Cargille April 20, 2020, 9:20 p.m. UTC | #5
On Mon, Apr 20, 2020 at 2:10 PM Paolo Bonzini <pbonzini@redhat.com> wrote:
>
> On 20/04/20 20:47, Jon Cargille wrote:
> >> Is it safe to allow any value from userspace here or would it maybe make
> >> sense to only allow [0, global halt_poll_ns]?
> > I believe that any value is safe; a very large value effectively disables
> > halt-polling, which is equivalent to setting a value of zero to explicitly
> > disable it, which is legal.
>
> Doesn't a large value make KVM poll all the time?  But you could do that
> just by running "for (;;)" so there's no reason to limit the parameter.

Yes, I mis-spoke; apologies. A large number will cause KVM to poll for a
long time; as long as the thread can be preempted, we don't see any
problem with that.

Thanks,

Jon

>
> Paolo
>
Jim Mattson April 22, 2020, 9:36 p.m. UTC | #6
On Sun, Apr 19, 2020 at 1:46 PM Vitaly Kuznetsov <vkuznets@redhat.com> wrote:
>
> Jon Cargille <jcargill@google.com> writes:
>
> > From: David Matlack <dmatlack@google.com>
> >
> > KVM_CAP_HALT_POLL is a per-VM capability that lets userspace
> > control the halt-polling time, allowing halt-polling to be tuned or
> > disabled on particular VMs.
> >
> > With dynamic halt-polling, a VM's VCPUs can poll from anywhere from
> > [0, halt_poll_ns] on each halt. KVM_CAP_HALT_POLL sets the
> > upper limit on the poll time.
>
> Out of pure curiosity, why is this a per-VM and not a per-VCPU property?

It didn't seem to be worth doing the plumbing for an
architecture-agnostic per-vCPU property (which doesn't exist today).

> >
> > Signed-off-by: David Matlack <dmatlack@google.com>
> > Signed-off-by: Jon Cargille <jcargill@google.com>
> > Reviewed-by: Jim Mattson <jmattson@google.com>
> > ---
> >  Documentation/virt/kvm/api.rst | 17 +++++++++++++++++
> >  include/linux/kvm_host.h       |  1 +
> >  include/uapi/linux/kvm.h       |  1 +
> >  virt/kvm/kvm_main.c            | 19 +++++++++++++++----
> >  4 files changed, 34 insertions(+), 4 deletions(-)
> >
> > diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> > index efbbe570aa9b7b..d871dacb984e98 100644
> > --- a/Documentation/virt/kvm/api.rst
> > +++ b/Documentation/virt/kvm/api.rst
> > @@ -5802,6 +5802,23 @@ If present, this capability can be enabled for a VM, meaning that KVM
> >  will allow the transition to secure guest mode.  Otherwise KVM will
> >  veto the transition.
> >
> > +7.20 KVM_CAP_HALT_POLL
> > +----------------------
> > +
> > +:Architectures: all
> > +:Target: VM
> > +:Parameters: args[0] is the maximum poll time in nanoseconds
> > +:Returns: 0 on success; -1 on error
> > +
> > +This capability overrides the kvm module parameter halt_poll_ns for the
> > +target VM.
> > +
> > +VCPU polling allows a VCPU to poll for wakeup events instead of immediately
> > +scheduling during guest halts. The maximum time a VCPU can spend polling is
> > +controlled by the kvm module parameter halt_poll_ns. This capability allows
> > +the maximum halt time to specified on a per-VM basis, effectively overriding
> > +the module parameter for the target VM.
> > +
> >  8. Other capabilities.
> >  ======================
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 6d58beb65454f7..922b24ce5e7297 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -503,6 +503,7 @@ struct kvm {
> >       struct srcu_struct srcu;
> >       struct srcu_struct irq_srcu;
> >       pid_t userspace_pid;
> > +     unsigned int max_halt_poll_ns;
> >  };
> >
> >  #define kvm_err(fmt, ...) \
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 428c7dde6b4b37..ac9eba0289d1b6 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1017,6 +1017,7 @@ struct kvm_ppc_resize_hpt {
> >  #define KVM_CAP_S390_VCPU_RESETS 179
> >  #define KVM_CAP_S390_PROTECTED 180
> >  #define KVM_CAP_PPC_SECURE_GUEST 181
> > +#define KVM_CAP_HALT_POLL 182
> >
> >  #ifdef KVM_CAP_IRQ_ROUTING
> >
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 74bdb7bf32952e..ec038a9e60a275 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -710,6 +710,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
> >                       goto out_err_no_arch_destroy_vm;
> >       }
> >
> > +     kvm->max_halt_poll_ns = halt_poll_ns;
> > +
> >       r = kvm_arch_init_vm(kvm, type);
> >       if (r)
> >               goto out_err_no_arch_destroy_vm;
> > @@ -2716,15 +2718,16 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
> >       if (!kvm_arch_no_poll(vcpu)) {
> >               if (!vcpu_valid_wakeup(vcpu)) {
> >                       shrink_halt_poll_ns(vcpu);
> > -             } else if (halt_poll_ns) {
> > +             } else if (vcpu->kvm->max_halt_poll_ns) {
> >                       if (block_ns <= vcpu->halt_poll_ns)
> >                               ;
> >                       /* we had a long block, shrink polling */
> > -                     else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
> > +                     else if (vcpu->halt_poll_ns &&
> > +                                     block_ns > vcpu->kvm->max_halt_poll_ns)
> >                               shrink_halt_poll_ns(vcpu);
> >                       /* we had a short halt and our poll time is too small */
> > -                     else if (vcpu->halt_poll_ns < halt_poll_ns &&
> > -                             block_ns < halt_poll_ns)
> > +                     else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
> > +                                     block_ns < vcpu->kvm->max_halt_poll_ns)
> >                               grow_halt_poll_ns(vcpu);
> >               } else {
> >                       vcpu->halt_poll_ns = 0;
> > @@ -3516,6 +3519,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
> >       case KVM_CAP_IOEVENTFD_ANY_LENGTH:
> >       case KVM_CAP_CHECK_EXTENSION_VM:
> >       case KVM_CAP_ENABLE_CAP_VM:
> > +     case KVM_CAP_HALT_POLL:
> >               return 1;
> >  #ifdef CONFIG_KVM_MMIO
> >       case KVM_CAP_COALESCED_MMIO:
> > @@ -3566,6 +3570,13 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
> >               return 0;
> >       }
> >  #endif
> > +     case KVM_CAP_HALT_POLL: {
> > +             if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
> > +                     return -EINVAL;
> > +
> > +             kvm->max_halt_poll_ns = cap->args[0];
>
> Is it safe to allow any value from userspace here or would it maybe make
> sense to only allow [0, global halt_poll_ns]?

Would that restriction help to get this change accepted?

> > +             return 0;
> > +     }
> >       default:
> >               return kvm_vm_ioctl_enable_cap(kvm, cap);
> >       }
>
> --
> Vitaly
>
Paolo Bonzini April 23, 2020, 1:09 p.m. UTC | #7
On 22/04/20 23:36, Jim Mattson wrote:
>>> +     case KVM_CAP_HALT_POLL: {
>>> +             if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
>>> +                     return -EINVAL;
>>> +
>>> +             kvm->max_halt_poll_ns = cap->args[0];
>> Is it safe to allow any value from userspace here or would it maybe make
>> sense to only allow [0, global halt_poll_ns]?
> Would that restriction help to get this change accepted?
> 

No, in the sense that I'm applying it already.

Paolo
diff mbox series

Patch

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index efbbe570aa9b7b..d871dacb984e98 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5802,6 +5802,23 @@  If present, this capability can be enabled for a VM, meaning that KVM
 will allow the transition to secure guest mode.  Otherwise KVM will
 veto the transition.
 
+7.20 KVM_CAP_HALT_POLL
+----------------------
+
+:Architectures: all
+:Target: VM
+:Parameters: args[0] is the maximum poll time in nanoseconds
+:Returns: 0 on success; -1 on error
+
+This capability overrides the kvm module parameter halt_poll_ns for the
+target VM.
+
+VCPU polling allows a VCPU to poll for wakeup events instead of immediately
+scheduling during guest halts. The maximum time a VCPU can spend polling is
+controlled by the kvm module parameter halt_poll_ns. This capability allows
+the maximum halt time to specified on a per-VM basis, effectively overriding
+the module parameter for the target VM.
+
 8. Other capabilities.
 ======================
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6d58beb65454f7..922b24ce5e7297 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -503,6 +503,7 @@  struct kvm {
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
 	pid_t userspace_pid;
+	unsigned int max_halt_poll_ns;
 };
 
 #define kvm_err(fmt, ...) \
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 428c7dde6b4b37..ac9eba0289d1b6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1017,6 +1017,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_VCPU_RESETS 179
 #define KVM_CAP_S390_PROTECTED 180
 #define KVM_CAP_PPC_SECURE_GUEST 181
+#define KVM_CAP_HALT_POLL 182
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 74bdb7bf32952e..ec038a9e60a275 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -710,6 +710,8 @@  static struct kvm *kvm_create_vm(unsigned long type)
 			goto out_err_no_arch_destroy_vm;
 	}
 
+	kvm->max_halt_poll_ns = halt_poll_ns;
+
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
 		goto out_err_no_arch_destroy_vm;
@@ -2716,15 +2718,16 @@  void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	if (!kvm_arch_no_poll(vcpu)) {
 		if (!vcpu_valid_wakeup(vcpu)) {
 			shrink_halt_poll_ns(vcpu);
-		} else if (halt_poll_ns) {
+		} else if (vcpu->kvm->max_halt_poll_ns) {
 			if (block_ns <= vcpu->halt_poll_ns)
 				;
 			/* we had a long block, shrink polling */
-			else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+			else if (vcpu->halt_poll_ns &&
+					block_ns > vcpu->kvm->max_halt_poll_ns)
 				shrink_halt_poll_ns(vcpu);
 			/* we had a short halt and our poll time is too small */
-			else if (vcpu->halt_poll_ns < halt_poll_ns &&
-				block_ns < halt_poll_ns)
+			else if (vcpu->halt_poll_ns < vcpu->kvm->max_halt_poll_ns &&
+					block_ns < vcpu->kvm->max_halt_poll_ns)
 				grow_halt_poll_ns(vcpu);
 		} else {
 			vcpu->halt_poll_ns = 0;
@@ -3516,6 +3519,7 @@  static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 	case KVM_CAP_IOEVENTFD_ANY_LENGTH:
 	case KVM_CAP_CHECK_EXTENSION_VM:
 	case KVM_CAP_ENABLE_CAP_VM:
+	case KVM_CAP_HALT_POLL:
 		return 1;
 #ifdef CONFIG_KVM_MMIO
 	case KVM_CAP_COALESCED_MMIO:
@@ -3566,6 +3570,13 @@  static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 		return 0;
 	}
 #endif
+	case KVM_CAP_HALT_POLL: {
+		if (cap->flags || cap->args[0] != (unsigned int)cap->args[0])
+			return -EINVAL;
+
+		kvm->max_halt_poll_ns = cap->args[0];
+		return 0;
+	}
 	default:
 		return kvm_vm_ioctl_enable_cap(kvm, cap);
 	}