diff mbox

[RFC,v2,1/6] kvm: add device control API

Message ID 1364856473-25245-2-git-send-email-scottwood@freescale.com (mailing list archive)
State New, archived
Headers show

Commit Message

Scott Wood April 1, 2013, 10:47 p.m. UTC
Currently, devices that are emulated inside KVM are configured in a
hardcoded manner based on an assumption that any given architecture
only has one way to do it.  If there's any need to access device state,
it is done through inflexible one-purpose-only IOCTLs (e.g.
KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
cumbersome and depletes a limited numberspace.

This API provides a mechanism to instantiate a device of a certain
type, returning an ID that can be used to set/get attributes of the
device.  Attributes may include configuration parameters (e.g.
register base address), device state, operational commands, etc.  It
is similar to the ONE_REG API, except that it acts on devices rather
than vcpus.

Both device types and individual attributes can be tested without having
to create the device or get/set the attribute, without the need for
separately managing enumerated capabilities.

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 Documentation/virtual/kvm/api.txt        |   70 ++++++++++++++++++++++++++++++
 Documentation/virtual/kvm/devices/README |    1 +
 arch/powerpc/include/asm/kvm_host.h      |    6 +++
 arch/powerpc/include/asm/kvm_ppc.h       |    2 +
 arch/powerpc/kvm/powerpc.c               |    7 +++
 include/uapi/linux/kvm.h                 |   27 ++++++++++++
 virt/kvm/kvm_main.c                      |   31 +++++++++++++
 7 files changed, 144 insertions(+)
 create mode 100644 Documentation/virtual/kvm/devices/README

Comments

Tiejun Chen April 2, 2013, 6:59 a.m. UTC | #1
On 04/02/2013 06:47 AM, Scott Wood wrote:
> Currently, devices that are emulated inside KVM are configured in a
> hardcoded manner based on an assumption that any given architecture
> only has one way to do it.  If there's any need to access device state,
> it is done through inflexible one-purpose-only IOCTLs (e.g.
> KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
> cumbersome and depletes a limited numberspace.
>
> This API provides a mechanism to instantiate a device of a certain
> type, returning an ID that can be used to set/get attributes of the
> device.  Attributes may include configuration parameters (e.g.
> register base address), device state, operational commands, etc.  It
> is similar to the ONE_REG API, except that it acts on devices rather
> than vcpus.
>
> Both device types and individual attributes can be tested without having
> to create the device or get/set the attribute, without the need for
> separately managing enumerated capabilities.
>
> Signed-off-by: Scott Wood <scottwood@freescale.com>
> ---
>   Documentation/virtual/kvm/api.txt        |   70 ++++++++++++++++++++++++++++++
>   Documentation/virtual/kvm/devices/README |    1 +
>   arch/powerpc/include/asm/kvm_host.h      |    6 +++
>   arch/powerpc/include/asm/kvm_ppc.h       |    2 +
>   arch/powerpc/kvm/powerpc.c               |    7 +++
>   include/uapi/linux/kvm.h                 |   27 ++++++++++++
>   virt/kvm/kvm_main.c                      |   31 +++++++++++++
>   7 files changed, 144 insertions(+)
>   create mode 100644 Documentation/virtual/kvm/devices/README
>
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 976eb65..77328aa 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents from the data
>   written, then `n_invalid' invalid entries, invalidating any previously
>   valid entries found.
>
> +4.79 KVM_CREATE_DEVICE
> +
> +Capability: KVM_CAP_DEVICE_CTRL
> +Type: vm ioctl
> +Parameters: struct kvm_create_device (in/out)
> +Returns: 0 on success, -1 on error
> +Errors:
> +  ENODEV: The device type is unknown or unsupported
> +  EEXIST: Device already created, and this type of device may not
> +          be instantiated multiple times
> +  ENOSPC: Too many devices have been created
> +
> +  Other error conditions may be defined by individual device types.
> +
> +Creates an emulated device in the kernel.  The file descriptor returned
> +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
> +
> +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
> +device type is supported (not necessarily whether it can be created
> +in the current vm).
> +
> +Individual devices should not define flags.  Attributes should be used
> +for specifying any behavior that is not implied by the device type
> +number.
> +
> +struct kvm_create_device {
> +	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
> +	__u32	fd;	/* out: device handle */
> +	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
> +};
> +
> +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
> +
> +Capability: KVM_CAP_DEVICE_CTRL
> +Type: device ioctl
> +Parameters: struct kvm_device_attr
> +Returns: 0 on success, -1 on error
> +Errors:
> +  ENXIO:  The group or attribute is unknown/unsupported for this device
> +  EPERM:  The attribute cannot (currently) be accessed this way
> +          (e.g. read-only attribute, or attribute that only makes
> +          sense when the device is in a different state)
> +
> +  Other error conditions may be defined by individual device types.
> +
> +Gets/sets a specified piece of device configuration and/or state.  The
> +semantics are device-specific.  See individual device documentation in
> +the "devices" directory.  As with ONE_REG, the size of the data
> +transferred is defined by the particular attribute.
> +
> +struct kvm_device_attr {
> +	__u32	flags;		/* no flags currently defined */
> +	__u32	group;		/* device-defined */
> +	__u64	attr;		/* group-defined */
> +	__u64	addr;		/* userspace address of attr data */
> +};
> +
> +4.81 KVM_HAS_DEVICE_ATTR
> +
> +Capability: KVM_CAP_DEVICE_CTRL
> +Type: device ioctl
> +Parameters: struct kvm_device_attr
> +Returns: 0 on success, -1 on error
> +Errors:
> +  ENXIO:  The group or attribute is unknown/unsupported for this device
> +
> +Tests whether a device supports a particular attribute.  A successful
> +return indicates the attribute is implemented.  It does not necessarily
> +indicate that the attribute can be read or written in the device's
> +current state.  "addr" is ignored.
>
>   4.77 KVM_ARM_VCPU_INIT
>
> diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README
> new file mode 100644
> index 0000000..34a6983
> --- /dev/null
> +++ b/Documentation/virtual/kvm/devices/README
> @@ -0,0 +1 @@
> +This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index e34f8fe..e0caae2 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -370,6 +370,9 @@ struct kvmppc_booke_debug_reg {
>   	u64 dac[KVMPPC_BOOKE_MAX_DAC];
>   };
>
> +#define KVMPPC_IRQCHIP_NONE	0
> +#define KVMPPC_IRQCHIP_MPIC	1
> +
>   struct kvm_vcpu_arch {
>   	ulong host_stack;
>   	u32 host_pid;
> @@ -549,6 +552,9 @@ struct kvm_vcpu_arch {
>   	unsigned long magic_page_pa; /* phys addr to map the magic page to */
>   	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
>
> +	int irqchip_type;
> +	void *irqchip_priv;
> +
>   #ifdef CONFIG_KVM_BOOK3S_64_HV
>   	struct kvm_vcpu_arch_shared shregs;
>
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index f589307..f44932c 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -323,4 +323,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
>   	return ea;
>   }
>
> +void mpic_put(void *priv);
> +
>   #endif /* __POWERPC_KVM_PPC_H__ */
> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 16b4595..bdfa526 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -459,6 +459,13 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
>   	tasklet_kill(&vcpu->arch.tasklet);
>
>   	kvmppc_remove_vcpu_debugfs(vcpu);
> +
> +	switch (vcpu->arch.irqchip_type) {
> +	case KVMPPC_IRQCHIP_MPIC:
> +		mpic_put(vcpu->arch.irqchip_priv);
> +		break;
> +	}
> +
>   	kvmppc_core_vcpu_free(vcpu);
>   }
>
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 74d0ff3..20ce2d2 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -668,6 +668,7 @@ struct kvm_ppc_smmu_info {
>   #define KVM_CAP_PPC_EPR 86
>   #define KVM_CAP_ARM_PSCI 87
>   #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
> +#define KVM_CAP_DEVICE_CTRL 89
>
>   #ifdef KVM_CAP_IRQ_ROUTING
>
> @@ -909,6 +910,32 @@ struct kvm_s390_ucas_mapping {
>   #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
>
>   /*
> + * Device control API, available with KVM_CAP_DEVICE_CTRL
> + */
> +#define KVM_CREATE_DEVICE_TEST		1
> +
> +struct kvm_create_device {
> +	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
> +	__u32	fd;	/* out: device handle */
> +	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
> +};
> +
> +struct kvm_device_attr {
> +	__u32	flags;		/* no flags currently defined */
> +	__u32	group;		/* device-defined */
> +	__u64	attr;		/* group-defined */
> +	__u64	addr;		/* userspace address of attr data */
> +};
> +
> +/* ioctl for vm fd */
> +#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
> +
> +/* ioctls for fds returned by KVM_CREATE_DEVICE */
> +#define KVM_SET_DEVICE_ATTR	  _IOW(KVMIO,  0xe1, struct kvm_device_attr)
> +#define KVM_GET_DEVICE_ATTR	  _IOW(KVMIO,  0xe2, struct kvm_device_attr)
> +#define KVM_HAS_DEVICE_ATTR	  _IOW(KVMIO,  0xe3, struct kvm_device_attr)
> +
> +/*
>    * ioctls for vcpu fds
>    */
>   #define KVM_RUN                   _IO(KVMIO,   0x80)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index ff71541..ed033c0 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2158,6 +2158,17 @@ out:
>   }
>   #endif
>
> +static int kvm_ioctl_create_device(struct kvm *kvm,
> +				   struct kvm_create_device *cd)
> +{
> +	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
> +
> +	switch (cd->type) {
> +	default:
> +		return -ENODEV;
> +	}

Even after apply patch 5, looks here still misses something like:

	if (test)
		WARN_ON_ONCE(!cd->type);

	return -ENODEV;

Tiejun

> +}
> +
>   static long kvm_vm_ioctl(struct file *filp,
>   			   unsigned int ioctl, unsigned long arg)
>   {
> @@ -2272,6 +2283,26 @@ static long kvm_vm_ioctl(struct file *filp,
>   		break;
>   	}
>   #endif
> +	case KVM_CREATE_DEVICE: {
> +		struct kvm_create_device cd;
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&cd, argp, sizeof(cd)))
> +			goto out;
> +
> +		mutex_lock(&kvm->lock);
> +		r = kvm_ioctl_create_device(kvm, &cd);
> +		mutex_unlock(&kvm->lock);
> +		if (r)
> +			goto out;
> +
> +		r = -EFAULT;
> +		if (copy_to_user(argp, &cd, sizeof(cd)))
> +			goto out;
> +
> +		r = 0;
> +		break;
> +	}
>   	default:
>   		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>   		if (r == -ENOTTY)
>

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras April 3, 2013, 1:02 a.m. UTC | #2
On Mon, Apr 01, 2013 at 05:47:48PM -0500, Scott Wood wrote:
> Currently, devices that are emulated inside KVM are configured in a
> hardcoded manner based on an assumption that any given architecture
> only has one way to do it.  If there's any need to access device state,
> it is done through inflexible one-purpose-only IOCTLs (e.g.
> KVM_GET/SET_LAPIC).  Defining new IOCTLs for every little thing is
> cumbersome and depletes a limited numberspace.
> 
> This API provides a mechanism to instantiate a device of a certain
> type, returning an ID that can be used to set/get attributes of the
> device.  Attributes may include configuration parameters (e.g.
> register base address), device state, operational commands, etc.  It
> is similar to the ONE_REG API, except that it acts on devices rather
> than vcpus.
> 
> Both device types and individual attributes can be tested without having
> to create the device or get/set the attribute, without the need for
> separately managing enumerated capabilities.
> 
> Signed-off-by: Scott Wood <scottwood@freescale.com>

Some comments below...

> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index 976eb65..77328aa 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents from the data
>  written, then `n_invalid' invalid entries, invalidating any previously
>  valid entries found.
>  
> +4.79 KVM_CREATE_DEVICE
> +
> +Capability: KVM_CAP_DEVICE_CTRL

I notice this patch doesn't add this capability; you add it in a later
patch.  Since this patch adds the KVM_CREATE_DEVICE ioctl, it probably
should add the KVM_CAP_DEVICE_CTRL capability too.


> +Type: vm ioctl
> +Parameters: struct kvm_create_device (in/out)
> +Returns: 0 on success, -1 on error
> +Errors:
> +  ENODEV: The device type is unknown or unsupported
> +  EEXIST: Device already created, and this type of device may not
> +          be instantiated multiple times
> +  ENOSPC: Too many devices have been created

Is this still a possible error code?

> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -370,6 +370,9 @@ struct kvmppc_booke_debug_reg {
>  	u64 dac[KVMPPC_BOOKE_MAX_DAC];
>  };
>  
> +#define KVMPPC_IRQCHIP_NONE	0
> +#define KVMPPC_IRQCHIP_MPIC	1

This define should go in the patch that adds the MPIC device.

>  struct kvm_vcpu_arch {
>  	ulong host_stack;
>  	u32 host_pid;
> @@ -549,6 +552,9 @@ struct kvm_vcpu_arch {
>  	unsigned long magic_page_pa; /* phys addr to map the magic page to */
>  	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
>  
> +	int irqchip_type;
> +	void *irqchip_priv;

Since you add this (irqchip_priv) only to remove it in a later patch
and replace it by a device-specific pointer, why bother adding it
here?  And why not give irqchip_type the name it ultimately ends up
with?

> diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
> index 16b4595..bdfa526 100644
> --- a/arch/powerpc/kvm/powerpc.c
> +++ b/arch/powerpc/kvm/powerpc.c
> @@ -459,6 +459,13 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
>  	tasklet_kill(&vcpu->arch.tasklet);
>  
>  	kvmppc_remove_vcpu_debugfs(vcpu);
> +
> +	switch (vcpu->arch.irqchip_type) {
> +	case KVMPPC_IRQCHIP_MPIC:
> +		mpic_put(vcpu->arch.irqchip_priv);
> +		break;
> +	}

This is going to break bisection, since you don't define mpic_put() in
this patch.

> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 74d0ff3..20ce2d2 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -668,6 +668,7 @@ struct kvm_ppc_smmu_info {
>  #define KVM_CAP_PPC_EPR 86
>  #define KVM_CAP_ARM_PSCI 87
>  #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
> +#define KVM_CAP_DEVICE_CTRL 89
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -909,6 +910,32 @@ struct kvm_s390_ucas_mapping {
>  #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
>  
>  /*
> + * Device control API, available with KVM_CAP_DEVICE_CTRL
> + */
> +#define KVM_CREATE_DEVICE_TEST		1
> +
> +struct kvm_create_device {
> +	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
> +	__u32	fd;	/* out: device handle */
> +	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
> +};
> +
> +struct kvm_device_attr {
> +	__u32	flags;		/* no flags currently defined */
> +	__u32	group;		/* device-defined */
> +	__u64	attr;		/* group-defined */
> +	__u64	addr;		/* userspace address of attr data */
> +};
> +
> +/* ioctl for vm fd */
> +#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)

This define should go with the other VM ioctls, otherwise the next
person to add a VM ioctl will probably miss it and reuse the 0xe0
code.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen April 3, 2013, 1:28 a.m. UTC | #3
On 04/03/2013 01:30 AM, Scott Wood wrote:
> On 04/02/2013 01:59:57 AM, tiejun.chen wrote:
>> On 04/02/2013 06:47 AM, Scott Wood wrote:
>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>> index ff71541..ed033c0 100644
>>> --- a/virt/kvm/kvm_main.c
>>> +++ b/virt/kvm/kvm_main.c
>>> @@ -2158,6 +2158,17 @@ out:
>>>   }
>>>   #endif
>>>
>>> +static int kvm_ioctl_create_device(struct kvm *kvm,
>>> +                   struct kvm_create_device *cd)
>>> +{
>>> +    bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
>>> +
>>> +    switch (cd->type) {
>>> +    default:
>>> +        return -ENODEV;
>>> +    }
>>
>> Even after apply patch 5, looks here still misses something like:
>>
>>     if (test)
>>         WARN_ON_ONCE(!cd->type);
>
> Why?  How does userspace passing in a bad type value mean the kernel needs to
> report internal badness, why is a value of zero worse than any other bad value,
> and why only when the test flag is set?

I just mean we need do something here since looks the 'test' variable is defined 
but unused, right? But please correct this as you expect :)

And if the userspace can't guarantee cd->type is never zero, we should return 
-ENODEV as well after that switch().

Tiejun
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tiejun Chen April 3, 2013, 1:42 a.m. UTC | #4
On 04/03/2013 09:34 AM, Scott Wood wrote:
> On 04/02/2013 08:28:01 PM, tiejun.chen wrote:
>> On 04/03/2013 01:30 AM, Scott Wood wrote:
>>> On 04/02/2013 01:59:57 AM, tiejun.chen wrote:
>>>> On 04/02/2013 06:47 AM, Scott Wood wrote:
>>>>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>>>>> index ff71541..ed033c0 100644
>>>>> --- a/virt/kvm/kvm_main.c
>>>>> +++ b/virt/kvm/kvm_main.c
>>>>> @@ -2158,6 +2158,17 @@ out:
>>>>>   }
>>>>>   #endif
>>>>>
>>>>> +static int kvm_ioctl_create_device(struct kvm *kvm,
>>>>> +                   struct kvm_create_device *cd)
>>>>> +{
>>>>> +    bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
>>>>> +
>>>>> +    switch (cd->type) {
>>>>> +    default:
>>>>> +        return -ENODEV;
>>>>> +    }
>>>>
>>>> Even after apply patch 5, looks here still misses something like:
>>>>
>>>>     if (test)
>>>>         WARN_ON_ONCE(!cd->type);
>>>
>>> Why?  How does userspace passing in a bad type value mean the kernel needs to
>>> report internal badness, why is a value of zero worse than any other bad value,
>>> and why only when the test flag is set?
>>
>> I just mean we need do something here since looks the 'test' variable is
>> defined but unused, right? But please correct this as you expect :)
>
> Yes, it's unused in this patch, but is used after patch 5 is applied.  I didn't
> think it was worth adding a temporary unused annotation, since this part of the
> kernel doesn't use -Werror.

Yes, its accepted in !-Werror case if we shouldn't warn something as you said.

Tiejun

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Mackerras April 3, 2013, 2:17 a.m. UTC | #5
On Tue, Apr 02, 2013 at 08:19:56PM -0500, Scott Wood wrote:
> On 04/02/2013 08:02:39 PM, Paul Mackerras wrote:
> >On Mon, Apr 01, 2013 at 05:47:48PM -0500, Scott Wood wrote:
> >> +4.79 KVM_CREATE_DEVICE
> >> +
> >> +Capability: KVM_CAP_DEVICE_CTRL
> >
> >I notice this patch doesn't add this capability;
> 
> Yes, it does (see below).
> 
> >you add it in a later patch.
> 
> Maybe you're thinking of KVM_CAP_IRQ_MPIC?

No, I was referring to the addition to kvm_dev_ioctl_check_extension()
of a KVM_CAP_DEVICE_CTRL case.  Since this patch adds the code to handle
KVM_CREATE_DEVICE ioctl it should also add the code to return 1 if
userspace queries the KVM_CAP_DEVICE_CTRL capability.

> >> +/* ioctl for vm fd */
> >> +#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct
> >kvm_create_device)
> >
> >This define should go with the other VM ioctls, otherwise the next
> >person to add a VM ioctl will probably miss it and reuse the 0xe0
> >code.
> 
> That's actually why I moved it to a new section, with device control
> ioctls getting their own range, as the legacy "device model" and
> some other things did.  0xe0 is not the next ioctl that would be
> used for either vm or vcpu.  The ioctl numbering is actually already
> a mess, with sometimes care being taken to keep vcpu and vm ioctls
> from overlapping, but on other places overlapping does happen.  I'm
> not sure what exactly I should do here.

Well, even if you are using a new range, I still think that
KVM_CREATE_DEVICE, being a VM ioctl, should go next to the other VM
ioctls.  I guess it's ultimately up to the maintainers.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 3, 2013, 1:22 p.m. UTC | #6
On 03.04.2013, at 04:17, Paul Mackerras wrote:

> On Tue, Apr 02, 2013 at 08:19:56PM -0500, Scott Wood wrote:
>> On 04/02/2013 08:02:39 PM, Paul Mackerras wrote:
>>> On Mon, Apr 01, 2013 at 05:47:48PM -0500, Scott Wood wrote:
>>>> +4.79 KVM_CREATE_DEVICE
>>>> +
>>>> +Capability: KVM_CAP_DEVICE_CTRL
>>> 
>>> I notice this patch doesn't add this capability;
>> 
>> Yes, it does (see below).
>> 
>>> you add it in a later patch.
>> 
>> Maybe you're thinking of KVM_CAP_IRQ_MPIC?
> 
> No, I was referring to the addition to kvm_dev_ioctl_check_extension()
> of a KVM_CAP_DEVICE_CTRL case.  Since this patch adds the code to handle
> KVM_CREATE_DEVICE ioctl it should also add the code to return 1 if
> userspace queries the KVM_CAP_DEVICE_CTRL capability.
> 
>>>> +/* ioctl for vm fd */
>>>> +#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct
>>> kvm_create_device)
>>> 
>>> This define should go with the other VM ioctls, otherwise the next
>>> person to add a VM ioctl will probably miss it and reuse the 0xe0
>>> code.
>> 
>> That's actually why I moved it to a new section, with device control
>> ioctls getting their own range, as the legacy "device model" and
>> some other things did.  0xe0 is not the next ioctl that would be
>> used for either vm or vcpu.  The ioctl numbering is actually already
>> a mess, with sometimes care being taken to keep vcpu and vm ioctls
>> from overlapping, but on other places overlapping does happen.  I'm
>> not sure what exactly I should do here.
> 
> Well, even if you are using a new range, I still think that
> KVM_CREATE_DEVICE, being a VM ioctl, should go next to the other VM
> ioctls.  I guess it's ultimately up to the maintainers.

I agree. Things get confusing for VM ioctls otherwise.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood April 3, 2013, 9:03 p.m. UTC | #7
On 04/02/2013 09:17:58 PM, Paul Mackerras wrote:
> On Tue, Apr 02, 2013 at 08:19:56PM -0500, Scott Wood wrote:
> > On 04/02/2013 08:02:39 PM, Paul Mackerras wrote:
> > >On Mon, Apr 01, 2013 at 05:47:48PM -0500, Scott Wood wrote:
> > >> +4.79 KVM_CREATE_DEVICE
> > >> +
> > >> +Capability: KVM_CAP_DEVICE_CTRL
> > >
> > >I notice this patch doesn't add this capability;
> >
> > Yes, it does (see below).
> >
> > >you add it in a later patch.
> >
> > Maybe you're thinking of KVM_CAP_IRQ_MPIC?
> 
> No, I was referring to the addition to kvm_dev_ioctl_check_extension()
> of a KVM_CAP_DEVICE_CTRL case.  Since this patch adds the code to  
> handle
> KVM_CREATE_DEVICE ioctl it should also add the code to return 1 if
> userspace queries the KVM_CAP_DEVICE_CTRL capability.

Ah.  In that case we should probably recognize the capability in  
generic code rather than ppc code.

-Scott
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 976eb65..77328aa 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2173,6 +2173,76 @@  header; first `n_valid' valid entries with contents from the data
 written, then `n_invalid' invalid entries, invalidating any previously
 valid entries found.
 
+4.79 KVM_CREATE_DEVICE
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: vm ioctl
+Parameters: struct kvm_create_device (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+  ENODEV: The device type is unknown or unsupported
+  EEXIST: Device already created, and this type of device may not
+          be instantiated multiple times
+  ENOSPC: Too many devices have been created
+
+  Other error conditions may be defined by individual device types.
+
+Creates an emulated device in the kernel.  The file descriptor returned
+in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
+
+If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
+device type is supported (not necessarily whether it can be created
+in the current vm).
+
+Individual devices should not define flags.  Attributes should be used
+for specifying any behavior that is not implied by the device type
+number.
+
+struct kvm_create_device {
+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
+};
+
+4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: device ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+  EPERM:  The attribute cannot (currently) be accessed this way
+          (e.g. read-only attribute, or attribute that only makes
+          sense when the device is in a different state)
+
+  Other error conditions may be defined by individual device types.
+
+Gets/sets a specified piece of device configuration and/or state.  The
+semantics are device-specific.  See individual device documentation in
+the "devices" directory.  As with ONE_REG, the size of the data
+transferred is defined by the particular attribute.
+
+struct kvm_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
+4.81 KVM_HAS_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: device ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+  ENXIO:  The group or attribute is unknown/unsupported for this device
+
+Tests whether a device supports a particular attribute.  A successful
+return indicates the attribute is implemented.  It does not necessarily
+indicate that the attribute can be read or written in the device's
+current state.  "addr" is ignored.
 
 4.77 KVM_ARM_VCPU_INIT
 
diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README
new file mode 100644
index 0000000..34a6983
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/README
@@ -0,0 +1 @@ 
+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e34f8fe..e0caae2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -370,6 +370,9 @@  struct kvmppc_booke_debug_reg {
 	u64 dac[KVMPPC_BOOKE_MAX_DAC];
 };
 
+#define KVMPPC_IRQCHIP_NONE	0
+#define KVMPPC_IRQCHIP_MPIC	1
+
 struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
@@ -549,6 +552,9 @@  struct kvm_vcpu_arch {
 	unsigned long magic_page_pa; /* phys addr to map the magic page to */
 	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
 
+	int irqchip_type;
+	void *irqchip_priv;
+
 #ifdef CONFIG_KVM_BOOK3S_64_HV
 	struct kvm_vcpu_arch_shared shregs;
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index f589307..f44932c 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -323,4 +323,6 @@  static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
 	return ea;
 }
 
+void mpic_put(void *priv);
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 16b4595..bdfa526 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -459,6 +459,13 @@  void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 	tasklet_kill(&vcpu->arch.tasklet);
 
 	kvmppc_remove_vcpu_debugfs(vcpu);
+
+	switch (vcpu->arch.irqchip_type) {
+	case KVMPPC_IRQCHIP_MPIC:
+		mpic_put(vcpu->arch.irqchip_priv);
+		break;
+	}
+
 	kvmppc_core_vcpu_free(vcpu);
 }
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 74d0ff3..20ce2d2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -668,6 +668,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_EPR 86
 #define KVM_CAP_ARM_PSCI 87
 #define KVM_CAP_ARM_SET_DEVICE_ADDR 88
+#define KVM_CAP_DEVICE_CTRL 89
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -909,6 +910,32 @@  struct kvm_s390_ucas_mapping {
 #define KVM_ARM_SET_DEVICE_ADDR	  _IOW(KVMIO,  0xab, struct kvm_arm_device_addr)
 
 /*
+ * Device control API, available with KVM_CAP_DEVICE_CTRL
+ */
+#define KVM_CREATE_DEVICE_TEST		1
+
+struct kvm_create_device {
+	__u32	type;	/* in: KVM_DEV_TYPE_xxx */
+	__u32	fd;	/* out: device handle */
+	__u32	flags;	/* in: KVM_CREATE_DEVICE_xxx */
+};
+
+struct kvm_device_attr {
+	__u32	flags;		/* no flags currently defined */
+	__u32	group;		/* device-defined */
+	__u64	attr;		/* group-defined */
+	__u64	addr;		/* userspace address of attr data */
+};
+
+/* ioctl for vm fd */
+#define KVM_CREATE_DEVICE	  _IOWR(KVMIO,  0xe0, struct kvm_create_device)
+
+/* ioctls for fds returned by KVM_CREATE_DEVICE */
+#define KVM_SET_DEVICE_ATTR	  _IOW(KVMIO,  0xe1, struct kvm_device_attr)
+#define KVM_GET_DEVICE_ATTR	  _IOW(KVMIO,  0xe2, struct kvm_device_attr)
+#define KVM_HAS_DEVICE_ATTR	  _IOW(KVMIO,  0xe3, struct kvm_device_attr)
+
+/*
  * ioctls for vcpu fds
  */
 #define KVM_RUN                   _IO(KVMIO,   0x80)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ff71541..ed033c0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2158,6 +2158,17 @@  out:
 }
 #endif
 
+static int kvm_ioctl_create_device(struct kvm *kvm,
+				   struct kvm_create_device *cd)
+{
+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+
+	switch (cd->type) {
+	default:
+		return -ENODEV;
+	}
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2272,6 +2283,26 @@  static long kvm_vm_ioctl(struct file *filp,
 		break;
 	}
 #endif
+	case KVM_CREATE_DEVICE: {
+		struct kvm_create_device cd;
+
+		r = -EFAULT;
+		if (copy_from_user(&cd, argp, sizeof(cd)))
+			goto out;
+
+		mutex_lock(&kvm->lock);
+		r = kvm_ioctl_create_device(kvm, &cd);
+		mutex_unlock(&kvm->lock);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (copy_to_user(argp, &cd, sizeof(cd)))
+			goto out;
+
+		r = 0;
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)