diff mbox

[6/6] KVM: booke/bookehv: Add debug stub support

Message ID 1345557120-16197-7-git-send-email-Bharat.Bhushan@freescale.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bharat Bhushan Aug. 21, 2012, 1:52 p.m. UTC
This patch adds the debug stub support on booke/bookehv.
Now QEMU debug stub can use hw breakpoint, watchpoint and
software breakpoint to debug guest.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
 arch/powerpc/include/asm/kvm.h        |   29 ++++++-
 arch/powerpc/include/asm/kvm_host.h   |    5 +
 arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
 arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++----
 arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
 arch/powerpc/kvm/bookehv_interrupts.S |  141 +++++++++++++++++++++++++++++++-
 arch/powerpc/kvm/e500mc.c             |    3 +-
 7 files changed, 435 insertions(+), 23 deletions(-)

Comments

Scott Wood Sept. 5, 2012, 11:23 p.m. UTC | #1
On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
> This patch adds the debug stub support on booke/bookehv.
> Now QEMU debug stub can use hw breakpoint, watchpoint and
> software breakpoint to debug guest.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
>  arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>  arch/powerpc/include/asm/kvm_host.h   |    5 +
>  arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>  arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++----
>  arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>  arch/powerpc/kvm/bookehv_interrupts.S |  141 +++++++++++++++++++++++++++++++-
>  arch/powerpc/kvm/e500mc.c             |    3 +-
>  7 files changed, 435 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
> index 61b197e..53479ea 100644
> --- a/arch/powerpc/include/asm/kvm.h
> +++ b/arch/powerpc/include/asm/kvm.h
> @@ -25,6 +25,7 @@
>  /* Select powerpc specific features in <linux/kvm.h> */
>  #define __KVM_HAVE_SPAPR_TCE
>  #define __KVM_HAVE_PPC_SMT
> +#define __KVM_HAVE_GUEST_DEBUG
>  
>  struct kvm_regs {
>  	__u64 pc;
> @@ -264,7 +265,31 @@ struct kvm_fpu {
>  	__u64 fpr[32];
>  };
>  
> +
> +/*
> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
> + * software breakpoint.
> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> + * for KVM_DEBUG_EXIT.
> + */
> +#define KVMPPC_DEBUG_NONE		0x0
> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>  struct kvm_debug_exit_arch {

That says "arch", but it's not in an arch-specific file.

> +	__u64 pc;
> +	/*
> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
> +	 * set for this address) by qemu then it is supposed to inject this
> +	 * exception to guest.
> +	 */
> +	__u32 exception;
> +	/*
> +	 * exiting to userspace because of h/w breakpoint, watchpoint
> +	 * (read, write or both) and software breakpoint.
> +	 */
> +	__u32 status;
>  };

What does "exception number" mean in a generic API?

What values can go in "status"?

> +	addi	r7, r4, VCPU_HOST_DBG
> +	mfspr	r9, SPRN_DBCR0
> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
> +	andis.	r9, r9, DBCR0_AC_BITS@h
> +	beq	skip_load_host_debug
> +	li	r9, 0
> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
> +	mtspr	SPRN_DBCR1, r9
> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
> +	mtspr	SPRN_DBCR2, r9
> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
> +	mtspr	SPRN_IAC1, r9
> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
> +	mtspr	SPRN_IAC2, r9
> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
> +	mtspr	SPRN_IAC3, r9
> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
> +	mtspr	SPRN_IAC4, r9
> +#endif

What if CONFIG_PPC_ADV_DEBUG_REGS isn't set?

-Scott


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood Sept. 5, 2012, 11:27 p.m. UTC | #2
On 09/05/2012 06:23 PM, Scott Wood wrote:
> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>> This patch adds the debug stub support on booke/bookehv.
>> Now QEMU debug stub can use hw breakpoint, watchpoint and
>> software breakpoint to debug guest.
>>
>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>> ---
>>  arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>  arch/powerpc/include/asm/kvm_host.h   |    5 +
>>  arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>  arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++----
>>  arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>  arch/powerpc/kvm/bookehv_interrupts.S |  141 +++++++++++++++++++++++++++++++-
>>  arch/powerpc/kvm/e500mc.c             |    3 +-
>>  7 files changed, 435 insertions(+), 23 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
>> index 61b197e..53479ea 100644
>> --- a/arch/powerpc/include/asm/kvm.h
>> +++ b/arch/powerpc/include/asm/kvm.h
>> @@ -25,6 +25,7 @@
>>  /* Select powerpc specific features in <linux/kvm.h> */
>>  #define __KVM_HAVE_SPAPR_TCE
>>  #define __KVM_HAVE_PPC_SMT
>> +#define __KVM_HAVE_GUEST_DEBUG
>>  
>>  struct kvm_regs {
>>  	__u64 pc;
>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>  	__u64 fpr[32];
>>  };
>>  
>> +
>> +/*
>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>> + * software breakpoint.
>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>> + * for KVM_DEBUG_EXIT.
>> + */
>> +#define KVMPPC_DEBUG_NONE		0x0
>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>  struct kvm_debug_exit_arch {
> 
> That says "arch", but it's not in an arch-specific file.

Sigh, I can't read today apparently.

>> +	__u64 pc;
>> +	/*
>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>> +	 * set for this address) by qemu then it is supposed to inject this
>> +	 * exception to guest.
>> +	 */
>> +	__u32 exception;
>> +	/*
>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>> +	 * (read, write or both) and software breakpoint.
>> +	 */
>> +	__u32 status;
>>  };
> 
> What does "exception number" mean in a generic API?

Still, "exception number" is not a well-defined concept powerpc-wide.

-Scott


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan Sept. 6, 2012, 2:56 p.m. UTC | #3
> -----Original Message-----

> From: Wood Scott-B07421

> Sent: Thursday, September 06, 2012 4:57 AM

> To: Bhushan Bharat-R65777

> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-

> R65777

> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support

> 

> On 09/05/2012 06:23 PM, Scott Wood wrote:

> > On 08/21/2012 08:52 AM, Bharat Bhushan wrote:

> >> This patch adds the debug stub support on booke/bookehv.

> >> Now QEMU debug stub can use hw breakpoint, watchpoint and software

> >> breakpoint to debug guest.

> >>

> >> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>

> >> ---

> >>  arch/powerpc/include/asm/kvm.h        |   29 ++++++-

> >>  arch/powerpc/include/asm/kvm_host.h   |    5 +

> >>  arch/powerpc/kernel/asm-offsets.c     |   26 ++++++

> >>  arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--

> --

> >>  arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++

> >>  arch/powerpc/kvm/bookehv_interrupts.S |  141

> +++++++++++++++++++++++++++++++-

> >>  arch/powerpc/kvm/e500mc.c             |    3 +-

> >>  7 files changed, 435 insertions(+), 23 deletions(-)

> >>

> >> diff --git a/arch/powerpc/include/asm/kvm.h

> >> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644

> >> --- a/arch/powerpc/include/asm/kvm.h

> >> +++ b/arch/powerpc/include/asm/kvm.h

> >> @@ -25,6 +25,7 @@

> >>  /* Select powerpc specific features in <linux/kvm.h> */  #define

> >> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT

> >> +#define __KVM_HAVE_GUEST_DEBUG

> >>

> >>  struct kvm_regs {

> >>  	__u64 pc;

> >> @@ -264,7 +265,31 @@ struct kvm_fpu {

> >>  	__u64 fpr[32];

> >>  };

> >>

> >> +

> >> +/*

> >> + * Defines for h/w breakpoint, watchpoint (read, write or both) and

> >> + * software breakpoint.

> >> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"

> >> + * for KVM_DEBUG_EXIT.

> >> + */

> >> +#define KVMPPC_DEBUG_NONE		0x0

> >> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)

> >> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)

> >> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)

> >>  struct kvm_debug_exit_arch {

> >

> > That says "arch", but it's not in an arch-specific file.

> 

> Sigh, I can't read today apparently.

> 

> >> +	__u64 pc;

> >> +	/*

> >> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT

> >> +	 * exit is not handled (say not h/w breakpoint or software breakpoint

> >> +	 * set for this address) by qemu then it is supposed to inject this

> >> +	 * exception to guest.

> >> +	 */

> >> +	__u32 exception;

> >> +	/*

> >> +	 * exiting to userspace because of h/w breakpoint, watchpoint

> >> +	 * (read, write or both) and software breakpoint.

> >> +	 */

> >> +	__u32 status;

> >>  };

> >

> > What does "exception number" mean in a generic API?

> 

> Still, "exception number" is not a well-defined concept powerpc-wide.


Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.

Should we just through a print with clearing the exception condition? Or something else you would like to suggest?

Thanks
-Bharat
Scott Wood Sept. 6, 2012, 10:56 p.m. UTC | #4
On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
> 
> 
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Thursday, September 06, 2012 4:57 AM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>> R65777
>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>
>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>> This patch adds the debug stub support on booke/bookehv.
>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>> breakpoint to debug guest.
>>>>
>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>> ---
>>>>  arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>  arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>  arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>  arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>> --
>>>>  arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>  arch/powerpc/kvm/bookehv_interrupts.S |  141
>> +++++++++++++++++++++++++++++++-
>>>>  arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>  7 files changed, 435 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>> @@ -25,6 +25,7 @@
>>>>  /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>
>>>>  struct kvm_regs {
>>>>  	__u64 pc;
>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>  	__u64 fpr[32];
>>>>  };
>>>>
>>>> +
>>>> +/*
>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>> + * software breakpoint.
>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>> + * for KVM_DEBUG_EXIT.
>>>> + */
>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>  struct kvm_debug_exit_arch {
>>>
>>> That says "arch", but it's not in an arch-specific file.
>>
>> Sigh, I can't read today apparently.
>>
>>>> +	__u64 pc;
>>>> +	/*
>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>> +	 * exception to guest.
>>>> +	 */
>>>> +	__u32 exception;
>>>> +	/*
>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>> +	 * (read, write or both) and software breakpoint.
>>>> +	 */
>>>> +	__u32 status;
>>>>  };
>>>
>>> What does "exception number" mean in a generic API?
>>
>> Still, "exception number" is not a well-defined concept powerpc-wide.
> 
> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
> 
> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?

We can pass up the exception type; it just needs more documentation
about what exactly you're referring to, and probably some enumeration
that says which exception numberspace it is.

For booke the exception number should probably be related to the fixed
offsets rather than the IVOR number, as IVORs are phased out.

-Scott


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Sept. 24, 2012, 2:46 p.m. UTC | #5
On 07.09.2012, at 00:56, Scott Wood wrote:

> On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
>> 
>> 
>>> -----Original Message-----
>>> From: Wood Scott-B07421
>>> Sent: Thursday, September 06, 2012 4:57 AM
>>> To: Bhushan Bharat-R65777
>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>>> R65777
>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>> 
>>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>> breakpoint to debug guest.
>>>>> 
>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>> ---
>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>>> --
>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>>> +++++++++++++++++++++++++++++++-
>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>> 
>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>> @@ -25,6 +25,7 @@
>>>>> /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>> 
>>>>> struct kvm_regs {
>>>>> 	__u64 pc;
>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>> 	__u64 fpr[32];
>>>>> };
>>>>> 
>>>>> +
>>>>> +/*
>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>> + * software breakpoint.
>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>> + * for KVM_DEBUG_EXIT.
>>>>> + */
>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>> struct kvm_debug_exit_arch {
>>>> 
>>>> That says "arch", but it's not in an arch-specific file.
>>> 
>>> Sigh, I can't read today apparently.
>>> 
>>>>> +	__u64 pc;
>>>>> +	/*
>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>> +	 * exception to guest.
>>>>> +	 */
>>>>> +	__u32 exception;
>>>>> +	/*
>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>> +	 * (read, write or both) and software breakpoint.
>>>>> +	 */
>>>>> +	__u32 status;
>>>>> };
>>>> 
>>>> What does "exception number" mean in a generic API?
>>> 
>>> Still, "exception number" is not a well-defined concept powerpc-wide.
>> 
>> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
>> 
>> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?
> 
> We can pass up the exception type; it just needs more documentation
> about what exactly you're referring to, and probably some enumeration
> that says which exception numberspace it is.
> 
> For booke the exception number should probably be related to the fixed
> offsets rather than the IVOR number, as IVORs are phased out.

Jan, I would like to get your comment on this one.

Since we don't have standardized exception vectors like x86 does, we need to convert things between different semantics in user space if we want to make use of the exception type. Do we actually need to know about it in user space or do we only need to store it in case we get a migration at that point?

If it's the latter, can we maybe keep the reinjection logic internal to KVM and make DEBUG exits non-migratable similar to how we already handle MMIO/PIO exits today?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Sept. 24, 2012, 4:20 p.m. UTC | #6
On 21.08.2012, at 15:52, Bharat Bhushan wrote:

> This patch adds the debug stub support on booke/bookehv.
> Now QEMU debug stub can use hw breakpoint, watchpoint and
> software breakpoint to debug guest.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
> arch/powerpc/include/asm/kvm_host.h   |    5 +
> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++----
> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
> arch/powerpc/kvm/bookehv_interrupts.S |  141 +++++++++++++++++++++++++++++++-
> arch/powerpc/kvm/e500mc.c             |    3 +-
> 7 files changed, 435 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
> index 61b197e..53479ea 100644
> --- a/arch/powerpc/include/asm/kvm.h
> +++ b/arch/powerpc/include/asm/kvm.h
> @@ -25,6 +25,7 @@
> /* Select powerpc specific features in <linux/kvm.h> */
> #define __KVM_HAVE_SPAPR_TCE
> #define __KVM_HAVE_PPC_SMT
> +#define __KVM_HAVE_GUEST_DEBUG
> 
> struct kvm_regs {
> 	__u64 pc;
> @@ -264,7 +265,31 @@ struct kvm_fpu {
> 	__u64 fpr[32];
> };
> 
> +
> +/*
> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
> + * software breakpoint.
> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> + * for KVM_DEBUG_EXIT.
> + */
> +#define KVMPPC_DEBUG_NONE		0x0
> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> struct kvm_debug_exit_arch {
> +	__u64 pc;
> +	/*
> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
> +	 * set for this address) by qemu then it is supposed to inject this
> +	 * exception to guest.
> +	 */
> +	__u32 exception;
> +	/*
> +	 * exiting to userspace because of h/w breakpoint, watchpoint
> +	 * (read, write or both) and software breakpoint.
> +	 */
> +	__u32 status;
> };
> 
> /* for KVM_SET_GUEST_DEBUG */
> @@ -276,10 +301,6 @@ struct kvm_guest_debug_arch {
> 		 * Type denotes h/w breakpoint, read watchpoint, write
> 		 * watchpoint or watchpoint (both read and write).
> 		 */
> -#define KVMPPC_DEBUG_NOTYPE		0x0
> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> 		__u32 type;
> 		__u32 pad1;
> 		__u64 pad2;
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index c7219c1..3ba465a 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -496,7 +496,12 @@ struct kvm_vcpu_arch {
> 	u32 mmucfg;
> 	u32 epr;
> 	u32 crit_save;
> +	/* guest debug registers*/
> 	struct kvmppc_booke_debug_reg dbg_reg;
> +	/* shadow debug registers */
> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
> +	/* host debug registers*/
> +	struct kvmppc_booke_debug_reg host_dbg_reg;
> #endif
> 	gpa_t paddr_accessed;
> 	gva_t vaddr_accessed;
> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
> index 555448e..6987821 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -564,6 +564,32 @@ int main(void)
> 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
> 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
> 	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
> +	DEFINE(VCPU_DBSR, offsetof(struct kvm_vcpu, arch.dbsr));
> +	DEFINE(VCPU_SHADOW_DBG, offsetof(struct kvm_vcpu, arch.shadow_dbg_reg));
> +	DEFINE(VCPU_HOST_DBG, offsetof(struct kvm_vcpu, arch.host_dbg_reg));
> +	DEFINE(KVMPPC_DBG_DBCR0, offsetof(struct kvmppc_booke_debug_reg,
> +					  dbcr0));
> +	DEFINE(KVMPPC_DBG_DBCR1, offsetof(struct kvmppc_booke_debug_reg,
> +					  dbcr1));
> +	DEFINE(KVMPPC_DBG_DBCR2, offsetof(struct kvmppc_booke_debug_reg,
> +					  dbcr2));
> +#ifdef CONFIG_KVM_E500MC
> +	DEFINE(KVMPPC_DBG_DBCR4, offsetof(struct kvmppc_booke_debug_reg,
> +					  dbcr4));
> +#endif
> +	DEFINE(KVMPPC_DBG_IAC1, offsetof(struct kvmppc_booke_debug_reg,
> +					 iac[0]));
> +	DEFINE(KVMPPC_DBG_IAC2, offsetof(struct kvmppc_booke_debug_reg,
> +					 iac[1]));
> +	DEFINE(KVMPPC_DBG_IAC3, offsetof(struct kvmppc_booke_debug_reg,
> +					 iac[2]));
> +	DEFINE(KVMPPC_DBG_IAC4, offsetof(struct kvmppc_booke_debug_reg,
> +					 iac[3]));
> +	DEFINE(KVMPPC_DBG_DAC1, offsetof(struct kvmppc_booke_debug_reg,
> +					 dac[0]));
> +	DEFINE(KVMPPC_DBG_DAC2, offsetof(struct kvmppc_booke_debug_reg,
> +					 dac[1]));
> +	DEFINE(VCPU_GUEST_DEBUG, offsetof(struct kvm_vcpu, guest_debug));
> #endif /* CONFIG_PPC_BOOK3S */
> #endif /* CONFIG_KVM */
> 
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index dd0e5b8..4d82e34 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -132,6 +132,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> 
> #ifdef CONFIG_KVM_BOOKE_HV
> 	new_msr |= MSR_GS;
> +
> +	if (vcpu->guest_debug)
> +		new_msr |= MSR_DE;
> #endif
> 
> 	vcpu->arch.shared->msr = new_msr;
> @@ -667,10 +670,21 @@ out:
> 	return ret;
> }
> 
> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> +			  int exit_nr)
> {
> 	enum emulation_result er;
> 
> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
> +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {

This belongs into the normal emulation code path, behind the same switch() that everything else goes through.

> +		run->exit_reason = KVM_EXIT_DEBUG;
> +		run->debug.arch.pc = vcpu->arch.pc;
> +		run->debug.arch.exception = exit_nr;
> +		run->debug.arch.status = 0;
> +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> +		return RESUME_HOST;
> +	}
> +
> 	er = kvmppc_emulate_instruction(run, vcpu);
> 	switch (er) {
> 	case EMULATE_DONE:
> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
> 	default:
> 		BUG();
> 	}
> +
> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
> +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {

I don't understand how this is supposed to work. When we enable singlestep, why would we end up in emulation_exit()?

> +		run->exit_reason = KVM_EXIT_DEBUG;
> +		return RESUME_HOST;
> +	}
> +}
> +
> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
> +{
> +	u32 dbsr;
> +
> +#ifndef CONFIG_KVM_BOOKE_HV
> +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
> +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
> +	else
> +		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> +#endif

Why doesn't this get handled in the asm code that recovers from the respective exceptions?

> +	dbsr = vcpu->arch.dbsr;
> +
> +	run->debug.arch.pc = vcpu->arch.pc;
> +	run->debug.arch.status = 0;
> +	vcpu->arch.dbsr = 0;
> +
> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> +	} else {
> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
> +	}

Mind to explain the guest register sharing mechanism you are envisioning? Which registers are used by the guest? Which are used by the host? Who decides what gets used by whom?

> +
> +	return RESUME_HOST;
> }
> 
> static void kvmppc_fill_pt_regs(struct pt_regs *regs)
> @@ -843,7 +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 		break;
> 
> 	case BOOKE_INTERRUPT_HV_PRIV:
> -		r = emulation_exit(run, vcpu);
> +		r = emulation_exit(run, vcpu, exit_nr);
> 		break;
> 
> 	case BOOKE_INTERRUPT_PROGRAM:
> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 			break;
> 		}
> 
> -		r = emulation_exit(run, vcpu);
> +		r = emulation_exit(run, vcpu, exit_nr);
> 		break;
> 
> 	case BOOKE_INTERRUPT_FP_UNAVAIL:
> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	}
> 
> 	case BOOKE_INTERRUPT_DEBUG: {
> -		u32 dbsr;
> -
> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> -
> -		/* clear IAC events in DBSR register */
> -		dbsr = mfspr(SPRN_DBSR);
> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> -		mtspr(SPRN_DBSR, dbsr);
> -
> -		run->exit_reason = KVM_EXIT_DEBUG;
> +		r = kvmppc_handle_debug(run, vcpu);
> +		if (r == RESUME_HOST) {
> +			run->debug.arch.exception = exit_nr;
> +			run->exit_reason = KVM_EXIT_DEBUG;
> +		}
> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> -		r = RESUME_HOST;
> 		break;
> 	}
> 
> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
> 	return r;
> }
> 
> +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
> +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
> +
> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> 					 struct kvm_guest_debug *dbg)
> {
> -	return -EINVAL;
> +
> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> +		/* Clear All debug events */
> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +		vcpu->guest_debug = 0;
> +		return 0;
> +	}
> +
> +	vcpu->guest_debug = dbg->control;
> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +
> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> +
> +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> +		struct kvmppc_booke_debug_reg *gdbgr =
> +				&(vcpu->arch.shadow_dbg_reg);
> +		int n, b = 0, w = 0;
> +		const u32 bp_code[] = {
> +			DBCR0_IAC1 | DBCR0_IDM,
> +			DBCR0_IAC2 | DBCR0_IDM,
> +			DBCR0_IAC3 | DBCR0_IDM,
> +			DBCR0_IAC4 | DBCR0_IDM
> +		};
> +		const u32 wp_code[] = {
> +			DBCR0_DAC1W | DBCR0_IDM,
> +			DBCR0_DAC2W | DBCR0_IDM,
> +			DBCR0_DAC1R | DBCR0_IDM,
> +			DBCR0_DAC2R | DBCR0_IDM
> +		};
> +
> +#ifndef CONFIG_KVM_BOOKE_HV
> +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
> +				DBCR1_IAC3US | DBCR1_IAC4US;
> +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
> +#else
> +		gdbgr->dbcr1 = 0;
> +		gdbgr->dbcr2 = 0;
> +#endif
> +
> +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
> +			u32 type = dbg->arch.bp[n].type;
> +
> +			if (!type)
> +				break;
> +
> +			if (type & (KVMPPC_DEBUG_WATCH_READ |
> +				    KVMPPC_DEBUG_WATCH_WRITE)) {
> +				if (w < WP_NUM) {
> +					if (type & KVMPPC_DEBUG_WATCH_READ)
> +						gdbgr->dbcr0 |= wp_code[w + 2];
> +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
> +						gdbgr->dbcr0 |= wp_code[w];
> +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
> +					w++;
> +				}
> +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> +				if (b < BP_NUM) {
> +					gdbgr->dbcr0 |= bp_code[b];
> +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
> +					b++;
> +				}
> +			}
> +		}
> +	}
> +	return 0;
> }
> 
> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
> index dd9c5d4..2202936 100644
> --- a/arch/powerpc/kvm/booke_interrupts.S
> +++ b/arch/powerpc/kvm/booke_interrupts.S
> @@ -39,6 +39,8 @@
> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
> #define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
> +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | \
> +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)
> 
> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
>                         (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
> @@ -52,6 +54,8 @@
>                        (1<<BOOKE_INTERRUPT_PROGRAM) | \
>                        (1<<BOOKE_INTERRUPT_DTLB_MISS))
> 
> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
> +
> .macro __KVM_HANDLER ivor_nr scratch srr0
> 	stw	r3, VCPU_GPR(R3)(r4)
> 	stw	r5, VCPU_GPR(R5)(r4)
> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
> 	stw	r9, VCPU_FAULT_ESR(r4)
> ..skip_esr:
> 
> +	addi	r7, r4, VCPU_HOST_DBG
> +	mfspr	r9, SPRN_DBCR0
> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
> +	andis.	r9, r9, DBCR0_AC_BITS@h
> +	beq	skip_load_host_debug
> +	li	r9, 0
> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
> +	mtspr	SPRN_DBCR1, r9
> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
> +	mtspr	SPRN_DBCR2, r9
> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
> +	mtspr	SPRN_IAC1, r9
> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
> +	mtspr	SPRN_IAC2, r9
> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
> +	mtspr	SPRN_IAC3, r9
> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
> +	mtspr	SPRN_IAC4, r9
> +#endif
> +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
> +	mtspr	SPRN_DAC1, r9
> +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)

Yikes. Please move this into a struct, similar to how the MSR save/restore happens on x86.

Also, these are 64-bit loads and stores, no?

> +	mtspr	SPRN_DAC2, r9
> +skip_load_host_debug:
> +	/* Clear h/w DBSR and save current(guest) DBSR */
> +	mfspr	r9, SPRN_DBSR
> +	mtspr	SPRN_DBSR, r9
> +	isync
> +	andi.	r7, r6, NEED_DEBUG_SAVE
> +	beq	skip_dbsr_save
> +	/*
> +	 * If vcpu->guest_debug flag is set then do not check for
> +	 * shared->msr.DE as this debugging (say by QEMU) does not
> +	 * depends on shared->msr.de. In these scanerios MSR.DE is
> +	 * always set using shared_msr and should be handled always.
> +	 */
> +	lwz	r7, VCPU_GUEST_DEBUG(r4)
> +	cmpwi	r7, 0
> +	bne	skip_save_trap_event
> +	PPC_LL	r3, VCPU_SHARED(r4)
> +#ifndef CONFIG_64BIT
> +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
> +#else
> +	ld	r3, (VCPU_SHARED_MSR)(r3)
> +#endif

Didn't we have 64-bit access helpers for these?

Also this shouldn't happen in the entry/exit code. We have shadow_msr for these purposes.



Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka Sept. 25, 2012, 10:38 a.m. UTC | #7
On 2012-09-24 16:46, Alexander Graf wrote:
> 
> On 07.09.2012, at 00:56, Scott Wood wrote:
> 
>> On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Wood Scott-B07421
>>>> Sent: Thursday, September 06, 2012 4:57 AM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>>>> R65777
>>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>>>
>>>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>>> breakpoint to debug guest.
>>>>>>
>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>> ---
>>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>>>> --
>>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>>>> +++++++++++++++++++++++++++++++-
>>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>>>
>>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>>> @@ -25,6 +25,7 @@
>>>>>> /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>>
>>>>>> struct kvm_regs {
>>>>>> 	__u64 pc;
>>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>>> 	__u64 fpr[32];
>>>>>> };
>>>>>>
>>>>>> +
>>>>>> +/*
>>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>>> + * software breakpoint.
>>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>>> + * for KVM_DEBUG_EXIT.
>>>>>> + */
>>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>>> struct kvm_debug_exit_arch {
>>>>>
>>>>> That says "arch", but it's not in an arch-specific file.
>>>>
>>>> Sigh, I can't read today apparently.
>>>>
>>>>>> +	__u64 pc;
>>>>>> +	/*
>>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>>> +	 * exception to guest.
>>>>>> +	 */
>>>>>> +	__u32 exception;
>>>>>> +	/*
>>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>>> +	 * (read, write or both) and software breakpoint.
>>>>>> +	 */
>>>>>> +	__u32 status;
>>>>>> };
>>>>>
>>>>> What does "exception number" mean in a generic API?
>>>>
>>>> Still, "exception number" is not a well-defined concept powerpc-wide.
>>>
>>> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
>>>
>>> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?
>>
>> We can pass up the exception type; it just needs more documentation
>> about what exactly you're referring to, and probably some enumeration
>> that says which exception numberspace it is.
>>
>> For booke the exception number should probably be related to the fixed
>> offsets rather than the IVOR number, as IVORs are phased out.
> 
> Jan, I would like to get your comment on this one.
> 
> Since we don't have standardized exception vectors like x86 does, we need to convert things between different semantics in user space if we want to make use of the exception type. Do we actually need to know about it in user space or do we only need to store it in case we get a migration at that point?
> 
> If it's the latter, can we maybe keep the reinjection logic internal to KVM and make DEBUG exits non-migratable similar to how we already handle MMIO/PIO exits today?

Reinjection depends on userspace. It has to check if the debug event was
related to the host debugging the guest or if it was due to a
guest-internal debugging event. That's because the kernel does not track
individual breakpoints, just controls the trapping. So we need a way to
manage the reinjection, and we do this (on x86) by passing the exception
up and then using it for SET_VCPU_EVENTS to reinject it if necessary.

The general logic (reinjection filter in userspace) should be generic
enough, but all the details can, of course, be defined in an
arch-specific manner.

Jan
Alexander Graf Sept. 25, 2012, 10:47 a.m. UTC | #8
On 25.09.2012, at 12:38, Jan Kiszka wrote:

> On 2012-09-24 16:46, Alexander Graf wrote:
>> 
>> On 07.09.2012, at 00:56, Scott Wood wrote:
>> 
>>> On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
>>>> 
>>>> 
>>>>> -----Original Message-----
>>>>> From: Wood Scott-B07421
>>>>> Sent: Thursday, September 06, 2012 4:57 AM
>>>>> To: Bhushan Bharat-R65777
>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>>>>> R65777
>>>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>>>> 
>>>>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>>>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>>>> breakpoint to debug guest.
>>>>>>> 
>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>> ---
>>>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>>>>> --
>>>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>>>>> +++++++++++++++++++++++++++++++-
>>>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>>>> @@ -25,6 +25,7 @@
>>>>>>> /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>>> 
>>>>>>> struct kvm_regs {
>>>>>>> 	__u64 pc;
>>>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>>>> 	__u64 fpr[32];
>>>>>>> };
>>>>>>> 
>>>>>>> +
>>>>>>> +/*
>>>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>>>> + * software breakpoint.
>>>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>>>> + * for KVM_DEBUG_EXIT.
>>>>>>> + */
>>>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>>>> struct kvm_debug_exit_arch {
>>>>>> 
>>>>>> That says "arch", but it's not in an arch-specific file.
>>>>> 
>>>>> Sigh, I can't read today apparently.
>>>>> 
>>>>>>> +	__u64 pc;
>>>>>>> +	/*
>>>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>>>> +	 * exception to guest.
>>>>>>> +	 */
>>>>>>> +	__u32 exception;
>>>>>>> +	/*
>>>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>>>> +	 * (read, write or both) and software breakpoint.
>>>>>>> +	 */
>>>>>>> +	__u32 status;
>>>>>>> };
>>>>>> 
>>>>>> What does "exception number" mean in a generic API?
>>>>> 
>>>>> Still, "exception number" is not a well-defined concept powerpc-wide.
>>>> 
>>>> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
>>>> 
>>>> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?
>>> 
>>> We can pass up the exception type; it just needs more documentation
>>> about what exactly you're referring to, and probably some enumeration
>>> that says which exception numberspace it is.
>>> 
>>> For booke the exception number should probably be related to the fixed
>>> offsets rather than the IVOR number, as IVORs are phased out.
>> 
>> Jan, I would like to get your comment on this one.
>> 
>> Since we don't have standardized exception vectors like x86 does, we need to convert things between different semantics in user space if we want to make use of the exception type. Do we actually need to know about it in user space or do we only need to store it in case we get a migration at that point?
>> 
>> If it's the latter, can we maybe keep the reinjection logic internal to KVM and make DEBUG exits non-migratable similar to how we already handle MMIO/PIO exits today?
> 
> Reinjection depends on userspace. It has to check if the debug event was
> related to the host debugging the guest or if it was due to a
> guest-internal debugging event. That's because the kernel does not track
> individual breakpoints, just controls the trapping. So we need a way to
> manage the reinjection, and we do this (on x86) by passing the exception
> up and then using it for SET_VCPU_EVENTS to reinject it if necessary.
> 
> The general logic (reinjection filter in userspace) should be generic
> enough, but all the details can, of course, be defined in an
> arch-specific manner.

Well, we could always just pass a payload to user space that it passes into an ioctl to the kernel to reinject the interrupt. But that would break migratability as this payload wouldn't get stored in env.

But the same thing applies to x86, right? So if just pass a struct kvm_vcpu_events with the debug interrupt as return value, user space can simply use that to reinject it with SET_VCPU_EVENTS.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka Sept. 25, 2012, 10:56 a.m. UTC | #9
On 2012-09-25 12:47, Alexander Graf wrote:
> 
> On 25.09.2012, at 12:38, Jan Kiszka wrote:
> 
>> On 2012-09-24 16:46, Alexander Graf wrote:
>>>
>>> On 07.09.2012, at 00:56, Scott Wood wrote:
>>>
>>>> On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
>>>>>
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Wood Scott-B07421
>>>>>> Sent: Thursday, September 06, 2012 4:57 AM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>>>>>> R65777
>>>>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>>>>>
>>>>>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>>>>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>>>>> breakpoint to debug guest.
>>>>>>>>
>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>> ---
>>>>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>>>>>> --
>>>>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>>>>>> +++++++++++++++++++++++++++++++-
>>>>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>>>>> @@ -25,6 +25,7 @@
>>>>>>>> /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>>>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>>>>
>>>>>>>> struct kvm_regs {
>>>>>>>> 	__u64 pc;
>>>>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>>>>> 	__u64 fpr[32];
>>>>>>>> };
>>>>>>>>
>>>>>>>> +
>>>>>>>> +/*
>>>>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>>>>> + * software breakpoint.
>>>>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>>>>> + * for KVM_DEBUG_EXIT.
>>>>>>>> + */
>>>>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>>>>> struct kvm_debug_exit_arch {
>>>>>>>
>>>>>>> That says "arch", but it's not in an arch-specific file.
>>>>>>
>>>>>> Sigh, I can't read today apparently.
>>>>>>
>>>>>>>> +	__u64 pc;
>>>>>>>> +	/*
>>>>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>>>>> +	 * exception to guest.
>>>>>>>> +	 */
>>>>>>>> +	__u32 exception;
>>>>>>>> +	/*
>>>>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>>>>> +	 * (read, write or both) and software breakpoint.
>>>>>>>> +	 */
>>>>>>>> +	__u32 status;
>>>>>>>> };
>>>>>>>
>>>>>>> What does "exception number" mean in a generic API?
>>>>>>
>>>>>> Still, "exception number" is not a well-defined concept powerpc-wide.
>>>>>
>>>>> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
>>>>>
>>>>> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?
>>>>
>>>> We can pass up the exception type; it just needs more documentation
>>>> about what exactly you're referring to, and probably some enumeration
>>>> that says which exception numberspace it is.
>>>>
>>>> For booke the exception number should probably be related to the fixed
>>>> offsets rather than the IVOR number, as IVORs are phased out.
>>>
>>> Jan, I would like to get your comment on this one.
>>>
>>> Since we don't have standardized exception vectors like x86 does, we need to convert things between different semantics in user space if we want to make use of the exception type. Do we actually need to know about it in user space or do we only need to store it in case we get a migration at that point?
>>>
>>> If it's the latter, can we maybe keep the reinjection logic internal to KVM and make DEBUG exits non-migratable similar to how we already handle MMIO/PIO exits today?
>>
>> Reinjection depends on userspace. It has to check if the debug event was
>> related to the host debugging the guest or if it was due to a
>> guest-internal debugging event. That's because the kernel does not track
>> individual breakpoints, just controls the trapping. So we need a way to
>> manage the reinjection, and we do this (on x86) by passing the exception
>> up and then using it for SET_VCPU_EVENTS to reinject it if necessary.
>>
>> The general logic (reinjection filter in userspace) should be generic
>> enough, but all the details can, of course, be defined in an
>> arch-specific manner.
> 
> Well, we could always just pass a payload to user space that it passes into an ioctl to the kernel to reinject the interrupt. But that would break migratability as this payload wouldn't get stored in env.

Right.

> 
> But the same thing applies to x86, right? So if just pass a struct kvm_vcpu_events with the debug interrupt as return value, user space can simply use that to reinject it with SET_VCPU_EVENTS.

Sorry, I'm slow today: That's a proposal for booke now?

Jan
Alexander Graf Sept. 25, 2012, 10:58 a.m. UTC | #10
On 25.09.2012, at 12:56, Jan Kiszka wrote:

> On 2012-09-25 12:47, Alexander Graf wrote:
>> 
>> On 25.09.2012, at 12:38, Jan Kiszka wrote:
>> 
>>> On 2012-09-24 16:46, Alexander Graf wrote:
>>>> 
>>>> On 07.09.2012, at 00:56, Scott Wood wrote:
>>>> 
>>>>> On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>> 
>>>>>>> -----Original Message-----
>>>>>>> From: Wood Scott-B07421
>>>>>>> Sent: Thursday, September 06, 2012 4:57 AM
>>>>>>> To: Bhushan Bharat-R65777
>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>>>>>>> R65777
>>>>>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>>>>>> 
>>>>>>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>>>>>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>>>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>>>>>> breakpoint to debug guest.
>>>>>>>>> 
>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>> ---
>>>>>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>>>>>>> --
>>>>>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>>>>>>> +++++++++++++++++++++++++++++++-
>>>>>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>>>>>> 
>>>>>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>>>>>> @@ -25,6 +25,7 @@
>>>>>>>>> /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>>>>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>>>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>>>>> 
>>>>>>>>> struct kvm_regs {
>>>>>>>>> 	__u64 pc;
>>>>>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>>>>>> 	__u64 fpr[32];
>>>>>>>>> };
>>>>>>>>> 
>>>>>>>>> +
>>>>>>>>> +/*
>>>>>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>>>>>> + * software breakpoint.
>>>>>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>>>>>> + * for KVM_DEBUG_EXIT.
>>>>>>>>> + */
>>>>>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>>>>>> struct kvm_debug_exit_arch {
>>>>>>>> 
>>>>>>>> That says "arch", but it's not in an arch-specific file.
>>>>>>> 
>>>>>>> Sigh, I can't read today apparently.
>>>>>>> 
>>>>>>>>> +	__u64 pc;
>>>>>>>>> +	/*
>>>>>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>>>>>> +	 * exception to guest.
>>>>>>>>> +	 */
>>>>>>>>> +	__u32 exception;
>>>>>>>>> +	/*
>>>>>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>>>>>> +	 * (read, write or both) and software breakpoint.
>>>>>>>>> +	 */
>>>>>>>>> +	__u32 status;
>>>>>>>>> };
>>>>>>>> 
>>>>>>>> What does "exception number" mean in a generic API?
>>>>>>> 
>>>>>>> Still, "exception number" is not a well-defined concept powerpc-wide.
>>>>>> 
>>>>>> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
>>>>>> 
>>>>>> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?
>>>>> 
>>>>> We can pass up the exception type; it just needs more documentation
>>>>> about what exactly you're referring to, and probably some enumeration
>>>>> that says which exception numberspace it is.
>>>>> 
>>>>> For booke the exception number should probably be related to the fixed
>>>>> offsets rather than the IVOR number, as IVORs are phased out.
>>>> 
>>>> Jan, I would like to get your comment on this one.
>>>> 
>>>> Since we don't have standardized exception vectors like x86 does, we need to convert things between different semantics in user space if we want to make use of the exception type. Do we actually need to know about it in user space or do we only need to store it in case we get a migration at that point?
>>>> 
>>>> If it's the latter, can we maybe keep the reinjection logic internal to KVM and make DEBUG exits non-migratable similar to how we already handle MMIO/PIO exits today?
>>> 
>>> Reinjection depends on userspace. It has to check if the debug event was
>>> related to the host debugging the guest or if it was due to a
>>> guest-internal debugging event. That's because the kernel does not track
>>> individual breakpoints, just controls the trapping. So we need a way to
>>> manage the reinjection, and we do this (on x86) by passing the exception
>>> up and then using it for SET_VCPU_EVENTS to reinject it if necessary.
>>> 
>>> The general logic (reinjection filter in userspace) should be generic
>>> enough, but all the details can, of course, be defined in an
>>> arch-specific manner.
>> 
>> Well, we could always just pass a payload to user space that it passes into an ioctl to the kernel to reinject the interrupt. But that would break migratability as this payload wouldn't get stored in env.
> 
> Right.
> 
>> 
>> But the same thing applies to x86, right? So if just pass a struct kvm_vcpu_events with the debug interrupt as return value, user space can simply use that to reinject it with SET_VCPU_EVENTS.
> 
> Sorry, I'm slow today: That's a proposal for booke now?

Yes, that's a proposal for PPC in general, because we need to be extensible enough to transmit all data that Book3S one day might also want to have.

The thing I was saying is that if we make the payload magical, we need to make debug exits atomic. Otherwise QEMU could try to live migrate away from us and we lose state. I suppose x86 doesn't have this problem because it shoehorns the exception vectors into env?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka Sept. 25, 2012, 11:06 a.m. UTC | #11
On 2012-09-25 12:58, Alexander Graf wrote:
> 
> On 25.09.2012, at 12:56, Jan Kiszka wrote:
> 
>> On 2012-09-25 12:47, Alexander Graf wrote:
>>>
>>> On 25.09.2012, at 12:38, Jan Kiszka wrote:
>>>
>>>> On 2012-09-24 16:46, Alexander Graf wrote:
>>>>>
>>>>> On 07.09.2012, at 00:56, Scott Wood wrote:
>>>>>
>>>>>> On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
>>>>>>>
>>>>>>>
>>>>>>>> -----Original Message-----
>>>>>>>> From: Wood Scott-B07421
>>>>>>>> Sent: Thursday, September 06, 2012 4:57 AM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>>>>>>>> R65777
>>>>>>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>>>>>>>
>>>>>>>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>>>>>>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>>>>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>>>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>>>>>>> breakpoint to debug guest.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>> ---
>>>>>>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>>>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>>>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>>>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>>>>>>>> --
>>>>>>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>>>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>>>>>>>> +++++++++++++++++++++++++++++++-
>>>>>>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>>>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>>>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>>>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>>>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>>>>>>> @@ -25,6 +25,7 @@
>>>>>>>>>> /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>>>>>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>>>>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>>>>>>
>>>>>>>>>> struct kvm_regs {
>>>>>>>>>> 	__u64 pc;
>>>>>>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>>>>>>> 	__u64 fpr[32];
>>>>>>>>>> };
>>>>>>>>>>
>>>>>>>>>> +
>>>>>>>>>> +/*
>>>>>>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>>>>>>> + * software breakpoint.
>>>>>>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>>>>>>> + * for KVM_DEBUG_EXIT.
>>>>>>>>>> + */
>>>>>>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>>>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>>>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>>>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>>>>>>> struct kvm_debug_exit_arch {
>>>>>>>>>
>>>>>>>>> That says "arch", but it's not in an arch-specific file.
>>>>>>>>
>>>>>>>> Sigh, I can't read today apparently.
>>>>>>>>
>>>>>>>>>> +	__u64 pc;
>>>>>>>>>> +	/*
>>>>>>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>>>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>>>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>>>>>>> +	 * exception to guest.
>>>>>>>>>> +	 */
>>>>>>>>>> +	__u32 exception;
>>>>>>>>>> +	/*
>>>>>>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>>>>>>> +	 * (read, write or both) and software breakpoint.
>>>>>>>>>> +	 */
>>>>>>>>>> +	__u32 status;
>>>>>>>>>> };
>>>>>>>>>
>>>>>>>>> What does "exception number" mean in a generic API?
>>>>>>>>
>>>>>>>> Still, "exception number" is not a well-defined concept powerpc-wide.
>>>>>>>
>>>>>>> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
>>>>>>>
>>>>>>> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?
>>>>>>
>>>>>> We can pass up the exception type; it just needs more documentation
>>>>>> about what exactly you're referring to, and probably some enumeration
>>>>>> that says which exception numberspace it is.
>>>>>>
>>>>>> For booke the exception number should probably be related to the fixed
>>>>>> offsets rather than the IVOR number, as IVORs are phased out.
>>>>>
>>>>> Jan, I would like to get your comment on this one.
>>>>>
>>>>> Since we don't have standardized exception vectors like x86 does, we need to convert things between different semantics in user space if we want to make use of the exception type. Do we actually need to know about it in user space or do we only need to store it in case we get a migration at that point?
>>>>>
>>>>> If it's the latter, can we maybe keep the reinjection logic internal to KVM and make DEBUG exits non-migratable similar to how we already handle MMIO/PIO exits today?
>>>>
>>>> Reinjection depends on userspace. It has to check if the debug event was
>>>> related to the host debugging the guest or if it was due to a
>>>> guest-internal debugging event. That's because the kernel does not track
>>>> individual breakpoints, just controls the trapping. So we need a way to
>>>> manage the reinjection, and we do this (on x86) by passing the exception
>>>> up and then using it for SET_VCPU_EVENTS to reinject it if necessary.
>>>>
>>>> The general logic (reinjection filter in userspace) should be generic
>>>> enough, but all the details can, of course, be defined in an
>>>> arch-specific manner.
>>>
>>> Well, we could always just pass a payload to user space that it passes into an ioctl to the kernel to reinject the interrupt. But that would break migratability as this payload wouldn't get stored in env.
>>
>> Right.
>>
>>>
>>> But the same thing applies to x86, right? So if just pass a struct kvm_vcpu_events with the debug interrupt as return value, user space can simply use that to reinject it with SET_VCPU_EVENTS.
>>
>> Sorry, I'm slow today: That's a proposal for booke now?
> 
> Yes, that's a proposal for PPC in general, because we need to be extensible enough to transmit all data that Book3S one day might also want to have.
> 
> The thing I was saying is that if we make the payload magical, we need to make debug exits atomic. Otherwise QEMU could try to live migrate away from us and we lose state. I suppose x86 doesn't have this problem because it shoehorns the exception vectors into env?

It's no problem on x86 as our VCPU state can encode pending (debug)
exceptions, so we can store them also for migration. If PPC doesn't
allow to read/write such kind of state from/to the kernel, you can't
easily do the same. If debugging would be the only reason to do this, it
might be ok to declare that state atomic.

Jan
Alexander Graf Sept. 25, 2012, 11:10 a.m. UTC | #12
On 25.09.2012, at 13:06, Jan Kiszka wrote:

> On 2012-09-25 12:58, Alexander Graf wrote:
>> 
>> On 25.09.2012, at 12:56, Jan Kiszka wrote:
>> 
>>> On 2012-09-25 12:47, Alexander Graf wrote:
>>>> 
>>>> On 25.09.2012, at 12:38, Jan Kiszka wrote:
>>>> 
>>>>> On 2012-09-24 16:46, Alexander Graf wrote:
>>>>>> 
>>>>>> On 07.09.2012, at 00:56, Scott Wood wrote:
>>>>>> 
>>>>>>> On 09/06/2012 09:56 AM, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>> 
>>>>>>>>> -----Original Message-----
>>>>>>>>> From: Wood Scott-B07421
>>>>>>>>> Sent: Thursday, September 06, 2012 4:57 AM
>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; agraf@suse.de; Bhushan Bharat-
>>>>>>>>> R65777
>>>>>>>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>>>>>>>> 
>>>>>>>>> On 09/05/2012 06:23 PM, Scott Wood wrote:
>>>>>>>>>> On 08/21/2012 08:52 AM, Bharat Bhushan wrote:
>>>>>>>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>>>>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>>>>>>>> breakpoint to debug guest.
>>>>>>>>>>> 
>>>>>>>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>>>>>>>> ---
>>>>>>>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>>>>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>>>>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>>>>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>>>>>>>>> --
>>>>>>>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>>>>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>>>>>>>>> +++++++++++++++++++++++++++++++-
>>>>>>>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>>>>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>>>>>>>> 
>>>>>>>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>>>>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>>>>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>>>>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>>>>>>>> @@ -25,6 +25,7 @@
>>>>>>>>>>> /* Select powerpc specific features in <linux/kvm.h> */  #define
>>>>>>>>>>> __KVM_HAVE_SPAPR_TCE  #define __KVM_HAVE_PPC_SMT
>>>>>>>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>>>>>>> 
>>>>>>>>>>> struct kvm_regs {
>>>>>>>>>>> 	__u64 pc;
>>>>>>>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>>>>>>>> 	__u64 fpr[32];
>>>>>>>>>>> };
>>>>>>>>>>> 
>>>>>>>>>>> +
>>>>>>>>>>> +/*
>>>>>>>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>>>>>>>> + * software breakpoint.
>>>>>>>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>>>>>>>> + * for KVM_DEBUG_EXIT.
>>>>>>>>>>> + */
>>>>>>>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>>>>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>>>>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>>>>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>>>>>>>> struct kvm_debug_exit_arch {
>>>>>>>>>> 
>>>>>>>>>> That says "arch", but it's not in an arch-specific file.
>>>>>>>>> 
>>>>>>>>> Sigh, I can't read today apparently.
>>>>>>>>> 
>>>>>>>>>>> +	__u64 pc;
>>>>>>>>>>> +	/*
>>>>>>>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>>>>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>>>>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>>>>>>>> +	 * exception to guest.
>>>>>>>>>>> +	 */
>>>>>>>>>>> +	__u32 exception;
>>>>>>>>>>> +	/*
>>>>>>>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>>>>>>>> +	 * (read, write or both) and software breakpoint.
>>>>>>>>>>> +	 */
>>>>>>>>>>> +	__u32 status;
>>>>>>>>>>> };
>>>>>>>>>> 
>>>>>>>>>> What does "exception number" mean in a generic API?
>>>>>>>>> 
>>>>>>>>> Still, "exception number" is not a well-defined concept powerpc-wide.
>>>>>>>> 
>>>>>>>> Just for background why we added is that, on x86 this exception number is used to inject the exception to guest if QEMU is not able to handle the debug exception.
>>>>>>>> 
>>>>>>>> Should we just through a print with clearing the exception condition? Or something else you would like to suggest?
>>>>>>> 
>>>>>>> We can pass up the exception type; it just needs more documentation
>>>>>>> about what exactly you're referring to, and probably some enumeration
>>>>>>> that says which exception numberspace it is.
>>>>>>> 
>>>>>>> For booke the exception number should probably be related to the fixed
>>>>>>> offsets rather than the IVOR number, as IVORs are phased out.
>>>>>> 
>>>>>> Jan, I would like to get your comment on this one.
>>>>>> 
>>>>>> Since we don't have standardized exception vectors like x86 does, we need to convert things between different semantics in user space if we want to make use of the exception type. Do we actually need to know about it in user space or do we only need to store it in case we get a migration at that point?
>>>>>> 
>>>>>> If it's the latter, can we maybe keep the reinjection logic internal to KVM and make DEBUG exits non-migratable similar to how we already handle MMIO/PIO exits today?
>>>>> 
>>>>> Reinjection depends on userspace. It has to check if the debug event was
>>>>> related to the host debugging the guest or if it was due to a
>>>>> guest-internal debugging event. That's because the kernel does not track
>>>>> individual breakpoints, just controls the trapping. So we need a way to
>>>>> manage the reinjection, and we do this (on x86) by passing the exception
>>>>> up and then using it for SET_VCPU_EVENTS to reinject it if necessary.
>>>>> 
>>>>> The general logic (reinjection filter in userspace) should be generic
>>>>> enough, but all the details can, of course, be defined in an
>>>>> arch-specific manner.
>>>> 
>>>> Well, we could always just pass a payload to user space that it passes into an ioctl to the kernel to reinject the interrupt. But that would break migratability as this payload wouldn't get stored in env.
>>> 
>>> Right.
>>> 
>>>> 
>>>> But the same thing applies to x86, right? So if just pass a struct kvm_vcpu_events with the debug interrupt as return value, user space can simply use that to reinject it with SET_VCPU_EVENTS.
>>> 
>>> Sorry, I'm slow today: That's a proposal for booke now?
>> 
>> Yes, that's a proposal for PPC in general, because we need to be extensible enough to transmit all data that Book3S one day might also want to have.
>> 
>> The thing I was saying is that if we make the payload magical, we need to make debug exits atomic. Otherwise QEMU could try to live migrate away from us and we lose state. I suppose x86 doesn't have this problem because it shoehorns the exception vectors into env?
> 
> It's no problem on x86 as our VCPU state can encode pending (debug)
> exceptions, so we can store them also for migration. If PPC doesn't
> allow to read/write such kind of state from/to the kernel, you can't
> easily do the same. If debugging would be the only reason to do this, it
> might be ok to declare that state atomic.

Well, we do have such state as well. But it's quite different depending on the board, so it'd be a bunch of code that I'd rather not have around if it doesn't buy us too much.

Though I guess it's the cleanest way to do it. Sigh.

So we pass a struct kvm_vcpu_events up with the debug interrupt. That way a different user space can simply reinject it if it wants to. For QEMU, we would interpret that thing, shove all the fields into their respective env counterparts with the QEMU internal exception vector definitions and do the reverse in kvm_put_vcpu_events.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan Oct. 4, 2012, 11:06 a.m. UTC | #13
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Monday, September 24, 2012 9:50 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
> 
> 
> On 21.08.2012, at 15:52, Bharat Bhushan wrote:
> 
> > This patch adds the debug stub support on booke/bookehv.
> > Now QEMU debug stub can use hw breakpoint, watchpoint and
> > software breakpoint to debug guest.
> >
> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > ---
> > arch/powerpc/include/asm/kvm.h        |   29 ++++++-
> > arch/powerpc/include/asm/kvm_host.h   |    5 +
> > arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
> > arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++----
> > arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
> > arch/powerpc/kvm/bookehv_interrupts.S |  141 +++++++++++++++++++++++++++++++-
> > arch/powerpc/kvm/e500mc.c             |    3 +-
> > 7 files changed, 435 insertions(+), 23 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
> > index 61b197e..53479ea 100644
> > --- a/arch/powerpc/include/asm/kvm.h
> > +++ b/arch/powerpc/include/asm/kvm.h
> > @@ -25,6 +25,7 @@
> > /* Select powerpc specific features in <linux/kvm.h> */
> > #define __KVM_HAVE_SPAPR_TCE
> > #define __KVM_HAVE_PPC_SMT
> > +#define __KVM_HAVE_GUEST_DEBUG
> >
> > struct kvm_regs {
> > 	__u64 pc;
> > @@ -264,7 +265,31 @@ struct kvm_fpu {
> > 	__u64 fpr[32];
> > };
> >
> > +
> > +/*
> > + * Defines for h/w breakpoint, watchpoint (read, write or both) and
> > + * software breakpoint.
> > + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> > + * for KVM_DEBUG_EXIT.
> > + */
> > +#define KVMPPC_DEBUG_NONE		0x0
> > +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> > +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> > +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> > struct kvm_debug_exit_arch {
> > +	__u64 pc;
> > +	/*
> > +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
> > +	 * exit is not handled (say not h/w breakpoint or software breakpoint
> > +	 * set for this address) by qemu then it is supposed to inject this
> > +	 * exception to guest.
> > +	 */
> > +	__u32 exception;
> > +	/*
> > +	 * exiting to userspace because of h/w breakpoint, watchpoint
> > +	 * (read, write or both) and software breakpoint.
> > +	 */
> > +	__u32 status;
> > };
> >
> > /* for KVM_SET_GUEST_DEBUG */
> > @@ -276,10 +301,6 @@ struct kvm_guest_debug_arch {
> > 		 * Type denotes h/w breakpoint, read watchpoint, write
> > 		 * watchpoint or watchpoint (both read and write).
> > 		 */
> > -#define KVMPPC_DEBUG_NOTYPE		0x0
> > -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> > -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> > -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> > 		__u32 type;
> > 		__u32 pad1;
> > 		__u64 pad2;
> > diff --git a/arch/powerpc/include/asm/kvm_host.h
> b/arch/powerpc/include/asm/kvm_host.h
> > index c7219c1..3ba465a 100644
> > --- a/arch/powerpc/include/asm/kvm_host.h
> > +++ b/arch/powerpc/include/asm/kvm_host.h
> > @@ -496,7 +496,12 @@ struct kvm_vcpu_arch {
> > 	u32 mmucfg;
> > 	u32 epr;
> > 	u32 crit_save;
> > +	/* guest debug registers*/
> > 	struct kvmppc_booke_debug_reg dbg_reg;
> > +	/* shadow debug registers */
> > +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
> > +	/* host debug registers*/
> > +	struct kvmppc_booke_debug_reg host_dbg_reg;
> > #endif
> > 	gpa_t paddr_accessed;
> > 	gva_t vaddr_accessed;
> > diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-
> offsets.c
> > index 555448e..6987821 100644
> > --- a/arch/powerpc/kernel/asm-offsets.c
> > +++ b/arch/powerpc/kernel/asm-offsets.c
> > @@ -564,6 +564,32 @@ int main(void)
> > 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
> > 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
> > 	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
> > +	DEFINE(VCPU_DBSR, offsetof(struct kvm_vcpu, arch.dbsr));
> > +	DEFINE(VCPU_SHADOW_DBG, offsetof(struct kvm_vcpu, arch.shadow_dbg_reg));
> > +	DEFINE(VCPU_HOST_DBG, offsetof(struct kvm_vcpu, arch.host_dbg_reg));
> > +	DEFINE(KVMPPC_DBG_DBCR0, offsetof(struct kvmppc_booke_debug_reg,
> > +					  dbcr0));
> > +	DEFINE(KVMPPC_DBG_DBCR1, offsetof(struct kvmppc_booke_debug_reg,
> > +					  dbcr1));
> > +	DEFINE(KVMPPC_DBG_DBCR2, offsetof(struct kvmppc_booke_debug_reg,
> > +					  dbcr2));
> > +#ifdef CONFIG_KVM_E500MC
> > +	DEFINE(KVMPPC_DBG_DBCR4, offsetof(struct kvmppc_booke_debug_reg,
> > +					  dbcr4));
> > +#endif
> > +	DEFINE(KVMPPC_DBG_IAC1, offsetof(struct kvmppc_booke_debug_reg,
> > +					 iac[0]));
> > +	DEFINE(KVMPPC_DBG_IAC2, offsetof(struct kvmppc_booke_debug_reg,
> > +					 iac[1]));
> > +	DEFINE(KVMPPC_DBG_IAC3, offsetof(struct kvmppc_booke_debug_reg,
> > +					 iac[2]));
> > +	DEFINE(KVMPPC_DBG_IAC4, offsetof(struct kvmppc_booke_debug_reg,
> > +					 iac[3]));
> > +	DEFINE(KVMPPC_DBG_DAC1, offsetof(struct kvmppc_booke_debug_reg,
> > +					 dac[0]));
> > +	DEFINE(KVMPPC_DBG_DAC2, offsetof(struct kvmppc_booke_debug_reg,
> > +					 dac[1]));
> > +	DEFINE(VCPU_GUEST_DEBUG, offsetof(struct kvm_vcpu, guest_debug));
> > #endif /* CONFIG_PPC_BOOK3S */
> > #endif /* CONFIG_KVM */
> >
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> > index dd0e5b8..4d82e34 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -132,6 +132,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> >
> > #ifdef CONFIG_KVM_BOOKE_HV
> > 	new_msr |= MSR_GS;
> > +
> > +	if (vcpu->guest_debug)
> > +		new_msr |= MSR_DE;
> > #endif
> >
> > 	vcpu->arch.shared->msr = new_msr;
> > @@ -667,10 +670,21 @@ out:
> > 	return ret;
> > }
> >
> > -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
> > +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> > +			  int exit_nr)
> > {
> > 	enum emulation_result er;
> >
> > +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
> > +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
> 
> This belongs into the normal emulation code path, behind the same switch() that
> everything else goes through.

I am not sure I understood correctly. Below is the reason why I placed this code here.
Instruction where software breakpoint is to be set is replaced by "ehpriv" instruction. On e500v2, this is not a valid instruction can causes program interrupt. On e500mc, "ehpriv" is a valid instruction. Both the exit path calls emulation_exit(), so we have placed the code in this function.
Do you want this code to be moved in program interrupt exit path for e500v2 and BOOKE_INTERRUPT_HV_PRIV for e500mc?


> 
> > +		run->exit_reason = KVM_EXIT_DEBUG;
> > +		run->debug.arch.pc = vcpu->arch.pc;
> > +		run->debug.arch.exception = exit_nr;
> > +		run->debug.arch.status = 0;
> > +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> > +		return RESUME_HOST;
> > +	}
> > +
> > 	er = kvmppc_emulate_instruction(run, vcpu);
> > 	switch (er) {
> > 	case EMULATE_DONE:
> > @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu)
> > 	default:
> > 		BUG();
> > 	}
> > +
> > +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
> > +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
> 
> I don't understand how this is supposed to work. When we enable singlestep, why
> would we end up in emulation_exit()?

When singlestep is enabled then we set DBCR0[ICMP] and the debug handler should be able to handle this. I think you are right.

> 
> > +		run->exit_reason = KVM_EXIT_DEBUG;
> > +		return RESUME_HOST;
> > +	}
> > +}
> > +
> > +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
> > +{
> > +	u32 dbsr;
> > +
> > +#ifndef CONFIG_KVM_BOOKE_HV
> > +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
> > +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
> > +	else
> > +		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> > +#endif
> 
> Why doesn't this get handled in the asm code that recovers from the respective
> exceptions?

Yes. I will remove this.

> 
> > +	dbsr = vcpu->arch.dbsr;
> > +
> > +	run->debug.arch.pc = vcpu->arch.pc;
> > +	run->debug.arch.status = 0;
> > +	vcpu->arch.dbsr = 0;
> > +
> > +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> > +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> > +	} else {
> > +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> > +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> > +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> > +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> > +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> > +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
> > +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> > +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
> > +	}
> 
> Mind to explain the guest register sharing mechanism you are envisioning? Which
> registers are used by the guest? Which are used by the host? Who decides what
> gets used by whom?

struct kvm_vcpu_arch {
	.
	.

       /* guest debug registers*/
        struct kvmppc_booke_debug_reg dbg_reg;
       /* shadow debug registers */
       struct kvmppc_booke_debug_reg shadow_dbg_reg;
       /* host debug registers*/
       struct kvmppc_booke_debug_reg host_dbg_reg;

	.
	.
}

dbg_reg: contains the values written by guest.
shadow_dbg_reg: This contains the actual values to be written on behalf of guest/qemu.
host_dbg_reg: the debug register value of host (needed for store/retore purpose).

According to current design both, the qemu debug stub and guest, cannot share the debug resources. QEMU debug stub is given priority.
But host and guest can share all debug registers and host cannot debug guest if it is using the debug resource (which probably does not look like a good case).

> 
> > +
> > +	return RESUME_HOST;
> > }
> >
> > static void kvmppc_fill_pt_regs(struct pt_regs *regs)
> > @@ -843,7 +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> > 		break;
> >
> > 	case BOOKE_INTERRUPT_HV_PRIV:
> > -		r = emulation_exit(run, vcpu);
> > +		r = emulation_exit(run, vcpu, exit_nr);
> > 		break;
> >
> > 	case BOOKE_INTERRUPT_PROGRAM:
> > @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> > 			break;
> > 		}
> >
> > -		r = emulation_exit(run, vcpu);
> > +		r = emulation_exit(run, vcpu, exit_nr);
> > 		break;
> >
> > 	case BOOKE_INTERRUPT_FP_UNAVAIL:
> > @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> > 	}
> >
> > 	case BOOKE_INTERRUPT_DEBUG: {
> > -		u32 dbsr;
> > -
> > -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> > -
> > -		/* clear IAC events in DBSR register */
> > -		dbsr = mfspr(SPRN_DBSR);
> > -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> > -		mtspr(SPRN_DBSR, dbsr);
> > -
> > -		run->exit_reason = KVM_EXIT_DEBUG;
> > +		r = kvmppc_handle_debug(run, vcpu);
> > +		if (r == RESUME_HOST) {
> > +			run->debug.arch.exception = exit_nr;
> > +			run->exit_reason = KVM_EXIT_DEBUG;
> > +		}
> > 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> > -		r = RESUME_HOST;
> > 		break;
> > 	}
> >
> > @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
> struct kvm_one_reg *reg)
> > 	return r;
> > }
> >
> > +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
> > +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
> > +
> > int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> > 					 struct kvm_guest_debug *dbg)
> > {
> > -	return -EINVAL;
> > +
> > +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> > +		/* Clear All debug events */
> > +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> > +		vcpu->guest_debug = 0;
> > +		return 0;
> > +	}
> > +
> > +	vcpu->guest_debug = dbg->control;
> > +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> > +
> > +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> > +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> > +
> > +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> > +		struct kvmppc_booke_debug_reg *gdbgr =
> > +				&(vcpu->arch.shadow_dbg_reg);
> > +		int n, b = 0, w = 0;
> > +		const u32 bp_code[] = {
> > +			DBCR0_IAC1 | DBCR0_IDM,
> > +			DBCR0_IAC2 | DBCR0_IDM,
> > +			DBCR0_IAC3 | DBCR0_IDM,
> > +			DBCR0_IAC4 | DBCR0_IDM
> > +		};
> > +		const u32 wp_code[] = {
> > +			DBCR0_DAC1W | DBCR0_IDM,
> > +			DBCR0_DAC2W | DBCR0_IDM,
> > +			DBCR0_DAC1R | DBCR0_IDM,
> > +			DBCR0_DAC2R | DBCR0_IDM
> > +		};
> > +
> > +#ifndef CONFIG_KVM_BOOKE_HV
> > +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
> > +				DBCR1_IAC3US | DBCR1_IAC4US;
> > +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
> > +#else
> > +		gdbgr->dbcr1 = 0;
> > +		gdbgr->dbcr2 = 0;
> > +#endif
> > +
> > +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
> > +			u32 type = dbg->arch.bp[n].type;
> > +
> > +			if (!type)
> > +				break;
> > +
> > +			if (type & (KVMPPC_DEBUG_WATCH_READ |
> > +				    KVMPPC_DEBUG_WATCH_WRITE)) {
> > +				if (w < WP_NUM) {
> > +					if (type & KVMPPC_DEBUG_WATCH_READ)
> > +						gdbgr->dbcr0 |= wp_code[w + 2];
> > +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
> > +						gdbgr->dbcr0 |= wp_code[w];
> > +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
> > +					w++;
> > +				}
> > +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> > +				if (b < BP_NUM) {
> > +					gdbgr->dbcr0 |= bp_code[b];
> > +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
> > +					b++;
> > +				}
> > +			}
> > +		}
> > +	}
> > +	return 0;
> > }
> >
> > int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> > diff --git a/arch/powerpc/kvm/booke_interrupts.S
> b/arch/powerpc/kvm/booke_interrupts.S
> > index dd9c5d4..2202936 100644
> > --- a/arch/powerpc/kvm/booke_interrupts.S
> > +++ b/arch/powerpc/kvm/booke_interrupts.S
> > @@ -39,6 +39,8 @@
> > #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
> > #define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
> > #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
> > +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | \
> > +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)
> >
> > #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
> >                         (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
> > @@ -52,6 +54,8 @@
> >                        (1<<BOOKE_INTERRUPT_PROGRAM) | \
> >                        (1<<BOOKE_INTERRUPT_DTLB_MISS))
> >
> > +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
> > +
> > .macro __KVM_HANDLER ivor_nr scratch srr0
> > 	stw	r3, VCPU_GPR(R3)(r4)
> > 	stw	r5, VCPU_GPR(R5)(r4)
> > @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
> > 	stw	r9, VCPU_FAULT_ESR(r4)
> > ..skip_esr:
> >
> > +	addi	r7, r4, VCPU_HOST_DBG
> > +	mfspr	r9, SPRN_DBCR0
> > +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
> > +	andis.	r9, r9, DBCR0_AC_BITS@h
> > +	beq	skip_load_host_debug
> > +	li	r9, 0
> > +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
> > +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
> > +	mtspr	SPRN_DBCR1, r9
> > +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
> > +	mtspr	SPRN_DBCR2, r9
> > +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
> > +	mtspr	SPRN_IAC1, r9
> > +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
> > +	mtspr	SPRN_IAC2, r9
> > +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> > +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
> > +	mtspr	SPRN_IAC3, r9
> > +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
> > +	mtspr	SPRN_IAC4, r9
> > +#endif
> > +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
> > +	mtspr	SPRN_DAC1, r9
> > +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
> 
> Yikes. Please move this into a struct, similar to how the MSR save/restore
> happens on x86.
> 
> Also, these are 64-bit loads and stores, no?

Ok, you mean PPC_LD/STD?

> 
> > +	mtspr	SPRN_DAC2, r9
> > +skip_load_host_debug:
> > +	/* Clear h/w DBSR and save current(guest) DBSR */
> > +	mfspr	r9, SPRN_DBSR
> > +	mtspr	SPRN_DBSR, r9
> > +	isync
> > +	andi.	r7, r6, NEED_DEBUG_SAVE
> > +	beq	skip_dbsr_save
> > +	/*
> > +	 * If vcpu->guest_debug flag is set then do not check for
> > +	 * shared->msr.DE as this debugging (say by QEMU) does not
> > +	 * depends on shared->msr.de. In these scanerios MSR.DE is
> > +	 * always set using shared_msr and should be handled always.
> > +	 */
> > +	lwz	r7, VCPU_GUEST_DEBUG(r4)
> > +	cmpwi	r7, 0
> > +	bne	skip_save_trap_event
> > +	PPC_LL	r3, VCPU_SHARED(r4)
> > +#ifndef CONFIG_64BIT
> > +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
> > +#else
> > +	ld	r3, (VCPU_SHARED_MSR)(r3)
> > +#endif
> 
> Didn't we have 64-bit access helpers for these?

I will use PPC_LD.

> 
> Also this shouldn't happen in the entry/exit code. We have shadow_msr for these
> purposes.

Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).
Thanks
-Bharat

> 
> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Oct. 4, 2012, 11:25 a.m. UTC | #14
On 04.10.2012, at 13:06, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Monday, September 24, 2012 9:50 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Bhushan Bharat-R65777
>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>> 
>> 
>> On 21.08.2012, at 15:52, Bharat Bhushan wrote:
>> 
>>> This patch adds the debug stub support on booke/bookehv.
>>> Now QEMU debug stub can use hw breakpoint, watchpoint and
>>> software breakpoint to debug guest.
>>> 
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++----
>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>> arch/powerpc/kvm/bookehv_interrupts.S |  141 +++++++++++++++++++++++++++++++-
>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
>>> index 61b197e..53479ea 100644
>>> --- a/arch/powerpc/include/asm/kvm.h
>>> +++ b/arch/powerpc/include/asm/kvm.h
>>> @@ -25,6 +25,7 @@
>>> /* Select powerpc specific features in <linux/kvm.h> */
>>> #define __KVM_HAVE_SPAPR_TCE
>>> #define __KVM_HAVE_PPC_SMT
>>> +#define __KVM_HAVE_GUEST_DEBUG
>>> 
>>> struct kvm_regs {
>>> 	__u64 pc;
>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>> 	__u64 fpr[32];
>>> };
>>> 
>>> +
>>> +/*
>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>> + * software breakpoint.
>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>> + * for KVM_DEBUG_EXIT.
>>> + */
>>> +#define KVMPPC_DEBUG_NONE		0x0
>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>> struct kvm_debug_exit_arch {
>>> +	__u64 pc;
>>> +	/*
>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>> +	 * set for this address) by qemu then it is supposed to inject this
>>> +	 * exception to guest.
>>> +	 */
>>> +	__u32 exception;
>>> +	/*
>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>> +	 * (read, write or both) and software breakpoint.
>>> +	 */
>>> +	__u32 status;
>>> };
>>> 
>>> /* for KVM_SET_GUEST_DEBUG */
>>> @@ -276,10 +301,6 @@ struct kvm_guest_debug_arch {
>>> 		 * Type denotes h/w breakpoint, read watchpoint, write
>>> 		 * watchpoint or watchpoint (both read and write).
>>> 		 */
>>> -#define KVMPPC_DEBUG_NOTYPE		0x0
>>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>> 		__u32 type;
>>> 		__u32 pad1;
>>> 		__u64 pad2;
>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>> b/arch/powerpc/include/asm/kvm_host.h
>>> index c7219c1..3ba465a 100644
>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>> @@ -496,7 +496,12 @@ struct kvm_vcpu_arch {
>>> 	u32 mmucfg;
>>> 	u32 epr;
>>> 	u32 crit_save;
>>> +	/* guest debug registers*/
>>> 	struct kvmppc_booke_debug_reg dbg_reg;
>>> +	/* shadow debug registers */
>>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
>>> +	/* host debug registers*/
>>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
>>> #endif
>>> 	gpa_t paddr_accessed;
>>> 	gva_t vaddr_accessed;
>>> diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-
>> offsets.c
>>> index 555448e..6987821 100644
>>> --- a/arch/powerpc/kernel/asm-offsets.c
>>> +++ b/arch/powerpc/kernel/asm-offsets.c
>>> @@ -564,6 +564,32 @@ int main(void)
>>> 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
>>> 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
>>> 	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
>>> +	DEFINE(VCPU_DBSR, offsetof(struct kvm_vcpu, arch.dbsr));
>>> +	DEFINE(VCPU_SHADOW_DBG, offsetof(struct kvm_vcpu, arch.shadow_dbg_reg));
>>> +	DEFINE(VCPU_HOST_DBG, offsetof(struct kvm_vcpu, arch.host_dbg_reg));
>>> +	DEFINE(KVMPPC_DBG_DBCR0, offsetof(struct kvmppc_booke_debug_reg,
>>> +					  dbcr0));
>>> +	DEFINE(KVMPPC_DBG_DBCR1, offsetof(struct kvmppc_booke_debug_reg,
>>> +					  dbcr1));
>>> +	DEFINE(KVMPPC_DBG_DBCR2, offsetof(struct kvmppc_booke_debug_reg,
>>> +					  dbcr2));
>>> +#ifdef CONFIG_KVM_E500MC
>>> +	DEFINE(KVMPPC_DBG_DBCR4, offsetof(struct kvmppc_booke_debug_reg,
>>> +					  dbcr4));
>>> +#endif
>>> +	DEFINE(KVMPPC_DBG_IAC1, offsetof(struct kvmppc_booke_debug_reg,
>>> +					 iac[0]));
>>> +	DEFINE(KVMPPC_DBG_IAC2, offsetof(struct kvmppc_booke_debug_reg,
>>> +					 iac[1]));
>>> +	DEFINE(KVMPPC_DBG_IAC3, offsetof(struct kvmppc_booke_debug_reg,
>>> +					 iac[2]));
>>> +	DEFINE(KVMPPC_DBG_IAC4, offsetof(struct kvmppc_booke_debug_reg,
>>> +					 iac[3]));
>>> +	DEFINE(KVMPPC_DBG_DAC1, offsetof(struct kvmppc_booke_debug_reg,
>>> +					 dac[0]));
>>> +	DEFINE(KVMPPC_DBG_DAC2, offsetof(struct kvmppc_booke_debug_reg,
>>> +					 dac[1]));
>>> +	DEFINE(VCPU_GUEST_DEBUG, offsetof(struct kvm_vcpu, guest_debug));
>>> #endif /* CONFIG_PPC_BOOK3S */
>>> #endif /* CONFIG_KVM */
>>> 
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>> index dd0e5b8..4d82e34 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -132,6 +132,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
>>> 
>>> #ifdef CONFIG_KVM_BOOKE_HV
>>> 	new_msr |= MSR_GS;
>>> +
>>> +	if (vcpu->guest_debug)
>>> +		new_msr |= MSR_DE;
>>> #endif
>>> 
>>> 	vcpu->arch.shared->msr = new_msr;
>>> @@ -667,10 +670,21 @@ out:
>>> 	return ret;
>>> }
>>> 
>>> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
>>> +			  int exit_nr)
>>> {
>>> 	enum emulation_result er;
>>> 
>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
>>> +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
>> 
>> This belongs into the normal emulation code path, behind the same switch() that
>> everything else goes through.
> 
> I am not sure I understood correctly. Below is the reason why I placed this code here.
> Instruction where software breakpoint is to be set is replaced by "ehpriv" instruction. On e500v2, this is not a valid instruction can causes program interrupt. On e500mc, "ehpriv" is a valid instruction. Both the exit path calls emulation_exit(), so we have placed the code in this function.
> Do you want this code to be moved in program interrupt exit path for e500v2 and BOOKE_INTERRUPT_HV_PRIV for e500mc?

Ok, in this patch you do (basically):

int emulation_exit()
{
    if (inst == DEBUG_INST) {
        debug_stuff();
        return;
    }

    switch (inst) {
        case INST_A:
            foo();
        ....
    }
}

what I want is:

int emulation_exit()
{
    switch (inst) {
        case INST_A:
            foo(); break;
        case DEBUG_INST:
            debug_stuff(); break;
        ....
    }
}

> 
> 
>> 
>>> +		run->exit_reason = KVM_EXIT_DEBUG;
>>> +		run->debug.arch.pc = vcpu->arch.pc;
>>> +		run->debug.arch.exception = exit_nr;
>>> +		run->debug.arch.status = 0;
>>> +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>> +		return RESUME_HOST;
>>> +	}
>>> +
>>> 	er = kvmppc_emulate_instruction(run, vcpu);
>>> 	switch (er) {
>>> 	case EMULATE_DONE:
>>> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu)
>>> 	default:
>>> 		BUG();
>>> 	}
>>> +
>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
>>> +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
>> 
>> I don't understand how this is supposed to work. When we enable singlestep, why
>> would we end up in emulation_exit()?
> 
> When singlestep is enabled then we set DBCR0[ICMP] and the debug handler should be able to handle this. I think you are right.
> 
>> 
>>> +		run->exit_reason = KVM_EXIT_DEBUG;
>>> +		return RESUME_HOST;
>>> +	}
>>> +}
>>> +
>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
>>> +{
>>> +	u32 dbsr;
>>> +
>>> +#ifndef CONFIG_KVM_BOOKE_HV
>>> +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
>>> +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
>>> +	else
>>> +		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>> +#endif
>> 
>> Why doesn't this get handled in the asm code that recovers from the respective
>> exceptions?
> 
> Yes. I will remove this.
> 
>> 
>>> +	dbsr = vcpu->arch.dbsr;
>>> +
>>> +	run->debug.arch.pc = vcpu->arch.pc;
>>> +	run->debug.arch.status = 0;
>>> +	vcpu->arch.dbsr = 0;
>>> +
>>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>> +	} else {
>>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
>>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
>>> +	}
>> 
>> Mind to explain the guest register sharing mechanism you are envisioning? Which
>> registers are used by the guest? Which are used by the host? Who decides what
>> gets used by whom?
> 
> struct kvm_vcpu_arch {
> 	.
> 	.
> 
>       /* guest debug registers*/
>        struct kvmppc_booke_debug_reg dbg_reg;
>       /* shadow debug registers */
>       struct kvmppc_booke_debug_reg shadow_dbg_reg;
>       /* host debug registers*/
>       struct kvmppc_booke_debug_reg host_dbg_reg;
> 
> 	.
> 	.
> }
> 
> dbg_reg: contains the values written by guest.
> shadow_dbg_reg: This contains the actual values to be written on behalf of guest/qemu.
> host_dbg_reg: the debug register value of host (needed for store/retore purpose).
> 
> According to current design both, the qemu debug stub and guest, cannot share the debug resources. QEMU debug stub is given priority.
> But host and guest can share all debug registers and host cannot debug guest if it is using the debug resource (which probably does not look like a good case).

Ok. Assume that in guest context, we only ever want guest debugging enabled. If QEMU wants to inject a QEMU breakpoint, it will fiddle with the guest debug registers itself to inject its breakpoint into them.

With that scheme, would we still need shadow debug registers? Or could we remove that one level of indirection?

> 
>> 
>>> +
>>> +	return RESUME_HOST;
>>> }
>>> 
>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs)
>>> @@ -843,7 +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>> 		break;
>>> 
>>> 	case BOOKE_INTERRUPT_HV_PRIV:
>>> -		r = emulation_exit(run, vcpu);
>>> +		r = emulation_exit(run, vcpu, exit_nr);
>>> 		break;
>>> 
>>> 	case BOOKE_INTERRUPT_PROGRAM:
>>> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>> 			break;
>>> 		}
>>> 
>>> -		r = emulation_exit(run, vcpu);
>>> +		r = emulation_exit(run, vcpu, exit_nr);
>>> 		break;
>>> 
>>> 	case BOOKE_INTERRUPT_FP_UNAVAIL:
>>> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>> 	}
>>> 
>>> 	case BOOKE_INTERRUPT_DEBUG: {
>>> -		u32 dbsr;
>>> -
>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>> -
>>> -		/* clear IAC events in DBSR register */
>>> -		dbsr = mfspr(SPRN_DBSR);
>>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>> -		mtspr(SPRN_DBSR, dbsr);
>>> -
>>> -		run->exit_reason = KVM_EXIT_DEBUG;
>>> +		r = kvmppc_handle_debug(run, vcpu);
>>> +		if (r == RESUME_HOST) {
>>> +			run->debug.arch.exception = exit_nr;
>>> +			run->exit_reason = KVM_EXIT_DEBUG;
>>> +		}
>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>> -		r = RESUME_HOST;
>>> 		break;
>>> 	}
>>> 
>>> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
>> struct kvm_one_reg *reg)
>>> 	return r;
>>> }
>>> 
>>> +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
>>> +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
>>> +
>>> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>> 					 struct kvm_guest_debug *dbg)
>>> {
>>> -	return -EINVAL;
>>> +
>>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
>>> +		/* Clear All debug events */
>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +		vcpu->guest_debug = 0;
>>> +		return 0;
>>> +	}
>>> +
>>> +	vcpu->guest_debug = dbg->control;
>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +
>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>> +
>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
>>> +		struct kvmppc_booke_debug_reg *gdbgr =
>>> +				&(vcpu->arch.shadow_dbg_reg);
>>> +		int n, b = 0, w = 0;
>>> +		const u32 bp_code[] = {
>>> +			DBCR0_IAC1 | DBCR0_IDM,
>>> +			DBCR0_IAC2 | DBCR0_IDM,
>>> +			DBCR0_IAC3 | DBCR0_IDM,
>>> +			DBCR0_IAC4 | DBCR0_IDM
>>> +		};
>>> +		const u32 wp_code[] = {
>>> +			DBCR0_DAC1W | DBCR0_IDM,
>>> +			DBCR0_DAC2W | DBCR0_IDM,
>>> +			DBCR0_DAC1R | DBCR0_IDM,
>>> +			DBCR0_DAC2R | DBCR0_IDM
>>> +		};
>>> +
>>> +#ifndef CONFIG_KVM_BOOKE_HV
>>> +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
>>> +				DBCR1_IAC3US | DBCR1_IAC4US;
>>> +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
>>> +#else
>>> +		gdbgr->dbcr1 = 0;
>>> +		gdbgr->dbcr2 = 0;
>>> +#endif
>>> +
>>> +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
>>> +			u32 type = dbg->arch.bp[n].type;
>>> +
>>> +			if (!type)
>>> +				break;
>>> +
>>> +			if (type & (KVMPPC_DEBUG_WATCH_READ |
>>> +				    KVMPPC_DEBUG_WATCH_WRITE)) {
>>> +				if (w < WP_NUM) {
>>> +					if (type & KVMPPC_DEBUG_WATCH_READ)
>>> +						gdbgr->dbcr0 |= wp_code[w + 2];
>>> +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>> +						gdbgr->dbcr0 |= wp_code[w];
>>> +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
>>> +					w++;
>>> +				}
>>> +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
>>> +				if (b < BP_NUM) {
>>> +					gdbgr->dbcr0 |= bp_code[b];
>>> +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
>>> +					b++;
>>> +				}
>>> +			}
>>> +		}
>>> +	}
>>> +	return 0;
>>> }
>>> 
>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>>> diff --git a/arch/powerpc/kvm/booke_interrupts.S
>> b/arch/powerpc/kvm/booke_interrupts.S
>>> index dd9c5d4..2202936 100644
>>> --- a/arch/powerpc/kvm/booke_interrupts.S
>>> +++ b/arch/powerpc/kvm/booke_interrupts.S
>>> @@ -39,6 +39,8 @@
>>> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
>>> #define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
>>> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
>>> +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | \
>>> +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)
>>> 
>>> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
>>>                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
>>> @@ -52,6 +54,8 @@
>>>                       (1<<BOOKE_INTERRUPT_PROGRAM) | \
>>>                       (1<<BOOKE_INTERRUPT_DTLB_MISS))
>>> 
>>> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
>>> +
>>> .macro __KVM_HANDLER ivor_nr scratch srr0
>>> 	stw	r3, VCPU_GPR(R3)(r4)
>>> 	stw	r5, VCPU_GPR(R5)(r4)
>>> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
>>> 	stw	r9, VCPU_FAULT_ESR(r4)
>>> ..skip_esr:
>>> 
>>> +	addi	r7, r4, VCPU_HOST_DBG
>>> +	mfspr	r9, SPRN_DBCR0
>>> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
>>> +	andis.	r9, r9, DBCR0_AC_BITS@h
>>> +	beq	skip_load_host_debug
>>> +	li	r9, 0
>>> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
>>> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
>>> +	mtspr	SPRN_DBCR1, r9
>>> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
>>> +	mtspr	SPRN_DBCR2, r9
>>> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
>>> +	mtspr	SPRN_IAC1, r9
>>> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
>>> +	mtspr	SPRN_IAC2, r9
>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
>>> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
>>> +	mtspr	SPRN_IAC3, r9
>>> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
>>> +	mtspr	SPRN_IAC4, r9
>>> +#endif
>>> +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
>>> +	mtspr	SPRN_DAC1, r9
>>> +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
>> 
>> Yikes. Please move this into a struct, similar to how the MSR save/restore
>> happens on x86.
>> 
>> Also, these are 64-bit loads and stores, no?
> 
> Ok, you mean PPC_LD/STD?

Yeah :).

> 
>> 
>>> +	mtspr	SPRN_DAC2, r9
>>> +skip_load_host_debug:
>>> +	/* Clear h/w DBSR and save current(guest) DBSR */
>>> +	mfspr	r9, SPRN_DBSR
>>> +	mtspr	SPRN_DBSR, r9
>>> +	isync
>>> +	andi.	r7, r6, NEED_DEBUG_SAVE
>>> +	beq	skip_dbsr_save
>>> +	/*
>>> +	 * If vcpu->guest_debug flag is set then do not check for
>>> +	 * shared->msr.DE as this debugging (say by QEMU) does not
>>> +	 * depends on shared->msr.de. In these scanerios MSR.DE is
>>> +	 * always set using shared_msr and should be handled always.
>>> +	 */
>>> +	lwz	r7, VCPU_GUEST_DEBUG(r4)
>>> +	cmpwi	r7, 0
>>> +	bne	skip_save_trap_event
>>> +	PPC_LL	r3, VCPU_SHARED(r4)
>>> +#ifndef CONFIG_64BIT
>>> +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
>>> +#else
>>> +	ld	r3, (VCPU_SHARED_MSR)(r3)
>>> +#endif
>> 
>> Didn't we have 64-bit access helpers for these?
> 
> I will use PPC_LD.
> 
>> 
>> Also this shouldn't happen in the entry/exit code. We have shadow_msr for these
>> purposes.
> 
> Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).

Hrm. Yeah, must've misread something here.

It would be awesome if we could make the debug register save/restore a bit more relaxed though. For example during preemption.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan Oct. 4, 2012, 2:22 p.m. UTC | #15
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Thursday, October 04, 2012 4:56 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
> 
> 
> On 04.10.2012, at 13:06, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -----Original Message-----
> >> From: Alexander Graf [mailto:agraf@suse.de]
> >> Sent: Monday, September 24, 2012 9:50 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Bhushan
> >> Bharat-R65777
> >> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
> >>
> >>
> >> On 21.08.2012, at 15:52, Bharat Bhushan wrote:
> >>
> >>> This patch adds the debug stub support on booke/bookehv.
> >>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
> >>> breakpoint to debug guest.
> >>>
> >>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>> ---
> >>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
> >>> arch/powerpc/include/asm/kvm_host.h   |    5 +
> >>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
> >>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
> --
> >>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
> >>> arch/powerpc/kvm/bookehv_interrupts.S |  141
> +++++++++++++++++++++++++++++++-
> >>> arch/powerpc/kvm/e500mc.c             |    3 +-
> >>> 7 files changed, 435 insertions(+), 23 deletions(-)
> >>>
> >>> diff --git a/arch/powerpc/include/asm/kvm.h
> >>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
> >>> --- a/arch/powerpc/include/asm/kvm.h
> >>> +++ b/arch/powerpc/include/asm/kvm.h
> >>> @@ -25,6 +25,7 @@
> >>> /* Select powerpc specific features in <linux/kvm.h> */ #define
> >>> __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
> >>> +#define __KVM_HAVE_GUEST_DEBUG
> >>>
> >>> struct kvm_regs {
> >>> 	__u64 pc;
> >>> @@ -264,7 +265,31 @@ struct kvm_fpu {
> >>> 	__u64 fpr[32];
> >>> };
> >>>
> >>> +
> >>> +/*
> >>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
> >>> + * software breakpoint.
> >>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> >>> + * for KVM_DEBUG_EXIT.
> >>> + */
> >>> +#define KVMPPC_DEBUG_NONE		0x0
> >>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> >>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> >>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> >>> struct kvm_debug_exit_arch {
> >>> +	__u64 pc;
> >>> +	/*
> >>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
> >>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
> >>> +	 * set for this address) by qemu then it is supposed to inject this
> >>> +	 * exception to guest.
> >>> +	 */
> >>> +	__u32 exception;
> >>> +	/*
> >>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
> >>> +	 * (read, write or both) and software breakpoint.
> >>> +	 */
> >>> +	__u32 status;
> >>> };
> >>>
> >>> /* for KVM_SET_GUEST_DEBUG */
> >>> @@ -276,10 +301,6 @@ struct kvm_guest_debug_arch {
> >>> 		 * Type denotes h/w breakpoint, read watchpoint, write
> >>> 		 * watchpoint or watchpoint (both read and write).
> >>> 		 */
> >>> -#define KVMPPC_DEBUG_NOTYPE		0x0
> >>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> >>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> >>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> >>> 		__u32 type;
> >>> 		__u32 pad1;
> >>> 		__u64 pad2;
> >>> diff --git a/arch/powerpc/include/asm/kvm_host.h
> >> b/arch/powerpc/include/asm/kvm_host.h
> >>> index c7219c1..3ba465a 100644
> >>> --- a/arch/powerpc/include/asm/kvm_host.h
> >>> +++ b/arch/powerpc/include/asm/kvm_host.h
> >>> @@ -496,7 +496,12 @@ struct kvm_vcpu_arch {
> >>> 	u32 mmucfg;
> >>> 	u32 epr;
> >>> 	u32 crit_save;
> >>> +	/* guest debug registers*/
> >>> 	struct kvmppc_booke_debug_reg dbg_reg;
> >>> +	/* shadow debug registers */
> >>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
> >>> +	/* host debug registers*/
> >>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
> >>> #endif
> >>> 	gpa_t paddr_accessed;
> >>> 	gva_t vaddr_accessed;
> >>> diff --git a/arch/powerpc/kernel/asm-offsets.c
> >>> b/arch/powerpc/kernel/asm-
> >> offsets.c
> >>> index 555448e..6987821 100644
> >>> --- a/arch/powerpc/kernel/asm-offsets.c
> >>> +++ b/arch/powerpc/kernel/asm-offsets.c
> >>> @@ -564,6 +564,32 @@ int main(void)
> >>> 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
> >>> 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
> >>> 	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
> >>> +	DEFINE(VCPU_DBSR, offsetof(struct kvm_vcpu, arch.dbsr));
> >>> +	DEFINE(VCPU_SHADOW_DBG, offsetof(struct kvm_vcpu, arch.shadow_dbg_reg));
> >>> +	DEFINE(VCPU_HOST_DBG, offsetof(struct kvm_vcpu, arch.host_dbg_reg));
> >>> +	DEFINE(KVMPPC_DBG_DBCR0, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					  dbcr0));
> >>> +	DEFINE(KVMPPC_DBG_DBCR1, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					  dbcr1));
> >>> +	DEFINE(KVMPPC_DBG_DBCR2, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					  dbcr2));
> >>> +#ifdef CONFIG_KVM_E500MC
> >>> +	DEFINE(KVMPPC_DBG_DBCR4, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					  dbcr4));
> >>> +#endif
> >>> +	DEFINE(KVMPPC_DBG_IAC1, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					 iac[0]));
> >>> +	DEFINE(KVMPPC_DBG_IAC2, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					 iac[1]));
> >>> +	DEFINE(KVMPPC_DBG_IAC3, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					 iac[2]));
> >>> +	DEFINE(KVMPPC_DBG_IAC4, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					 iac[3]));
> >>> +	DEFINE(KVMPPC_DBG_DAC1, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					 dac[0]));
> >>> +	DEFINE(KVMPPC_DBG_DAC2, offsetof(struct kvmppc_booke_debug_reg,
> >>> +					 dac[1]));
> >>> +	DEFINE(VCPU_GUEST_DEBUG, offsetof(struct kvm_vcpu, guest_debug));
> >>> #endif /* CONFIG_PPC_BOOK3S */
> >>> #endif /* CONFIG_KVM */
> >>>
> >>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> >>> index dd0e5b8..4d82e34 100644
> >>> --- a/arch/powerpc/kvm/booke.c
> >>> +++ b/arch/powerpc/kvm/booke.c
> >>> @@ -132,6 +132,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32
> >>> new_msr)
> >>>
> >>> #ifdef CONFIG_KVM_BOOKE_HV
> >>> 	new_msr |= MSR_GS;
> >>> +
> >>> +	if (vcpu->guest_debug)
> >>> +		new_msr |= MSR_DE;
> >>> #endif
> >>>
> >>> 	vcpu->arch.shared->msr = new_msr;
> >>> @@ -667,10 +670,21 @@ out:
> >>> 	return ret;
> >>> }
> >>>
> >>> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
> >>> *vcpu)
> >>> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> >>> +			  int exit_nr)
> >>> {
> >>> 	enum emulation_result er;
> >>>
> >>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
> >>> +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
> >>
> >> This belongs into the normal emulation code path, behind the same
> >> switch() that everything else goes through.
> >
> > I am not sure I understood correctly. Below is the reason why I placed this
> code here.
> > Instruction where software breakpoint is to be set is replaced by "ehpriv"
> instruction. On e500v2, this is not a valid instruction can causes program
> interrupt. On e500mc, "ehpriv" is a valid instruction. Both the exit path calls
> emulation_exit(), so we have placed the code in this function.
> > Do you want this code to be moved in program interrupt exit path for e500v2
> and BOOKE_INTERRUPT_HV_PRIV for e500mc?
> 
> Ok, in this patch you do (basically):
> 
> int emulation_exit()
> {
>     if (inst == DEBUG_INST) {
>         debug_stuff();
>         return;
>     }
> 
>     switch (inst) {
>         case INST_A:
>             foo();
>         ....
>     }
> }

Are not we doing something like this:
 int emulation_exit()
 {
     if (inst == DEBUG_INST) {
         debug_stuff();
         return;
     }
 
     status = kvmppc_emulate_instruction()
     switch (status) {
         case FAIL:
             foo();
         case DONE:
		foo1();
         ....
     }
 }

Do you want something like this:

int emulation_exit()
 {

     status = kvmppc_emulate_instruction()
     switch (status) {
         case FAIL:
     		if (inst == DEBUG_INST) {
         		debug_stuff();
    		      return;
     		}
             foo();

         case DONE:
		foo1();
         ....
     }
 }
> 
> what I want is:
> 
> int emulation_exit()
> {
>     switch (inst) {
>         case INST_A:
>             foo(); break;
>         case DEBUG_INST:
>             debug_stuff(); break;
>         ....
>     }
> }
> 
> >
> >
> >>
> >>> +		run->exit_reason = KVM_EXIT_DEBUG;
> >>> +		run->debug.arch.pc = vcpu->arch.pc;
> >>> +		run->debug.arch.exception = exit_nr;
> >>> +		run->debug.arch.status = 0;
> >>> +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>> +		return RESUME_HOST;
> >>> +	}
> >>> +
> >>> 	er = kvmppc_emulate_instruction(run, vcpu);
> >>> 	switch (er) {
> >>> 	case EMULATE_DONE:
> >>> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run *run,
> >>> struct
> >> kvm_vcpu *vcpu)
> >>> 	default:
> >>> 		BUG();
> >>> 	}
> >>> +
> >>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
> >>> +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
> >>
> >> I don't understand how this is supposed to work. When we enable
> >> singlestep, why would we end up in emulation_exit()?
> >
> > When singlestep is enabled then we set DBCR0[ICMP] and the debug handler
> should be able to handle this. I think you are right.
> >
> >>
> >>> +		run->exit_reason = KVM_EXIT_DEBUG;
> >>> +		return RESUME_HOST;
> >>> +	}
> >>> +}
> >>> +
> >>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
> >>> +*vcpu) {
> >>> +	u32 dbsr;
> >>> +
> >>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>> +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
> >>> +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
> >>> +	else
> >>> +		vcpu->arch.pc = mfspr(SPRN_CSRR0); #endif
> >>
> >> Why doesn't this get handled in the asm code that recovers from the
> >> respective exceptions?
> >
> > Yes. I will remove this.
> >
> >>
> >>> +	dbsr = vcpu->arch.dbsr;
> >>> +
> >>> +	run->debug.arch.pc = vcpu->arch.pc;
> >>> +	run->debug.arch.status = 0;
> >>> +	vcpu->arch.dbsr = 0;
> >>> +
> >>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> >>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> >>> +	} else {
> >>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> >>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> >>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> >>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> >>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> >>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
> >>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> >>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
> >>> +	}
> >>
> >> Mind to explain the guest register sharing mechanism you are
> >> envisioning? Which registers are used by the guest? Which are used by
> >> the host? Who decides what gets used by whom?
> >
> > struct kvm_vcpu_arch {
> > 	.
> > 	.
> >
> >       /* guest debug registers*/
> >        struct kvmppc_booke_debug_reg dbg_reg;
> >       /* shadow debug registers */
> >       struct kvmppc_booke_debug_reg shadow_dbg_reg;
> >       /* host debug registers*/
> >       struct kvmppc_booke_debug_reg host_dbg_reg;
> >
> > 	.
> > 	.
> > }
> >
> > dbg_reg: contains the values written by guest.
> > shadow_dbg_reg: This contains the actual values to be written on behalf of
> guest/qemu.
> > host_dbg_reg: the debug register value of host (needed for store/retore
> purpose).
> >
> > According to current design both, the qemu debug stub and guest, cannot share
> the debug resources. QEMU debug stub is given priority.
> > But host and guest can share all debug registers and host cannot debug guest
> if it is using the debug resource (which probably does not look like a good
> case).
> 
> Ok. Assume that in guest context, we only ever want guest debugging enabled. If
> QEMU wants to inject a QEMU breakpoint, it will fiddle with the guest debug
> registers itself to inject its breakpoint into them.

I would like to understand the context you are describing.
QEMU would like to inject debug interrupt to guest only if a debug interrupt happened in guest context and QEMU is not able to handle interrupt (because qemu debug have not set). Right?
In that case the only thing that needed to be updated is guest->DBSR?

> 
> With that scheme, would we still need shadow debug registers? Or could we remove
> that one level of indirection?
> 
> >
> >>
> >>> +
> >>> +	return RESUME_HOST;
> >>> }
> >>>
> >>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) @@ -843,7
> >>> +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> >> kvm_vcpu *vcpu,
> >>> 		break;
> >>>
> >>> 	case BOOKE_INTERRUPT_HV_PRIV:
> >>> -		r = emulation_exit(run, vcpu);
> >>> +		r = emulation_exit(run, vcpu, exit_nr);
> >>> 		break;
> >>>
> >>> 	case BOOKE_INTERRUPT_PROGRAM:
> >>> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
> >>> struct
> >> kvm_vcpu *vcpu,
> >>> 			break;
> >>> 		}
> >>>
> >>> -		r = emulation_exit(run, vcpu);
> >>> +		r = emulation_exit(run, vcpu, exit_nr);
> >>> 		break;
> >>>
> >>> 	case BOOKE_INTERRUPT_FP_UNAVAIL:
> >>> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run *run,
> >>> struct
> >> kvm_vcpu *vcpu,
> >>> 	}
> >>>
> >>> 	case BOOKE_INTERRUPT_DEBUG: {
> >>> -		u32 dbsr;
> >>> -
> >>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> >>> -
> >>> -		/* clear IAC events in DBSR register */
> >>> -		dbsr = mfspr(SPRN_DBSR);
> >>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> >>> -		mtspr(SPRN_DBSR, dbsr);
> >>> -
> >>> -		run->exit_reason = KVM_EXIT_DEBUG;
> >>> +		r = kvmppc_handle_debug(run, vcpu);
> >>> +		if (r == RESUME_HOST) {
> >>> +			run->debug.arch.exception = exit_nr;
> >>> +			run->exit_reason = KVM_EXIT_DEBUG;
> >>> +		}
> >>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>> -		r = RESUME_HOST;
> >>> 		break;
> >>> 	}
> >>>
> >>> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct
> >>> kvm_vcpu *vcpu,
> >> struct kvm_one_reg *reg)
> >>> 	return r;
> >>> }
> >>>
> >>> +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
> >>> +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
> >>> +
> >>> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>> 					 struct kvm_guest_debug *dbg)
> >>> {
> >>> -	return -EINVAL;
> >>> +
> >>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> >>> +		/* Clear All debug events */
> >>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>> +		vcpu->guest_debug = 0;
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	vcpu->guest_debug = dbg->control;
> >>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>> +
> >>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> >>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> >>> +
> >>> +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> >>> +		struct kvmppc_booke_debug_reg *gdbgr =
> >>> +				&(vcpu->arch.shadow_dbg_reg);
> >>> +		int n, b = 0, w = 0;
> >>> +		const u32 bp_code[] = {
> >>> +			DBCR0_IAC1 | DBCR0_IDM,
> >>> +			DBCR0_IAC2 | DBCR0_IDM,
> >>> +			DBCR0_IAC3 | DBCR0_IDM,
> >>> +			DBCR0_IAC4 | DBCR0_IDM
> >>> +		};
> >>> +		const u32 wp_code[] = {
> >>> +			DBCR0_DAC1W | DBCR0_IDM,
> >>> +			DBCR0_DAC2W | DBCR0_IDM,
> >>> +			DBCR0_DAC1R | DBCR0_IDM,
> >>> +			DBCR0_DAC2R | DBCR0_IDM
> >>> +		};
> >>> +
> >>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>> +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
> >>> +				DBCR1_IAC3US | DBCR1_IAC4US;
> >>> +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else
> >>> +		gdbgr->dbcr1 = 0;
> >>> +		gdbgr->dbcr2 = 0;
> >>> +#endif
> >>> +
> >>> +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
> >>> +			u32 type = dbg->arch.bp[n].type;
> >>> +
> >>> +			if (!type)
> >>> +				break;
> >>> +
> >>> +			if (type & (KVMPPC_DEBUG_WATCH_READ |
> >>> +				    KVMPPC_DEBUG_WATCH_WRITE)) {
> >>> +				if (w < WP_NUM) {
> >>> +					if (type & KVMPPC_DEBUG_WATCH_READ)
> >>> +						gdbgr->dbcr0 |= wp_code[w + 2];
> >>> +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
> >>> +						gdbgr->dbcr0 |= wp_code[w];
> >>> +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
> >>> +					w++;
> >>> +				}
> >>> +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> >>> +				if (b < BP_NUM) {
> >>> +					gdbgr->dbcr0 |= bp_code[b];
> >>> +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
> >>> +					b++;
> >>> +				}
> >>> +			}
> >>> +		}
> >>> +	}
> >>> +	return 0;
> >>> }
> >>>
> >>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
> >>> kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S
> >> b/arch/powerpc/kvm/booke_interrupts.S
> >>> index dd9c5d4..2202936 100644
> >>> --- a/arch/powerpc/kvm/booke_interrupts.S
> >>> +++ b/arch/powerpc/kvm/booke_interrupts.S
> >>> @@ -39,6 +39,8 @@
> >>> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4) #define
> >>> HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
> >>> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
> >>> +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | \
> >>> +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)
> >>>
> >>> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>                        (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ @@ -52,6
> >>> +54,8 @@
> >>>                       (1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>                       (1<<BOOKE_INTERRUPT_DTLB_MISS))
> >>>
> >>> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
> >>> +
> >>> .macro __KVM_HANDLER ivor_nr scratch srr0
> >>> 	stw	r3, VCPU_GPR(R3)(r4)
> >>> 	stw	r5, VCPU_GPR(R5)(r4)
> >>> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
> >>> 	stw	r9, VCPU_FAULT_ESR(r4)
> >>> ..skip_esr:
> >>>
> >>> +	addi	r7, r4, VCPU_HOST_DBG
> >>> +	mfspr	r9, SPRN_DBCR0
> >>> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
> >>> +	andis.	r9, r9, DBCR0_AC_BITS@h
> >>> +	beq	skip_load_host_debug
> >>> +	li	r9, 0
> >>> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
> >>> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
> >>> +	mtspr	SPRN_DBCR1, r9
> >>> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
> >>> +	mtspr	SPRN_DBCR2, r9
> >>> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
> >>> +	mtspr	SPRN_IAC1, r9
> >>> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
> >>> +	mtspr	SPRN_IAC2, r9
> >>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
> >>> +	mtspr	SPRN_IAC3, r9
> >>> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
> >>> +	mtspr	SPRN_IAC4, r9
> >>> +#endif
> >>> +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
> >>> +	mtspr	SPRN_DAC1, r9
> >>> +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
> >>
> >> Yikes. Please move this into a struct, similar to how the MSR
> >> save/restore happens on x86.
> >>
> >> Also, these are 64-bit loads and stores, no?
> >
> > Ok, you mean PPC_LD/STD?
> 
> Yeah :).
> 
> >
> >>
> >>> +	mtspr	SPRN_DAC2, r9
> >>> +skip_load_host_debug:
> >>> +	/* Clear h/w DBSR and save current(guest) DBSR */
> >>> +	mfspr	r9, SPRN_DBSR
> >>> +	mtspr	SPRN_DBSR, r9
> >>> +	isync
> >>> +	andi.	r7, r6, NEED_DEBUG_SAVE
> >>> +	beq	skip_dbsr_save
> >>> +	/*
> >>> +	 * If vcpu->guest_debug flag is set then do not check for
> >>> +	 * shared->msr.DE as this debugging (say by QEMU) does not
> >>> +	 * depends on shared->msr.de. In these scanerios MSR.DE is
> >>> +	 * always set using shared_msr and should be handled always.
> >>> +	 */
> >>> +	lwz	r7, VCPU_GUEST_DEBUG(r4)
> >>> +	cmpwi	r7, 0
> >>> +	bne	skip_save_trap_event
> >>> +	PPC_LL	r3, VCPU_SHARED(r4)
> >>> +#ifndef CONFIG_64BIT
> >>> +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
> >>> +#else
> >>> +	ld	r3, (VCPU_SHARED_MSR)(r3)
> >>> +#endif
> >>
> >> Didn't we have 64-bit access helpers for these?
> >
> > I will use PPC_LD.
> >
> >>
> >> Also this shouldn't happen in the entry/exit code. We have shadow_msr
> >> for these purposes.
> >
> > Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).
> 
> Hrm. Yeah, must've misread something here.
> 
> It would be awesome if we could make the debug register save/restore a bit more
> relaxed though. For example during preemption.

If we delay this save/restore then host cannot do debugging of KVM code. Right?

Thanks
-Bharat

> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Oct. 4, 2012, 2:41 p.m. UTC | #16
On 04.10.2012, at 16:22, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Thursday, October 04, 2012 4:56 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>> 
>> 
>> On 04.10.2012, at 13:06, Bhushan Bharat-R65777 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>> Sent: Monday, September 24, 2012 9:50 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Bhushan
>>>> Bharat-R65777
>>>> Subject: Re: [PATCH 6/6] KVM: booke/bookehv: Add debug stub support
>>>> 
>>>> 
>>>> On 21.08.2012, at 15:52, Bharat Bhushan wrote:
>>>> 
>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>> breakpoint to debug guest.
>>>>> 
>>>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>>>> ---
>>>>> arch/powerpc/include/asm/kvm.h        |   29 ++++++-
>>>>> arch/powerpc/include/asm/kvm_host.h   |    5 +
>>>>> arch/powerpc/kernel/asm-offsets.c     |   26 ++++++
>>>>> arch/powerpc/kvm/booke.c              |  144 +++++++++++++++++++++++++++++--
>> --
>>>>> arch/powerpc/kvm/booke_interrupts.S   |  110 +++++++++++++++++++++++++
>>>>> arch/powerpc/kvm/bookehv_interrupts.S |  141
>> +++++++++++++++++++++++++++++++-
>>>>> arch/powerpc/kvm/e500mc.c             |    3 +-
>>>>> 7 files changed, 435 insertions(+), 23 deletions(-)
>>>>> 
>>>>> diff --git a/arch/powerpc/include/asm/kvm.h
>>>>> b/arch/powerpc/include/asm/kvm.h index 61b197e..53479ea 100644
>>>>> --- a/arch/powerpc/include/asm/kvm.h
>>>>> +++ b/arch/powerpc/include/asm/kvm.h
>>>>> @@ -25,6 +25,7 @@
>>>>> /* Select powerpc specific features in <linux/kvm.h> */ #define
>>>>> __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>> 
>>>>> struct kvm_regs {
>>>>> 	__u64 pc;
>>>>> @@ -264,7 +265,31 @@ struct kvm_fpu {
>>>>> 	__u64 fpr[32];
>>>>> };
>>>>> 
>>>>> +
>>>>> +/*
>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>> + * software breakpoint.
>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>> + * for KVM_DEBUG_EXIT.
>>>>> + */
>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>> struct kvm_debug_exit_arch {
>>>>> +	__u64 pc;
>>>>> +	/*
>>>>> +	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
>>>>> +	 * exit is not handled (say not h/w breakpoint or software breakpoint
>>>>> +	 * set for this address) by qemu then it is supposed to inject this
>>>>> +	 * exception to guest.
>>>>> +	 */
>>>>> +	__u32 exception;
>>>>> +	/*
>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>> +	 * (read, write or both) and software breakpoint.
>>>>> +	 */
>>>>> +	__u32 status;
>>>>> };
>>>>> 
>>>>> /* for KVM_SET_GUEST_DEBUG */
>>>>> @@ -276,10 +301,6 @@ struct kvm_guest_debug_arch {
>>>>> 		 * Type denotes h/w breakpoint, read watchpoint, write
>>>>> 		 * watchpoint or watchpoint (both read and write).
>>>>> 		 */
>>>>> -#define KVMPPC_DEBUG_NOTYPE		0x0
>>>>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>>>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>>>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>>>> 		__u32 type;
>>>>> 		__u32 pad1;
>>>>> 		__u64 pad2;
>>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>>> b/arch/powerpc/include/asm/kvm_host.h
>>>>> index c7219c1..3ba465a 100644
>>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>>> @@ -496,7 +496,12 @@ struct kvm_vcpu_arch {
>>>>> 	u32 mmucfg;
>>>>> 	u32 epr;
>>>>> 	u32 crit_save;
>>>>> +	/* guest debug registers*/
>>>>> 	struct kvmppc_booke_debug_reg dbg_reg;
>>>>> +	/* shadow debug registers */
>>>>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
>>>>> +	/* host debug registers*/
>>>>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
>>>>> #endif
>>>>> 	gpa_t paddr_accessed;
>>>>> 	gva_t vaddr_accessed;
>>>>> diff --git a/arch/powerpc/kernel/asm-offsets.c
>>>>> b/arch/powerpc/kernel/asm-
>>>> offsets.c
>>>>> index 555448e..6987821 100644
>>>>> --- a/arch/powerpc/kernel/asm-offsets.c
>>>>> +++ b/arch/powerpc/kernel/asm-offsets.c
>>>>> @@ -564,6 +564,32 @@ int main(void)
>>>>> 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
>>>>> 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
>>>>> 	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
>>>>> +	DEFINE(VCPU_DBSR, offsetof(struct kvm_vcpu, arch.dbsr));
>>>>> +	DEFINE(VCPU_SHADOW_DBG, offsetof(struct kvm_vcpu, arch.shadow_dbg_reg));
>>>>> +	DEFINE(VCPU_HOST_DBG, offsetof(struct kvm_vcpu, arch.host_dbg_reg));
>>>>> +	DEFINE(KVMPPC_DBG_DBCR0, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					  dbcr0));
>>>>> +	DEFINE(KVMPPC_DBG_DBCR1, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					  dbcr1));
>>>>> +	DEFINE(KVMPPC_DBG_DBCR2, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					  dbcr2));
>>>>> +#ifdef CONFIG_KVM_E500MC
>>>>> +	DEFINE(KVMPPC_DBG_DBCR4, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					  dbcr4));
>>>>> +#endif
>>>>> +	DEFINE(KVMPPC_DBG_IAC1, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					 iac[0]));
>>>>> +	DEFINE(KVMPPC_DBG_IAC2, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					 iac[1]));
>>>>> +	DEFINE(KVMPPC_DBG_IAC3, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					 iac[2]));
>>>>> +	DEFINE(KVMPPC_DBG_IAC4, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					 iac[3]));
>>>>> +	DEFINE(KVMPPC_DBG_DAC1, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					 dac[0]));
>>>>> +	DEFINE(KVMPPC_DBG_DAC2, offsetof(struct kvmppc_booke_debug_reg,
>>>>> +					 dac[1]));
>>>>> +	DEFINE(VCPU_GUEST_DEBUG, offsetof(struct kvm_vcpu, guest_debug));
>>>>> #endif /* CONFIG_PPC_BOOK3S */
>>>>> #endif /* CONFIG_KVM */
>>>>> 
>>>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>>>> index dd0e5b8..4d82e34 100644
>>>>> --- a/arch/powerpc/kvm/booke.c
>>>>> +++ b/arch/powerpc/kvm/booke.c
>>>>> @@ -132,6 +132,9 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32
>>>>> new_msr)
>>>>> 
>>>>> #ifdef CONFIG_KVM_BOOKE_HV
>>>>> 	new_msr |= MSR_GS;
>>>>> +
>>>>> +	if (vcpu->guest_debug)
>>>>> +		new_msr |= MSR_DE;
>>>>> #endif
>>>>> 
>>>>> 	vcpu->arch.shared->msr = new_msr;
>>>>> @@ -667,10 +670,21 @@ out:
>>>>> 	return ret;
>>>>> }
>>>>> 
>>>>> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
>>>>> *vcpu)
>>>>> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
>>>>> +			  int exit_nr)
>>>>> {
>>>>> 	enum emulation_result er;
>>>>> 
>>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
>>>>> +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
>>>> 
>>>> This belongs into the normal emulation code path, behind the same
>>>> switch() that everything else goes through.
>>> 
>>> I am not sure I understood correctly. Below is the reason why I placed this
>> code here.
>>> Instruction where software breakpoint is to be set is replaced by "ehpriv"
>> instruction. On e500v2, this is not a valid instruction can causes program
>> interrupt. On e500mc, "ehpriv" is a valid instruction. Both the exit path calls
>> emulation_exit(), so we have placed the code in this function.
>>> Do you want this code to be moved in program interrupt exit path for e500v2
>> and BOOKE_INTERRUPT_HV_PRIV for e500mc?
>> 
>> Ok, in this patch you do (basically):
>> 
>> int emulation_exit()
>> {
>>    if (inst == DEBUG_INST) {
>>        debug_stuff();
>>        return;
>>    }
>> 
>>    switch (inst) {
>>        case INST_A:
>>            foo();
>>        ....
>>    }
>> }
> 
> Are not we doing something like this:
> int emulation_exit()
> {
>     if (inst == DEBUG_INST) {
>         debug_stuff();
>         return;
>     }
> 
>     status = kvmppc_emulate_instruction()
>     switch (status) {
>         case FAIL:
>             foo();
>         case DONE:
> 		foo1();
>         ....
>     }
> }
> 
> Do you want something like this:
> 
> int emulation_exit()
> {
> 
>     status = kvmppc_emulate_instruction()
>     switch (status) {
>         case FAIL:
>     		if (inst == DEBUG_INST) {
>         		debug_stuff();
>    		      return;
>     		}
>             foo();
> 
>         case DONE:
> 		foo1();
>         ....
>     }
> }

No, I want the DEBUG_INST be handled the same as any other instruction we emulate.

>> 
>> what I want is:
>> 
>> int emulation_exit()
>> {
>>    switch (inst) {
>>        case INST_A:
>>            foo(); break;
>>        case DEBUG_INST:
>>            debug_stuff(); break;
>>        ....
>>    }
>> }
>> 
>>> 
>>> 
>>>> 
>>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
>>>>> +		run->debug.arch.pc = vcpu->arch.pc;
>>>>> +		run->debug.arch.exception = exit_nr;
>>>>> +		run->debug.arch.status = 0;
>>>>> +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>>>> +		return RESUME_HOST;
>>>>> +	}
>>>>> +
>>>>> 	er = kvmppc_emulate_instruction(run, vcpu);
>>>>> 	switch (er) {
>>>>> 	case EMULATE_DONE:
>>>>> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run *run,
>>>>> struct
>>>> kvm_vcpu *vcpu)
>>>>> 	default:
>>>>> 		BUG();
>>>>> 	}
>>>>> +
>>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
>>>>> +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
>>>> 
>>>> I don't understand how this is supposed to work. When we enable
>>>> singlestep, why would we end up in emulation_exit()?
>>> 
>>> When singlestep is enabled then we set DBCR0[ICMP] and the debug handler
>> should be able to handle this. I think you are right.
>>> 
>>>> 
>>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
>>>>> +		return RESUME_HOST;
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
>>>>> +*vcpu) {
>>>>> +	u32 dbsr;
>>>>> +
>>>>> +#ifndef CONFIG_KVM_BOOKE_HV
>>>>> +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
>>>>> +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
>>>>> +	else
>>>>> +		vcpu->arch.pc = mfspr(SPRN_CSRR0); #endif
>>>> 
>>>> Why doesn't this get handled in the asm code that recovers from the
>>>> respective exceptions?
>>> 
>>> Yes. I will remove this.
>>> 
>>>> 
>>>>> +	dbsr = vcpu->arch.dbsr;
>>>>> +
>>>>> +	run->debug.arch.pc = vcpu->arch.pc;
>>>>> +	run->debug.arch.status = 0;
>>>>> +	vcpu->arch.dbsr = 0;
>>>>> +
>>>>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>>>> +	} else {
>>>>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>>>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>>>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
>>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
>>>>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
>>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
>>>>> +	}
>>>> 
>>>> Mind to explain the guest register sharing mechanism you are
>>>> envisioning? Which registers are used by the guest? Which are used by
>>>> the host? Who decides what gets used by whom?
>>> 
>>> struct kvm_vcpu_arch {
>>> 	.
>>> 	.
>>> 
>>>      /* guest debug registers*/
>>>       struct kvmppc_booke_debug_reg dbg_reg;
>>>      /* shadow debug registers */
>>>      struct kvmppc_booke_debug_reg shadow_dbg_reg;
>>>      /* host debug registers*/
>>>      struct kvmppc_booke_debug_reg host_dbg_reg;
>>> 
>>> 	.
>>> 	.
>>> }
>>> 
>>> dbg_reg: contains the values written by guest.
>>> shadow_dbg_reg: This contains the actual values to be written on behalf of
>> guest/qemu.
>>> host_dbg_reg: the debug register value of host (needed for store/retore
>> purpose).
>>> 
>>> According to current design both, the qemu debug stub and guest, cannot share
>> the debug resources. QEMU debug stub is given priority.
>>> But host and guest can share all debug registers and host cannot debug guest
>> if it is using the debug resource (which probably does not look like a good
>> case).
>> 
>> Ok. Assume that in guest context, we only ever want guest debugging enabled. If
>> QEMU wants to inject a QEMU breakpoint, it will fiddle with the guest debug
>> registers itself to inject its breakpoint into them.
> 
> I would like to understand the context you are describing.
> QEMU would like to inject debug interrupt to guest only if a debug interrupt happened in guest context and QEMU is not able to handle interrupt (because qemu debug have not set). Right?

Yes

> In that case the only thing that needed to be updated is guest->DBSR?

I think you lost me here :)

> 
>> 
>> With that scheme, would we still need shadow debug registers? Or could we remove
>> that one level of indirection?
>> 
>>> 
>>>> 
>>>>> +
>>>>> +	return RESUME_HOST;
>>>>> }
>>>>> 
>>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) @@ -843,7
>>>>> +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>>>> kvm_vcpu *vcpu,
>>>>> 		break;
>>>>> 
>>>>> 	case BOOKE_INTERRUPT_HV_PRIV:
>>>>> -		r = emulation_exit(run, vcpu);
>>>>> +		r = emulation_exit(run, vcpu, exit_nr);
>>>>> 		break;
>>>>> 
>>>>> 	case BOOKE_INTERRUPT_PROGRAM:
>>>>> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
>>>>> struct
>>>> kvm_vcpu *vcpu,
>>>>> 			break;
>>>>> 		}
>>>>> 
>>>>> -		r = emulation_exit(run, vcpu);
>>>>> +		r = emulation_exit(run, vcpu, exit_nr);
>>>>> 		break;
>>>>> 
>>>>> 	case BOOKE_INTERRUPT_FP_UNAVAIL:
>>>>> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run *run,
>>>>> struct
>>>> kvm_vcpu *vcpu,
>>>>> 	}
>>>>> 
>>>>> 	case BOOKE_INTERRUPT_DEBUG: {
>>>>> -		u32 dbsr;
>>>>> -
>>>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>>>> -
>>>>> -		/* clear IAC events in DBSR register */
>>>>> -		dbsr = mfspr(SPRN_DBSR);
>>>>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>>>> -		mtspr(SPRN_DBSR, dbsr);
>>>>> -
>>>>> -		run->exit_reason = KVM_EXIT_DEBUG;
>>>>> +		r = kvmppc_handle_debug(run, vcpu);
>>>>> +		if (r == RESUME_HOST) {
>>>>> +			run->debug.arch.exception = exit_nr;
>>>>> +			run->exit_reason = KVM_EXIT_DEBUG;
>>>>> +		}
>>>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>>>> -		r = RESUME_HOST;
>>>>> 		break;
>>>>> 	}
>>>>> 
>>>>> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct
>>>>> kvm_vcpu *vcpu,
>>>> struct kvm_one_reg *reg)
>>>>> 	return r;
>>>>> }
>>>>> 
>>>>> +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
>>>>> +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
>>>>> +
>>>>> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>> 					 struct kvm_guest_debug *dbg)
>>>>> {
>>>>> -	return -EINVAL;
>>>>> +
>>>>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
>>>>> +		/* Clear All debug events */
>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>> +		vcpu->guest_debug = 0;
>>>>> +		return 0;
>>>>> +	}
>>>>> +
>>>>> +	vcpu->guest_debug = dbg->control;
>>>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>> +
>>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>>>> +
>>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
>>>>> +		struct kvmppc_booke_debug_reg *gdbgr =
>>>>> +				&(vcpu->arch.shadow_dbg_reg);
>>>>> +		int n, b = 0, w = 0;
>>>>> +		const u32 bp_code[] = {
>>>>> +			DBCR0_IAC1 | DBCR0_IDM,
>>>>> +			DBCR0_IAC2 | DBCR0_IDM,
>>>>> +			DBCR0_IAC3 | DBCR0_IDM,
>>>>> +			DBCR0_IAC4 | DBCR0_IDM
>>>>> +		};
>>>>> +		const u32 wp_code[] = {
>>>>> +			DBCR0_DAC1W | DBCR0_IDM,
>>>>> +			DBCR0_DAC2W | DBCR0_IDM,
>>>>> +			DBCR0_DAC1R | DBCR0_IDM,
>>>>> +			DBCR0_DAC2R | DBCR0_IDM
>>>>> +		};
>>>>> +
>>>>> +#ifndef CONFIG_KVM_BOOKE_HV
>>>>> +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
>>>>> +				DBCR1_IAC3US | DBCR1_IAC4US;
>>>>> +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else
>>>>> +		gdbgr->dbcr1 = 0;
>>>>> +		gdbgr->dbcr2 = 0;
>>>>> +#endif
>>>>> +
>>>>> +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
>>>>> +			u32 type = dbg->arch.bp[n].type;
>>>>> +
>>>>> +			if (!type)
>>>>> +				break;
>>>>> +
>>>>> +			if (type & (KVMPPC_DEBUG_WATCH_READ |
>>>>> +				    KVMPPC_DEBUG_WATCH_WRITE)) {
>>>>> +				if (w < WP_NUM) {
>>>>> +					if (type & KVMPPC_DEBUG_WATCH_READ)
>>>>> +						gdbgr->dbcr0 |= wp_code[w + 2];
>>>>> +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>>>> +						gdbgr->dbcr0 |= wp_code[w];
>>>>> +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
>>>>> +					w++;
>>>>> +				}
>>>>> +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
>>>>> +				if (b < BP_NUM) {
>>>>> +					gdbgr->dbcr0 |= bp_code[b];
>>>>> +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
>>>>> +					b++;
>>>>> +				}
>>>>> +			}
>>>>> +		}
>>>>> +	}
>>>>> +	return 0;
>>>>> }
>>>>> 
>>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
>>>>> kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S
>>>> b/arch/powerpc/kvm/booke_interrupts.S
>>>>> index dd9c5d4..2202936 100644
>>>>> --- a/arch/powerpc/kvm/booke_interrupts.S
>>>>> +++ b/arch/powerpc/kvm/booke_interrupts.S
>>>>> @@ -39,6 +39,8 @@
>>>>> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4) #define
>>>>> HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
>>>>> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
>>>>> +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | \
>>>>> +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)
>>>>> 
>>>>> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
>>>>>                       (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ @@ -52,6
>>>>> +54,8 @@
>>>>>                      (1<<BOOKE_INTERRUPT_PROGRAM) | \
>>>>>                      (1<<BOOKE_INTERRUPT_DTLB_MISS))
>>>>> 
>>>>> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
>>>>> +
>>>>> .macro __KVM_HANDLER ivor_nr scratch srr0
>>>>> 	stw	r3, VCPU_GPR(R3)(r4)
>>>>> 	stw	r5, VCPU_GPR(R5)(r4)
>>>>> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
>>>>> 	stw	r9, VCPU_FAULT_ESR(r4)
>>>>> ..skip_esr:
>>>>> 
>>>>> +	addi	r7, r4, VCPU_HOST_DBG
>>>>> +	mfspr	r9, SPRN_DBCR0
>>>>> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
>>>>> +	andis.	r9, r9, DBCR0_AC_BITS@h
>>>>> +	beq	skip_load_host_debug
>>>>> +	li	r9, 0
>>>>> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
>>>>> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
>>>>> +	mtspr	SPRN_DBCR1, r9
>>>>> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
>>>>> +	mtspr	SPRN_DBCR2, r9
>>>>> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
>>>>> +	mtspr	SPRN_IAC1, r9
>>>>> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
>>>>> +	mtspr	SPRN_IAC2, r9
>>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
>>>>> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
>>>>> +	mtspr	SPRN_IAC3, r9
>>>>> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
>>>>> +	mtspr	SPRN_IAC4, r9
>>>>> +#endif
>>>>> +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
>>>>> +	mtspr	SPRN_DAC1, r9
>>>>> +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
>>>> 
>>>> Yikes. Please move this into a struct, similar to how the MSR
>>>> save/restore happens on x86.
>>>> 
>>>> Also, these are 64-bit loads and stores, no?
>>> 
>>> Ok, you mean PPC_LD/STD?
>> 
>> Yeah :).
>> 
>>> 
>>>> 
>>>>> +	mtspr	SPRN_DAC2, r9
>>>>> +skip_load_host_debug:
>>>>> +	/* Clear h/w DBSR and save current(guest) DBSR */
>>>>> +	mfspr	r9, SPRN_DBSR
>>>>> +	mtspr	SPRN_DBSR, r9
>>>>> +	isync
>>>>> +	andi.	r7, r6, NEED_DEBUG_SAVE
>>>>> +	beq	skip_dbsr_save
>>>>> +	/*
>>>>> +	 * If vcpu->guest_debug flag is set then do not check for
>>>>> +	 * shared->msr.DE as this debugging (say by QEMU) does not
>>>>> +	 * depends on shared->msr.de. In these scanerios MSR.DE is
>>>>> +	 * always set using shared_msr and should be handled always.
>>>>> +	 */
>>>>> +	lwz	r7, VCPU_GUEST_DEBUG(r4)
>>>>> +	cmpwi	r7, 0
>>>>> +	bne	skip_save_trap_event
>>>>> +	PPC_LL	r3, VCPU_SHARED(r4)
>>>>> +#ifndef CONFIG_64BIT
>>>>> +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
>>>>> +#else
>>>>> +	ld	r3, (VCPU_SHARED_MSR)(r3)
>>>>> +#endif
>>>> 
>>>> Didn't we have 64-bit access helpers for these?
>>> 
>>> I will use PPC_LD.
>>> 
>>>> 
>>>> Also this shouldn't happen in the entry/exit code. We have shadow_msr
>>>> for these purposes.
>>> 
>>> Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).
>> 
>> Hrm. Yeah, must've misread something here.
>> 
>> It would be awesome if we could make the debug register save/restore a bit more
>> relaxed though. For example during preemption.
> 
> If we delay this save/restore then host cannot do debugging of KVM code. Right?

You mean using kdb? Does anyone actually do that?

But let's leave this for later as an optimization.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan Oct. 4, 2012, 3:19 p.m. UTC | #17
> >>>>> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
> >>>>> *vcpu)
> >>>>> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> >>>>> +			  int exit_nr)
> >>>>> {
> >>>>> 	enum emulation_result er;
> >>>>>
> >>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
> >>>>> +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
> >>>>
> >>>> This belongs into the normal emulation code path, behind the same
> >>>> switch() that everything else goes through.
> >>>
> >>> I am not sure I understood correctly. Below is the reason why I
> >>> placed this
> >> code here.
> >>> Instruction where software breakpoint is to be set is replaced by "ehpriv"
> >> instruction. On e500v2, this is not a valid instruction can causes
> >> program interrupt. On e500mc, "ehpriv" is a valid instruction. Both
> >> the exit path calls emulation_exit(), so we have placed the code in this
> function.
> >>> Do you want this code to be moved in program interrupt exit path for
> >>> e500v2
> >> and BOOKE_INTERRUPT_HV_PRIV for e500mc?
> >>
> >> Ok, in this patch you do (basically):
> >>
> >> int emulation_exit()
> >> {
> >>    if (inst == DEBUG_INST) {
> >>        debug_stuff();
> >>        return;
> >>    }
> >>
> >>    switch (inst) {
> >>        case INST_A:
> >>            foo();
> >>        ....
> >>    }
> >> }
> >
> > Are not we doing something like this:
> > int emulation_exit()
> > {
> >     if (inst == DEBUG_INST) {
> >         debug_stuff();
> >         return;
> >     }
> >
> >     status = kvmppc_emulate_instruction()
> >     switch (status) {
> >         case FAIL:
> >             foo();
> >         case DONE:
> > 		foo1();
> >         ....
> >     }
> > }
> >
> > Do you want something like this:
> >
> > int emulation_exit()
> > {
> >
> >     status = kvmppc_emulate_instruction()
> >     switch (status) {
> >         case FAIL:
> >     		if (inst == DEBUG_INST) {
> >         		debug_stuff();
> >    		      return;
> >     		}
> >             foo();
> >
> >         case DONE:
> > 		foo1();
> >         ....
> >     }
> > }
> 
> No, I want the DEBUG_INST be handled the same as any other instruction we
> emulate.

I would like to understand what you are thinking:
What I derived is , add the instruction in kvmppc_emulate_instruction() (or its child function) which, 
1) fill the relevant information in run-> , kvmppc_account_exit(vcpu, DEBUG_EXITS); and returns EMULATION_DONE
 And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
	case EMULATION_DONE:
		if (inst == DEBUG)
			return RESUME_HOST;
     }
 Or
2) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns EMULATION_DONE;
And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
	case EMULATION_DONE:
		if (inst == DEBUG) {
			fill run-> 
			return RESUME_HOST;
		}
     }

Or
3) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns a new status type (EMULATION_DEBUG_INST)
And in emulation_exit()
     status = kvmppc_emulate_instruction()
     switch (status) {
	case EMULATION_DEBUG_INST:
			fill run-> 
			return RESUME_HOST;
     }

> 
> >>
> >> what I want is:
> >>
> >> int emulation_exit()
> >> {
> >>    switch (inst) {
> >>        case INST_A:
> >>            foo(); break;
> >>        case DEBUG_INST:
> >>            debug_stuff(); break;
> >>        ....
> >>    }
> >> }
> >>
> >>>
> >>>
> >>>>
> >>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		run->debug.arch.pc = vcpu->arch.pc;
> >>>>> +		run->debug.arch.exception = exit_nr;
> >>>>> +		run->debug.arch.status = 0;
> >>>>> +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>>>> +		return RESUME_HOST;
> >>>>> +	}
> >>>>> +
> >>>>> 	er = kvmppc_emulate_instruction(run, vcpu);
> >>>>> 	switch (er) {
> >>>>> 	case EMULATE_DONE:
> >>>>> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run
> >>>>> *run, struct
> >>>> kvm_vcpu *vcpu)
> >>>>> 	default:
> >>>>> 		BUG();
> >>>>> 	}
> >>>>> +
> >>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
> >>>>> +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
> >>>>
> >>>> I don't understand how this is supposed to work. When we enable
> >>>> singlestep, why would we end up in emulation_exit()?
> >>>
> >>> When singlestep is enabled then we set DBCR0[ICMP] and the debug
> >>> handler
> >> should be able to handle this. I think you are right.
> >>>
> >>>>
> >>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		return RESUME_HOST;
> >>>>> +	}
> >>>>> +}
> >>>>> +
> >>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct
> >>>>> +kvm_vcpu
> >>>>> +*vcpu) {
> >>>>> +	u32 dbsr;
> >>>>> +
> >>>>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>>>> +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
> >>>>> +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
> >>>>> +	else
> >>>>> +		vcpu->arch.pc = mfspr(SPRN_CSRR0); #endif
> >>>>
> >>>> Why doesn't this get handled in the asm code that recovers from the
> >>>> respective exceptions?
> >>>
> >>> Yes. I will remove this.
> >>>
> >>>>
> >>>>> +	dbsr = vcpu->arch.dbsr;
> >>>>> +
> >>>>> +	run->debug.arch.pc = vcpu->arch.pc;
> >>>>> +	run->debug.arch.status = 0;
> >>>>> +	vcpu->arch.dbsr = 0;
> >>>>> +
> >>>>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> >>>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> >>>>> +	} else {
> >>>>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> >>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> >>>>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> >>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> >>>>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> >>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
> >>>>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> >>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
> >>>>> +	}
> >>>>
> >>>> Mind to explain the guest register sharing mechanism you are
> >>>> envisioning? Which registers are used by the guest? Which are used
> >>>> by the host? Who decides what gets used by whom?
> >>>
> >>> struct kvm_vcpu_arch {
> >>> 	.
> >>> 	.
> >>>
> >>>      /* guest debug registers*/
> >>>       struct kvmppc_booke_debug_reg dbg_reg;
> >>>      /* shadow debug registers */
> >>>      struct kvmppc_booke_debug_reg shadow_dbg_reg;
> >>>      /* host debug registers*/
> >>>      struct kvmppc_booke_debug_reg host_dbg_reg;
> >>>
> >>> 	.
> >>> 	.
> >>> }
> >>>
> >>> dbg_reg: contains the values written by guest.
> >>> shadow_dbg_reg: This contains the actual values to be written on
> >>> behalf of
> >> guest/qemu.
> >>> host_dbg_reg: the debug register value of host (needed for
> >>> store/retore
> >> purpose).
> >>>
> >>> According to current design both, the qemu debug stub and guest,
> >>> cannot share
> >> the debug resources. QEMU debug stub is given priority.
> >>> But host and guest can share all debug registers and host cannot
> >>> debug guest
> >> if it is using the debug resource (which probably does not look like
> >> a good case).
> >>
> >> Ok. Assume that in guest context, we only ever want guest debugging
> >> enabled. If QEMU wants to inject a QEMU breakpoint, it will fiddle
> >> with the guest debug registers itself to inject its breakpoint into them.
> >
> > I would like to understand the context you are describing.
> > QEMU would like to inject debug interrupt to guest only if a debug interrupt
> happened in guest context and QEMU is not able to handle interrupt (because qemu
> debug have not set). Right?
> 
> Yes
> 
> > In that case the only thing that needed to be updated is guest->DBSR?
> 
> I think you lost me here :)

Really :) . May be I wrote something wrong. 

> 
> >
> >>
> >> With that scheme, would we still need shadow debug registers? Or
> >> could we remove that one level of indirection?
> >>
> >>>
> >>>>
> >>>>> +
> >>>>> +	return RESUME_HOST;
> >>>>> }
> >>>>>
> >>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) @@ -843,7
> >>>>> +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> >>>> kvm_vcpu *vcpu,
> >>>>> 		break;
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_HV_PRIV:
> >>>>> -		r = emulation_exit(run, vcpu);
> >>>>> +		r = emulation_exit(run, vcpu, exit_nr);
> >>>>> 		break;
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_PROGRAM:
> >>>>> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
> >>>>> struct
> >>>> kvm_vcpu *vcpu,
> >>>>> 			break;
> >>>>> 		}
> >>>>>
> >>>>> -		r = emulation_exit(run, vcpu);
> >>>>> +		r = emulation_exit(run, vcpu, exit_nr);
> >>>>> 		break;
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_FP_UNAVAIL:
> >>>>> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run
> >>>>> *run, struct
> >>>> kvm_vcpu *vcpu,
> >>>>> 	}
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_DEBUG: {
> >>>>> -		u32 dbsr;
> >>>>> -
> >>>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> >>>>> -
> >>>>> -		/* clear IAC events in DBSR register */
> >>>>> -		dbsr = mfspr(SPRN_DBSR);
> >>>>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> >>>>> -		mtspr(SPRN_DBSR, dbsr);
> >>>>> -
> >>>>> -		run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		r = kvmppc_handle_debug(run, vcpu);
> >>>>> +		if (r == RESUME_HOST) {
> >>>>> +			run->debug.arch.exception = exit_nr;
> >>>>> +			run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		}
> >>>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>>>> -		r = RESUME_HOST;
> >>>>> 		break;
> >>>>> 	}
> >>>>>
> >>>>> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct
> >>>>> kvm_vcpu *vcpu,
> >>>> struct kvm_one_reg *reg)
> >>>>> 	return r;
> >>>>> }
> >>>>>
> >>>>> +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
> >>>>> +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
> >>>>> +
> >>>>> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>>>> 					 struct kvm_guest_debug *dbg) {
> >>>>> -	return -EINVAL;
> >>>>> +
> >>>>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> >>>>> +		/* Clear All debug events */
> >>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +		vcpu->guest_debug = 0;
> >>>>> +		return 0;
> >>>>> +	}
> >>>>> +
> >>>>> +	vcpu->guest_debug = dbg->control;
> >>>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +
> >>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> >>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> >>>>> +
> >>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> >>>>> +		struct kvmppc_booke_debug_reg *gdbgr =
> >>>>> +				&(vcpu->arch.shadow_dbg_reg);
> >>>>> +		int n, b = 0, w = 0;
> >>>>> +		const u32 bp_code[] = {
> >>>>> +			DBCR0_IAC1 | DBCR0_IDM,
> >>>>> +			DBCR0_IAC2 | DBCR0_IDM,
> >>>>> +			DBCR0_IAC3 | DBCR0_IDM,
> >>>>> +			DBCR0_IAC4 | DBCR0_IDM
> >>>>> +		};
> >>>>> +		const u32 wp_code[] = {
> >>>>> +			DBCR0_DAC1W | DBCR0_IDM,
> >>>>> +			DBCR0_DAC2W | DBCR0_IDM,
> >>>>> +			DBCR0_DAC1R | DBCR0_IDM,
> >>>>> +			DBCR0_DAC2R | DBCR0_IDM
> >>>>> +		};
> >>>>> +
> >>>>> +#ifndef CONFIG_KVM_BOOKE_HV
> >>>>> +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
> >>>>> +				DBCR1_IAC3US | DBCR1_IAC4US;
> >>>>> +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else
> >>>>> +		gdbgr->dbcr1 = 0;
> >>>>> +		gdbgr->dbcr2 = 0;
> >>>>> +#endif
> >>>>> +
> >>>>> +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
> >>>>> +			u32 type = dbg->arch.bp[n].type;
> >>>>> +
> >>>>> +			if (!type)
> >>>>> +				break;
> >>>>> +
> >>>>> +			if (type & (KVMPPC_DEBUG_WATCH_READ |
> >>>>> +				    KVMPPC_DEBUG_WATCH_WRITE)) {
> >>>>> +				if (w < WP_NUM) {
> >>>>> +					if (type & KVMPPC_DEBUG_WATCH_READ)
> >>>>> +						gdbgr->dbcr0 |= wp_code[w + 2];
> >>>>> +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
> >>>>> +						gdbgr->dbcr0 |= wp_code[w];
> >>>>> +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
> >>>>> +					w++;
> >>>>> +				}
> >>>>> +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> >>>>> +				if (b < BP_NUM) {
> >>>>> +					gdbgr->dbcr0 |= bp_code[b];
> >>>>> +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
> >>>>> +					b++;
> >>>>> +				}
> >>>>> +			}
> >>>>> +		}
> >>>>> +	}
> >>>>> +	return 0;
> >>>>> }
> >>>>>
> >>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
> >>>>> kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S
> >>>> b/arch/powerpc/kvm/booke_interrupts.S
> >>>>> index dd9c5d4..2202936 100644
> >>>>> --- a/arch/powerpc/kvm/booke_interrupts.S
> >>>>> +++ b/arch/powerpc/kvm/booke_interrupts.S
> >>>>> @@ -39,6 +39,8 @@
> >>>>> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4) #define
> >>>>> HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
> >>>>> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
> >>>>> +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 |
> DBCR0_IAC4 | \
> >>>>> +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R |
> DBCR0_DAC2W)
> >>>>>
> >>>>> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>>>                       (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ @@ -52,6
> >>>>> +54,8 @@
> >>>>>                      (1<<BOOKE_INTERRUPT_PROGRAM) | \
> >>>>>                      (1<<BOOKE_INTERRUPT_DTLB_MISS))
> >>>>>
> >>>>> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
> >>>>> +
> >>>>> .macro __KVM_HANDLER ivor_nr scratch srr0
> >>>>> 	stw	r3, VCPU_GPR(R3)(r4)
> >>>>> 	stw	r5, VCPU_GPR(R5)(r4)
> >>>>> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
> >>>>> 	stw	r9, VCPU_FAULT_ESR(r4)
> >>>>> ..skip_esr:
> >>>>>
> >>>>> +	addi	r7, r4, VCPU_HOST_DBG
> >>>>> +	mfspr	r9, SPRN_DBCR0
> >>>>> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
> >>>>> +	andis.	r9, r9, DBCR0_AC_BITS@h
> >>>>> +	beq	skip_load_host_debug
> >>>>> +	li	r9, 0
> >>>>> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
> >>>>> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
> >>>>> +	mtspr	SPRN_DBCR1, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
> >>>>> +	mtspr	SPRN_DBCR2, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
> >>>>> +	mtspr	SPRN_IAC1, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
> >>>>> +	mtspr	SPRN_IAC2, r9
> >>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
> >>>>> +	mtspr	SPRN_IAC3, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
> >>>>> +	mtspr	SPRN_IAC4, r9
> >>>>> +#endif
> >>>>> +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
> >>>>> +	mtspr	SPRN_DAC1, r9
> >>>>> +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
> >>>>
> >>>> Yikes. Please move this into a struct, similar to how the MSR
> >>>> save/restore happens on x86.
> >>>>
> >>>> Also, these are 64-bit loads and stores, no?
> >>>
> >>> Ok, you mean PPC_LD/STD?
> >>
> >> Yeah :).
> >>
> >>>
> >>>>
> >>>>> +	mtspr	SPRN_DAC2, r9
> >>>>> +skip_load_host_debug:
> >>>>> +	/* Clear h/w DBSR and save current(guest) DBSR */
> >>>>> +	mfspr	r9, SPRN_DBSR
> >>>>> +	mtspr	SPRN_DBSR, r9
> >>>>> +	isync
> >>>>> +	andi.	r7, r6, NEED_DEBUG_SAVE
> >>>>> +	beq	skip_dbsr_save
> >>>>> +	/*
> >>>>> +	 * If vcpu->guest_debug flag is set then do not check for
> >>>>> +	 * shared->msr.DE as this debugging (say by QEMU) does not
> >>>>> +	 * depends on shared->msr.de. In these scanerios MSR.DE is
> >>>>> +	 * always set using shared_msr and should be handled always.
> >>>>> +	 */
> >>>>> +	lwz	r7, VCPU_GUEST_DEBUG(r4)
> >>>>> +	cmpwi	r7, 0
> >>>>> +	bne	skip_save_trap_event
> >>>>> +	PPC_LL	r3, VCPU_SHARED(r4)
> >>>>> +#ifndef CONFIG_64BIT
> >>>>> +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
> >>>>> +#else
> >>>>> +	ld	r3, (VCPU_SHARED_MSR)(r3)
> >>>>> +#endif
> >>>>
> >>>> Didn't we have 64-bit access helpers for these?
> >>>
> >>> I will use PPC_LD.
> >>>
> >>>>
> >>>> Also this shouldn't happen in the entry/exit code. We have
> >>>> shadow_msr for these purposes.
> >>>
> >>> Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).
> >>
> >> Hrm. Yeah, must've misread something here.
> >>
> >> It would be awesome if we could make the debug register save/restore
> >> a bit more relaxed though. For example during preemption.
> >
> > If we delay this save/restore then host cannot do debugging of KVM code.
> Right?
> 
> You mean using kdb? Does anyone actually do that?
> 
> But let's leave this for later as an optimization.

Do you mean that debug registers should be saved/restored in preemption? any specific reason? I think the current save/restore does not add much overhead and we can better it now only, is not it? 

Thanks
-Bharat
> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf Oct. 4, 2012, 3:40 p.m. UTC | #18
On 04.10.2012, at 17:19, Bhushan Bharat-R65777 wrote:

> 
> 
>>>>>>> -static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
>>>>>>> *vcpu)
>>>>>>> +static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
>>>>>>> +			  int exit_nr)
>>>>>>> {
>>>>>>> 	enum emulation_result er;
>>>>>>> 
>>>>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
>>>>>>> +		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
>>>>>> 
>>>>>> This belongs into the normal emulation code path, behind the same
>>>>>> switch() that everything else goes through.
>>>>> 
>>>>> I am not sure I understood correctly. Below is the reason why I
>>>>> placed this
>>>> code here.
>>>>> Instruction where software breakpoint is to be set is replaced by "ehpriv"
>>>> instruction. On e500v2, this is not a valid instruction can causes
>>>> program interrupt. On e500mc, "ehpriv" is a valid instruction. Both
>>>> the exit path calls emulation_exit(), so we have placed the code in this
>> function.
>>>>> Do you want this code to be moved in program interrupt exit path for
>>>>> e500v2
>>>> and BOOKE_INTERRUPT_HV_PRIV for e500mc?
>>>> 
>>>> Ok, in this patch you do (basically):
>>>> 
>>>> int emulation_exit()
>>>> {
>>>>   if (inst == DEBUG_INST) {
>>>>       debug_stuff();
>>>>       return;
>>>>   }
>>>> 
>>>>   switch (inst) {
>>>>       case INST_A:
>>>>           foo();
>>>>       ....
>>>>   }
>>>> }
>>> 
>>> Are not we doing something like this:
>>> int emulation_exit()
>>> {
>>>    if (inst == DEBUG_INST) {
>>>        debug_stuff();
>>>        return;
>>>    }
>>> 
>>>    status = kvmppc_emulate_instruction()
>>>    switch (status) {
>>>        case FAIL:
>>>            foo();
>>>        case DONE:
>>> 		foo1();
>>>        ....
>>>    }
>>> }
>>> 
>>> Do you want something like this:
>>> 
>>> int emulation_exit()
>>> {
>>> 
>>>    status = kvmppc_emulate_instruction()
>>>    switch (status) {
>>>        case FAIL:
>>>    		if (inst == DEBUG_INST) {
>>>        		debug_stuff();
>>>   		      return;
>>>    		}
>>>            foo();
>>> 
>>>        case DONE:
>>> 		foo1();
>>>        ....
>>>    }
>>> }
>> 
>> No, I want the DEBUG_INST be handled the same as any other instruction we
>> emulate.
> 
> I would like to understand what you are thinking:
> What I derived is , add the instruction in kvmppc_emulate_instruction() (or its child function) which, 
> 1) fill the relevant information in run-> , kvmppc_account_exit(vcpu, DEBUG_EXITS); and returns EMULATION_DONE
> And in emulation_exit()
>     status = kvmppc_emulate_instruction()
>     switch (status) {
> 	case EMULATION_DONE:
> 		if (inst == DEBUG)
> 			return RESUME_HOST;
>     }
> Or
> 2) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns EMULATION_DONE;
> And in emulation_exit()
>     status = kvmppc_emulate_instruction()
>     switch (status) {
> 	case EMULATION_DONE:
> 		if (inst == DEBUG) {
> 			fill run-> 
> 			return RESUME_HOST;
> 		}
>     }
> 
> Or
> 3) kvmppc_account_exit(vcpu, DEBUG_EXITS); returns a new status type (EMULATION_DEBUG_INST)
> And in emulation_exit()
>     status = kvmppc_emulate_instruction()
>     switch (status) {
> 	case EMULATION_DEBUG_INST:
> 			fill run-> 
> 			return RESUME_HOST;
>     }

This one :).

> 
>> 
>>>> 
>>>> what I want is:
>>>> 
>>>> int emulation_exit()
>>>> {
>>>>   switch (inst) {
>>>>       case INST_A:
>>>>           foo(); break;
>>>>       case DEBUG_INST:
>>>>           debug_stuff(); break;
>>>>       ....
>>>>   }
>>>> }
>>>> 
>>>>> 
>>>>> 
>>>>>> 
>>>>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
>>>>>>> +		run->debug.arch.pc = vcpu->arch.pc;
>>>>>>> +		run->debug.arch.exception = exit_nr;
>>>>>>> +		run->debug.arch.status = 0;
>>>>>>> +		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>>>>>> +		return RESUME_HOST;
>>>>>>> +	}
>>>>>>> +
>>>>>>> 	er = kvmppc_emulate_instruction(run, vcpu);
>>>>>>> 	switch (er) {
>>>>>>> 	case EMULATE_DONE:
>>>>>>> @@ -697,6 +711,44 @@ static int emulation_exit(struct kvm_run
>>>>>>> *run, struct
>>>>>> kvm_vcpu *vcpu)
>>>>>>> 	default:
>>>>>>> 		BUG();
>>>>>>> 	}
>>>>>>> +
>>>>>>> +	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
>>>>>>> +	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
>>>>>> 
>>>>>> I don't understand how this is supposed to work. When we enable
>>>>>> singlestep, why would we end up in emulation_exit()?
>>>>> 
>>>>> When singlestep is enabled then we set DBCR0[ICMP] and the debug
>>>>> handler
>>>> should be able to handle this. I think you are right.
>>>>> 
>>>>>> 
>>>>>>> +		run->exit_reason = KVM_EXIT_DEBUG;
>>>>>>> +		return RESUME_HOST;
>>>>>>> +	}
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct
>>>>>>> +kvm_vcpu
>>>>>>> +*vcpu) {
>>>>>>> +	u32 dbsr;
>>>>>>> +
>>>>>>> +#ifndef CONFIG_KVM_BOOKE_HV
>>>>>>> +	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
>>>>>>> +		vcpu->arch.pc = mfspr(SPRN_DSRR0);
>>>>>>> +	else
>>>>>>> +		vcpu->arch.pc = mfspr(SPRN_CSRR0); #endif
>>>>>> 
>>>>>> Why doesn't this get handled in the asm code that recovers from the
>>>>>> respective exceptions?
>>>>> 
>>>>> Yes. I will remove this.
>>>>> 
>>>>>> 
>>>>>>> +	dbsr = vcpu->arch.dbsr;
>>>>>>> +
>>>>>>> +	run->debug.arch.pc = vcpu->arch.pc;
>>>>>>> +	run->debug.arch.status = 0;
>>>>>>> +	vcpu->arch.dbsr = 0;
>>>>>>> +
>>>>>>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>>>>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>>>>>> +	} else {
>>>>>>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
>>>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>>>>>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
>>>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>>>>>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
>>>>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
>>>>>>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
>>>>>>> +			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
>>>>>>> +	}
>>>>>> 
>>>>>> Mind to explain the guest register sharing mechanism you are
>>>>>> envisioning? Which registers are used by the guest? Which are used
>>>>>> by the host? Who decides what gets used by whom?
>>>>> 
>>>>> struct kvm_vcpu_arch {
>>>>> 	.
>>>>> 	.
>>>>> 
>>>>>     /* guest debug registers*/
>>>>>      struct kvmppc_booke_debug_reg dbg_reg;
>>>>>     /* shadow debug registers */
>>>>>     struct kvmppc_booke_debug_reg shadow_dbg_reg;
>>>>>     /* host debug registers*/
>>>>>     struct kvmppc_booke_debug_reg host_dbg_reg;
>>>>> 
>>>>> 	.
>>>>> 	.
>>>>> }
>>>>> 
>>>>> dbg_reg: contains the values written by guest.
>>>>> shadow_dbg_reg: This contains the actual values to be written on
>>>>> behalf of
>>>> guest/qemu.
>>>>> host_dbg_reg: the debug register value of host (needed for
>>>>> store/retore
>>>> purpose).
>>>>> 
>>>>> According to current design both, the qemu debug stub and guest,
>>>>> cannot share
>>>> the debug resources. QEMU debug stub is given priority.
>>>>> But host and guest can share all debug registers and host cannot
>>>>> debug guest
>>>> if it is using the debug resource (which probably does not look like
>>>> a good case).
>>>> 
>>>> Ok. Assume that in guest context, we only ever want guest debugging
>>>> enabled. If QEMU wants to inject a QEMU breakpoint, it will fiddle
>>>> with the guest debug registers itself to inject its breakpoint into them.
>>> 
>>> I would like to understand the context you are describing.
>>> QEMU would like to inject debug interrupt to guest only if a debug interrupt
>> happened in guest context and QEMU is not able to handle interrupt (because qemu
>> debug have not set). Right?
>> 
>> Yes
>> 
>>> In that case the only thing that needed to be updated is guest->DBSR?
>> 
>> I think you lost me here :)
> 
> Really :) . May be I wrote something wrong. 
> 
>> 
>>> 
>>>> 
>>>> With that scheme, would we still need shadow debug registers? Or
>>>> could we remove that one level of indirection?
>>>> 
>>>>> 
>>>>>> 
>>>>>>> +
>>>>>>> +	return RESUME_HOST;
>>>>>>> }
>>>>>>> 
>>>>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) @@ -843,7
>>>>>>> +895,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>>>>>> kvm_vcpu *vcpu,
>>>>>>> 		break;
>>>>>>> 
>>>>>>> 	case BOOKE_INTERRUPT_HV_PRIV:
>>>>>>> -		r = emulation_exit(run, vcpu);
>>>>>>> +		r = emulation_exit(run, vcpu, exit_nr);
>>>>>>> 		break;
>>>>>>> 
>>>>>>> 	case BOOKE_INTERRUPT_PROGRAM:
>>>>>>> @@ -862,7 +914,7 @@ int kvmppc_handle_exit(struct kvm_run *run,
>>>>>>> struct
>>>>>> kvm_vcpu *vcpu,
>>>>>>> 			break;
>>>>>>> 		}
>>>>>>> 
>>>>>>> -		r = emulation_exit(run, vcpu);
>>>>>>> +		r = emulation_exit(run, vcpu, exit_nr);
>>>>>>> 		break;
>>>>>>> 
>>>>>>> 	case BOOKE_INTERRUPT_FP_UNAVAIL:
>>>>>>> @@ -1052,18 +1104,12 @@ int kvmppc_handle_exit(struct kvm_run
>>>>>>> *run, struct
>>>>>> kvm_vcpu *vcpu,
>>>>>>> 	}
>>>>>>> 
>>>>>>> 	case BOOKE_INTERRUPT_DEBUG: {
>>>>>>> -		u32 dbsr;
>>>>>>> -
>>>>>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>>>>>> -
>>>>>>> -		/* clear IAC events in DBSR register */
>>>>>>> -		dbsr = mfspr(SPRN_DBSR);
>>>>>>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>>>>>> -		mtspr(SPRN_DBSR, dbsr);
>>>>>>> -
>>>>>>> -		run->exit_reason = KVM_EXIT_DEBUG;
>>>>>>> +		r = kvmppc_handle_debug(run, vcpu);
>>>>>>> +		if (r == RESUME_HOST) {
>>>>>>> +			run->debug.arch.exception = exit_nr;
>>>>>>> +			run->exit_reason = KVM_EXIT_DEBUG;
>>>>>>> +		}
>>>>>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>>>>>> -		r = RESUME_HOST;
>>>>>>> 		break;
>>>>>>> 	}
>>>>>>> 
>>>>>>> @@ -1403,10 +1449,78 @@ int kvm_vcpu_ioctl_set_one_reg(struct
>>>>>>> kvm_vcpu *vcpu,
>>>>>> struct kvm_one_reg *reg)
>>>>>>> 	return r;
>>>>>>> }
>>>>>>> 
>>>>>>> +#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
>>>>>>> +#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
>>>>>>> +
>>>>>>> int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>>>> 					 struct kvm_guest_debug *dbg) {
>>>>>>> -	return -EINVAL;
>>>>>>> +
>>>>>>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
>>>>>>> +		/* Clear All debug events */
>>>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>>>> +		vcpu->guest_debug = 0;
>>>>>>> +		return 0;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	vcpu->guest_debug = dbg->control;
>>>>>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>>>> +
>>>>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>>>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>>>>>> +
>>>>>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
>>>>>>> +		struct kvmppc_booke_debug_reg *gdbgr =
>>>>>>> +				&(vcpu->arch.shadow_dbg_reg);
>>>>>>> +		int n, b = 0, w = 0;
>>>>>>> +		const u32 bp_code[] = {
>>>>>>> +			DBCR0_IAC1 | DBCR0_IDM,
>>>>>>> +			DBCR0_IAC2 | DBCR0_IDM,
>>>>>>> +			DBCR0_IAC3 | DBCR0_IDM,
>>>>>>> +			DBCR0_IAC4 | DBCR0_IDM
>>>>>>> +		};
>>>>>>> +		const u32 wp_code[] = {
>>>>>>> +			DBCR0_DAC1W | DBCR0_IDM,
>>>>>>> +			DBCR0_DAC2W | DBCR0_IDM,
>>>>>>> +			DBCR0_DAC1R | DBCR0_IDM,
>>>>>>> +			DBCR0_DAC2R | DBCR0_IDM
>>>>>>> +		};
>>>>>>> +
>>>>>>> +#ifndef CONFIG_KVM_BOOKE_HV
>>>>>>> +		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
>>>>>>> +				DBCR1_IAC3US | DBCR1_IAC4US;
>>>>>>> +		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #else
>>>>>>> +		gdbgr->dbcr1 = 0;
>>>>>>> +		gdbgr->dbcr2 = 0;
>>>>>>> +#endif
>>>>>>> +
>>>>>>> +		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
>>>>>>> +			u32 type = dbg->arch.bp[n].type;
>>>>>>> +
>>>>>>> +			if (!type)
>>>>>>> +				break;
>>>>>>> +
>>>>>>> +			if (type & (KVMPPC_DEBUG_WATCH_READ |
>>>>>>> +				    KVMPPC_DEBUG_WATCH_WRITE)) {
>>>>>>> +				if (w < WP_NUM) {
>>>>>>> +					if (type & KVMPPC_DEBUG_WATCH_READ)
>>>>>>> +						gdbgr->dbcr0 |= wp_code[w + 2];
>>>>>>> +					if (type & KVMPPC_DEBUG_WATCH_WRITE)
>>>>>>> +						gdbgr->dbcr0 |= wp_code[w];
>>>>>>> +					gdbgr->dac[w] = dbg->arch.bp[n].addr;
>>>>>>> +					w++;
>>>>>>> +				}
>>>>>>> +			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
>>>>>>> +				if (b < BP_NUM) {
>>>>>>> +					gdbgr->dbcr0 |= bp_code[b];
>>>>>>> +					gdbgr->iac[b] = dbg->arch.bp[n].addr;
>>>>>>> +					b++;
>>>>>>> +				}
>>>>>>> +			}
>>>>>>> +		}
>>>>>>> +	}
>>>>>>> +	return 0;
>>>>>>> }
>>>>>>> 
>>>>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
>>>>>>> kvm_fpu *fpu) diff --git a/arch/powerpc/kvm/booke_interrupts.S
>>>>>> b/arch/powerpc/kvm/booke_interrupts.S
>>>>>>> index dd9c5d4..2202936 100644
>>>>>>> --- a/arch/powerpc/kvm/booke_interrupts.S
>>>>>>> +++ b/arch/powerpc/kvm/booke_interrupts.S
>>>>>>> @@ -39,6 +39,8 @@
>>>>>>> #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4) #define
>>>>>>> HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
>>>>>>> #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
>>>>>>> +#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 |
>> DBCR0_IAC4 | \
>>>>>>> +			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R |
>> DBCR0_DAC2W)
>>>>>>> 
>>>>>>> #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
>>>>>>>                      (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ @@ -52,6
>>>>>>> +54,8 @@
>>>>>>>                     (1<<BOOKE_INTERRUPT_PROGRAM) | \
>>>>>>>                     (1<<BOOKE_INTERRUPT_DTLB_MISS))
>>>>>>> 
>>>>>>> +#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
>>>>>>> +
>>>>>>> .macro __KVM_HANDLER ivor_nr scratch srr0
>>>>>>> 	stw	r3, VCPU_GPR(R3)(r4)
>>>>>>> 	stw	r5, VCPU_GPR(R5)(r4)
>>>>>>> @@ -212,6 +216,61 @@ _GLOBAL(kvmppc_resume_host)
>>>>>>> 	stw	r9, VCPU_FAULT_ESR(r4)
>>>>>>> ..skip_esr:
>>>>>>> 
>>>>>>> +	addi	r7, r4, VCPU_HOST_DBG
>>>>>>> +	mfspr	r9, SPRN_DBCR0
>>>>>>> +	lwz	r8, KVMPPC_DBG_DBCR0(r7)
>>>>>>> +	andis.	r9, r9, DBCR0_AC_BITS@h
>>>>>>> +	beq	skip_load_host_debug
>>>>>>> +	li	r9, 0
>>>>>>> +	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
>>>>>>> +	lwz	r9, KVMPPC_DBG_DBCR1(r7)
>>>>>>> +	mtspr	SPRN_DBCR1, r9
>>>>>>> +	lwz	r9, KVMPPC_DBG_DBCR2(r7)
>>>>>>> +	mtspr	SPRN_DBCR2, r9
>>>>>>> +	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
>>>>>>> +	mtspr	SPRN_IAC1, r9
>>>>>>> +	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
>>>>>>> +	mtspr	SPRN_IAC2, r9
>>>>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
>>>>>>> +	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
>>>>>>> +	mtspr	SPRN_IAC3, r9
>>>>>>> +	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
>>>>>>> +	mtspr	SPRN_IAC4, r9
>>>>>>> +#endif
>>>>>>> +	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
>>>>>>> +	mtspr	SPRN_DAC1, r9
>>>>>>> +	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
>>>>>> 
>>>>>> Yikes. Please move this into a struct, similar to how the MSR
>>>>>> save/restore happens on x86.
>>>>>> 
>>>>>> Also, these are 64-bit loads and stores, no?
>>>>> 
>>>>> Ok, you mean PPC_LD/STD?
>>>> 
>>>> Yeah :).
>>>> 
>>>>> 
>>>>>> 
>>>>>>> +	mtspr	SPRN_DAC2, r9
>>>>>>> +skip_load_host_debug:
>>>>>>> +	/* Clear h/w DBSR and save current(guest) DBSR */
>>>>>>> +	mfspr	r9, SPRN_DBSR
>>>>>>> +	mtspr	SPRN_DBSR, r9
>>>>>>> +	isync
>>>>>>> +	andi.	r7, r6, NEED_DEBUG_SAVE
>>>>>>> +	beq	skip_dbsr_save
>>>>>>> +	/*
>>>>>>> +	 * If vcpu->guest_debug flag is set then do not check for
>>>>>>> +	 * shared->msr.DE as this debugging (say by QEMU) does not
>>>>>>> +	 * depends on shared->msr.de. In these scanerios MSR.DE is
>>>>>>> +	 * always set using shared_msr and should be handled always.
>>>>>>> +	 */
>>>>>>> +	lwz	r7, VCPU_GUEST_DEBUG(r4)
>>>>>>> +	cmpwi	r7, 0
>>>>>>> +	bne	skip_save_trap_event
>>>>>>> +	PPC_LL	r3, VCPU_SHARED(r4)
>>>>>>> +#ifndef CONFIG_64BIT
>>>>>>> +	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
>>>>>>> +#else
>>>>>>> +	ld	r3, (VCPU_SHARED_MSR)(r3)
>>>>>>> +#endif
>>>>>> 
>>>>>> Didn't we have 64-bit access helpers for these?
>>>>> 
>>>>> I will use PPC_LD.
>>>>> 
>>>>>> 
>>>>>> Also this shouldn't happen in the entry/exit code. We have
>>>>>> shadow_msr for these purposes.
>>>>> 
>>>>> Based on shared_msr.MSR_DE, we set make DBSR (set DBSR_TIE).
>>>> 
>>>> Hrm. Yeah, must've misread something here.
>>>> 
>>>> It would be awesome if we could make the debug register save/restore
>>>> a bit more relaxed though. For example during preemption.
>>> 
>>> If we delay this save/restore then host cannot do debugging of KVM code.
>> Right?
>> 
>> You mean using kdb? Does anyone actually do that?
>> 
>> But let's leave this for later as an optimization.
> 
> Do you mean that debug registers should be saved/restored in preemption? any specific reason? I think the current save/restore does not add much overhead and we can better it now only, is not it? 

It definitely adds quite a bit of overhead in a very performance critical path (instruction emulation for v2).


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 61b197e..53479ea 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -25,6 +25,7 @@ 
 /* Select powerpc specific features in <linux/kvm.h> */
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
 	__u64 pc;
@@ -264,7 +265,31 @@  struct kvm_fpu {
 	__u64 fpr[32];
 };
 
+
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE		0x0
+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
 struct kvm_debug_exit_arch {
+	__u64 pc;
+	/*
+	 * exception -> returns the exception number. If the KVM_DEBUG_EXIT
+	 * exit is not handled (say not h/w breakpoint or software breakpoint
+	 * set for this address) by qemu then it is supposed to inject this
+	 * exception to guest.
+	 */
+	__u32 exception;
+	/*
+	 * exiting to userspace because of h/w breakpoint, watchpoint
+	 * (read, write or both) and software breakpoint.
+	 */
+	__u32 status;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
@@ -276,10 +301,6 @@  struct kvm_guest_debug_arch {
 		 * Type denotes h/w breakpoint, read watchpoint, write
 		 * watchpoint or watchpoint (both read and write).
 		 */
-#define KVMPPC_DEBUG_NOTYPE		0x0
-#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
-#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
-#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
 		__u32 type;
 		__u32 pad1;
 		__u64 pad2;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c7219c1..3ba465a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -496,7 +496,12 @@  struct kvm_vcpu_arch {
 	u32 mmucfg;
 	u32 epr;
 	u32 crit_save;
+	/* guest debug registers*/
 	struct kvmppc_booke_debug_reg dbg_reg;
+	/* shadow debug registers */
+	struct kvmppc_booke_debug_reg shadow_dbg_reg;
+	/* host debug registers*/
+	struct kvmppc_booke_debug_reg host_dbg_reg;
 #endif
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 555448e..6987821 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -564,6 +564,32 @@  int main(void)
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 	DEFINE(VCPU_CRIT_SAVE, offsetof(struct kvm_vcpu, arch.crit_save));
+	DEFINE(VCPU_DBSR, offsetof(struct kvm_vcpu, arch.dbsr));
+	DEFINE(VCPU_SHADOW_DBG, offsetof(struct kvm_vcpu, arch.shadow_dbg_reg));
+	DEFINE(VCPU_HOST_DBG, offsetof(struct kvm_vcpu, arch.host_dbg_reg));
+	DEFINE(KVMPPC_DBG_DBCR0, offsetof(struct kvmppc_booke_debug_reg,
+					  dbcr0));
+	DEFINE(KVMPPC_DBG_DBCR1, offsetof(struct kvmppc_booke_debug_reg,
+					  dbcr1));
+	DEFINE(KVMPPC_DBG_DBCR2, offsetof(struct kvmppc_booke_debug_reg,
+					  dbcr2));
+#ifdef CONFIG_KVM_E500MC
+	DEFINE(KVMPPC_DBG_DBCR4, offsetof(struct kvmppc_booke_debug_reg,
+					  dbcr4));
+#endif
+	DEFINE(KVMPPC_DBG_IAC1, offsetof(struct kvmppc_booke_debug_reg,
+					 iac[0]));
+	DEFINE(KVMPPC_DBG_IAC2, offsetof(struct kvmppc_booke_debug_reg,
+					 iac[1]));
+	DEFINE(KVMPPC_DBG_IAC3, offsetof(struct kvmppc_booke_debug_reg,
+					 iac[2]));
+	DEFINE(KVMPPC_DBG_IAC4, offsetof(struct kvmppc_booke_debug_reg,
+					 iac[3]));
+	DEFINE(KVMPPC_DBG_DAC1, offsetof(struct kvmppc_booke_debug_reg,
+					 dac[0]));
+	DEFINE(KVMPPC_DBG_DAC2, offsetof(struct kvmppc_booke_debug_reg,
+					 dac[1]));
+	DEFINE(VCPU_GUEST_DEBUG, offsetof(struct kvm_vcpu, guest_debug));
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* CONFIG_KVM */
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index dd0e5b8..4d82e34 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -132,6 +132,9 @@  void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 
 #ifdef CONFIG_KVM_BOOKE_HV
 	new_msr |= MSR_GS;
+
+	if (vcpu->guest_debug)
+		new_msr |= MSR_DE;
 #endif
 
 	vcpu->arch.shared->msr = new_msr;
@@ -667,10 +670,21 @@  out:
 	return ret;
 }
 
-static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
+static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+			  int exit_nr)
 {
 	enum emulation_result er;
 
+	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) &&
+		     vcpu->arch.last_inst == KVMPPC_INST_GUEST_GDB) {
+		run->exit_reason = KVM_EXIT_DEBUG;
+		run->debug.arch.pc = vcpu->arch.pc;
+		run->debug.arch.exception = exit_nr;
+		run->debug.arch.status = 0;
+		kvmppc_account_exit(vcpu, DEBUG_EXITS);
+		return RESUME_HOST;
+	}
+
 	er = kvmppc_emulate_instruction(run, vcpu);
 	switch (er) {
 	case EMULATE_DONE:
@@ -697,6 +711,44 @@  static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	default:
 		BUG();
 	}
+
+	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_ENABLE) &&
+	    (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)) {
+		run->exit_reason = KVM_EXIT_DEBUG;
+		return RESUME_HOST;
+	}
+}
+
+static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	u32 dbsr;
+
+#ifndef CONFIG_KVM_BOOKE_HV
+	if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
+		vcpu->arch.pc = mfspr(SPRN_DSRR0);
+	else
+		vcpu->arch.pc = mfspr(SPRN_CSRR0);
+#endif
+	dbsr = vcpu->arch.dbsr;
+
+	run->debug.arch.pc = vcpu->arch.pc;
+	run->debug.arch.status = 0;
+	vcpu->arch.dbsr = 0;
+
+	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+	} else {
+		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[0];
+		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+			run->debug.arch.pc = vcpu->arch.shadow_dbg_reg.dac[1];
+	}
+
+	return RESUME_HOST;
 }
 
 static void kvmppc_fill_pt_regs(struct pt_regs *regs)
@@ -843,7 +895,7 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_HV_PRIV:
-		r = emulation_exit(run, vcpu);
+		r = emulation_exit(run, vcpu, exit_nr);
 		break;
 
 	case BOOKE_INTERRUPT_PROGRAM:
@@ -862,7 +914,7 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 		}
 
-		r = emulation_exit(run, vcpu);
+		r = emulation_exit(run, vcpu, exit_nr);
 		break;
 
 	case BOOKE_INTERRUPT_FP_UNAVAIL:
@@ -1052,18 +1104,12 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	case BOOKE_INTERRUPT_DEBUG: {
-		u32 dbsr;
-
-		vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
-		/* clear IAC events in DBSR register */
-		dbsr = mfspr(SPRN_DBSR);
-		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-		mtspr(SPRN_DBSR, dbsr);
-
-		run->exit_reason = KVM_EXIT_DEBUG;
+		r = kvmppc_handle_debug(run, vcpu);
+		if (r == RESUME_HOST) {
+			run->debug.arch.exception = exit_nr;
+			run->exit_reason = KVM_EXIT_DEBUG;
+		}
 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
-		r = RESUME_HOST;
 		break;
 	}
 
@@ -1403,10 +1449,78 @@  int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	return r;
 }
 
+#define BP_NUM	KVMPPC_BOOKE_IAC_NUM
+#define WP_NUM	KVMPPC_BOOKE_DAC_NUM
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					 struct kvm_guest_debug *dbg)
 {
-	return -EINVAL;
+
+	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+		/* Clear All debug events */
+		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+		vcpu->guest_debug = 0;
+		return 0;
+	}
+
+	vcpu->guest_debug = dbg->control;
+	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
+		struct kvmppc_booke_debug_reg *gdbgr =
+				&(vcpu->arch.shadow_dbg_reg);
+		int n, b = 0, w = 0;
+		const u32 bp_code[] = {
+			DBCR0_IAC1 | DBCR0_IDM,
+			DBCR0_IAC2 | DBCR0_IDM,
+			DBCR0_IAC3 | DBCR0_IDM,
+			DBCR0_IAC4 | DBCR0_IDM
+		};
+		const u32 wp_code[] = {
+			DBCR0_DAC1W | DBCR0_IDM,
+			DBCR0_DAC2W | DBCR0_IDM,
+			DBCR0_DAC1R | DBCR0_IDM,
+			DBCR0_DAC2R | DBCR0_IDM
+		};
+
+#ifndef CONFIG_KVM_BOOKE_HV
+		gdbgr->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US |
+				DBCR1_IAC3US | DBCR1_IAC4US;
+		gdbgr->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#else
+		gdbgr->dbcr1 = 0;
+		gdbgr->dbcr2 = 0;
+#endif
+
+		for (n = 0; n < (BP_NUM + WP_NUM); n++) {
+			u32 type = dbg->arch.bp[n].type;
+
+			if (!type)
+				break;
+
+			if (type & (KVMPPC_DEBUG_WATCH_READ |
+				    KVMPPC_DEBUG_WATCH_WRITE)) {
+				if (w < WP_NUM) {
+					if (type & KVMPPC_DEBUG_WATCH_READ)
+						gdbgr->dbcr0 |= wp_code[w + 2];
+					if (type & KVMPPC_DEBUG_WATCH_WRITE)
+						gdbgr->dbcr0 |= wp_code[w];
+					gdbgr->dac[w] = dbg->arch.bp[n].addr;
+					w++;
+				}
+			} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
+				if (b < BP_NUM) {
+					gdbgr->dbcr0 |= bp_code[b];
+					gdbgr->iac[b] = dbg->arch.bp[n].addr;
+					b++;
+				}
+			}
+		}
+	}
+	return 0;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index dd9c5d4..2202936 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -39,6 +39,8 @@ 
 #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + 4)
 #define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
 #define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
+#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | \
+			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)
 
 #define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
                         (1<<BOOKE_INTERRUPT_DTLB_MISS) | \
@@ -52,6 +54,8 @@ 
                        (1<<BOOKE_INTERRUPT_PROGRAM) | \
                        (1<<BOOKE_INTERRUPT_DTLB_MISS))
 
+#define NEED_DEBUG_SAVE (1<<BOOKE_INTERRUPT_DEBUG)
+
 .macro __KVM_HANDLER ivor_nr scratch srr0
 	stw	r3, VCPU_GPR(R3)(r4)
 	stw	r5, VCPU_GPR(R5)(r4)
@@ -212,6 +216,61 @@  _GLOBAL(kvmppc_resume_host)
 	stw	r9, VCPU_FAULT_ESR(r4)
 ..skip_esr:
 
+	addi	r7, r4, VCPU_HOST_DBG
+	mfspr	r9, SPRN_DBCR0
+	lwz	r8, KVMPPC_DBG_DBCR0(r7)
+	andis.	r9, r9, DBCR0_AC_BITS@h
+	beq	skip_load_host_debug
+	li	r9, 0
+	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
+	lwz	r9, KVMPPC_DBG_DBCR1(r7)
+	mtspr	SPRN_DBCR1, r9
+	lwz	r9, KVMPPC_DBG_DBCR2(r7)
+	mtspr	SPRN_DBCR2, r9
+	lwz	r9, KVMPPC_DBG_IAC1+4(r7)
+	mtspr	SPRN_IAC1, r9
+	lwz	r9, KVMPPC_DBG_IAC2+4(r7)
+	mtspr	SPRN_IAC2, r9
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	lwz	r9, KVMPPC_DBG_IAC3+4(r7)
+	mtspr	SPRN_IAC3, r9
+	lwz	r9, KVMPPC_DBG_IAC4+4(r7)
+	mtspr	SPRN_IAC4, r9
+#endif
+	lwz	r9, KVMPPC_DBG_DAC1+4(r7)
+	mtspr	SPRN_DAC1, r9
+	lwz	r9, KVMPPC_DBG_DAC2+4(r7)
+	mtspr	SPRN_DAC2, r9
+skip_load_host_debug:
+	/* Clear h/w DBSR and save current(guest) DBSR */
+	mfspr	r9, SPRN_DBSR
+	mtspr	SPRN_DBSR, r9
+	isync
+	andi.	r7, r6, NEED_DEBUG_SAVE
+	beq	skip_dbsr_save
+	/*
+	 * If vcpu->guest_debug flag is set then do not check for
+	 * shared->msr.DE as this debugging (say by QEMU) does not
+	 * depends on shared->msr.de. In these scanerios MSR.DE is
+	 * always set using shared_msr and should be handled always.
+	 */
+	lwz	r7, VCPU_GUEST_DEBUG(r4)
+	cmpwi	r7, 0
+	bne	skip_save_trap_event
+	PPC_LL	r3, VCPU_SHARED(r4)
+#ifndef CONFIG_64BIT
+	lwz	r3, (VCPU_SHARED_MSR + 4)(r3)
+#else
+	ld	r3, (VCPU_SHARED_MSR)(r3)
+#endif
+	andi.	r3, r3, MSR_DE
+	bne	skip_save_trap_event
+	andis.	r9, r9, DBSR_TIE@h
+skip_save_trap_event:
+	stw	r9, VCPU_DBSR(r4)
+skip_dbsr_save:
+	mtspr	SPRN_DBCR0, r8
+
 	/* Save remaining volatile guest register state to vcpu. */
 	stw	r0, VCPU_GPR(R0)(r4)
 	stw	r1, VCPU_GPR(R1)(r4)
@@ -465,6 +524,57 @@  lightweight_exit:
 	PPC_LD(r3, VCPU_SHARED_SPRG7, r5)
 	mtspr	SPRN_SPRG7W, r3
 
+	mfmsr	r7
+	rlwinm	r7, r7, 0, ~MSR_DE
+	mtmsr	r7
+	addi	r5, r4, VCPU_SHADOW_DBG
+	addi	r7, r4, VCPU_HOST_DBG
+	lwz	r6, 0(r5)
+	mfspr	r8, SPRN_DBCR0
+	andis.	r3, r6, DBCR0_AC_BITS@h
+	stw	r8, KVMPPC_DBG_DBCR0(r7)
+	beq	skip_load_guest_debug
+	mfspr	r8, SPRN_DBCR1
+	stw	r8, KVMPPC_DBG_DBCR1(r7)
+	mfspr	r8, SPRN_DBCR2
+	stw	r8, KVMPPC_DBG_DBCR2(r7)
+	mfspr	r8, SPRN_IAC1
+	stw	r8, KVMPPC_DBG_IAC1+4(r7)
+	mfspr	r8, SPRN_IAC2
+	stw	r8, KVMPPC_DBG_IAC2+4(r7)
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	mfspr	r8, SPRN_IAC3
+	stw	r8, KVMPPC_DBG_IAC3+4(r7)
+	mfspr	r8, SPRN_IAC4
+	stw	r8, KVMPPC_DBG_IAC4+4(r7)
+#endif
+	mfspr	r8, SPRN_DAC1
+	stw	r8, KVMPPC_DBG_DAC1+4(r7)
+	mfspr	r8, SPRN_DAC2
+	stw	r8, KVMPPC_DBG_DAC2+4(r7)
+	li	r8, 0
+	mtspr	SPRN_DBCR0, r8		/* disable all debug event */
+	lwz	r8, KVMPPC_DBG_DBCR1(r5)
+	mtspr	SPRN_DBCR1, r8
+	lwz	r8, KVMPPC_DBG_DBCR2(r5)
+	mtspr	SPRN_DBCR2, r8
+	lwz	r8, KVMPPC_DBG_IAC1+4(r5)
+	mtspr	SPRN_IAC1, r8
+	lwz	r8, KVMPPC_DBG_IAC2+4(r5)
+	mtspr	SPRN_IAC2, r8
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	lwz	r8, KVMPPC_DBG_IAC3+4(r5)
+	mtspr	SPRN_IAC3, r8
+	lwz	r8, KVMPPC_DBG_IAC4+4(r5)
+	mtspr	SPRN_IAC4, r8
+#endif
+	lwz	r8, KVMPPC_DBG_DAC1+4(r5)
+	mtspr	SPRN_DAC1, r8
+	lwz	r8, KVMPPC_DBG_DAC2+4(r5)
+	mtspr	SPRN_DAC2, r8
+skip_load_guest_debug:
+	mtspr	SPRN_DBCR0, r6
+
 #ifdef CONFIG_KVM_EXIT_TIMING
 	/* save enter time */
 1:
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 099fe82..d021735 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -59,6 +59,10 @@ 
 #define NEED_EMU		0x00000001 /* emulation -- save nv regs */
 #define NEED_DEAR		0x00000002 /* save faulting DEAR */
 #define NEED_ESR		0x00000004 /* save faulting ESR */
+#define NEED_DBSR		0x00000008 /* save DBSR */
+
+#define DBCR0_AC_BITS	(DBCR0_IAC1 | DBCR0_IAC2 | DBCR0_IAC3 | DBCR0_IAC4 | \
+			 DBCR0_DAC1R | DBCR0_DAC1W | DBCR0_DAC2R | DBCR0_DAC2W)
 
 /*
  * On entry:
@@ -198,6 +202,11 @@ 
 	PPC_STL	r9, VCPU_FAULT_DEAR(r4)
 	.endif
 
+	.if	\flags & NEED_DBSR
+	mfspr	r9, SPRN_DBSR
+	stw	r9, VCPU_DBSR(r4)
+	.endif
+
 	b	kvmppc_resume_host
 .endm
 
@@ -292,9 +301,9 @@  kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, SPRN_GSRR0, SPRN_GSRR1, 0
 kvm_lvl_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, \
 	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
 kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
-	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
+	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, NEED_DBSR
 kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
-	SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
+	SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, NEED_DBSR
 
 
 /* Registers:
@@ -304,6 +313,56 @@  kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
  *  r14: KVM exit number
  */
 _GLOBAL(kvmppc_resume_host)
+	/*
+	 * If guest not used debug facility then hw debug registers
+	 * already have proper host values. If guest used debug
+	 * facility then restore host debug registers.
+	 * No Need to save guest debug registers as they are already intact
+	 * in guest/shadow registers.
+	 */
+	lwz	r9, VCPU_SHADOW_DBG(r4)
+	rlwinm.	r8, r9, 0, ~DBCR0_IDM
+	beq	skip_load_host_debug
+	lwz	r3, VCPU_HOST_DBG(r4)
+	andis.	r9, r9, DBCR0_AC_BITS@h
+	li	r9, 0
+	mtspr	SPRN_DBCR0, r9		/* disable all debug event */
+	beq	skip_load_hw_bkpts
+	lwz	r7, VCPU_HOST_DBG+KVMPPC_DBG_DBCR1(r4)
+	lwz	r8, VCPU_HOST_DBG+KVMPPC_DBG_DBCR2(r4)
+	lwz	r9, VCPU_HOST_DBG+KVMPPC_DBG_DBCR4(r4)
+	mtspr	SPRN_DBCR1, r7
+	PPC_LD(r6, VCPU_HOST_DBG+KVMPPC_DBG_IAC1, r4)
+	PPC_LD(r7, VCPU_HOST_DBG+KVMPPC_DBG_IAC2, r4)
+	mtspr	SPRN_DBCR2, r8
+	mtspr	SPRN_DBCR4, r9
+	mtspr	SPRN_IAC1, r6
+	mtspr	SPRN_IAC2, r7
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	PPC_LD(r7, VCPU_HOST_DBG+KVMPPC_DBG_IAC3, r4)
+	PPC_LD(r8, VCPU_HOST_DBG+KVMPPC_DBG_IAC4, r4)
+	mtspr	SPRN_IAC3, r7
+	mtspr	SPRN_IAC4, r8
+#endif
+	PPC_LD(r8, VCPU_HOST_DBG+KVMPPC_DBG_DAC1, r4)
+	PPC_LD(r9, VCPU_HOST_DBG+KVMPPC_DBG_DAC2, r4)
+	mtspr	SPRN_DAC1, r8
+	mtspr	SPRN_DAC2, r9
+skip_load_hw_bkpts:
+	isync
+	/* Clear h/w DBSR and save current(guest) DBSR */
+	mfspr	r8, SPRN_DBSR
+	mtspr	SPRN_DBSR, r8
+	isync
+	/* Clear EPCR.DUVD and set host DBCR0 */
+	mfspr	r8, SPRN_EPCR
+	rlwinm	r8, r8, 0, ~SPRN_EPCR_DUVD
+	mtspr	SPRN_EPCR, r8
+	isync
+	mtspr	SPRN_DBCR0, r3
+	isync
+skip_load_host_debug:
+
 	/* Save remaining volatile guest register state to vcpu. */
 	mfspr	r3, SPRN_VRSAVE
 	PPC_STL	r0, VCPU_GPR(R0)(r4)
@@ -543,6 +602,84 @@  lightweight_exit:
 	mtspr	SPRN_SPRG6W, r7
 	mtspr	SPRN_SPRG7W, r8
 
+	mfmsr	r7
+	rlwinm	r7, r7, 0, ~MSR_DE
+	mtmsr	r7
+	/*
+	 * Load hw debug registers with guest(shadow) debug registers
+	 * if guest is using the debug facility and also set EPCR.DUVD
+	 * to not allow debug events in HV mode. Do not change the
+	 * debug registers if guest is not using the debug facility.
+	 */
+	lwz	r6, VCPU_SHADOW_DBG(r4)
+	rlwinm.	r7, r6, 0, ~DBCR0_IDM
+	beq	skip_load_guest_debug
+	/* Save host DBCR0 */
+	mfspr	r8, SPRN_DBCR0
+	stw	r8, VCPU_HOST_DBG(r4)
+	/*
+	 * Save host DBCR1/2, IACx and DACx and load guest DBCR1/2,
+	 * IACx and DACx if guest using hw breakpoint/watchpoints.
+	 */
+	andis.	r3, r6, DBCR0_AC_BITS@h
+	beq	skip_hw_bkpts
+	mfspr	r7, SPRN_DBCR1
+	stw	r7, VCPU_HOST_DBG+KVMPPC_DBG_DBCR1(r4)
+	mfspr	r8, SPRN_DBCR2
+	stw	r8, VCPU_HOST_DBG+KVMPPC_DBG_DBCR2(r4)
+	mfspr	r7, SPRN_DBCR4
+	stw	r7, VCPU_HOST_DBG+KVMPPC_DBG_DBCR4(r4)
+	mfspr	r8, SPRN_IAC1
+	PPC_STD(r8, VCPU_HOST_DBG+KVMPPC_DBG_IAC1, r4)
+	mfspr	r7, SPRN_IAC2
+	PPC_STD(r7, VCPU_HOST_DBG+KVMPPC_DBG_IAC2, r4)
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	mfspr	r8, SPRN_IAC3
+	PPC_STD(r8, VCPU_HOST_DBG+KVMPPC_DBG_IAC3, r4)
+	mfspr	r7, SPRN_IAC4
+	PPC_STD(r7, VCPU_HOST_DBG+KVMPPC_DBG_IAC4, r4)
+#endif
+	mfspr	r8, SPRN_DAC1
+	PPC_STD(r8, VCPU_HOST_DBG+KVMPPC_DBG_DAC1, r4)
+	mfspr	r7, SPRN_DAC2
+	PPC_STD(r7, VCPU_HOST_DBG+KVMPPC_DBG_DAC2, r4)
+	li	r8, 0
+	mtspr	SPRN_DBCR0, r8		/* disable all debug event */
+	lwz	r7, VCPU_SHADOW_DBG+KVMPPC_DBG_DBCR1(r4)
+	lwz	r8, VCPU_SHADOW_DBG+KVMPPC_DBG_DBCR2(r4)
+	lwz	r9, VCPU_SHADOW_DBG+KVMPPC_DBG_DBCR4(r4)
+	mtspr	SPRN_DBCR1, r7
+	PPC_LD(r7, VCPU_SHADOW_DBG+KVMPPC_DBG_IAC1, r4)
+	PPC_LD(r3, VCPU_SHADOW_DBG+KVMPPC_DBG_IAC2, r4)
+	mtspr	SPRN_DBCR2, r8
+	mtspr	SPRN_DBCR4, r9
+	mtspr	SPRN_IAC1, r7
+	mtspr	SPRN_IAC2, r3
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	PPC_LD(r7, VCPU_SHADOW_DBG+KVMPPC_DBG_IAC3, r4)
+	PPC_LD(r8, VCPU_SHADOW_DBG+KVMPPC_DBG_IAC4, r4)
+	mtspr	SPRN_IAC3, r7
+	mtspr	SPRN_IAC4, r8
+#endif
+	PPC_LD(r7, VCPU_SHADOW_DBG+KVMPPC_DBG_DAC1, r4)
+	PPC_LD(r8, VCPU_SHADOW_DBG+KVMPPC_DBG_DAC2, r4)
+	mtspr	SPRN_DAC1, r7
+	mtspr	SPRN_DAC2, r8
+skip_hw_bkpts:
+	/* Set EPCR.DUVD and guest DBCR0 */
+	mfspr	r7, SPRN_EPCR
+	oris	r7, r7, SPRN_EPCR_DUVD@h
+	mtspr	SPRN_EPCR, r7
+	isync
+	/* Clear if any deferred debug event */
+	mfspr	r8, SPRN_DBSR
+	mtspr	SPRN_DBSR, r8
+	isync
+	/* Restore guest DBCR */
+	mtspr	SPRN_DBCR0, r6
+	isync
+skip_load_guest_debug:
+
 	/* Load some guest volatiles. */
 	PPC_LL	r3, VCPU_LR(r4)
 	PPC_LL	r5, VCPU_XER(r4)
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 1f89d26..f5fc6f5 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -182,8 +182,7 @@  int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
-	vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
-				 SPRN_EPCR_DUVD;
+	vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI;
 #ifdef CONFIG_64BIT
 	vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
 #endif