diff mbox

[4/4,v2] KVM: PPC: Add userspace debug stub support

Message ID 1363847101-26503-5-git-send-email-Bharat.Bhushan@freescale.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bharat Bhushan March 21, 2013, 6:25 a.m. UTC
From: Bharat Bhushan <bharat.bhushan@freescale.com>

This patch adds the debug stub support on booke/bookehv.
Now QEMU debug stub can use hw breakpoint, watchpoint and
software breakpoint to debug guest.

Debug registers are saved/restored on vcpu_put()/vcpu_get().
Also the debug registers are saved restored only if guest is using
debug resources.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
---
v2:
 - save/restore in vcpu_get()/vcpu_put()
 - some more minor cleanup based on review comments.

 arch/powerpc/include/asm/kvm_host.h |   10 ++
 arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
 arch/powerpc/kvm/booke.c            |  252 ++++++++++++++++++++++++++++++++---
 arch/powerpc/kvm/e500_emulate.c     |   10 ++
 4 files changed, 272 insertions(+), 22 deletions(-)

Comments

Alexander Graf March 28, 2013, 4:36 p.m. UTC | #1
On 21.03.2013, at 07:25, Bharat Bhushan wrote:

> From: Bharat Bhushan <bharat.bhushan@freescale.com>
> 
> This patch adds the debug stub support on booke/bookehv.
> Now QEMU debug stub can use hw breakpoint, watchpoint and
> software breakpoint to debug guest.
> 
> Debug registers are saved/restored on vcpu_put()/vcpu_get().
> Also the debug registers are saved restored only if guest is using
> debug resources.
> 
> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> ---
> v2:
> - save/restore in vcpu_get()/vcpu_put()
> - some more minor cleanup based on review comments.
> 
> arch/powerpc/include/asm/kvm_host.h |   10 ++
> arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
> arch/powerpc/kvm/booke.c            |  252 ++++++++++++++++++++++++++++++++---
> arch/powerpc/kvm/e500_emulate.c     |   10 ++
> 4 files changed, 272 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
> index f4ba881..8571952 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
> 	u32 mmucfg;
> 	u32 epr;
> 	u32 crit_save;
> +	/* guest debug registers*/
> 	struct kvmppc_booke_debug_reg dbg_reg;
> +	/* shadow debug registers */
> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
> +	/* host debug registers*/
> +	struct kvmppc_booke_debug_reg host_dbg_reg;
> +	/*
> +	 * Flag indicating that debug registers are used by guest
> +	 * and requires save restore.
> +	*/
> +	bool debug_save_restore;
> #endif
> 	gpa_t paddr_accessed;
> 	gva_t vaddr_accessed;
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
> index 15f9a00..d7ce449 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -25,6 +25,7 @@
> /* Select powerpc specific features in <linux/kvm.h> */
> #define __KVM_HAVE_SPAPR_TCE
> #define __KVM_HAVE_PPC_SMT
> +#define __KVM_HAVE_GUEST_DEBUG
> 
> struct kvm_regs {
> 	__u64 pc;
> @@ -267,7 +268,24 @@ struct kvm_fpu {
> 	__u64 fpr[32];
> };
> 
> +/*
> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
> + * software breakpoint.
> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> + * for KVM_DEBUG_EXIT.
> + */
> +#define KVMPPC_DEBUG_NONE		0x0
> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> struct kvm_debug_exit_arch {
> +	__u64 address;
> +	/*
> +	 * exiting to userspace because of h/w breakpoint, watchpoint
> +	 * (read, write or both) and software breakpoint.
> +	 */
> +	__u32 status;
> +	__u32 reserved;
> };
> 
> /* for KVM_SET_GUEST_DEBUG */
> @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
> 		 * Type denotes h/w breakpoint, read watchpoint, write
> 		 * watchpoint or watchpoint (both read and write).
> 		 */
> -#define KVMPPC_DEBUG_NOTYPE		0x0
> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> 		__u32 type;
> 		__u32 reserved;
> 	} bp[16];
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index 1de93a8..bf20056 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
> #endif
> }
> 
> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
> +{
> +	/* Synchronize guest's desire to get debug interrupts into shadow MSR */
> +#ifndef CONFIG_KVM_BOOKE_HV
> +	vcpu->arch.shadow_msr &= ~MSR_DE;
> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
> +#endif
> +
> +	/* Force enable debug interrupts when user space wants to debug */
> +	if (vcpu->guest_debug) {
> +#ifdef CONFIG_KVM_BOOKE_HV
> +		/*
> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
> +		 * visible MSR. Do not allow guest to change MSR[DE].
> +		 */
> +		vcpu->arch.shared->msr |= MSR_DE;
> +		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);

This mtspr should really just be a bit or in shadow_mspr when guest_debug gets enabled. It should automatically get synchronized as soon as the next vpcu_load() happens.

Also, what happens when user space disables guest_debug?

> +#else
> +		vcpu->arch.shadow_msr |= MSR_DE;
> +		vcpu->arch.shared->msr &= ~MSR_DE;
> +#endif
> +	}
> +}
> +
> /*
>  * Helper function for "full" MSR writes.  No need to call this if only
>  * EE/CE/ME/DE/RI are changing.
> @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
> 	kvmppc_vcpu_sync_spe(vcpu);
> 	kvmppc_vcpu_sync_fpu(vcpu);
> +	kvmppc_vcpu_sync_debug(vcpu);
> }
> 
> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
> @@ -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
> 		run->exit_reason = KVM_EXIT_DCR;
> 		return RESUME_HOST;
> 
> +	case EMULATE_EXIT_USER:
> +		return RESUME_HOST;

This should get folded into the previous patch or be a separate one.

> +
> 	case EMULATE_FAIL:
> 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
> 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
> @@ -751,6 +779,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
> 	}
> }
> 
> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
> +{
> +	u32 dbsr = mfspr(SPRN_DBSR);
> +	mtspr(SPRN_DBSR, dbsr);

This definitely deserves a comment :). Also, are we non-preemptible here?

> +
> +	run->debug.arch.status = 0;
> +	run->debug.arch.address = vcpu->arch.pc;
> +
> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> +	} else {
> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0];
> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1];
> +	}
> +
> +	return RESUME_HOST;

Shouldn't this check for guest_debug and only go to user space if it asked for debugging? Can't you just leave it at the old code for !guest_debug?

> +}
> +
> static void kvmppc_fill_pt_regs(struct pt_regs *regs)
> {
> 	ulong r1, ip, msr, lr;
> @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 	}
> 
> 	case BOOKE_INTERRUPT_DEBUG: {
> -		u32 dbsr;
> -
> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> -
> -		/* clear IAC events in DBSR register */
> -		dbsr = mfspr(SPRN_DBSR);
> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> -		mtspr(SPRN_DBSR, dbsr);
> -
> -		run->exit_reason = KVM_EXIT_DEBUG;
> +		r = kvmppc_handle_debug(run, vcpu);
> +		if (r == RESUME_HOST)
> +			run->exit_reason = KVM_EXIT_DEBUG;
> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> -		r = RESUME_HOST;
> 		break;
> 	}
> 
> @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> 	kvmppc_set_msr(vcpu, 0);
> 
> #ifndef CONFIG_KVM_BOOKE_HV
> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
> 	vcpu->arch.shadow_pid = 1;
> 	vcpu->arch.shared->msr = 0;
> #endif
> @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
> 	return r;
> }
> 
> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> -					 struct kvm_guest_debug *dbg)
> -{
> -	return -EINVAL;
> -}
> -
> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> {
> 	return -ENOTSUPP;
> @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data)
> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
> }
> 
> +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu)
> +{
> +	if (!vcpu->arch.debug_save_restore)
> +		return;
> +
> +	/* Restore Host Context. Disable all debug events First */
> +	mtspr(SPRN_DBCR0, 0x0);
> +	/* Disable pending debug event by Clearing DBSR */
> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> +
> +	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
> +	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2);
> +#ifdef CONFIG_KVM_E500MC
> +	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4);
> +#endif
> +	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
> +	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]);
> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> +	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
> +	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]);
> +#endif
> +	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
> +	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
> +
> +	/* Enable debug events after all other debug registers restored */
> +	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);

How does the normal debug register switching code work in Linux? Can't we just reuse that? Or rely on it to restore working state when another process gets scheduled in?

> +
> +	/* Host debug register are restored */
> +	vcpu->arch.debug_save_restore = false;
> +}
> +
> +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu *vcpu)
> +{
> +	/*
> +	 * Check whether guest still need debug resource, if not then there
> +	 * is no need to resotre guest context.

restore

> +	 */
> +	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
> +		return;

Are we guaranteed that debugging is disabled here? We don't want to get debug exceptions that were meant for some other process.

> +
> +	/*
> +	 * Save Host debug register only after restore. If host debug
> +	 * registers are saved and not restored then do not save again.
> +	 */
> +	if (!vcpu->arch.debug_save_restore) {
> +		/* Save Host context */
> +		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
> +		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
> +		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2);
> +#ifdef CONFIG_KVM_E500MC
> +		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4);
> +#endif
> +		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
> +		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2);
> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> +		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
> +		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4);
> +#endif
> +		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
> +		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
> +		vcpu->arch.debug_save_restore = true;
> +	}
> +
> +	/* Restore Guest Context. Disable all debug events First */
> +	mtspr(SPRN_DBCR0, 0x0);
> +	/* Clear h/w DBSR */
> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> +
> +	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
> +	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2);
> +#ifdef CONFIG_KVM_E500MC
> +	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4);
> +#endif
> +	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
> +	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> +	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
> +	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
> +#endif
> +	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
> +	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
> +
> +	/* Enable debug events after other debug registers restored */
> +	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0);
> +}
> +
> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> +					 struct kvm_guest_debug *dbg)
> +{
> +	struct kvmppc_booke_debug_reg *dbg_reg;
> +	int n, b = 0, w = 0;
> +	const u32 bp_code[] = {
> +		DBCR0_IAC1 | DBCR0_IDM,
> +		DBCR0_IAC2 | DBCR0_IDM,
> +		DBCR0_IAC3 | DBCR0_IDM,
> +		DBCR0_IAC4 | DBCR0_IDM
> +	};
> +	const u32 wp_code[] = {
> +		DBCR0_DAC1W | DBCR0_IDM,
> +		DBCR0_DAC2W | DBCR0_IDM,
> +		DBCR0_DAC1R | DBCR0_IDM,
> +		DBCR0_DAC2R | DBCR0_IDM
> +	};
> +
> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> +		/* Clear All debug events */
> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +		vcpu->guest_debug = 0;

Ah, this is where we disable guest_debug. This needs to enable guest_debug for the guest again, so you need to remove the DE bit from shadow_msrp here.

> +		return 0;
> +	}
> +
> +	vcpu->guest_debug = dbg->control;
> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> +
> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> +
> +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
> +		/* Code below handles only HW breakpoints */
> +		kvmppc_booke_vcpu_load_debug_regs(vcpu);
> +		return 0;
> +	}
> +
> +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +
> +	/*
> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> +	 * to occur when MSR.PR is set.
> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> +	 * should clear DBCR1 and DBCR2.
> +	 */
> +#ifdef CONFIG_KVM_BOOKE_HV
> +	dbg_reg->dbcr1 = 0;
> +	dbg_reg->dbcr2 = 0;

Does that mean we can't debug guest user space?

> +#else
> +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
> +			  DBCR1_IAC4US;
> +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
> +#endif
> +
> +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
> +		u32 type = dbg->arch.bp[n].type;
> +
> +		if (!type)
> +			break;

Not continue?

> +
> +		if (type & (KVMPPC_DEBUG_WATCH_READ |
> +			    KVMPPC_DEBUG_WATCH_WRITE)) {
> +			if (w >= KVMPPC_BOOKE_DAC_NUM)
> +				continue;

This should result in an error, no?

> +
> +			if (type & KVMPPC_DEBUG_WATCH_READ)
> +				dbg_reg->dbcr0 |= wp_code[w + 2];
> +			if (type & KVMPPC_DEBUG_WATCH_WRITE)
> +				dbg_reg->dbcr0 |= wp_code[w];
> +
> +			dbg_reg->dac[w] = dbg->arch.bp[n].addr;
> +			w++;
> +		} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> +			if (b >= KVMPPC_BOOKE_IAC_NUM)
> +				continue;

Same here.

> +
> +			dbg_reg->dbcr0 |= bp_code[b];
> +			dbg_reg->iac[b] = dbg->arch.bp[n].addr;
> +			b++;
> +		}
> +	}
> +
> +	kvmppc_booke_vcpu_load_debug_regs(vcpu);
> +	return 0;
> +}
> +
> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
> 	vcpu->cpu = smp_processor_id();
> 	current->thread.kvm_vcpu = vcpu;
> +
> +	kvmppc_booke_vcpu_load_debug_regs(vcpu);
> }
> 
> void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
> {
> 	current->thread.kvm_vcpu = NULL;
> 	vcpu->cpu = -1;
> +
> +	kvmppc_booke_vcpu_put_debug_regs(vcpu);
> }
> 
> int __init kvmppc_booke_init(void)
> diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
> index e78f353..cefdd38 100644
> --- a/arch/powerpc/kvm/e500_emulate.c
> +++ b/arch/powerpc/kvm/e500_emulate.c
> @@ -26,6 +26,7 @@
> #define XOP_TLBRE   946
> #define XOP_TLBWE   978
> #define XOP_TLBILX  18
> +#define XOP_EHPRIV  270
> 
> #ifdef CONFIG_KVM_E500MC
> static int dbell2prio(ulong param)
> @@ -130,6 +131,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
> 			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
> 			break;
> 
> +		case XOP_EHPRIV:
> +			run->exit_reason = KVM_EXIT_DEBUG;
> +			run->debug.arch.address = vcpu->arch.pc;
> +			run->debug.arch.status = 0;
> +			kvmppc_account_exit(vcpu, DEBUG_EXITS);
> +			emulated = EMULATE_EXIT_USER;
> +			*advance = 0;
> +			break;
> +

Looks good.

I would appreciate if you could split this patch up a bit more.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan March 29, 2013, 6:04 a.m. UTC | #2
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Thursday, March 28, 2013 10:06 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421; Bhushan
> Bharat-R65777
> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
> 
> 
> On 21.03.2013, at 07:25, Bharat Bhushan wrote:
> 
> > From: Bharat Bhushan <bharat.bhushan@freescale.com>
> >
> > This patch adds the debug stub support on booke/bookehv.
> > Now QEMU debug stub can use hw breakpoint, watchpoint and software
> > breakpoint to debug guest.
> >
> > Debug registers are saved/restored on vcpu_put()/vcpu_get().
> > Also the debug registers are saved restored only if guest is using
> > debug resources.
> >
> > Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> > ---
> > v2:
> > - save/restore in vcpu_get()/vcpu_put()
> > - some more minor cleanup based on review comments.
> >
> > arch/powerpc/include/asm/kvm_host.h |   10 ++
> > arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
> > arch/powerpc/kvm/booke.c            |  252 ++++++++++++++++++++++++++++++++---
> > arch/powerpc/kvm/e500_emulate.c     |   10 ++
> > 4 files changed, 272 insertions(+), 22 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/kvm_host.h
> > b/arch/powerpc/include/asm/kvm_host.h
> > index f4ba881..8571952 100644
> > --- a/arch/powerpc/include/asm/kvm_host.h
> > +++ b/arch/powerpc/include/asm/kvm_host.h
> > @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
> > 	u32 mmucfg;
> > 	u32 epr;
> > 	u32 crit_save;
> > +	/* guest debug registers*/
> > 	struct kvmppc_booke_debug_reg dbg_reg;
> > +	/* shadow debug registers */
> > +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
> > +	/* host debug registers*/
> > +	struct kvmppc_booke_debug_reg host_dbg_reg;
> > +	/*
> > +	 * Flag indicating that debug registers are used by guest
> > +	 * and requires save restore.
> > +	*/
> > +	bool debug_save_restore;
> > #endif
> > 	gpa_t paddr_accessed;
> > 	gva_t vaddr_accessed;
> > diff --git a/arch/powerpc/include/uapi/asm/kvm.h
> > b/arch/powerpc/include/uapi/asm/kvm.h
> > index 15f9a00..d7ce449 100644
> > --- a/arch/powerpc/include/uapi/asm/kvm.h
> > +++ b/arch/powerpc/include/uapi/asm/kvm.h
> > @@ -25,6 +25,7 @@
> > /* Select powerpc specific features in <linux/kvm.h> */ #define
> > __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
> > +#define __KVM_HAVE_GUEST_DEBUG
> >
> > struct kvm_regs {
> > 	__u64 pc;
> > @@ -267,7 +268,24 @@ struct kvm_fpu {
> > 	__u64 fpr[32];
> > };
> >
> > +/*
> > + * Defines for h/w breakpoint, watchpoint (read, write or both) and
> > + * software breakpoint.
> > + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> > + * for KVM_DEBUG_EXIT.
> > + */
> > +#define KVMPPC_DEBUG_NONE		0x0
> > +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> > +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> > +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> > struct kvm_debug_exit_arch {
> > +	__u64 address;
> > +	/*
> > +	 * exiting to userspace because of h/w breakpoint, watchpoint
> > +	 * (read, write or both) and software breakpoint.
> > +	 */
> > +	__u32 status;
> > +	__u32 reserved;
> > };
> >
> > /* for KVM_SET_GUEST_DEBUG */
> > @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
> > 		 * Type denotes h/w breakpoint, read watchpoint, write
> > 		 * watchpoint or watchpoint (both read and write).
> > 		 */
> > -#define KVMPPC_DEBUG_NOTYPE		0x0
> > -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> > -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> > -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> > 		__u32 type;
> > 		__u32 reserved;
> > 	} bp[16];
> > diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
> > 1de93a8..bf20056 100644
> > --- a/arch/powerpc/kvm/booke.c
> > +++ b/arch/powerpc/kvm/booke.c
> > @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu
> > *vcpu) #endif }
> >
> > +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
> > +	/* Synchronize guest's desire to get debug interrupts into shadow
> > +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
> > +	vcpu->arch.shadow_msr &= ~MSR_DE;
> > +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
> > +
> > +	/* Force enable debug interrupts when user space wants to debug */
> > +	if (vcpu->guest_debug) {
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +		/*
> > +		 * Since there is no shadow MSR, sync MSR_DE into the guest
> > +		 * visible MSR. Do not allow guest to change MSR[DE].
> > +		 */
> > +		vcpu->arch.shared->msr |= MSR_DE;
> > +		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
> 
> This mtspr should really just be a bit or in shadow_mspr when guest_debug gets
> enabled. It should automatically get synchronized as soon as the next
> vpcu_load() happens.

I think this is not required here as shadow_dbsr already have MSRP_DEP set.

Will setup shadow_msrp when setting guest_debug and clear shadow_msrp when guest_debug is cleared.
But that will also not be sufficient as it not sure when vcpu_load() will be called after the shadow_msrp is changed. So 

When guest_debug is set in debug_ioctl()
 /* Set MSRP_DEP in shadow_msrp so that guest cannot change MSR.DE. As shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that vcpu_load() will be called just after debug_ioctl(), so also set MSRP_DEP in SPRN_MSRP. */
 - shadow_msrp | MSRP_DEP;
 - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be called?	


When guest_debug is cleared in debug_ioctl()
 /* Clear MSRP_DEP in shadow_msrp so that guest can change MSR.DE. As shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that vcpu_load() will be called just after debug_ioctl(), so also clear MSRP_DEP in SPRN_MSRP. */
 - shadow_msrp | MSRP_DEP;
 - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be called?	

> 
> Also, what happens when user space disables guest_debug?
> 
> > +#else
> > +		vcpu->arch.shadow_msr |= MSR_DE;
> > +		vcpu->arch.shared->msr &= ~MSR_DE;
> > +#endif
> > +	}
> > +}
> > +
> > /*
> >  * Helper function for "full" MSR writes.  No need to call this if
> > only
> >  * EE/CE/ME/DE/RI are changing.
> > @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> > 	kvmppc_mmu_msr_notify(vcpu, old_msr);
> > 	kvmppc_vcpu_sync_spe(vcpu);
> > 	kvmppc_vcpu_sync_fpu(vcpu);
> > +	kvmppc_vcpu_sync_debug(vcpu);
> > }
> >
> > static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
> > -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu)
> > 		run->exit_reason = KVM_EXIT_DCR;
> > 		return RESUME_HOST;
> >
> > +	case EMULATE_EXIT_USER:
> > +		return RESUME_HOST;
> 
> This should get folded into the previous patch or be a separate one.

ok

> 
> > +
> > 	case EMULATE_FAIL:
> > 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
> > 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst); @@ -751,6
> > +779,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
> *vcpu)
> > 	}
> > }
> >
> > +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
> > +*vcpu) {
> > +	u32 dbsr = mfspr(SPRN_DBSR);
> > +	mtspr(SPRN_DBSR, dbsr);
> 
> This definitely deserves a comment :).

Ok. 

> Also, are we non-preemptible here?

This is called from kvmppc_handle_exit and interrupts are enabled, so I think this can be preempted. 
But I do not think there is any issue as we clear DBSR in vpu_put().

> 
> > +
> > +	run->debug.arch.status = 0;
> > +	run->debug.arch.address = vcpu->arch.pc;
> > +
> > +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> > +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> > +	} else {
> > +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> > +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> > +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> > +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> > +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> > +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0];
> > +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> > +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1];
> > +	}
> > +
> > +	return RESUME_HOST;
> 
> Shouldn't this check for guest_debug and only go to user space if it asked for
> debugging? Can't you just leave it at the old code for !guest_debug?

Will add, I thought that it is redundant as of now as debug interrupt in guest can come only when user space is debugging guest.

clear DBSR
if (!guest_debug)
	return RESUME_GUEST;

> 
> > +}
> > +
> > static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
> > 	ulong r1, ip, msr, lr;
> > @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> > 	}
> >
> > 	case BOOKE_INTERRUPT_DEBUG: {
> > -		u32 dbsr;
> > -
> > -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> > -
> > -		/* clear IAC events in DBSR register */
> > -		dbsr = mfspr(SPRN_DBSR);
> > -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> > -		mtspr(SPRN_DBSR, dbsr);
> > -
> > -		run->exit_reason = KVM_EXIT_DEBUG;
> > +		r = kvmppc_handle_debug(run, vcpu);
> > +		if (r == RESUME_HOST)
> > +			run->exit_reason = KVM_EXIT_DEBUG;
> > 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> > -		r = RESUME_HOST;
> > 		break;
> > 	}
> >
> > @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> > 	kvmppc_set_msr(vcpu, 0);
> >
> > #ifndef CONFIG_KVM_BOOKE_HV
> > -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> > +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
> > 	vcpu->arch.shadow_pid = 1;
> > 	vcpu->arch.shared->msr = 0;
> > #endif
> > @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
> struct kvm_one_reg *reg)
> > 	return r;
> > }
> >
> > -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> > -					 struct kvm_guest_debug *dbg)
> > -{
> > -	return -EINVAL;
> > -}
> > -
> > int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu
> > *fpu) {
> > 	return -ENOTSUPP;
> > @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data)
> > 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
> > }
> >
> > +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) {
> > +	if (!vcpu->arch.debug_save_restore)
> > +		return;
> > +
> > +	/* Restore Host Context. Disable all debug events First */
> > +	mtspr(SPRN_DBCR0, 0x0);
> > +	/* Disable pending debug event by Clearing DBSR */
> > +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> > +
> > +	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
> > +	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2); #ifdef
> > +CONFIG_KVM_E500MC
> > +	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4); #endif
> > +	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
> > +	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]); #if
> > +CONFIG_PPC_ADV_DEBUG_IACS > 2
> > +	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
> > +	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]); #endif
> > +	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
> > +	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
> > +
> > +	/* Enable debug events after all other debug registers restored */
> > +	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);
> 
> How does the normal debug register switching code work in Linux? Can't we just
> reuse that? Or rely on it to restore working state when another process gets
> scheduled in?

Good point, I can see debug registers loading in function __switch_to()->switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
So as long as assume that host will not use debug resources we can rely on this restore. But I am not sure that this is a fare assumption. As Scott earlier mentioned someone can use debug resource for kernel debugging also.

> 
> > +
> > +	/* Host debug register are restored */
> > +	vcpu->arch.debug_save_restore = false; }
> > +
> > +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu *vcpu)
> > +{
> > +	/*
> > +	 * Check whether guest still need debug resource, if not then there
> > +	 * is no need to resotre guest context.
> 
> restore
> 
> > +	 */
> > +	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
> > +		return;
> 
> Are we guaranteed that debugging is disabled here?

Ah, I should be clearing the hw DBCR0 and DBSR.

> We don't want to get debug
> exceptions that were meant for some other process.
> 
> > +
> > +	/*
> > +	 * Save Host debug register only after restore. If host debug
> > +	 * registers are saved and not restored then do not save again.
> > +	 */
> > +	if (!vcpu->arch.debug_save_restore) {
> > +		/* Save Host context */
> > +		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
> > +		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
> > +		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2); #ifdef
> > +CONFIG_KVM_E500MC
> > +		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4); #endif
> > +		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
> > +		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2); #if
> > +CONFIG_PPC_ADV_DEBUG_IACS > 2
> > +		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
> > +		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4); #endif
> > +		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
> > +		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
> > +		vcpu->arch.debug_save_restore = true;
> > +	}
> > +
> > +	/* Restore Guest Context. Disable all debug events First */
> > +	mtspr(SPRN_DBCR0, 0x0);
> > +	/* Clear h/w DBSR */
> > +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> > +
> > +	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
> > +	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); #ifdef
> > +CONFIG_KVM_E500MC
> > +	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); #endif
> > +	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
> > +	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
> > +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> > +	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
> > +	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
> > +#endif
> > +	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
> > +	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
> > +
> > +	/* Enable debug events after other debug registers restored */
> > +	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); }
> > +
> > +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> > +					 struct kvm_guest_debug *dbg)
> > +{
> > +	struct kvmppc_booke_debug_reg *dbg_reg;
> > +	int n, b = 0, w = 0;
> > +	const u32 bp_code[] = {
> > +		DBCR0_IAC1 | DBCR0_IDM,
> > +		DBCR0_IAC2 | DBCR0_IDM,
> > +		DBCR0_IAC3 | DBCR0_IDM,
> > +		DBCR0_IAC4 | DBCR0_IDM
> > +	};
> > +	const u32 wp_code[] = {
> > +		DBCR0_DAC1W | DBCR0_IDM,
> > +		DBCR0_DAC2W | DBCR0_IDM,
> > +		DBCR0_DAC1R | DBCR0_IDM,
> > +		DBCR0_DAC2R | DBCR0_IDM
> > +	};
> > +
> > +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> > +		/* Clear All debug events */
> > +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> > +		vcpu->guest_debug = 0;
> 
> Ah, this is where we disable guest_debug. This needs to enable guest_debug for
> the guest again, so you need to remove the DE bit from shadow_msrp here.
> 
> > +		return 0;
> > +	}
> > +
> > +	vcpu->guest_debug = dbg->control;
> > +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> > +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
> > +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> > +
> > +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> > +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> > +
> > +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
> > +		/* Code below handles only HW breakpoints */
> > +		kvmppc_booke_vcpu_load_debug_regs(vcpu);
> > +		return 0;
> > +	}
> > +
> > +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> > +
> > +	/*
> > +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> > +	 * to occur when MSR.PR is set.
> > +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> > +	 * should clear DBCR1 and DBCR2.
> > +	 */
> > +#ifdef CONFIG_KVM_BOOKE_HV
> > +	dbg_reg->dbcr1 = 0;
> > +	dbg_reg->dbcr2 = 0;
> 
> Does that mean we can't debug guest user space?

Yes

> 
> > +#else
> > +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
> > +			  DBCR1_IAC4US;
> > +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
> > +
> > +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
> > +		u32 type = dbg->arch.bp[n].type;
> > +
> > +		if (!type)
> > +			break;
> 
> Not continue?

I think wrong type should also mean error (INVALID)

> 
> > +
> > +		if (type & (KVMPPC_DEBUG_WATCH_READ |
> > +			    KVMPPC_DEBUG_WATCH_WRITE)) {
> > +			if (w >= KVMPPC_BOOKE_DAC_NUM)
> > +				continue;
> 
> This should result in an error, no?

I do not think we need error, as gdb itself will through warning when trying to set more than required hardware breakpoint and watchpoint.

> 
> > +
> > +			if (type & KVMPPC_DEBUG_WATCH_READ)
> > +				dbg_reg->dbcr0 |= wp_code[w + 2];
> > +			if (type & KVMPPC_DEBUG_WATCH_WRITE)
> > +				dbg_reg->dbcr0 |= wp_code[w];
> > +
> > +			dbg_reg->dac[w] = dbg->arch.bp[n].addr;
> > +			w++;
> > +		} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
> > +			if (b >= KVMPPC_BOOKE_IAC_NUM)
> > +				continue;
> 
> Same here.

Same as above .

Thanks
-Bharat
> 
> > +
> > +			dbg_reg->dbcr0 |= bp_code[b];
> > +			dbg_reg->iac[b] = dbg->arch.bp[n].addr;
> > +			b++;
> > +		}
> > +	}
> > +
> > +	kvmppc_booke_vcpu_load_debug_regs(vcpu);
> > +	return 0;
> > +}
> > +
> > void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu) {
> > 	vcpu->cpu = smp_processor_id();
> > 	current->thread.kvm_vcpu = vcpu;
> > +
> > +	kvmppc_booke_vcpu_load_debug_regs(vcpu);
> > }
> >
> > void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu) {
> > 	current->thread.kvm_vcpu = NULL;
> > 	vcpu->cpu = -1;
> > +
> > +	kvmppc_booke_vcpu_put_debug_regs(vcpu);
> > }
> >
> > int __init kvmppc_booke_init(void)
> > diff --git a/arch/powerpc/kvm/e500_emulate.c
> > b/arch/powerpc/kvm/e500_emulate.c index e78f353..cefdd38 100644
> > --- a/arch/powerpc/kvm/e500_emulate.c
> > +++ b/arch/powerpc/kvm/e500_emulate.c
> > @@ -26,6 +26,7 @@
> > #define XOP_TLBRE   946
> > #define XOP_TLBWE   978
> > #define XOP_TLBILX  18
> > +#define XOP_EHPRIV  270
> >
> > #ifdef CONFIG_KVM_E500MC
> > static int dbell2prio(ulong param)
> > @@ -130,6 +131,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> > 			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
> > 			break;
> >
> > +		case XOP_EHPRIV:
> > +			run->exit_reason = KVM_EXIT_DEBUG;
> > +			run->debug.arch.address = vcpu->arch.pc;
> > +			run->debug.arch.status = 0;
> > +			kvmppc_account_exit(vcpu, DEBUG_EXITS);
> > +			emulated = EMULATE_EXIT_USER;
> > +			*advance = 0;
> > +			break;
> > +
> 
> Looks good.
> 
> I would appreciate if you could split this patch up a bit more
> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 2, 2013, 8:27 a.m. UTC | #3
On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Thursday, March 28, 2013 10:06 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421; Bhushan
>> Bharat-R65777
>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
>> 
>> 
>> On 21.03.2013, at 07:25, Bharat Bhushan wrote:
>> 
>>> From: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> 
>>> This patch adds the debug stub support on booke/bookehv.
>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>> breakpoint to debug guest.
>>> 
>>> Debug registers are saved/restored on vcpu_put()/vcpu_get().
>>> Also the debug registers are saved restored only if guest is using
>>> debug resources.
>>> 
>>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
>>> ---
>>> v2:
>>> - save/restore in vcpu_get()/vcpu_put()
>>> - some more minor cleanup based on review comments.
>>> 
>>> arch/powerpc/include/asm/kvm_host.h |   10 ++
>>> arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
>>> arch/powerpc/kvm/booke.c            |  252 ++++++++++++++++++++++++++++++++---
>>> arch/powerpc/kvm/e500_emulate.c     |   10 ++
>>> 4 files changed, 272 insertions(+), 22 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>> b/arch/powerpc/include/asm/kvm_host.h
>>> index f4ba881..8571952 100644
>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>> @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
>>> 	u32 mmucfg;
>>> 	u32 epr;
>>> 	u32 crit_save;
>>> +	/* guest debug registers*/
>>> 	struct kvmppc_booke_debug_reg dbg_reg;
>>> +	/* shadow debug registers */
>>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
>>> +	/* host debug registers*/
>>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
>>> +	/*
>>> +	 * Flag indicating that debug registers are used by guest
>>> +	 * and requires save restore.
>>> +	*/
>>> +	bool debug_save_restore;
>>> #endif
>>> 	gpa_t paddr_accessed;
>>> 	gva_t vaddr_accessed;
>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
>>> b/arch/powerpc/include/uapi/asm/kvm.h
>>> index 15f9a00..d7ce449 100644
>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>> @@ -25,6 +25,7 @@
>>> /* Select powerpc specific features in <linux/kvm.h> */ #define
>>> __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
>>> +#define __KVM_HAVE_GUEST_DEBUG
>>> 
>>> struct kvm_regs {
>>> 	__u64 pc;
>>> @@ -267,7 +268,24 @@ struct kvm_fpu {
>>> 	__u64 fpr[32];
>>> };
>>> 
>>> +/*
>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>> + * software breakpoint.
>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>> + * for KVM_DEBUG_EXIT.
>>> + */
>>> +#define KVMPPC_DEBUG_NONE		0x0
>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>> struct kvm_debug_exit_arch {
>>> +	__u64 address;
>>> +	/*
>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>> +	 * (read, write or both) and software breakpoint.
>>> +	 */
>>> +	__u32 status;
>>> +	__u32 reserved;
>>> };
>>> 
>>> /* for KVM_SET_GUEST_DEBUG */
>>> @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
>>> 		 * Type denotes h/w breakpoint, read watchpoint, write
>>> 		 * watchpoint or watchpoint (both read and write).
>>> 		 */
>>> -#define KVMPPC_DEBUG_NOTYPE		0x0
>>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
>>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
>>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
>>> 		__u32 type;
>>> 		__u32 reserved;
>>> 	} bp[16];
>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index
>>> 1de93a8..bf20056 100644
>>> --- a/arch/powerpc/kvm/booke.c
>>> +++ b/arch/powerpc/kvm/booke.c
>>> @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu
>>> *vcpu) #endif }
>>> 
>>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
>>> +	/* Synchronize guest's desire to get debug interrupts into shadow
>>> +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
>>> +	vcpu->arch.shadow_msr &= ~MSR_DE;
>>> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
>>> +
>>> +	/* Force enable debug interrupts when user space wants to debug */
>>> +	if (vcpu->guest_debug) {
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +		/*
>>> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
>>> +		 * visible MSR. Do not allow guest to change MSR[DE].
>>> +		 */
>>> +		vcpu->arch.shared->msr |= MSR_DE;
>>> +		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
>> 
>> This mtspr should really just be a bit or in shadow_mspr when guest_debug gets
>> enabled. It should automatically get synchronized as soon as the next
>> vpcu_load() happens.
> 
> I think this is not required here as shadow_dbsr already have MSRP_DEP set.
> 
> Will setup shadow_msrp when setting guest_debug and clear shadow_msrp when guest_debug is cleared.
> But that will also not be sufficient as it not sure when vcpu_load() will be called after the shadow_msrp is changed. So 
> 
> When guest_debug is set in debug_ioctl()
> /* Set MSRP_DEP in shadow_msrp so that guest cannot change MSR.DE. As shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that vcpu_load() will be called just after debug_ioctl(), so also set MSRP_DEP in SPRN_MSRP. */

debug_ioctl is an ioctl. So while that ioctl is on, the vcpu is not running. After that ioctl user space calls another VCPU_RUN ioctl which invokes vcpu_load.

> - shadow_msrp | MSRP_DEP;
> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be called?	
> 
> 
> When guest_debug is cleared in debug_ioctl()
> /* Clear MSRP_DEP in shadow_msrp so that guest can change MSR.DE. As shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that vcpu_load() will be called just after debug_ioctl(), so also clear MSRP_DEP in SPRN_MSRP. */

Same thing here again.

> - shadow_msrp | MSRP_DEP;
> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be called?	
> 
>> 
>> Also, what happens when user space disables guest_debug?
>> 
>>> +#else
>>> +		vcpu->arch.shadow_msr |= MSR_DE;
>>> +		vcpu->arch.shared->msr &= ~MSR_DE;
>>> +#endif
>>> +	}
>>> +}
>>> +
>>> /*
>>> * Helper function for "full" MSR writes.  No need to call this if
>>> only
>>> * EE/CE/ME/DE/RI are changing.
>>> @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
>>> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
>>> 	kvmppc_vcpu_sync_spe(vcpu);
>>> 	kvmppc_vcpu_sync_fpu(vcpu);
>>> +	kvmppc_vcpu_sync_debug(vcpu);
>>> }
>>> 
>>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
>>> -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu)
>>> 		run->exit_reason = KVM_EXIT_DCR;
>>> 		return RESUME_HOST;
>>> 
>>> +	case EMULATE_EXIT_USER:
>>> +		return RESUME_HOST;
>> 
>> This should get folded into the previous patch or be a separate one.
> 
> ok
> 
>> 
>>> +
>>> 	case EMULATE_FAIL:
>>> 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
>>> 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst); @@ -751,6
>>> +779,30 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu
>> *vcpu)
>>> 	}
>>> }
>>> 
>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
>>> +*vcpu) {
>>> +	u32 dbsr = mfspr(SPRN_DBSR);
>>> +	mtspr(SPRN_DBSR, dbsr);
>> 
>> This definitely deserves a comment :).
> 
> Ok. 
> 
>> Also, are we non-preemptible here?
> 
> This is called from kvmppc_handle_exit and interrupts are enabled, so I think this can be preempted. 
> But I do not think there is any issue as we clear DBSR in vpu_put().

Ok, make clear_dbsr a function then that you call from both places. That way it's more obvious.

> 
>> 
>>> +
>>> +	run->debug.arch.status = 0;
>>> +	run->debug.arch.address = vcpu->arch.pc;
>>> +
>>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>> +	} else {
>>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
>>> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0];
>>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
>>> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1];
>>> +	}
>>> +
>>> +	return RESUME_HOST;
>> 
>> Shouldn't this check for guest_debug and only go to user space if it asked for
>> debugging? Can't you just leave it at the old code for !guest_debug?
> 
> Will add, I thought that it is redundant as of now as debug interrupt in guest can come only when user space is debugging guest.

The problem is that user space might not expect a guest debug exit.

> clear DBSR
> if (!guest_debug)
> 	return RESUME_GUEST;

Can't you just do exactly what was done before when !guest_debug? We usually try to change semantics one step at a time.

> 
>> 
>>> +}
>>> +
>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
>>> 	ulong r1, ip, msr, lr;
>>> @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
>> kvm_vcpu *vcpu,
>>> 	}
>>> 
>>> 	case BOOKE_INTERRUPT_DEBUG: {
>>> -		u32 dbsr;
>>> -
>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>> -
>>> -		/* clear IAC events in DBSR register */
>>> -		dbsr = mfspr(SPRN_DBSR);
>>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>> -		mtspr(SPRN_DBSR, dbsr);
>>> -
>>> -		run->exit_reason = KVM_EXIT_DEBUG;
>>> +		r = kvmppc_handle_debug(run, vcpu);
>>> +		if (r == RESUME_HOST)
>>> +			run->exit_reason = KVM_EXIT_DEBUG;
>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>> -		r = RESUME_HOST;
>>> 		break;
>>> 	}
>>> 
>>> @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>> 	kvmppc_set_msr(vcpu, 0);
>>> 
>>> #ifndef CONFIG_KVM_BOOKE_HV
>>> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
>>> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
>>> 	vcpu->arch.shadow_pid = 1;
>>> 	vcpu->arch.shared->msr = 0;
>>> #endif
>>> @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
>> struct kvm_one_reg *reg)
>>> 	return r;
>>> }
>>> 
>>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>> -					 struct kvm_guest_debug *dbg)
>>> -{
>>> -	return -EINVAL;
>>> -}
>>> -
>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu
>>> *fpu) {
>>> 	return -ENOTSUPP;
>>> @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data)
>>> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
>>> }
>>> 
>>> +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) {
>>> +	if (!vcpu->arch.debug_save_restore)
>>> +		return;
>>> +
>>> +	/* Restore Host Context. Disable all debug events First */
>>> +	mtspr(SPRN_DBCR0, 0x0);
>>> +	/* Disable pending debug event by Clearing DBSR */
>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
>>> +
>>> +	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
>>> +	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2); #ifdef
>>> +CONFIG_KVM_E500MC
>>> +	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4); #endif
>>> +	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
>>> +	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]); #if
>>> +CONFIG_PPC_ADV_DEBUG_IACS > 2
>>> +	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
>>> +	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]); #endif
>>> +	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
>>> +	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
>>> +
>>> +	/* Enable debug events after all other debug registers restored */
>>> +	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);
>> 
>> How does the normal debug register switching code work in Linux? Can't we just
>> reuse that? Or rely on it to restore working state when another process gets
>> scheduled in?
> 
> Good point, I can see debug registers loading in function __switch_to()->switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
> So as long as assume that host will not use debug resources we can rely on this restore. But I am not sure that this is a fare assumption. As Scott earlier mentioned someone can use debug resource for kernel debugging also.

Someone in the kernel can also use floating point registers. But then it's his responsibility to clean up the mess he leaves behind.

> 
>> 
>>> +
>>> +	/* Host debug register are restored */
>>> +	vcpu->arch.debug_save_restore = false; }
>>> +
>>> +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu *vcpu)
>>> +{
>>> +	/*
>>> +	 * Check whether guest still need debug resource, if not then there
>>> +	 * is no need to resotre guest context.
>> 
>> restore
>> 
>>> +	 */
>>> +	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
>>> +		return;
>> 
>> Are we guaranteed that debugging is disabled here?
> 
> Ah, I should be clearing the hw DBCR0 and DBSR.
> 
>> We don't want to get debug
>> exceptions that were meant for some other process.
>> 
>>> +
>>> +	/*
>>> +	 * Save Host debug register only after restore. If host debug
>>> +	 * registers are saved and not restored then do not save again.
>>> +	 */
>>> +	if (!vcpu->arch.debug_save_restore) {
>>> +		/* Save Host context */
>>> +		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
>>> +		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
>>> +		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2); #ifdef
>>> +CONFIG_KVM_E500MC
>>> +		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4); #endif
>>> +		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
>>> +		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2); #if
>>> +CONFIG_PPC_ADV_DEBUG_IACS > 2
>>> +		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
>>> +		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4); #endif
>>> +		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
>>> +		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
>>> +		vcpu->arch.debug_save_restore = true;
>>> +	}
>>> +
>>> +	/* Restore Guest Context. Disable all debug events First */
>>> +	mtspr(SPRN_DBCR0, 0x0);
>>> +	/* Clear h/w DBSR */
>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
>>> +
>>> +	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
>>> +	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); #ifdef
>>> +CONFIG_KVM_E500MC
>>> +	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); #endif
>>> +	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
>>> +	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
>>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
>>> +	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
>>> +	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
>>> +#endif
>>> +	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
>>> +	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
>>> +
>>> +	/* Enable debug events after other debug registers restored */
>>> +	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); }
>>> +
>>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>> +					 struct kvm_guest_debug *dbg)
>>> +{
>>> +	struct kvmppc_booke_debug_reg *dbg_reg;
>>> +	int n, b = 0, w = 0;
>>> +	const u32 bp_code[] = {
>>> +		DBCR0_IAC1 | DBCR0_IDM,
>>> +		DBCR0_IAC2 | DBCR0_IDM,
>>> +		DBCR0_IAC3 | DBCR0_IDM,
>>> +		DBCR0_IAC4 | DBCR0_IDM
>>> +	};
>>> +	const u32 wp_code[] = {
>>> +		DBCR0_DAC1W | DBCR0_IDM,
>>> +		DBCR0_DAC2W | DBCR0_IDM,
>>> +		DBCR0_DAC1R | DBCR0_IDM,
>>> +		DBCR0_DAC2R | DBCR0_IDM
>>> +	};
>>> +
>>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
>>> +		/* Clear All debug events */
>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +		vcpu->guest_debug = 0;
>> 
>> Ah, this is where we disable guest_debug. This needs to enable guest_debug for
>> the guest again, so you need to remove the DE bit from shadow_msrp here.
>> 
>>> +		return 0;
>>> +	}
>>> +
>>> +	vcpu->guest_debug = dbg->control;
>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
>>> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
>>> +
>>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>> +
>>> +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
>>> +		/* Code below handles only HW breakpoints */
>>> +		kvmppc_booke_vcpu_load_debug_regs(vcpu);
>>> +		return 0;
>>> +	}
>>> +
>>> +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
>>> +
>>> +	/*
>>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
>>> +	 * to occur when MSR.PR is set.
>>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
>>> +	 * should clear DBCR1 and DBCR2.
>>> +	 */
>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>> +	dbg_reg->dbcr1 = 0;
>>> +	dbg_reg->dbcr2 = 0;
>> 
>> Does that mean we can't debug guest user space?
> 
> Yes

This is wrong.

> 
>> 
>>> +#else
>>> +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
>>> +			  DBCR1_IAC4US;
>>> +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
>>> +
>>> +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
>>> +		u32 type = dbg->arch.bp[n].type;
>>> +
>>> +		if (!type)
>>> +			break;
>> 
>> Not continue?
> 
> I think wrong type should also mean error (INVALID)

Then make it an error :).

> 
>> 
>>> +
>>> +		if (type & (KVMPPC_DEBUG_WATCH_READ |
>>> +			    KVMPPC_DEBUG_WATCH_WRITE)) {
>>> +			if (w >= KVMPPC_BOOKE_DAC_NUM)
>>> +				continue;
>> 
>> This should result in an error, no?
> 
> I do not think we need error, as gdb itself will through warning when trying to set more than required hardware breakpoint and watchpoint.

It's gdb's decision to print a warning. We should at least give gdb the chance to do so and continue.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan April 2, 2013, 2:09 p.m. UTC | #4
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Tuesday, April 02, 2013 1:57 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
> 
> 
> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -----Original Message-----
> >> From: Alexander Graf [mailto:agraf@suse.de]
> >> Sent: Thursday, March 28, 2013 10:06 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
> >> Bhushan
> >> Bharat-R65777
> >> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >> support
> >>
> >>
> >> On 21.03.2013, at 07:25, Bharat Bhushan wrote:
> >>
> >>> From: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>>
> >>> This patch adds the debug stub support on booke/bookehv.
> >>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
> >>> breakpoint to debug guest.
> >>>
> >>> Debug registers are saved/restored on vcpu_put()/vcpu_get().
> >>> Also the debug registers are saved restored only if guest is using
> >>> debug resources.
> >>>
> >>> Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
> >>> ---
> >>> v2:
> >>> - save/restore in vcpu_get()/vcpu_put()
> >>> - some more minor cleanup based on review comments.
> >>>
> >>> arch/powerpc/include/asm/kvm_host.h |   10 ++
> >>> arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
> >>> arch/powerpc/kvm/booke.c            |  252 ++++++++++++++++++++++++++++++++-
> --
> >>> arch/powerpc/kvm/e500_emulate.c     |   10 ++
> >>> 4 files changed, 272 insertions(+), 22 deletions(-)
> >>>
> >>> diff --git a/arch/powerpc/include/asm/kvm_host.h
> >>> b/arch/powerpc/include/asm/kvm_host.h
> >>> index f4ba881..8571952 100644
> >>> --- a/arch/powerpc/include/asm/kvm_host.h
> >>> +++ b/arch/powerpc/include/asm/kvm_host.h
> >>> @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
> >>> 	u32 mmucfg;
> >>> 	u32 epr;
> >>> 	u32 crit_save;
> >>> +	/* guest debug registers*/
> >>> 	struct kvmppc_booke_debug_reg dbg_reg;
> >>> +	/* shadow debug registers */
> >>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
> >>> +	/* host debug registers*/
> >>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
> >>> +	/*
> >>> +	 * Flag indicating that debug registers are used by guest
> >>> +	 * and requires save restore.
> >>> +	*/
> >>> +	bool debug_save_restore;
> >>> #endif
> >>> 	gpa_t paddr_accessed;
> >>> 	gva_t vaddr_accessed;
> >>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
> >>> b/arch/powerpc/include/uapi/asm/kvm.h
> >>> index 15f9a00..d7ce449 100644
> >>> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >>> @@ -25,6 +25,7 @@
> >>> /* Select powerpc specific features in <linux/kvm.h> */ #define
> >>> __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
> >>> +#define __KVM_HAVE_GUEST_DEBUG
> >>>
> >>> struct kvm_regs {
> >>> 	__u64 pc;
> >>> @@ -267,7 +268,24 @@ struct kvm_fpu {
> >>> 	__u64 fpr[32];
> >>> };
> >>>
> >>> +/*
> >>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
> >>> + * software breakpoint.
> >>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> >>> + * for KVM_DEBUG_EXIT.
> >>> + */
> >>> +#define KVMPPC_DEBUG_NONE		0x0
> >>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> >>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> >>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> >>> struct kvm_debug_exit_arch {
> >>> +	__u64 address;
> >>> +	/*
> >>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
> >>> +	 * (read, write or both) and software breakpoint.
> >>> +	 */
> >>> +	__u32 status;
> >>> +	__u32 reserved;
> >>> };
> >>>
> >>> /* for KVM_SET_GUEST_DEBUG */
> >>> @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
> >>> 		 * Type denotes h/w breakpoint, read watchpoint, write
> >>> 		 * watchpoint or watchpoint (both read and write).
> >>> 		 */
> >>> -#define KVMPPC_DEBUG_NOTYPE		0x0
> >>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
> >>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
> >>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
> >>> 		__u32 type;
> >>> 		__u32 reserved;
> >>> 	} bp[16];
> >>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> >>> index
> >>> 1de93a8..bf20056 100644
> >>> --- a/arch/powerpc/kvm/booke.c
> >>> +++ b/arch/powerpc/kvm/booke.c
> >>> @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct
> >>> kvm_vcpu
> >>> *vcpu) #endif }
> >>>
> >>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
> >>> +	/* Synchronize guest's desire to get debug interrupts into shadow
> >>> +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
> >>> +	vcpu->arch.shadow_msr &= ~MSR_DE;
> >>> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE; #endif
> >>> +
> >>> +	/* Force enable debug interrupts when user space wants to debug */
> >>> +	if (vcpu->guest_debug) {
> >>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>> +		/*
> >>> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
> >>> +		 * visible MSR. Do not allow guest to change MSR[DE].
> >>> +		 */
> >>> +		vcpu->arch.shared->msr |= MSR_DE;
> >>> +		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
> >>
> >> This mtspr should really just be a bit or in shadow_mspr when
> >> guest_debug gets enabled. It should automatically get synchronized as
> >> soon as the next
> >> vpcu_load() happens.
> >
> > I think this is not required here as shadow_dbsr already have MSRP_DEP set.
> >
> > Will setup shadow_msrp when setting guest_debug and clear shadow_msrp when
> guest_debug is cleared.
> > But that will also not be sufficient as it not sure when vcpu_load()
> > will be called after the shadow_msrp is changed. So
> >
> > When guest_debug is set in debug_ioctl()
> > /* Set MSRP_DEP in shadow_msrp so that guest cannot change MSR.DE. As
> > shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that
> > vcpu_load() will be called just after debug_ioctl(), so also set
> > MSRP_DEP in SPRN_MSRP. */
> 
> debug_ioctl is an ioctl. So while that ioctl is on, the vcpu is not running.
> After that ioctl user space calls another VCPU_RUN ioctl which invokes
> vcpu_load.

That's good. I was not aware of vcpu_load() is being called before ioctl handling in KVM :)

> 
> > - shadow_msrp | MSRP_DEP;
> > - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be
> called?
> >
> >
> > When guest_debug is cleared in debug_ioctl()
> > /* Clear MSRP_DEP in shadow_msrp so that guest can change MSR.DE. As
> > shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that
> > vcpu_load() will be called just after debug_ioctl(), so also clear
> > MSRP_DEP in SPRN_MSRP. */
> 
> Same thing here again.
> 
> > - shadow_msrp | MSRP_DEP;
> > - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be
> called?
> >
> >>
> >> Also, what happens when user space disables guest_debug?
> >>
> >>> +#else
> >>> +		vcpu->arch.shadow_msr |= MSR_DE;
> >>> +		vcpu->arch.shared->msr &= ~MSR_DE; #endif
> >>> +	}
> >>> +}
> >>> +
> >>> /*
> >>> * Helper function for "full" MSR writes.  No need to call this if
> >>> only
> >>> * EE/CE/ME/DE/RI are changing.
> >>> @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
> >>> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
> >>> 	kvmppc_vcpu_sync_spe(vcpu);
> >>> 	kvmppc_vcpu_sync_fpu(vcpu);
> >>> +	kvmppc_vcpu_sync_debug(vcpu);
> >>> }
> >>>
> >>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
> >>> -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run,
> >>> struct
> >> kvm_vcpu *vcpu)
> >>> 		run->exit_reason = KVM_EXIT_DCR;
> >>> 		return RESUME_HOST;
> >>>
> >>> +	case EMULATE_EXIT_USER:
> >>> +		return RESUME_HOST;
> >>
> >> This should get folded into the previous patch or be a separate one.
> >
> > ok
> >
> >>
> >>> +
> >>> 	case EMULATE_FAIL:
> >>> 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
> >>> 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst); @@ -751,6
> >>> +779,30 @@ static int emulation_exit(struct kvm_run *run, struct
> >>> +kvm_vcpu
> >> *vcpu)
> >>> 	}
> >>> }
> >>>
> >>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
> >>> +*vcpu) {
> >>> +	u32 dbsr = mfspr(SPRN_DBSR);
> >>> +	mtspr(SPRN_DBSR, dbsr);
> >>
> >> This definitely deserves a comment :).
> >
> > Ok.
> >
> >> Also, are we non-preemptible here?
> >
> > This is called from kvmppc_handle_exit and interrupts are enabled, so I think
> this can be preempted.
> > But I do not think there is any issue as we clear DBSR in vpu_put().
> 
> Ok, make clear_dbsr a function then that you call from both places. That way
> it's more obvious.

Ok 

> 
> >
> >>
> >>> +
> >>> +	run->debug.arch.status = 0;
> >>> +	run->debug.arch.address = vcpu->arch.pc;
> >>> +
> >>> +	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> >>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> >>> +	} else {
> >>> +		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> >>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> >>> +		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> >>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> >>> +		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> >>> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0];
> >>> +		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> >>> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1];
> >>> +	}
> >>> +
> >>> +	return RESUME_HOST;
> >>
> >> Shouldn't this check for guest_debug and only go to user space if it
> >> asked for debugging? Can't you just leave it at the old code for
> !guest_debug?
> >
> > Will add, I thought that it is redundant as of now as debug interrupt in guest
> can come only when user space is debugging guest.
> 
> The problem is that user space might not expect a guest debug exit.

Ok

> 
> > clear DBSR
> > if (!guest_debug)
> > 	return RESUME_GUEST;
> 
> Can't you just do exactly what was done before when !guest_debug? We usually try
> to change semantics one step at a time.

What you mean by "done before" ? do you mean the removed code in "case BOOKE_INTERRUPT_DEBUG" code in kvmppc_handle_exit() ? If yes then that code was meaningless and not correct. Right thing should be clear the pending exception and resume the guest. when no one is in state to handle the exception. May be we can have WARN_ON_ONCE() 

> 
> >
> >>
> >>> +}
> >>> +
> >>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
> >>> 	ulong r1, ip, msr, lr;
> >>> @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run *run,
> >>> struct
> >> kvm_vcpu *vcpu,
> >>> 	}
> >>>
> >>> 	case BOOKE_INTERRUPT_DEBUG: {
> >>> -		u32 dbsr;
> >>> -
> >>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> >>> -
> >>> -		/* clear IAC events in DBSR register */
> >>> -		dbsr = mfspr(SPRN_DBSR);
> >>> -		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> >>> -		mtspr(SPRN_DBSR, dbsr);
> >>> -
> >>> -		run->exit_reason = KVM_EXIT_DEBUG;
> >>> +		r = kvmppc_handle_debug(run, vcpu);
> >>> +		if (r == RESUME_HOST)
> >>> +			run->exit_reason = KVM_EXIT_DEBUG;
> >>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>> -		r = RESUME_HOST;
> >>> 		break;
> >>> 	}
> >>>
> >>> @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> >>> 	kvmppc_set_msr(vcpu, 0);
> >>>
> >>> #ifndef CONFIG_KVM_BOOKE_HV
> >>> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> >>> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
> >>> 	vcpu->arch.shadow_pid = 1;
> >>> 	vcpu->arch.shared->msr = 0;
> >>> #endif
> >>> @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct
> >>> kvm_vcpu *vcpu,
> >> struct kvm_one_reg *reg)
> >>> 	return r;
> >>> }
> >>>
> >>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>> -					 struct kvm_guest_debug *dbg)
> >>> -{
> >>> -	return -EINVAL;
> >>> -}
> >>> -
> >>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
> >>> kvm_fpu
> >>> *fpu) {
> >>> 	return -ENOTSUPP;
> >>> @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data)
> >>> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS); }
> >>>
> >>> +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) {
> >>> +	if (!vcpu->arch.debug_save_restore)
> >>> +		return;
> >>> +
> >>> +	/* Restore Host Context. Disable all debug events First */
> >>> +	mtspr(SPRN_DBCR0, 0x0);
> >>> +	/* Disable pending debug event by Clearing DBSR */
> >>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> >>> +
> >>> +	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
> >>> +	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2); #ifdef
> >>> +CONFIG_KVM_E500MC
> >>> +	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4); #endif
> >>> +	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
> >>> +	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]); #if
> >>> +CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>> +	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
> >>> +	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]); #endif
> >>> +	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
> >>> +	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
> >>> +
> >>> +	/* Enable debug events after all other debug registers restored */
> >>> +	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);
> >>
> >> How does the normal debug register switching code work in Linux?
> >> Can't we just reuse that? Or rely on it to restore working state when
> >> another process gets scheduled in?
> >
> > Good point, I can see debug registers loading in function __switch_to()-
> >switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
> > So as long as assume that host will not use debug resources we can rely on
> this restore. But I am not sure that this is a fare assumption. As Scott earlier
> mentioned someone can use debug resource for kernel debugging also.
> 
> Someone in the kernel can also use floating point registers. But then it's his
> responsibility to clean up the mess he leaves behind.

I am neither convinced by what you said and nor even have much reason to oppose :)

Scott, 
	I remember you mentioned that host can use debug resources, you comment on this ?

> 
> >
> >>
> >>> +
> >>> +	/* Host debug register are restored */
> >>> +	vcpu->arch.debug_save_restore = false; }
> >>> +
> >>> +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu
> >>> +*vcpu) {
> >>> +	/*
> >>> +	 * Check whether guest still need debug resource, if not then there
> >>> +	 * is no need to resotre guest context.
> >>
> >> restore
> >>
> >>> +	 */
> >>> +	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
> >>> +		return;
> >>
> >> Are we guaranteed that debugging is disabled here?
> >
> > Ah, I should be clearing the hw DBCR0 and DBSR.
> >
> >> We don't want to get debug
> >> exceptions that were meant for some other process.
> >>
> >>> +
> >>> +	/*
> >>> +	 * Save Host debug register only after restore. If host debug
> >>> +	 * registers are saved and not restored then do not save again.
> >>> +	 */
> >>> +	if (!vcpu->arch.debug_save_restore) {
> >>> +		/* Save Host context */
> >>> +		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
> >>> +		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
> >>> +		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2); #ifdef
> >>> +CONFIG_KVM_E500MC
> >>> +		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4); #endif
> >>> +		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
> >>> +		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2); #if
> >>> +CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>> +		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
> >>> +		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4); #endif
> >>> +		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
> >>> +		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
> >>> +		vcpu->arch.debug_save_restore = true;
> >>> +	}
> >>> +
> >>> +	/* Restore Guest Context. Disable all debug events First */
> >>> +	mtspr(SPRN_DBCR0, 0x0);
> >>> +	/* Clear h/w DBSR */
> >>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> >>> +
> >>> +	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
> >>> +	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); #ifdef
> >>> +CONFIG_KVM_E500MC
> >>> +	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); #endif
> >>> +	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
> >>> +	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
> >>> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> >>> +	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
> >>> +	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
> >>> +#endif
> >>> +	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
> >>> +	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
> >>> +
> >>> +	/* Enable debug events after other debug registers restored */
> >>> +	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); }
> >>> +
> >>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>> +					 struct kvm_guest_debug *dbg)
> >>> +{
> >>> +	struct kvmppc_booke_debug_reg *dbg_reg;
> >>> +	int n, b = 0, w = 0;
> >>> +	const u32 bp_code[] = {
> >>> +		DBCR0_IAC1 | DBCR0_IDM,
> >>> +		DBCR0_IAC2 | DBCR0_IDM,
> >>> +		DBCR0_IAC3 | DBCR0_IDM,
> >>> +		DBCR0_IAC4 | DBCR0_IDM
> >>> +	};
> >>> +	const u32 wp_code[] = {
> >>> +		DBCR0_DAC1W | DBCR0_IDM,
> >>> +		DBCR0_DAC2W | DBCR0_IDM,
> >>> +		DBCR0_DAC1R | DBCR0_IDM,
> >>> +		DBCR0_DAC2R | DBCR0_IDM
> >>> +	};
> >>> +
> >>> +	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> >>> +		/* Clear All debug events */
> >>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>> +		vcpu->guest_debug = 0;
> >>
> >> Ah, this is where we disable guest_debug. This needs to enable
> >> guest_debug for the guest again, so you need to remove the DE bit from
> shadow_msrp here.
> >>
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	vcpu->guest_debug = dbg->control;
> >>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
> >>> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> >>> +
> >>> +	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> >>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> >>> +
> >>> +	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
> >>> +		/* Code below handles only HW breakpoints */
> >>> +		kvmppc_booke_vcpu_load_debug_regs(vcpu);
> >>> +		return 0;
> >>> +	}
> >>> +
> >>> +	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> >>> +
> >>> +	/*
> >>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> >>> +	 * to occur when MSR.PR is set.
> >>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> >>> +	 * should clear DBCR1 and DBCR2.
> >>> +	 */
> >>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>> +	dbg_reg->dbcr1 = 0;
> >>> +	dbg_reg->dbcr2 = 0;
> >>
> >> Does that mean we can't debug guest user space?
> >
> > Yes
> 
> This is wrong.

Really, So far I am assuming qemu debug stub is not mean for debugging guest application.

> 
> >
> >>
> >>> +#else
> >>> +	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
> >>> +			  DBCR1_IAC4US;
> >>> +	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US; #endif
> >>> +
> >>> +	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
> >>> +		u32 type = dbg->arch.bp[n].type;
> >>> +
> >>> +		if (!type)
> >>> +			break;
> >>
> >> Not continue?
> >
> > I think wrong type should also mean error (INVALID)
> 
> Then make it an error :).
> 
> >
> >>
> >>> +
> >>> +		if (type & (KVMPPC_DEBUG_WATCH_READ |
> >>> +			    KVMPPC_DEBUG_WATCH_WRITE)) {
> >>> +			if (w >= KVMPPC_BOOKE_DAC_NUM)
> >>> +				continue;
> >>
> >> This should result in an error, no?
> >
> > I do not think we need error, as gdb itself will through warning when trying
> to set more than required hardware breakpoint and watchpoint.
> 
> It's gdb's decision to print a warning. We should at least give gdb the chance
> to do so and continue.

Ok.

Thanks
-Bharat

> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 2, 2013, 3:41 p.m. UTC | #5
On 04/02/2013 04:09 PM, Bhushan Bharat-R65777 wrote:
>
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Tuesday, April 02, 2013 1:57 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
>>
>>
>> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
>>
>>>
>>>> -----Original Message-----
>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>> Sent: Thursday, March 28, 2013 10:06 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421;
>>>> Bhushan
>>>> Bharat-R65777
>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>> support
>>>>
>>>>
>>>> On 21.03.2013, at 07:25, Bharat Bhushan wrote:
>>>>
>>>>> From: Bharat Bhushan<bharat.bhushan@freescale.com>
>>>>>
>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>> breakpoint to debug guest.
>>>>>
>>>>> Debug registers are saved/restored on vcpu_put()/vcpu_get().
>>>>> Also the debug registers are saved restored only if guest is using
>>>>> debug resources.
>>>>>
>>>>> Signed-off-by: Bharat Bhushan<bharat.bhushan@freescale.com>
>>>>> ---
>>>>> v2:
>>>>> - save/restore in vcpu_get()/vcpu_put()
>>>>> - some more minor cleanup based on review comments.
>>>>>
>>>>> arch/powerpc/include/asm/kvm_host.h |   10 ++
>>>>> arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
>>>>> arch/powerpc/kvm/booke.c            |  252 ++++++++++++++++++++++++++++++++-
>> --
>>>>> arch/powerpc/kvm/e500_emulate.c     |   10 ++
>>>>> 4 files changed, 272 insertions(+), 22 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>>>> b/arch/powerpc/include/asm/kvm_host.h
>>>>> index f4ba881..8571952 100644
>>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>>> @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
>>>>> 	u32 mmucfg;
>>>>> 	u32 epr;
>>>>> 	u32 crit_save;
>>>>> +	/* guest debug registers*/
>>>>> 	struct kvmppc_booke_debug_reg dbg_reg;
>>>>> +	/* shadow debug registers */
>>>>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
>>>>> +	/* host debug registers*/
>>>>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
>>>>> +	/*
>>>>> +	 * Flag indicating that debug registers are used by guest
>>>>> +	 * and requires save restore.
>>>>> +	*/
>>>>> +	bool debug_save_restore;
>>>>> #endif
>>>>> 	gpa_t paddr_accessed;
>>>>> 	gva_t vaddr_accessed;
>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
>>>>> b/arch/powerpc/include/uapi/asm/kvm.h
>>>>> index 15f9a00..d7ce449 100644
>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>>>> @@ -25,6 +25,7 @@
>>>>> /* Select powerpc specific features in<linux/kvm.h>  */ #define
>>>>> __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>
>>>>> struct kvm_regs {
>>>>> 	__u64 pc;
>>>>> @@ -267,7 +268,24 @@ struct kvm_fpu {
>>>>> 	__u64 fpr[32];
>>>>> };
>>>>>
>>>>> +/*
>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both) and
>>>>> + * software breakpoint.
>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>> + * for KVM_DEBUG_EXIT.
>>>>> + */
>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL<<  1)
>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL<<  2)
>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL<<  3)
>>>>> struct kvm_debug_exit_arch {
>>>>> +	__u64 address;
>>>>> +	/*
>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>> +	 * (read, write or both) and software breakpoint.
>>>>> +	 */
>>>>> +	__u32 status;
>>>>> +	__u32 reserved;
>>>>> };
>>>>>
>>>>> /* for KVM_SET_GUEST_DEBUG */
>>>>> @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
>>>>> 		 * Type denotes h/w breakpoint, read watchpoint, write
>>>>> 		 * watchpoint or watchpoint (both read and write).
>>>>> 		 */
>>>>> -#define KVMPPC_DEBUG_NOTYPE		0x0
>>>>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL<<  1)
>>>>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL<<  2)
>>>>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL<<  3)
>>>>> 		__u32 type;
>>>>> 		__u32 reserved;
>>>>> 	} bp[16];
>>>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>>>> index
>>>>> 1de93a8..bf20056 100644
>>>>> --- a/arch/powerpc/kvm/booke.c
>>>>> +++ b/arch/powerpc/kvm/booke.c
>>>>> @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct
>>>>> kvm_vcpu
>>>>> *vcpu) #endif }
>>>>>
>>>>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
>>>>> +	/* Synchronize guest's desire to get debug interrupts into shadow
>>>>> +MSR */ #ifndef CONFIG_KVM_BOOKE_HV
>>>>> +	vcpu->arch.shadow_msr&= ~MSR_DE;
>>>>> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr&  MSR_DE; #endif
>>>>> +
>>>>> +	/* Force enable debug interrupts when user space wants to debug */
>>>>> +	if (vcpu->guest_debug) {
>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>> +		/*
>>>>> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
>>>>> +		 * visible MSR. Do not allow guest to change MSR[DE].
>>>>> +		 */
>>>>> +		vcpu->arch.shared->msr |= MSR_DE;
>>>>> +		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
>>>> This mtspr should really just be a bit or in shadow_mspr when
>>>> guest_debug gets enabled. It should automatically get synchronized as
>>>> soon as the next
>>>> vpcu_load() happens.
>>> I think this is not required here as shadow_dbsr already have MSRP_DEP set.
>>>
>>> Will setup shadow_msrp when setting guest_debug and clear shadow_msrp when
>> guest_debug is cleared.
>>> But that will also not be sufficient as it not sure when vcpu_load()
>>> will be called after the shadow_msrp is changed. So
>>>
>>> When guest_debug is set in debug_ioctl()
>>> /* Set MSRP_DEP in shadow_msrp so that guest cannot change MSR.DE. As
>>> shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that
>>> vcpu_load() will be called just after debug_ioctl(), so also set
>>> MSRP_DEP in SPRN_MSRP. */
>> debug_ioctl is an ioctl. So while that ioctl is on, the vcpu is not running.
>> After that ioctl user space calls another VCPU_RUN ioctl which invokes
>> vcpu_load.
> That's good. I was not aware of vcpu_load() is being called before ioctl handling in KVM :)
>
>>> - shadow_msrp | MSRP_DEP;
>>> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be
>> called?
>>>
>>> When guest_debug is cleared in debug_ioctl()
>>> /* Clear MSRP_DEP in shadow_msrp so that guest can change MSR.DE. As
>>> shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that
>>> vcpu_load() will be called just after debug_ioctl(), so also clear
>>> MSRP_DEP in SPRN_MSRP. */
>> Same thing here again.
>>
>>> - shadow_msrp | MSRP_DEP;
>>> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will be
>> called?
>>>> Also, what happens when user space disables guest_debug?
>>>>
>>>>> +#else
>>>>> +		vcpu->arch.shadow_msr |= MSR_DE;
>>>>> +		vcpu->arch.shared->msr&= ~MSR_DE; #endif
>>>>> +	}
>>>>> +}
>>>>> +
>>>>> /*
>>>>> * Helper function for "full" MSR writes.  No need to call this if
>>>>> only
>>>>> * EE/CE/ME/DE/RI are changing.
>>>>> @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
>>>>> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
>>>>> 	kvmppc_vcpu_sync_spe(vcpu);
>>>>> 	kvmppc_vcpu_sync_fpu(vcpu);
>>>>> +	kvmppc_vcpu_sync_debug(vcpu);
>>>>> }
>>>>>
>>>>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
>>>>> -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run,
>>>>> struct
>>>> kvm_vcpu *vcpu)
>>>>> 		run->exit_reason = KVM_EXIT_DCR;
>>>>> 		return RESUME_HOST;
>>>>>
>>>>> +	case EMULATE_EXIT_USER:
>>>>> +		return RESUME_HOST;
>>>> This should get folded into the previous patch or be a separate one.
>>> ok
>>>
>>>>> +
>>>>> 	case EMULATE_FAIL:
>>>>> 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
>>>>> 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst); @@ -751,6
>>>>> +779,30 @@ static int emulation_exit(struct kvm_run *run, struct
>>>>> +kvm_vcpu
>>>> *vcpu)
>>>>> 	}
>>>>> }
>>>>>
>>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu
>>>>> +*vcpu) {
>>>>> +	u32 dbsr = mfspr(SPRN_DBSR);
>>>>> +	mtspr(SPRN_DBSR, dbsr);
>>>> This definitely deserves a comment :).
>>> Ok.
>>>
>>>> Also, are we non-preemptible here?
>>> This is called from kvmppc_handle_exit and interrupts are enabled, so I think
>> this can be preempted.
>>> But I do not think there is any issue as we clear DBSR in vpu_put().
>> Ok, make clear_dbsr a function then that you call from both places. That way
>> it's more obvious.
> Ok
>
>>>>> +
>>>>> +	run->debug.arch.status = 0;
>>>>> +	run->debug.arch.address = vcpu->arch.pc;
>>>>> +
>>>>> +	if (dbsr&  (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>>>> +	} else {
>>>>> +		if (dbsr&  (DBSR_DAC1W | DBSR_DAC2W))
>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>>>> +		else if (dbsr&  (DBSR_DAC1R | DBSR_DAC2R))
>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>>>> +		if (dbsr&  (DBSR_DAC1R | DBSR_DAC1W))
>>>>> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0];
>>>>> +		else if (dbsr&  (DBSR_DAC2R | DBSR_DAC2W))
>>>>> +			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1];
>>>>> +	}
>>>>> +
>>>>> +	return RESUME_HOST;
>>>> Shouldn't this check for guest_debug and only go to user space if it
>>>> asked for debugging? Can't you just leave it at the old code for
>> !guest_debug?
>>> Will add, I thought that it is redundant as of now as debug interrupt in guest
>> can come only when user space is debugging guest.
>>
>> The problem is that user space might not expect a guest debug exit.
> Ok
>
>>> clear DBSR
>>> if (!guest_debug)
>>> 	return RESUME_GUEST;
>> Can't you just do exactly what was done before when !guest_debug? We usually try
>> to change semantics one step at a time.
> What you mean by "done before" ? do you mean the removed code in "case BOOKE_INTERRUPT_DEBUG" code in kvmppc_handle_exit() ? If yes then that code was meaningless and not correct.

Then change it in a separate patch. One patch, one change. Otherwise 
bisectability suffers.

> Right thing should be clear the pending exception and resume the guest. when no one is in state to handle the exception. May be we can have WARN_ON_ONCE()
>
>>>>> +}
>>>>> +
>>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
>>>>> 	ulong r1, ip, msr, lr;
>>>>> @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run *run,
>>>>> struct
>>>> kvm_vcpu *vcpu,
>>>>> 	}
>>>>>
>>>>> 	case BOOKE_INTERRUPT_DEBUG: {
>>>>> -		u32 dbsr;
>>>>> -
>>>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>>>> -
>>>>> -		/* clear IAC events in DBSR register */
>>>>> -		dbsr = mfspr(SPRN_DBSR);
>>>>> -		dbsr&= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>>>> -		mtspr(SPRN_DBSR, dbsr);
>>>>> -
>>>>> -		run->exit_reason = KVM_EXIT_DEBUG;
>>>>> +		r = kvmppc_handle_debug(run, vcpu);
>>>>> +		if (r == RESUME_HOST)
>>>>> +			run->exit_reason = KVM_EXIT_DEBUG;
>>>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>>>> -		r = RESUME_HOST;
>>>>> 		break;
>>>>> 	}
>>>>>
>>>>> @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>>>> 	kvmppc_set_msr(vcpu, 0);
>>>>>
>>>>> #ifndef CONFIG_KVM_BOOKE_HV
>>>>> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
>>>>> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
>>>>> 	vcpu->arch.shadow_pid = 1;
>>>>> 	vcpu->arch.shared->msr = 0;
>>>>> #endif
>>>>> @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct
>>>>> kvm_vcpu *vcpu,
>>>> struct kvm_one_reg *reg)
>>>>> 	return r;
>>>>> }
>>>>>
>>>>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>> -					 struct kvm_guest_debug *dbg)
>>>>> -{
>>>>> -	return -EINVAL;
>>>>> -}
>>>>> -
>>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
>>>>> kvm_fpu
>>>>> *fpu) {
>>>>> 	return -ENOTSUPP;
>>>>> @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data)
>>>>> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS); }
>>>>>
>>>>> +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) {
>>>>> +	if (!vcpu->arch.debug_save_restore)
>>>>> +		return;
>>>>> +
>>>>> +	/* Restore Host Context. Disable all debug events First */
>>>>> +	mtspr(SPRN_DBCR0, 0x0);
>>>>> +	/* Disable pending debug event by Clearing DBSR */
>>>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
>>>>> +
>>>>> +	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
>>>>> +	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2); #ifdef
>>>>> +CONFIG_KVM_E500MC
>>>>> +	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4); #endif
>>>>> +	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
>>>>> +	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]); #if
>>>>> +CONFIG_PPC_ADV_DEBUG_IACS>  2
>>>>> +	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
>>>>> +	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]); #endif
>>>>> +	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
>>>>> +	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
>>>>> +
>>>>> +	/* Enable debug events after all other debug registers restored */
>>>>> +	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);
>>>> How does the normal debug register switching code work in Linux?
>>>> Can't we just reuse that? Or rely on it to restore working state when
>>>> another process gets scheduled in?
>>> Good point, I can see debug registers loading in function __switch_to()-
>>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
>>> So as long as assume that host will not use debug resources we can rely on
>> this restore. But I am not sure that this is a fare assumption. As Scott earlier
>> mentioned someone can use debug resource for kernel debugging also.
>>
>> Someone in the kernel can also use floating point registers. But then it's his
>> responsibility to clean up the mess he leaves behind.
> I am neither convinced by what you said and nor even have much reason to oppose :)
>
> Scott,
> 	I remember you mentioned that host can use debug resources, you comment on this ?
>
>>>>> +
>>>>> +	/* Host debug register are restored */
>>>>> +	vcpu->arch.debug_save_restore = false; }
>>>>> +
>>>>> +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu
>>>>> +*vcpu) {
>>>>> +	/*
>>>>> +	 * Check whether guest still need debug resource, if not then there
>>>>> +	 * is no need to resotre guest context.
>>>> restore
>>>>
>>>>> +	 */
>>>>> +	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
>>>>> +		return;
>>>> Are we guaranteed that debugging is disabled here?
>>> Ah, I should be clearing the hw DBCR0 and DBSR.
>>>
>>>> We don't want to get debug
>>>> exceptions that were meant for some other process.
>>>>
>>>>> +
>>>>> +	/*
>>>>> +	 * Save Host debug register only after restore. If host debug
>>>>> +	 * registers are saved and not restored then do not save again.
>>>>> +	 */
>>>>> +	if (!vcpu->arch.debug_save_restore) {
>>>>> +		/* Save Host context */
>>>>> +		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
>>>>> +		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
>>>>> +		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2); #ifdef
>>>>> +CONFIG_KVM_E500MC
>>>>> +		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4); #endif
>>>>> +		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
>>>>> +		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2); #if
>>>>> +CONFIG_PPC_ADV_DEBUG_IACS>  2
>>>>> +		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
>>>>> +		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4); #endif
>>>>> +		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
>>>>> +		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
>>>>> +		vcpu->arch.debug_save_restore = true;
>>>>> +	}
>>>>> +
>>>>> +	/* Restore Guest Context. Disable all debug events First */
>>>>> +	mtspr(SPRN_DBCR0, 0x0);
>>>>> +	/* Clear h/w DBSR */
>>>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
>>>>> +
>>>>> +	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
>>>>> +	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); #ifdef
>>>>> +CONFIG_KVM_E500MC
>>>>> +	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); #endif
>>>>> +	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
>>>>> +	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
>>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS>  2
>>>>> +	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
>>>>> +	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
>>>>> +#endif
>>>>> +	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
>>>>> +	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
>>>>> +
>>>>> +	/* Enable debug events after other debug registers restored */
>>>>> +	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); }
>>>>> +
>>>>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>> +					 struct kvm_guest_debug *dbg)
>>>>> +{
>>>>> +	struct kvmppc_booke_debug_reg *dbg_reg;
>>>>> +	int n, b = 0, w = 0;
>>>>> +	const u32 bp_code[] = {
>>>>> +		DBCR0_IAC1 | DBCR0_IDM,
>>>>> +		DBCR0_IAC2 | DBCR0_IDM,
>>>>> +		DBCR0_IAC3 | DBCR0_IDM,
>>>>> +		DBCR0_IAC4 | DBCR0_IDM
>>>>> +	};
>>>>> +	const u32 wp_code[] = {
>>>>> +		DBCR0_DAC1W | DBCR0_IDM,
>>>>> +		DBCR0_DAC2W | DBCR0_IDM,
>>>>> +		DBCR0_DAC1R | DBCR0_IDM,
>>>>> +		DBCR0_DAC2R | DBCR0_IDM
>>>>> +	};
>>>>> +
>>>>> +	if (!(dbg->control&  KVM_GUESTDBG_ENABLE)) {
>>>>> +		/* Clear All debug events */
>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>> +		vcpu->guest_debug = 0;
>>>> Ah, this is where we disable guest_debug. This needs to enable
>>>> guest_debug for the guest again, so you need to remove the DE bit from
>> shadow_msrp here.
>>>>> +		return 0;
>>>>> +	}
>>>>> +
>>>>> +	vcpu->guest_debug = dbg->control;
>>>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
>>>>> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
>>>>> +
>>>>> +	if (vcpu->guest_debug&  KVM_GUESTDBG_SINGLESTEP)
>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>>>> +
>>>>> +	if (!(vcpu->guest_debug&  KVM_GUESTDBG_USE_HW_BP)) {
>>>>> +		/* Code below handles only HW breakpoints */
>>>>> +		kvmppc_booke_vcpu_load_debug_regs(vcpu);
>>>>> +		return 0;
>>>>> +	}
>>>>> +
>>>>> +	dbg_reg =&(vcpu->arch.shadow_dbg_reg);
>>>>> +
>>>>> +	/*
>>>>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
>>>>> +	 * to occur when MSR.PR is set.
>>>>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
>>>>> +	 * should clear DBCR1 and DBCR2.
>>>>> +	 */
>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>> +	dbg_reg->dbcr1 = 0;
>>>>> +	dbg_reg->dbcr2 = 0;
>>>> Does that mean we can't debug guest user space?
>>> Yes
>> This is wrong.
> Really, So far I am assuming qemu debug stub is not mean for debugging guest application.

Ok, let me rephrase: This is confusing. You do trap in PR mode on 
e500v2. IIRC x86 also traps in kernel and user space. I don't see why 
e500 hv should be different.


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Scott Wood April 2, 2013, 6 p.m. UTC | #6
On 04/02/2013 09:09:34 AM, Bhushan Bharat-R65777 wrote:
> 
> 
> > -----Original Message-----
> > From: Alexander Graf [mailto:agraf@suse.de]
> > Sent: Tuesday, April 02, 2013 1:57 PM
> > To: Bhushan Bharat-R65777
> > Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
> > Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub  
> support
> >
> >
> > On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
> >
> > >
> > >
> > >> -----Original Message-----
> > >> From: Alexander Graf [mailto:agraf@suse.de]
> > >> Sent: Thursday, March 28, 2013 10:06 PM
> > >> To: Bhushan Bharat-R65777
> > >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood  
> Scott-B07421;
> > >> Bhushan
> > >> Bharat-R65777
> > >> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> > >> support
> > >>
> > >>
> > >> How does the normal debug register switching code work in Linux?
> > >> Can't we just reuse that? Or rely on it to restore working state  
> when
> > >> another process gets scheduled in?
> > >
> > > Good point, I can see debug registers loading in function  
> __switch_to()-
> > >switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
> > > So as long as assume that host will not use debug resources we  
> can rely on
> > this restore. But I am not sure that this is a fare assumption. As  
> Scott earlier
> > mentioned someone can use debug resource for kernel debugging also.
> >
> > Someone in the kernel can also use floating point registers. But  
> then it's his
> > responsibility to clean up the mess he leaves behind.
> 
> I am neither convinced by what you said and nor even have much reason  
> to oppose :)
> 
> Scott,
> 	I remember you mentioned that host can use debug resources, you  
> comment on this ?

I thought the conclusion we reached was that it was OK as long as KVM  
waits until it actually needs the debug resources to mess with the  
registers.

-Scott
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan April 3, 2013, 10:03 a.m. UTC | #7
> -----Original Message-----
> From: Wood Scott-B07421
> Sent: Tuesday, April 02, 2013 11:30 PM
> To: Bhushan Bharat-R65777
> Cc: Alexander Graf; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-
> B07421
> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
> 
> On 04/02/2013 09:09:34 AM, Bhushan Bharat-R65777 wrote:
> >
> >
> > > -----Original Message-----
> > > From: Alexander Graf [mailto:agraf@suse.de]
> > > Sent: Tuesday, April 02, 2013 1:57 PM
> > > To: Bhushan Bharat-R65777
> > > Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
> > > Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> > support
> > >
> > >
> > > On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
> > >
> > > >
> > > >
> > > >> -----Original Message-----
> > > >> From: Alexander Graf [mailto:agraf@suse.de]
> > > >> Sent: Thursday, March 28, 2013 10:06 PM
> > > >> To: Bhushan Bharat-R65777
> > > >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
> > Scott-B07421;
> > > >> Bhushan
> > > >> Bharat-R65777
> > > >> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> > > >> support
> > > >>
> > > >>
> > > >> How does the normal debug register switching code work in Linux?
> > > >> Can't we just reuse that? Or rely on it to restore working state
> > when
> > > >> another process gets scheduled in?
> > > >
> > > > Good point, I can see debug registers loading in function
> > __switch_to()-
> > > >switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
> > > > So as long as assume that host will not use debug resources we
> > can rely on
> > > this restore. But I am not sure that this is a fare assumption. As
> > Scott earlier
> > > mentioned someone can use debug resource for kernel debugging also.
> > >
> > > Someone in the kernel can also use floating point registers. But
> > then it's his
> > > responsibility to clean up the mess he leaves behind.
> >
> > I am neither convinced by what you said and nor even have much reason
> > to oppose :)
> >
> > Scott,
> > 	I remember you mentioned that host can use debug resources, you
> > comment on this ?
> 
> I thought the conclusion we reached was that it was OK as long as KVM waits
> until it actually needs the debug resources to mess with the registers.

Right,  Are we also agreeing on that KVM will not save/restore host debug context on vcpu_load/vcpu_put()? KVM will load its context in vcpu_load() if needed and on vcpu_put() it will clear DBCR0 and DBSR.

Thanks
-Bharat




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 3, 2013, 10:28 a.m. UTC | #8
Am 03.04.2013 um 12:03 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:

> 
> 
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Tuesday, April 02, 2013 11:30 PM
>> To: Bhushan Bharat-R65777
>> Cc: Alexander Graf; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-
>> B07421
>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
>> 
>> On 04/02/2013 09:09:34 AM, Bhushan Bharat-R65777 wrote:
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>> Sent: Tuesday, April 02, 2013 1:57 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>> support
>>>> 
>>>> 
>>>> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>>>> Sent: Thursday, March 28, 2013 10:06 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
>>> Scott-B07421;
>>>>>> Bhushan
>>>>>> Bharat-R65777
>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>>>> support
>>>>>> 
>>>>>> 
>>>>>> How does the normal debug register switching code work in Linux?
>>>>>> Can't we just reuse that? Or rely on it to restore working state
>>> when
>>>>>> another process gets scheduled in?
>>>>> 
>>>>> Good point, I can see debug registers loading in function
>>> __switch_to()-
>>>>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
>>>>> So as long as assume that host will not use debug resources we
>>> can rely on
>>>> this restore. But I am not sure that this is a fare assumption. As
>>> Scott earlier
>>>> mentioned someone can use debug resource for kernel debugging also.
>>>> 
>>>> Someone in the kernel can also use floating point registers. But
>>> then it's his
>>>> responsibility to clean up the mess he leaves behind.
>>> 
>>> I am neither convinced by what you said and nor even have much reason
>>> to oppose :)
>>> 
>>> Scott,
>>>    I remember you mentioned that host can use debug resources, you
>>> comment on this ?
>> 
>> I thought the conclusion we reached was that it was OK as long as KVM waits
>> until it actually needs the debug resources to mess with the registers.
> 
> Right,  Are we also agreeing on that KVM will not save/restore host debug context on vcpu_load/vcpu_put()? KVM will load its context in vcpu_load() if needed and on vcpu_put() it will clear DBCR0 and DBSR.

That depends on whether the kernel restores the debug registers. Please double-check that.

Also, someone could want to gdb QEMU, so the debug registers might have to get restored on a heavy weight exit. I'd hope Linux just provides helpers to restore a process's debug state that we can call here.


Alex

> 
> Thanks
> -Bharat
> 
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan April 3, 2013, 1:50 p.m. UTC | #9
> -----Original Message-----
> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
> Behalf Of Alexander Graf
> Sent: Wednesday, April 03, 2013 3:58 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
> 
> 
> 
> Am 03.04.2013 um 12:03 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:
> 
> >
> >
> >> -----Original Message-----
> >> From: Wood Scott-B07421
> >> Sent: Tuesday, April 02, 2013 11:30 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: Alexander Graf; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> >> Wood Scott-
> >> B07421
> >> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >> support
> >>
> >> On 04/02/2013 09:09:34 AM, Bhushan Bharat-R65777 wrote:
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Alexander Graf [mailto:agraf@suse.de]
> >>>> Sent: Tuesday, April 02, 2013 1:57 PM
> >>>> To: Bhushan Bharat-R65777
> >>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
> >>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >>> support
> >>>>
> >>>>
> >>>> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
> >>>>
> >>>>>
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Alexander Graf [mailto:agraf@suse.de]
> >>>>>> Sent: Thursday, March 28, 2013 10:06 PM
> >>>>>> To: Bhushan Bharat-R65777
> >>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
> >>> Scott-B07421;
> >>>>>> Bhushan
> >>>>>> Bharat-R65777
> >>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >>>>>> support
> >>>>>>
> >>>>>>
> >>>>>> How does the normal debug register switching code work in Linux?
> >>>>>> Can't we just reuse that? Or rely on it to restore working state
> >>> when
> >>>>>> another process gets scheduled in?
> >>>>>
> >>>>> Good point, I can see debug registers loading in function
> >>> __switch_to()-
> >>>>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
> >>>>> So as long as assume that host will not use debug resources we
> >>> can rely on
> >>>> this restore. But I am not sure that this is a fare assumption. As
> >>> Scott earlier
> >>>> mentioned someone can use debug resource for kernel debugging also.
> >>>>
> >>>> Someone in the kernel can also use floating point registers. But
> >>> then it's his
> >>>> responsibility to clean up the mess he leaves behind.
> >>>
> >>> I am neither convinced by what you said and nor even have much
> >>> reason to oppose :)
> >>>
> >>> Scott,
> >>>    I remember you mentioned that host can use debug resources, you
> >>> comment on this ?
> >>
> >> I thought the conclusion we reached was that it was OK as long as KVM
> >> waits until it actually needs the debug resources to mess with the registers.
> >
> > Right,  Are we also agreeing on that KVM will not save/restore host debug
> context on vcpu_load/vcpu_put()? KVM will load its context in vcpu_load() if
> needed and on vcpu_put() it will clear DBCR0 and DBSR.
> 
> That depends on whether the kernel restores the debug registers. Please double-
> check that.

Currently the kernel code restore the debug state of new schedule process in context_switch(). 

switch_booke_debug_regs() from __switch_to() and defined as :
/*
 * Unless neither the old or new thread are making use of the
 * debug registers, set the debug registers from the values
 * stored in the new thread.
 */
static void switch_booke_debug_regs(struct thread_struct *new_thread)
{
        if ((current->thread.dbcr0 & DBCR0_IDM)
                || (new_thread->dbcr0 & DBCR0_IDM))
                        prime_debug_regs(new_thread);
}

static void prime_debug_regs(struct thread_struct *thread)
{
        mtspr(SPRN_IAC1, thread->iac1);
        mtspr(SPRN_IAC2, thread->iac2);
#if CONFIG_PPC_ADV_DEBUG_IACS > 2
        mtspr(SPRN_IAC3, thread->iac3);
        mtspr(SPRN_IAC4, thread->iac4);
#endif
        mtspr(SPRN_DAC1, thread->dac1);
        mtspr(SPRN_DAC2, thread->dac2);
#if CONFIG_PPC_ADV_DEBUG_DVCS > 0
        mtspr(SPRN_DVC1, thread->dvc1);
        mtspr(SPRN_DVC2, thread->dvc2);
#endif
        mtspr(SPRN_DBCR0, thread->dbcr0);
        mtspr(SPRN_DBCR1, thread->dbcr1);
#ifdef CONFIG_BOOKE
        mtspr(SPRN_DBCR2, thread->dbcr2);
#endif
}
This is analogous to moving from guest to/from QEMU. so we can make prime_debug_regs() available to kvm code for heavyweight_exit. And vcpu_load() will load guest state and save host state (update thread->debug_registers).

And the kernel exception handling code clear DBSR and load DBCR0 with 0 (global_dbcr0 variable, which is zero) in transfer_to_handler in entry_32.S
This is analogous to switching from KVM to kernel.
But I do not same (clearing DBCR0 and DBSR) in 64bit exception handler. Is this a problem or I am missing something.  

Thanks
-Bharat

> 
> Also, someone could want to gdb QEMU, so the debug registers might have to get
> restored on a heavy weight exit. I'd hope Linux just provides helpers to restore
> a process's debug state that we can call here.
> 
> 
> Alex
> 
> >
> > Thanks
> > -Bharat
> >
> >
> >
> >
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> > the body of a message to majordomo@vger.kernel.org More majordomo info
> > at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in the body
> of a message to majordomo@vger.kernel.org More majordomo info at
> http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 3, 2013, 2:09 p.m. UTC | #10
On 03.04.2013, at 15:50, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: kvm-ppc-owner@vger.kernel.org [mailto:kvm-ppc-owner@vger.kernel.org] On
>> Behalf Of Alexander Graf
>> Sent: Wednesday, April 03, 2013 3:58 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
>> 
>> 
>> 
>> Am 03.04.2013 um 12:03 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: Wood Scott-B07421
>>>> Sent: Tuesday, April 02, 2013 11:30 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Alexander Graf; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>> Wood Scott-
>>>> B07421
>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>> support
>>>> 
>>>> On 04/02/2013 09:09:34 AM, Bhushan Bharat-R65777 wrote:
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>>>> Sent: Tuesday, April 02, 2013 1:57 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>>> support
>>>>>> 
>>>>>> 
>>>>>> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>>>>>> Sent: Thursday, March 28, 2013 10:06 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
>>>>> Scott-B07421;
>>>>>>>> Bhushan
>>>>>>>> Bharat-R65777
>>>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>>>>>> support
>>>>>>>> 
>>>>>>>> 
>>>>>>>> How does the normal debug register switching code work in Linux?
>>>>>>>> Can't we just reuse that? Or rely on it to restore working state
>>>>> when
>>>>>>>> another process gets scheduled in?
>>>>>>> 
>>>>>>> Good point, I can see debug registers loading in function
>>>>> __switch_to()-
>>>>>>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
>>>>>>> So as long as assume that host will not use debug resources we
>>>>> can rely on
>>>>>> this restore. But I am not sure that this is a fare assumption. As
>>>>> Scott earlier
>>>>>> mentioned someone can use debug resource for kernel debugging also.
>>>>>> 
>>>>>> Someone in the kernel can also use floating point registers. But
>>>>> then it's his
>>>>>> responsibility to clean up the mess he leaves behind.
>>>>> 
>>>>> I am neither convinced by what you said and nor even have much
>>>>> reason to oppose :)
>>>>> 
>>>>> Scott,
>>>>>   I remember you mentioned that host can use debug resources, you
>>>>> comment on this ?
>>>> 
>>>> I thought the conclusion we reached was that it was OK as long as KVM
>>>> waits until it actually needs the debug resources to mess with the registers.
>>> 
>>> Right,  Are we also agreeing on that KVM will not save/restore host debug
>> context on vcpu_load/vcpu_put()? KVM will load its context in vcpu_load() if
>> needed and on vcpu_put() it will clear DBCR0 and DBSR.
>> 
>> That depends on whether the kernel restores the debug registers. Please double-
>> check that.
> 
> Currently the kernel code restore the debug state of new schedule process in context_switch(). 
> 
> switch_booke_debug_regs() from __switch_to() and defined as :
> /*
> * Unless neither the old or new thread are making use of the
> * debug registers, set the debug registers from the values
> * stored in the new thread.
> */
> static void switch_booke_debug_regs(struct thread_struct *new_thread)
> {
>        if ((current->thread.dbcr0 & DBCR0_IDM)
>                || (new_thread->dbcr0 & DBCR0_IDM))
>                        prime_debug_regs(new_thread);
> }
> 
> static void prime_debug_regs(struct thread_struct *thread)
> {
>        mtspr(SPRN_IAC1, thread->iac1);
>        mtspr(SPRN_IAC2, thread->iac2);
> #if CONFIG_PPC_ADV_DEBUG_IACS > 2
>        mtspr(SPRN_IAC3, thread->iac3);
>        mtspr(SPRN_IAC4, thread->iac4);
> #endif
>        mtspr(SPRN_DAC1, thread->dac1);
>        mtspr(SPRN_DAC2, thread->dac2);
> #if CONFIG_PPC_ADV_DEBUG_DVCS > 0
>        mtspr(SPRN_DVC1, thread->dvc1);
>        mtspr(SPRN_DVC2, thread->dvc2);
> #endif
>        mtspr(SPRN_DBCR0, thread->dbcr0);
>        mtspr(SPRN_DBCR1, thread->dbcr1);
> #ifdef CONFIG_BOOKE
>        mtspr(SPRN_DBCR2, thread->dbcr2);
> #endif
> }
> This is analogous to moving from guest to/from QEMU. so we can make prime_debug_regs() available to kvm code for heavyweight_exit. And vcpu_load() will load guest state and save host state (update thread->debug_registers).

I don't think we need to do anything on vcpu_load if we just swap the thread->debug_registers. Just make sure to restore them before you return from a heavy weight exit.

> And the kernel exception handling code clear DBSR and load DBCR0 with 0 (global_dbcr0 variable, which is zero) in transfer_to_handler in entry_32.S
> This is analogous to switching from KVM to kernel.
> But I do not same (clearing DBCR0 and DBSR) in 64bit exception handler. Is this a problem or I am missing something.  

That one's up for the normal ppc ML to answer :).


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan April 3, 2013, 3:18 p.m. UTC | #11
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Wednesday, April 03, 2013 7:39 PM
> To: Bhushan Bharat-R65777
> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
> 
> 
> On 03.04.2013, at 15:50, Bhushan Bharat-R65777 wrote:
> 
> >
> >
> >> -----Original Message-----
> >> From: kvm-ppc-owner@vger.kernel.org
> >> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
> >> Sent: Wednesday, April 03, 2013 3:58 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
> >> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >> support
> >>
> >>
> >>
> >> Am 03.04.2013 um 12:03 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:
> >>
> >>>
> >>>
> >>>> -----Original Message-----
> >>>> From: Wood Scott-B07421
> >>>> Sent: Tuesday, April 02, 2013 11:30 PM
> >>>> To: Bhushan Bharat-R65777
> >>>> Cc: Alexander Graf; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
> >>>> Wood Scott-
> >>>> B07421
> >>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >>>> support
> >>>>
> >>>> On 04/02/2013 09:09:34 AM, Bhushan Bharat-R65777 wrote:
> >>>>>
> >>>>>
> >>>>>> -----Original Message-----
> >>>>>> From: Alexander Graf [mailto:agraf@suse.de]
> >>>>>> Sent: Tuesday, April 02, 2013 1:57 PM
> >>>>>> To: Bhushan Bharat-R65777
> >>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
> >>>>>> Scott-B07421
> >>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >>>>> support
> >>>>>>
> >>>>>>
> >>>>>> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
> >>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>> -----Original Message-----
> >>>>>>>> From: Alexander Graf [mailto:agraf@suse.de]
> >>>>>>>> Sent: Thursday, March 28, 2013 10:06 PM
> >>>>>>>> To: Bhushan Bharat-R65777
> >>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
> >>>>> Scott-B07421;
> >>>>>>>> Bhushan
> >>>>>>>> Bharat-R65777
> >>>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >>>>>>>> support
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> How does the normal debug register switching code work in Linux?
> >>>>>>>> Can't we just reuse that? Or rely on it to restore working
> >>>>>>>> state
> >>>>> when
> >>>>>>>> another process gets scheduled in?
> >>>>>>>
> >>>>>>> Good point, I can see debug registers loading in function
> >>>>> __switch_to()-
> >>>>>>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
> >>>>>>> So as long as assume that host will not use debug resources we
> >>>>> can rely on
> >>>>>> this restore. But I am not sure that this is a fare assumption.
> >>>>>> As
> >>>>> Scott earlier
> >>>>>> mentioned someone can use debug resource for kernel debugging also.
> >>>>>>
> >>>>>> Someone in the kernel can also use floating point registers. But
> >>>>> then it's his
> >>>>>> responsibility to clean up the mess he leaves behind.
> >>>>>
> >>>>> I am neither convinced by what you said and nor even have much
> >>>>> reason to oppose :)
> >>>>>
> >>>>> Scott,
> >>>>>   I remember you mentioned that host can use debug resources, you
> >>>>> comment on this ?
> >>>>
> >>>> I thought the conclusion we reached was that it was OK as long as
> >>>> KVM waits until it actually needs the debug resources to mess with the
> registers.
> >>>
> >>> Right,  Are we also agreeing on that KVM will not save/restore host
> >>> debug
> >> context on vcpu_load/vcpu_put()? KVM will load its context in
> >> vcpu_load() if needed and on vcpu_put() it will clear DBCR0 and DBSR.
> >>
> >> That depends on whether the kernel restores the debug registers.
> >> Please double- check that.
> >
> > Currently the kernel code restore the debug state of new schedule process in
> context_switch().
> >
> > switch_booke_debug_regs() from __switch_to() and defined as :
> > /*
> > * Unless neither the old or new thread are making use of the
> > * debug registers, set the debug registers from the values
> > * stored in the new thread.
> > */
> > static void switch_booke_debug_regs(struct thread_struct *new_thread)
> > {
> >        if ((current->thread.dbcr0 & DBCR0_IDM)
> >                || (new_thread->dbcr0 & DBCR0_IDM))
> >                        prime_debug_regs(new_thread); }
> >
> > static void prime_debug_regs(struct thread_struct *thread) {
> >        mtspr(SPRN_IAC1, thread->iac1);
> >        mtspr(SPRN_IAC2, thread->iac2); #if CONFIG_PPC_ADV_DEBUG_IACS >
> > 2
> >        mtspr(SPRN_IAC3, thread->iac3);
> >        mtspr(SPRN_IAC4, thread->iac4); #endif
> >        mtspr(SPRN_DAC1, thread->dac1);
> >        mtspr(SPRN_DAC2, thread->dac2); #if CONFIG_PPC_ADV_DEBUG_DVCS >
> > 0
> >        mtspr(SPRN_DVC1, thread->dvc1);
> >        mtspr(SPRN_DVC2, thread->dvc2); #endif
> >        mtspr(SPRN_DBCR0, thread->dbcr0);
> >        mtspr(SPRN_DBCR1, thread->dbcr1); #ifdef CONFIG_BOOKE
> >        mtspr(SPRN_DBCR2, thread->dbcr2); #endif } This is analogous to
> > moving from guest to/from QEMU. so we can make prime_debug_regs() available to
> kvm code for heavyweight_exit. And vcpu_load() will load guest state and save
> host state (update thread->debug_registers).
> 
> I don't think we need to do anything on vcpu_load if we just swap the thread-
> >debug_registers. Just make sure to restore them before you return from a heavy
> weight exit.

My understanding is :

1)
When VCPU is running      -> h/w debug registers have vcpu->arch.debug_registers
Goes for heavyweight_exit -> h/w debug registers are loaded with thread->debug_registers
Return from heavyweight_exit and run vcpu -> h/w debug registers have vcpu->arch.debug_registers. Do we need to save thread->debug_registers , probably no as the thread->debug_registers are upto date.

2)
KVM to kernel -> clear DBSR and DBCR0
Kernel to KVM -> load vcpu->arch.debug_registers.

> 
> > And the kernel exception handling code clear DBSR and load DBCR0 with
> > 0 (global_dbcr0 variable, which is zero) in transfer_to_handler in entry_32.S
> This is analogous to switching from KVM to kernel.
> > But I do not same (clearing DBCR0 and DBSR) in 64bit exception handler. Is
> this a problem or I am missing something.
> 
> That one's up for the normal ppc ML to answer :).

Will take this on ppc mailing list.

Thanks
-Bharat

> 
> 
> Alex
> 


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 3, 2013, 4:26 p.m. UTC | #12
On 03.04.2013, at 17:18, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Wednesday, April 03, 2013 7:39 PM
>> To: Bhushan Bharat-R65777
>> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
>> 
>> 
>> On 03.04.2013, at 15:50, Bhushan Bharat-R65777 wrote:
>> 
>>> 
>>> 
>>>> -----Original Message-----
>>>> From: kvm-ppc-owner@vger.kernel.org
>>>> [mailto:kvm-ppc-owner@vger.kernel.org] On Behalf Of Alexander Graf
>>>> Sent: Wednesday, April 03, 2013 3:58 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: Wood Scott-B07421; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org
>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>> support
>>>> 
>>>> 
>>>> 
>>>> Am 03.04.2013 um 12:03 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:
>>>> 
>>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Wood Scott-B07421
>>>>>> Sent: Tuesday, April 02, 2013 11:30 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: Alexander Graf; kvm-ppc@vger.kernel.org; kvm@vger.kernel.org;
>>>>>> Wood Scott-
>>>>>> B07421
>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>>>> support
>>>>>> 
>>>>>> On 04/02/2013 09:09:34 AM, Bhushan Bharat-R65777 wrote:
>>>>>>> 
>>>>>>> 
>>>>>>>> -----Original Message-----
>>>>>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>>>>>> Sent: Tuesday, April 02, 2013 1:57 PM
>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
>>>>>>>> Scott-B07421
>>>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>>>>> support
>>>>>>>> 
>>>>>>>> 
>>>>>>>> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>> -----Original Message-----
>>>>>>>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>>>>>>>> Sent: Thursday, March 28, 2013 10:06 PM
>>>>>>>>>> To: Bhushan Bharat-R65777
>>>>>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
>>>>>>> Scott-B07421;
>>>>>>>>>> Bhushan
>>>>>>>>>> Bharat-R65777
>>>>>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>>>>>>>> support
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> How does the normal debug register switching code work in Linux?
>>>>>>>>>> Can't we just reuse that? Or rely on it to restore working
>>>>>>>>>> state
>>>>>>> when
>>>>>>>>>> another process gets scheduled in?
>>>>>>>>> 
>>>>>>>>> Good point, I can see debug registers loading in function
>>>>>>> __switch_to()-
>>>>>>>>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
>>>>>>>>> So as long as assume that host will not use debug resources we
>>>>>>> can rely on
>>>>>>>> this restore. But I am not sure that this is a fare assumption.
>>>>>>>> As
>>>>>>> Scott earlier
>>>>>>>> mentioned someone can use debug resource for kernel debugging also.
>>>>>>>> 
>>>>>>>> Someone in the kernel can also use floating point registers. But
>>>>>>> then it's his
>>>>>>>> responsibility to clean up the mess he leaves behind.
>>>>>>> 
>>>>>>> I am neither convinced by what you said and nor even have much
>>>>>>> reason to oppose :)
>>>>>>> 
>>>>>>> Scott,
>>>>>>>  I remember you mentioned that host can use debug resources, you
>>>>>>> comment on this ?
>>>>>> 
>>>>>> I thought the conclusion we reached was that it was OK as long as
>>>>>> KVM waits until it actually needs the debug resources to mess with the
>> registers.
>>>>> 
>>>>> Right,  Are we also agreeing on that KVM will not save/restore host
>>>>> debug
>>>> context on vcpu_load/vcpu_put()? KVM will load its context in
>>>> vcpu_load() if needed and on vcpu_put() it will clear DBCR0 and DBSR.
>>>> 
>>>> That depends on whether the kernel restores the debug registers.
>>>> Please double- check that.
>>> 
>>> Currently the kernel code restore the debug state of new schedule process in
>> context_switch().
>>> 
>>> switch_booke_debug_regs() from __switch_to() and defined as :
>>> /*
>>> * Unless neither the old or new thread are making use of the
>>> * debug registers, set the debug registers from the values
>>> * stored in the new thread.
>>> */
>>> static void switch_booke_debug_regs(struct thread_struct *new_thread)
>>> {
>>>       if ((current->thread.dbcr0 & DBCR0_IDM)
>>>               || (new_thread->dbcr0 & DBCR0_IDM))
>>>                       prime_debug_regs(new_thread); }
>>> 
>>> static void prime_debug_regs(struct thread_struct *thread) {
>>>       mtspr(SPRN_IAC1, thread->iac1);
>>>       mtspr(SPRN_IAC2, thread->iac2); #if CONFIG_PPC_ADV_DEBUG_IACS >
>>> 2
>>>       mtspr(SPRN_IAC3, thread->iac3);
>>>       mtspr(SPRN_IAC4, thread->iac4); #endif
>>>       mtspr(SPRN_DAC1, thread->dac1);
>>>       mtspr(SPRN_DAC2, thread->dac2); #if CONFIG_PPC_ADV_DEBUG_DVCS >
>>> 0
>>>       mtspr(SPRN_DVC1, thread->dvc1);
>>>       mtspr(SPRN_DVC2, thread->dvc2); #endif
>>>       mtspr(SPRN_DBCR0, thread->dbcr0);
>>>       mtspr(SPRN_DBCR1, thread->dbcr1); #ifdef CONFIG_BOOKE
>>>       mtspr(SPRN_DBCR2, thread->dbcr2); #endif } This is analogous to
>>> moving from guest to/from QEMU. so we can make prime_debug_regs() available to
>> kvm code for heavyweight_exit. And vcpu_load() will load guest state and save
>> host state (update thread->debug_registers).
>> 
>> I don't think we need to do anything on vcpu_load if we just swap the thread-
>>> debug_registers. Just make sure to restore them before you return from a heavy
>> weight exit.
> 
> My understanding is :
> 
> 1)
> When VCPU is running      -> h/w debug registers have vcpu->arch.debug_registers
> Goes for heavyweight_exit -> h/w debug registers are loaded with thread->debug_registers
> Return from heavyweight_exit and run vcpu -> h/w debug registers have vcpu->arch.debug_registers. Do we need to save thread->debug_registers , probably no as the thread->debug_registers are upto date.

I think I had a thinko here :). I figured that when we put the guest debug register state into thread->debug_registers, we don't need to worry about save/restore on vcpu_load/put. But that won't update the register values in thread->debug_registers when the guest modifies them, so we would need to write them back on vcpu_put again, rendering the whole exercise pointless.

So yes, we should only make sure that we set thread->dbcr0 = 0 on vcpu_run and return it to its old value on a heavy weight exit. That way we don't swap in debug registers we don't care about if someone debugs the QEMU process.

> 
> 2)
> KVM to kernel -> clear DBSR and DBCR0

if debugging is active

> Kernel to KVM -> load vcpu->arch.debug_registers.

if debugging is active :)


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan April 3, 2013, 5:14 p.m. UTC | #13
> -----Original Message-----
> From: Alexander Graf [mailto:agraf@suse.de]
> Sent: Tuesday, April 02, 2013 9:11 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
> 
> On 04/02/2013 04:09 PM, Bhushan Bharat-R65777 wrote:
> >
> >> -----Original Message-----
> >> From: Alexander Graf [mailto:agraf@suse.de]
> >> Sent: Tuesday, April 02, 2013 1:57 PM
> >> To: Bhushan Bharat-R65777
> >> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
> >> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >> support
> >>
> >>
> >> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
> >>
> >>>
> >>>> -----Original Message-----
> >>>> From: Alexander Graf [mailto:agraf@suse.de]
> >>>> Sent: Thursday, March 28, 2013 10:06 PM
> >>>> To: Bhushan Bharat-R65777
> >>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
> >>>> Scott-B07421; Bhushan
> >>>> Bharat-R65777
> >>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
> >>>> support
> >>>>
> >>>>
> >>>> On 21.03.2013, at 07:25, Bharat Bhushan wrote:
> >>>>
> >>>>> From: Bharat Bhushan<bharat.bhushan@freescale.com>
> >>>>>
> >>>>> This patch adds the debug stub support on booke/bookehv.
> >>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
> >>>>> breakpoint to debug guest.
> >>>>>
> >>>>> Debug registers are saved/restored on vcpu_put()/vcpu_get().
> >>>>> Also the debug registers are saved restored only if guest is using
> >>>>> debug resources.
> >>>>>
> >>>>> Signed-off-by: Bharat Bhushan<bharat.bhushan@freescale.com>
> >>>>> ---
> >>>>> v2:
> >>>>> - save/restore in vcpu_get()/vcpu_put()
> >>>>> - some more minor cleanup based on review comments.
> >>>>>
> >>>>> arch/powerpc/include/asm/kvm_host.h |   10 ++
> >>>>> arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
> >>>>> arch/powerpc/kvm/booke.c            |  252
> ++++++++++++++++++++++++++++++++-
> >> --
> >>>>> arch/powerpc/kvm/e500_emulate.c     |   10 ++
> >>>>> 4 files changed, 272 insertions(+), 22 deletions(-)
> >>>>>
> >>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
> >>>>> b/arch/powerpc/include/asm/kvm_host.h
> >>>>> index f4ba881..8571952 100644
> >>>>> --- a/arch/powerpc/include/asm/kvm_host.h
> >>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
> >>>>> @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
> >>>>> 	u32 mmucfg;
> >>>>> 	u32 epr;
> >>>>> 	u32 crit_save;
> >>>>> +	/* guest debug registers*/
> >>>>> 	struct kvmppc_booke_debug_reg dbg_reg;
> >>>>> +	/* shadow debug registers */
> >>>>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
> >>>>> +	/* host debug registers*/
> >>>>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
> >>>>> +	/*
> >>>>> +	 * Flag indicating that debug registers are used by guest
> >>>>> +	 * and requires save restore.
> >>>>> +	*/
> >>>>> +	bool debug_save_restore;
> >>>>> #endif
> >>>>> 	gpa_t paddr_accessed;
> >>>>> 	gva_t vaddr_accessed;
> >>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
> >>>>> b/arch/powerpc/include/uapi/asm/kvm.h
> >>>>> index 15f9a00..d7ce449 100644
> >>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >>>>> @@ -25,6 +25,7 @@
> >>>>> /* Select powerpc specific features in<linux/kvm.h>  */ #define
> >>>>> __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
> >>>>> +#define __KVM_HAVE_GUEST_DEBUG
> >>>>>
> >>>>> struct kvm_regs {
> >>>>> 	__u64 pc;
> >>>>> @@ -267,7 +268,24 @@ struct kvm_fpu {
> >>>>> 	__u64 fpr[32];
> >>>>> };
> >>>>>
> >>>>> +/*
> >>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both)
> >>>>> +and
> >>>>> + * software breakpoint.
> >>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
> >>>>> + * for KVM_DEBUG_EXIT.
> >>>>> + */
> >>>>> +#define KVMPPC_DEBUG_NONE		0x0
> >>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL<<  1)
> >>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL<<  2)
> >>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL<<  3)
> >>>>> struct kvm_debug_exit_arch {
> >>>>> +	__u64 address;
> >>>>> +	/*
> >>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
> >>>>> +	 * (read, write or both) and software breakpoint.
> >>>>> +	 */
> >>>>> +	__u32 status;
> >>>>> +	__u32 reserved;
> >>>>> };
> >>>>>
> >>>>> /* for KVM_SET_GUEST_DEBUG */
> >>>>> @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
> >>>>> 		 * Type denotes h/w breakpoint, read watchpoint, write
> >>>>> 		 * watchpoint or watchpoint (both read and write).
> >>>>> 		 */
> >>>>> -#define KVMPPC_DEBUG_NOTYPE		0x0
> >>>>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL<<  1)
> >>>>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL<<  2)
> >>>>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL<<  3)
> >>>>> 		__u32 type;
> >>>>> 		__u32 reserved;
> >>>>> 	} bp[16];
> >>>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> >>>>> index
> >>>>> 1de93a8..bf20056 100644
> >>>>> --- a/arch/powerpc/kvm/booke.c
> >>>>> +++ b/arch/powerpc/kvm/booke.c
> >>>>> @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct
> >>>>> kvm_vcpu
> >>>>> *vcpu) #endif }
> >>>>>
> >>>>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
> >>>>> +	/* Synchronize guest's desire to get debug interrupts into
> >>>>> +shadow MSR */ #ifndef CONFIG_KVM_BOOKE_HV
> >>>>> +	vcpu->arch.shadow_msr&= ~MSR_DE;
> >>>>> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr&  MSR_DE; #endif
> >>>>> +
> >>>>> +	/* Force enable debug interrupts when user space wants to debug */
> >>>>> +	if (vcpu->guest_debug) {
> >>>>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>>>> +		/*
> >>>>> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
> >>>>> +		 * visible MSR. Do not allow guest to change MSR[DE].
> >>>>> +		 */
> >>>>> +		vcpu->arch.shared->msr |= MSR_DE;
> >>>>> +		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
> >>>> This mtspr should really just be a bit or in shadow_mspr when
> >>>> guest_debug gets enabled. It should automatically get synchronized
> >>>> as soon as the next
> >>>> vpcu_load() happens.
> >>> I think this is not required here as shadow_dbsr already have MSRP_DEP set.
> >>>
> >>> Will setup shadow_msrp when setting guest_debug and clear
> >>> shadow_msrp when
> >> guest_debug is cleared.
> >>> But that will also not be sufficient as it not sure when vcpu_load()
> >>> will be called after the shadow_msrp is changed. So
> >>>
> >>> When guest_debug is set in debug_ioctl()
> >>> /* Set MSRP_DEP in shadow_msrp so that guest cannot change MSR.DE.
> >>> As shadow_msrp is loaded on vcpu_load(), and it is not guaranteed
> >>> that
> >>> vcpu_load() will be called just after debug_ioctl(), so also set
> >>> MSRP_DEP in SPRN_MSRP. */
> >> debug_ioctl is an ioctl. So while that ioctl is on, the vcpu is not running.
> >> After that ioctl user space calls another VCPU_RUN ioctl which
> >> invokes vcpu_load.
> > That's good. I was not aware of vcpu_load() is being called before
> > ioctl handling in KVM :)
> >
> >>> - shadow_msrp | MSRP_DEP;
> >>> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will
> >>> be
> >> called?
> >>>
> >>> When guest_debug is cleared in debug_ioctl()
> >>> /* Clear MSRP_DEP in shadow_msrp so that guest can change MSR.DE. As
> >>> shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that
> >>> vcpu_load() will be called just after debug_ioctl(), so also clear
> >>> MSRP_DEP in SPRN_MSRP. */
> >> Same thing here again.
> >>
> >>> - shadow_msrp | MSRP_DEP;
> >>> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will
> >>> be
> >> called?
> >>>> Also, what happens when user space disables guest_debug?
> >>>>
> >>>>> +#else
> >>>>> +		vcpu->arch.shadow_msr |= MSR_DE;
> >>>>> +		vcpu->arch.shared->msr&= ~MSR_DE; #endif
> >>>>> +	}
> >>>>> +}
> >>>>> +
> >>>>> /*
> >>>>> * Helper function for "full" MSR writes.  No need to call this if
> >>>>> only
> >>>>> * EE/CE/ME/DE/RI are changing.
> >>>>> @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32
> new_msr)
> >>>>> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
> >>>>> 	kvmppc_vcpu_sync_spe(vcpu);
> >>>>> 	kvmppc_vcpu_sync_fpu(vcpu);
> >>>>> +	kvmppc_vcpu_sync_debug(vcpu);
> >>>>> }
> >>>>>
> >>>>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
> >>>>> -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run,
> >>>>> struct
> >>>> kvm_vcpu *vcpu)
> >>>>> 		run->exit_reason = KVM_EXIT_DCR;
> >>>>> 		return RESUME_HOST;
> >>>>>
> >>>>> +	case EMULATE_EXIT_USER:
> >>>>> +		return RESUME_HOST;
> >>>> This should get folded into the previous patch or be a separate one.
> >>> ok
> >>>
> >>>>> +
> >>>>> 	case EMULATE_FAIL:
> >>>>> 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
> >>>>> 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst); @@ -751,6
> >>>>> +779,30 @@ static int emulation_exit(struct kvm_run *run, struct
> >>>>> +kvm_vcpu
> >>>> *vcpu)
> >>>>> 	}
> >>>>> }
> >>>>>
> >>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct
> >>>>> +kvm_vcpu
> >>>>> +*vcpu) {
> >>>>> +	u32 dbsr = mfspr(SPRN_DBSR);
> >>>>> +	mtspr(SPRN_DBSR, dbsr);
> >>>> This definitely deserves a comment :).
> >>> Ok.
> >>>
> >>>> Also, are we non-preemptible here?
> >>> This is called from kvmppc_handle_exit and interrupts are enabled,
> >>> so I think
> >> this can be preempted.
> >>> But I do not think there is any issue as we clear DBSR in vpu_put().
> >> Ok, make clear_dbsr a function then that you call from both places.
> >> That way it's more obvious.
> > Ok
> >
> >>>>> +
> >>>>> +	run->debug.arch.status = 0;
> >>>>> +	run->debug.arch.address = vcpu->arch.pc;
> >>>>> +
> >>>>> +	if (dbsr&  (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> >>>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> >>>>> +	} else {
> >>>>> +		if (dbsr&  (DBSR_DAC1W | DBSR_DAC2W))
> >>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> >>>>> +		else if (dbsr&  (DBSR_DAC1R | DBSR_DAC2R))
> >>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> >>>>> +		if (dbsr&  (DBSR_DAC1R | DBSR_DAC1W))
> >>>>> +			run->debug.arch.address = vcpu-
> >arch.shadow_dbg_reg.dac[0];
> >>>>> +		else if (dbsr&  (DBSR_DAC2R | DBSR_DAC2W))
> >>>>> +			run->debug.arch.address = vcpu-
> >arch.shadow_dbg_reg.dac[1];
> >>>>> +	}
> >>>>> +
> >>>>> +	return RESUME_HOST;
> >>>> Shouldn't this check for guest_debug and only go to user space if
> >>>> it asked for debugging? Can't you just leave it at the old code for
> >> !guest_debug?
> >>> Will add, I thought that it is redundant as of now as debug
> >>> interrupt in guest
> >> can come only when user space is debugging guest.
> >>
> >> The problem is that user space might not expect a guest debug exit.
> > Ok
> >
> >>> clear DBSR
> >>> if (!guest_debug)
> >>> 	return RESUME_GUEST;
> >> Can't you just do exactly what was done before when !guest_debug? We
> >> usually try to change semantics one step at a time.
> > What you mean by "done before" ? do you mean the removed code in "case
> BOOKE_INTERRUPT_DEBUG" code in kvmppc_handle_exit() ? If yes then that code was
> meaningless and not correct.
> 
> Then change it in a separate patch. One patch, one change. Otherwise
> bisectability suffers.
> 
> > Right thing should be clear the pending exception and resume the
> > guest. when no one is in state to handle the exception. May be we can
> > have WARN_ON_ONCE()
> >
> >>>>> +}
> >>>>> +
> >>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
> >>>>> 	ulong r1, ip, msr, lr;
> >>>>> @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run
> >>>>> *run, struct
> >>>> kvm_vcpu *vcpu,
> >>>>> 	}
> >>>>>
> >>>>> 	case BOOKE_INTERRUPT_DEBUG: {
> >>>>> -		u32 dbsr;
> >>>>> -
> >>>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
> >>>>> -
> >>>>> -		/* clear IAC events in DBSR register */
> >>>>> -		dbsr = mfspr(SPRN_DBSR);
> >>>>> -		dbsr&= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> >>>>> -		mtspr(SPRN_DBSR, dbsr);
> >>>>> -
> >>>>> -		run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> +		r = kvmppc_handle_debug(run, vcpu);
> >>>>> +		if (r == RESUME_HOST)
> >>>>> +			run->exit_reason = KVM_EXIT_DEBUG;
> >>>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
> >>>>> -		r = RESUME_HOST;
> >>>>> 		break;
> >>>>> 	}
> >>>>>
> >>>>> @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> >>>>> 	kvmppc_set_msr(vcpu, 0);
> >>>>>
> >>>>> #ifndef CONFIG_KVM_BOOKE_HV
> >>>>> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> >>>>> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
> >>>>> 	vcpu->arch.shadow_pid = 1;
> >>>>> 	vcpu->arch.shared->msr = 0;
> >>>>> #endif
> >>>>> @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct
> >>>>> kvm_vcpu *vcpu,
> >>>> struct kvm_one_reg *reg)
> >>>>> 	return r;
> >>>>> }
> >>>>>
> >>>>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>>>> -					 struct kvm_guest_debug *dbg)
> >>>>> -{
> >>>>> -	return -EINVAL;
> >>>>> -}
> >>>>> -
> >>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
> >>>>> kvm_fpu
> >>>>> *fpu) {
> >>>>> 	return -ENOTSUPP;
> >>>>> @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data)
> >>>>> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS); }
> >>>>>
> >>>>> +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) {
> >>>>> +	if (!vcpu->arch.debug_save_restore)
> >>>>> +		return;
> >>>>> +
> >>>>> +	/* Restore Host Context. Disable all debug events First */
> >>>>> +	mtspr(SPRN_DBCR0, 0x0);
> >>>>> +	/* Disable pending debug event by Clearing DBSR */
> >>>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> >>>>> +
> >>>>> +	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
> >>>>> +	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2); #ifdef
> >>>>> +CONFIG_KVM_E500MC
> >>>>> +	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4); #endif
> >>>>> +	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
> >>>>> +	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]); #if
> >>>>> +CONFIG_PPC_ADV_DEBUG_IACS>  2
> >>>>> +	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
> >>>>> +	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]); #endif
> >>>>> +	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
> >>>>> +	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
> >>>>> +
> >>>>> +	/* Enable debug events after all other debug registers restored */
> >>>>> +	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);
> >>>> How does the normal debug register switching code work in Linux?
> >>>> Can't we just reuse that? Or rely on it to restore working state
> >>>> when another process gets scheduled in?
> >>> Good point, I can see debug registers loading in function
> >>> __switch_to()-
> >>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
> >>> So as long as assume that host will not use debug resources we can
> >>> rely on
> >> this restore. But I am not sure that this is a fare assumption. As
> >> Scott earlier mentioned someone can use debug resource for kernel debugging
> also.
> >>
> >> Someone in the kernel can also use floating point registers. But then
> >> it's his responsibility to clean up the mess he leaves behind.
> > I am neither convinced by what you said and nor even have much reason
> > to oppose :)
> >
> > Scott,
> > 	I remember you mentioned that host can use debug resources, you comment on
> this ?
> >
> >>>>> +
> >>>>> +	/* Host debug register are restored */
> >>>>> +	vcpu->arch.debug_save_restore = false; }
> >>>>> +
> >>>>> +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu
> >>>>> +*vcpu) {
> >>>>> +	/*
> >>>>> +	 * Check whether guest still need debug resource, if not then there
> >>>>> +	 * is no need to resotre guest context.
> >>>> restore
> >>>>
> >>>>> +	 */
> >>>>> +	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
> >>>>> +		return;
> >>>> Are we guaranteed that debugging is disabled here?
> >>> Ah, I should be clearing the hw DBCR0 and DBSR.
> >>>
> >>>> We don't want to get debug
> >>>> exceptions that were meant for some other process.
> >>>>
> >>>>> +
> >>>>> +	/*
> >>>>> +	 * Save Host debug register only after restore. If host debug
> >>>>> +	 * registers are saved and not restored then do not save again.
> >>>>> +	 */
> >>>>> +	if (!vcpu->arch.debug_save_restore) {
> >>>>> +		/* Save Host context */
> >>>>> +		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
> >>>>> +		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
> >>>>> +		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2); #ifdef
> >>>>> +CONFIG_KVM_E500MC
> >>>>> +		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4); #endif
> >>>>> +		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
> >>>>> +		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2); #if
> >>>>> +CONFIG_PPC_ADV_DEBUG_IACS>  2
> >>>>> +		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
> >>>>> +		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4); #endif
> >>>>> +		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
> >>>>> +		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
> >>>>> +		vcpu->arch.debug_save_restore = true;
> >>>>> +	}
> >>>>> +
> >>>>> +	/* Restore Guest Context. Disable all debug events First */
> >>>>> +	mtspr(SPRN_DBCR0, 0x0);
> >>>>> +	/* Clear h/w DBSR */
> >>>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> >>>>> +
> >>>>> +	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
> >>>>> +	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); #ifdef
> >>>>> +CONFIG_KVM_E500MC
> >>>>> +	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); #endif
> >>>>> +	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
> >>>>> +	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
> >>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS>  2
> >>>>> +	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
> >>>>> +	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
> >>>>> +#endif
> >>>>> +	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
> >>>>> +	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
> >>>>> +
> >>>>> +	/* Enable debug events after other debug registers restored */
> >>>>> +	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); }
> >>>>> +
> >>>>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> >>>>> +					 struct kvm_guest_debug *dbg) {
> >>>>> +	struct kvmppc_booke_debug_reg *dbg_reg;
> >>>>> +	int n, b = 0, w = 0;
> >>>>> +	const u32 bp_code[] = {
> >>>>> +		DBCR0_IAC1 | DBCR0_IDM,
> >>>>> +		DBCR0_IAC2 | DBCR0_IDM,
> >>>>> +		DBCR0_IAC3 | DBCR0_IDM,
> >>>>> +		DBCR0_IAC4 | DBCR0_IDM
> >>>>> +	};
> >>>>> +	const u32 wp_code[] = {
> >>>>> +		DBCR0_DAC1W | DBCR0_IDM,
> >>>>> +		DBCR0_DAC2W | DBCR0_IDM,
> >>>>> +		DBCR0_DAC1R | DBCR0_IDM,
> >>>>> +		DBCR0_DAC2R | DBCR0_IDM
> >>>>> +	};
> >>>>> +
> >>>>> +	if (!(dbg->control&  KVM_GUESTDBG_ENABLE)) {
> >>>>> +		/* Clear All debug events */
> >>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +		vcpu->guest_debug = 0;
> >>>> Ah, this is where we disable guest_debug. This needs to enable
> >>>> guest_debug for the guest again, so you need to remove the DE bit
> >>>> from
> >> shadow_msrp here.
> >>>>> +		return 0;
> >>>>> +	}
> >>>>> +
> >>>>> +	vcpu->guest_debug = dbg->control;
> >>>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> >>>>> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
> >>>>> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> >>>>> +
> >>>>> +	if (vcpu->guest_debug&  KVM_GUESTDBG_SINGLESTEP)
> >>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> >>>>> +
> >>>>> +	if (!(vcpu->guest_debug&  KVM_GUESTDBG_USE_HW_BP)) {
> >>>>> +		/* Code below handles only HW breakpoints */
> >>>>> +		kvmppc_booke_vcpu_load_debug_regs(vcpu);
> >>>>> +		return 0;
> >>>>> +	}
> >>>>> +
> >>>>> +	dbg_reg =&(vcpu->arch.shadow_dbg_reg);
> >>>>> +
> >>>>> +	/*
> >>>>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> >>>>> +	 * to occur when MSR.PR is set.
> >>>>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> >>>>> +	 * should clear DBCR1 and DBCR2.
> >>>>> +	 */
> >>>>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>>>> +	dbg_reg->dbcr1 = 0;
> >>>>> +	dbg_reg->dbcr2 = 0;
> >>>> Does that mean we can't debug guest user space?
> >>> Yes
> >> This is wrong.
> > Really, So far I am assuming qemu debug stub is not mean for debugging guest
> application.
> 
> Ok, let me rephrase: This is confusing. You do trap in PR mode on e500v2. IIRC
> x86 also traps in kernel and user space. I don't see why e500 hv should be
> different.

I am sorry, I think did not read the document correctly.

DBCR1 = 0 ; means the "00 IAC1 debug conditions unaffected by MSR[PR],MSR[GS].

Similarly for dbcr2. 

So yes the guest user space can be debugged.

Thanks
-Bharat




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 3, 2013, 5:35 p.m. UTC | #14
On 03.04.2013, at 19:14, Bhushan Bharat-R65777 wrote:

> 
> 
>> -----Original Message-----
>> From: Alexander Graf [mailto:agraf@suse.de]
>> Sent: Tuesday, April 02, 2013 9:11 PM
>> To: Bhushan Bharat-R65777
>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
>> 
>> On 04/02/2013 04:09 PM, Bhushan Bharat-R65777 wrote:
>>> 
>>>> -----Original Message-----
>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>> Sent: Tuesday, April 02, 2013 1:57 PM
>>>> To: Bhushan Bharat-R65777
>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>> support
>>>> 
>>>> 
>>>> On 29.03.2013, at 07:04, Bhushan Bharat-R65777 wrote:
>>>> 
>>>>> 
>>>>>> -----Original Message-----
>>>>>> From: Alexander Graf [mailto:agraf@suse.de]
>>>>>> Sent: Thursday, March 28, 2013 10:06 PM
>>>>>> To: Bhushan Bharat-R65777
>>>>>> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood
>>>>>> Scott-B07421; Bhushan
>>>>>> Bharat-R65777
>>>>>> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub
>>>>>> support
>>>>>> 
>>>>>> 
>>>>>> On 21.03.2013, at 07:25, Bharat Bhushan wrote:
>>>>>> 
>>>>>>> From: Bharat Bhushan<bharat.bhushan@freescale.com>
>>>>>>> 
>>>>>>> This patch adds the debug stub support on booke/bookehv.
>>>>>>> Now QEMU debug stub can use hw breakpoint, watchpoint and software
>>>>>>> breakpoint to debug guest.
>>>>>>> 
>>>>>>> Debug registers are saved/restored on vcpu_put()/vcpu_get().
>>>>>>> Also the debug registers are saved restored only if guest is using
>>>>>>> debug resources.
>>>>>>> 
>>>>>>> Signed-off-by: Bharat Bhushan<bharat.bhushan@freescale.com>
>>>>>>> ---
>>>>>>> v2:
>>>>>>> - save/restore in vcpu_get()/vcpu_put()
>>>>>>> - some more minor cleanup based on review comments.
>>>>>>> 
>>>>>>> arch/powerpc/include/asm/kvm_host.h |   10 ++
>>>>>>> arch/powerpc/include/uapi/asm/kvm.h |   22 +++-
>>>>>>> arch/powerpc/kvm/booke.c            |  252
>> ++++++++++++++++++++++++++++++++-
>>>> --
>>>>>>> arch/powerpc/kvm/e500_emulate.c     |   10 ++
>>>>>>> 4 files changed, 272 insertions(+), 22 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/arch/powerpc/include/asm/kvm_host.h
>>>>>>> b/arch/powerpc/include/asm/kvm_host.h
>>>>>>> index f4ba881..8571952 100644
>>>>>>> --- a/arch/powerpc/include/asm/kvm_host.h
>>>>>>> +++ b/arch/powerpc/include/asm/kvm_host.h
>>>>>>> @@ -504,7 +504,17 @@ struct kvm_vcpu_arch {
>>>>>>> 	u32 mmucfg;
>>>>>>> 	u32 epr;
>>>>>>> 	u32 crit_save;
>>>>>>> +	/* guest debug registers*/
>>>>>>> 	struct kvmppc_booke_debug_reg dbg_reg;
>>>>>>> +	/* shadow debug registers */
>>>>>>> +	struct kvmppc_booke_debug_reg shadow_dbg_reg;
>>>>>>> +	/* host debug registers*/
>>>>>>> +	struct kvmppc_booke_debug_reg host_dbg_reg;
>>>>>>> +	/*
>>>>>>> +	 * Flag indicating that debug registers are used by guest
>>>>>>> +	 * and requires save restore.
>>>>>>> +	*/
>>>>>>> +	bool debug_save_restore;
>>>>>>> #endif
>>>>>>> 	gpa_t paddr_accessed;
>>>>>>> 	gva_t vaddr_accessed;
>>>>>>> diff --git a/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>> b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>> index 15f9a00..d7ce449 100644
>>>>>>> --- a/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>> +++ b/arch/powerpc/include/uapi/asm/kvm.h
>>>>>>> @@ -25,6 +25,7 @@
>>>>>>> /* Select powerpc specific features in<linux/kvm.h>  */ #define
>>>>>>> __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT
>>>>>>> +#define __KVM_HAVE_GUEST_DEBUG
>>>>>>> 
>>>>>>> struct kvm_regs {
>>>>>>> 	__u64 pc;
>>>>>>> @@ -267,7 +268,24 @@ struct kvm_fpu {
>>>>>>> 	__u64 fpr[32];
>>>>>>> };
>>>>>>> 
>>>>>>> +/*
>>>>>>> + * Defines for h/w breakpoint, watchpoint (read, write or both)
>>>>>>> +and
>>>>>>> + * software breakpoint.
>>>>>>> + * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
>>>>>>> + * for KVM_DEBUG_EXIT.
>>>>>>> + */
>>>>>>> +#define KVMPPC_DEBUG_NONE		0x0
>>>>>>> +#define KVMPPC_DEBUG_BREAKPOINT		(1UL<<  1)
>>>>>>> +#define KVMPPC_DEBUG_WATCH_WRITE	(1UL<<  2)
>>>>>>> +#define KVMPPC_DEBUG_WATCH_READ		(1UL<<  3)
>>>>>>> struct kvm_debug_exit_arch {
>>>>>>> +	__u64 address;
>>>>>>> +	/*
>>>>>>> +	 * exiting to userspace because of h/w breakpoint, watchpoint
>>>>>>> +	 * (read, write or both) and software breakpoint.
>>>>>>> +	 */
>>>>>>> +	__u32 status;
>>>>>>> +	__u32 reserved;
>>>>>>> };
>>>>>>> 
>>>>>>> /* for KVM_SET_GUEST_DEBUG */
>>>>>>> @@ -279,10 +297,6 @@ struct kvm_guest_debug_arch {
>>>>>>> 		 * Type denotes h/w breakpoint, read watchpoint, write
>>>>>>> 		 * watchpoint or watchpoint (both read and write).
>>>>>>> 		 */
>>>>>>> -#define KVMPPC_DEBUG_NOTYPE		0x0
>>>>>>> -#define KVMPPC_DEBUG_BREAKPOINT		(1UL<<  1)
>>>>>>> -#define KVMPPC_DEBUG_WATCH_WRITE	(1UL<<  2)
>>>>>>> -#define KVMPPC_DEBUG_WATCH_READ		(1UL<<  3)
>>>>>>> 		__u32 type;
>>>>>>> 		__u32 reserved;
>>>>>>> 	} bp[16];
>>>>>>> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
>>>>>>> index
>>>>>>> 1de93a8..bf20056 100644
>>>>>>> --- a/arch/powerpc/kvm/booke.c
>>>>>>> +++ b/arch/powerpc/kvm/booke.c
>>>>>>> @@ -133,6 +133,30 @@ static void kvmppc_vcpu_sync_fpu(struct
>>>>>>> kvm_vcpu
>>>>>>> *vcpu) #endif }
>>>>>>> 
>>>>>>> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu) {
>>>>>>> +	/* Synchronize guest's desire to get debug interrupts into
>>>>>>> +shadow MSR */ #ifndef CONFIG_KVM_BOOKE_HV
>>>>>>> +	vcpu->arch.shadow_msr&= ~MSR_DE;
>>>>>>> +	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr&  MSR_DE; #endif
>>>>>>> +
>>>>>>> +	/* Force enable debug interrupts when user space wants to debug */
>>>>>>> +	if (vcpu->guest_debug) {
>>>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>>>> +		/*
>>>>>>> +		 * Since there is no shadow MSR, sync MSR_DE into the guest
>>>>>>> +		 * visible MSR. Do not allow guest to change MSR[DE].
>>>>>>> +		 */
>>>>>>> +		vcpu->arch.shared->msr |= MSR_DE;
>>>>>>> +		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
>>>>>> This mtspr should really just be a bit or in shadow_mspr when
>>>>>> guest_debug gets enabled. It should automatically get synchronized
>>>>>> as soon as the next
>>>>>> vpcu_load() happens.
>>>>> I think this is not required here as shadow_dbsr already have MSRP_DEP set.
>>>>> 
>>>>> Will setup shadow_msrp when setting guest_debug and clear
>>>>> shadow_msrp when
>>>> guest_debug is cleared.
>>>>> But that will also not be sufficient as it not sure when vcpu_load()
>>>>> will be called after the shadow_msrp is changed. So
>>>>> 
>>>>> When guest_debug is set in debug_ioctl()
>>>>> /* Set MSRP_DEP in shadow_msrp so that guest cannot change MSR.DE.
>>>>> As shadow_msrp is loaded on vcpu_load(), and it is not guaranteed
>>>>> that
>>>>> vcpu_load() will be called just after debug_ioctl(), so also set
>>>>> MSRP_DEP in SPRN_MSRP. */
>>>> debug_ioctl is an ioctl. So while that ioctl is on, the vcpu is not running.
>>>> After that ioctl user space calls another VCPU_RUN ioctl which
>>>> invokes vcpu_load.
>>> That's good. I was not aware of vcpu_load() is being called before
>>> ioctl handling in KVM :)
>>> 
>>>>> - shadow_msrp | MSRP_DEP;
>>>>> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will
>>>>> be
>>>> called?
>>>>> 
>>>>> When guest_debug is cleared in debug_ioctl()
>>>>> /* Clear MSRP_DEP in shadow_msrp so that guest can change MSR.DE. As
>>>>> shadow_msrp is loaded on vcpu_load(), and it is not guaranteed that
>>>>> vcpu_load() will be called just after debug_ioctl(), so also clear
>>>>> MSRP_DEP in SPRN_MSRP. */
>>>> Same thing here again.
>>>> 
>>>>> - shadow_msrp | MSRP_DEP;
>>>>> - change SPRN_MSRP.MSRP_DEP as you do not know when vcpu_load() will
>>>>> be
>>>> called?
>>>>>> Also, what happens when user space disables guest_debug?
>>>>>> 
>>>>>>> +#else
>>>>>>> +		vcpu->arch.shadow_msr |= MSR_DE;
>>>>>>> +		vcpu->arch.shared->msr&= ~MSR_DE; #endif
>>>>>>> +	}
>>>>>>> +}
>>>>>>> +
>>>>>>> /*
>>>>>>> * Helper function for "full" MSR writes.  No need to call this if
>>>>>>> only
>>>>>>> * EE/CE/ME/DE/RI are changing.
>>>>>>> @@ -150,6 +174,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32
>> new_msr)
>>>>>>> 	kvmppc_mmu_msr_notify(vcpu, old_msr);
>>>>>>> 	kvmppc_vcpu_sync_spe(vcpu);
>>>>>>> 	kvmppc_vcpu_sync_fpu(vcpu);
>>>>>>> +	kvmppc_vcpu_sync_debug(vcpu);
>>>>>>> }
>>>>>>> 
>>>>>>> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu, @@
>>>>>>> -736,6 +761,9 @@ static int emulation_exit(struct kvm_run *run,
>>>>>>> struct
>>>>>> kvm_vcpu *vcpu)
>>>>>>> 		run->exit_reason = KVM_EXIT_DCR;
>>>>>>> 		return RESUME_HOST;
>>>>>>> 
>>>>>>> +	case EMULATE_EXIT_USER:
>>>>>>> +		return RESUME_HOST;
>>>>>> This should get folded into the previous patch or be a separate one.
>>>>> ok
>>>>> 
>>>>>>> +
>>>>>>> 	case EMULATE_FAIL:
>>>>>>> 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
>>>>>>> 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst); @@ -751,6
>>>>>>> +779,30 @@ static int emulation_exit(struct kvm_run *run, struct
>>>>>>> +kvm_vcpu
>>>>>> *vcpu)
>>>>>>> 	}
>>>>>>> }
>>>>>>> 
>>>>>>> +static int kvmppc_handle_debug(struct kvm_run *run, struct
>>>>>>> +kvm_vcpu
>>>>>>> +*vcpu) {
>>>>>>> +	u32 dbsr = mfspr(SPRN_DBSR);
>>>>>>> +	mtspr(SPRN_DBSR, dbsr);
>>>>>> This definitely deserves a comment :).
>>>>> Ok.
>>>>> 
>>>>>> Also, are we non-preemptible here?
>>>>> This is called from kvmppc_handle_exit and interrupts are enabled,
>>>>> so I think
>>>> this can be preempted.
>>>>> But I do not think there is any issue as we clear DBSR in vpu_put().
>>>> Ok, make clear_dbsr a function then that you call from both places.
>>>> That way it's more obvious.
>>> Ok
>>> 
>>>>>>> +
>>>>>>> +	run->debug.arch.status = 0;
>>>>>>> +	run->debug.arch.address = vcpu->arch.pc;
>>>>>>> +
>>>>>>> +	if (dbsr&  (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
>>>>>>> +		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
>>>>>>> +	} else {
>>>>>>> +		if (dbsr&  (DBSR_DAC1W | DBSR_DAC2W))
>>>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
>>>>>>> +		else if (dbsr&  (DBSR_DAC1R | DBSR_DAC2R))
>>>>>>> +			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
>>>>>>> +		if (dbsr&  (DBSR_DAC1R | DBSR_DAC1W))
>>>>>>> +			run->debug.arch.address = vcpu-
>>> arch.shadow_dbg_reg.dac[0];
>>>>>>> +		else if (dbsr&  (DBSR_DAC2R | DBSR_DAC2W))
>>>>>>> +			run->debug.arch.address = vcpu-
>>> arch.shadow_dbg_reg.dac[1];
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	return RESUME_HOST;
>>>>>> Shouldn't this check for guest_debug and only go to user space if
>>>>>> it asked for debugging? Can't you just leave it at the old code for
>>>> !guest_debug?
>>>>> Will add, I thought that it is redundant as of now as debug
>>>>> interrupt in guest
>>>> can come only when user space is debugging guest.
>>>> 
>>>> The problem is that user space might not expect a guest debug exit.
>>> Ok
>>> 
>>>>> clear DBSR
>>>>> if (!guest_debug)
>>>>> 	return RESUME_GUEST;
>>>> Can't you just do exactly what was done before when !guest_debug? We
>>>> usually try to change semantics one step at a time.
>>> What you mean by "done before" ? do you mean the removed code in "case
>> BOOKE_INTERRUPT_DEBUG" code in kvmppc_handle_exit() ? If yes then that code was
>> meaningless and not correct.
>> 
>> Then change it in a separate patch. One patch, one change. Otherwise
>> bisectability suffers.
>> 
>>> Right thing should be clear the pending exception and resume the
>>> guest. when no one is in state to handle the exception. May be we can
>>> have WARN_ON_ONCE()
>>> 
>>>>>>> +}
>>>>>>> +
>>>>>>> static void kvmppc_fill_pt_regs(struct pt_regs *regs) {
>>>>>>> 	ulong r1, ip, msr, lr;
>>>>>>> @@ -1110,18 +1162,10 @@ int kvmppc_handle_exit(struct kvm_run
>>>>>>> *run, struct
>>>>>> kvm_vcpu *vcpu,
>>>>>>> 	}
>>>>>>> 
>>>>>>> 	case BOOKE_INTERRUPT_DEBUG: {
>>>>>>> -		u32 dbsr;
>>>>>>> -
>>>>>>> -		vcpu->arch.pc = mfspr(SPRN_CSRR0);
>>>>>>> -
>>>>>>> -		/* clear IAC events in DBSR register */
>>>>>>> -		dbsr = mfspr(SPRN_DBSR);
>>>>>>> -		dbsr&= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
>>>>>>> -		mtspr(SPRN_DBSR, dbsr);
>>>>>>> -
>>>>>>> -		run->exit_reason = KVM_EXIT_DEBUG;
>>>>>>> +		r = kvmppc_handle_debug(run, vcpu);
>>>>>>> +		if (r == RESUME_HOST)
>>>>>>> +			run->exit_reason = KVM_EXIT_DEBUG;
>>>>>>> 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
>>>>>>> -		r = RESUME_HOST;
>>>>>>> 		break;
>>>>>>> 	}
>>>>>>> 
>>>>>>> @@ -1172,7 +1216,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>>>>>>> 	kvmppc_set_msr(vcpu, 0);
>>>>>>> 
>>>>>>> #ifndef CONFIG_KVM_BOOKE_HV
>>>>>>> -	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
>>>>>>> +	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
>>>>>>> 	vcpu->arch.shadow_pid = 1;
>>>>>>> 	vcpu->arch.shared->msr = 0;
>>>>>>> #endif
>>>>>>> @@ -1527,12 +1571,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct
>>>>>>> kvm_vcpu *vcpu,
>>>>>> struct kvm_one_reg *reg)
>>>>>>> 	return r;
>>>>>>> }
>>>>>>> 
>>>>>>> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>>>> -					 struct kvm_guest_debug *dbg)
>>>>>>> -{
>>>>>>> -	return -EINVAL;
>>>>>>> -}
>>>>>>> -
>>>>>>> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct
>>>>>>> kvm_fpu
>>>>>>> *fpu) {
>>>>>>> 	return -ENOTSUPP;
>>>>>>> @@ -1638,16 +1676,194 @@ void kvmppc_decrementer_func(unsigned long data)
>>>>>>> 	kvmppc_set_tsr_bits(vcpu, TSR_DIS); }
>>>>>>> 
>>>>>>> +static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu) {
>>>>>>> +	if (!vcpu->arch.debug_save_restore)
>>>>>>> +		return;
>>>>>>> +
>>>>>>> +	/* Restore Host Context. Disable all debug events First */
>>>>>>> +	mtspr(SPRN_DBCR0, 0x0);
>>>>>>> +	/* Disable pending debug event by Clearing DBSR */
>>>>>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
>>>>>>> +
>>>>>>> +	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
>>>>>>> +	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2); #ifdef
>>>>>>> +CONFIG_KVM_E500MC
>>>>>>> +	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4); #endif
>>>>>>> +	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
>>>>>>> +	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]); #if
>>>>>>> +CONFIG_PPC_ADV_DEBUG_IACS>  2
>>>>>>> +	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
>>>>>>> +	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]); #endif
>>>>>>> +	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
>>>>>>> +	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
>>>>>>> +
>>>>>>> +	/* Enable debug events after all other debug registers restored */
>>>>>>> +	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);
>>>>>> How does the normal debug register switching code work in Linux?
>>>>>> Can't we just reuse that? Or rely on it to restore working state
>>>>>> when another process gets scheduled in?
>>>>> Good point, I can see debug registers loading in function
>>>>> __switch_to()-
>>>>> switch_booke_debug_regs() in file arch/powerpc/kernel/process.c.
>>>>> So as long as assume that host will not use debug resources we can
>>>>> rely on
>>>> this restore. But I am not sure that this is a fare assumption. As
>>>> Scott earlier mentioned someone can use debug resource for kernel debugging
>> also.
>>>> 
>>>> Someone in the kernel can also use floating point registers. But then
>>>> it's his responsibility to clean up the mess he leaves behind.
>>> I am neither convinced by what you said and nor even have much reason
>>> to oppose :)
>>> 
>>> Scott,
>>> 	I remember you mentioned that host can use debug resources, you comment on
>> this ?
>>> 
>>>>>>> +
>>>>>>> +	/* Host debug register are restored */
>>>>>>> +	vcpu->arch.debug_save_restore = false; }
>>>>>>> +
>>>>>>> +static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu
>>>>>>> +*vcpu) {
>>>>>>> +	/*
>>>>>>> +	 * Check whether guest still need debug resource, if not then there
>>>>>>> +	 * is no need to resotre guest context.
>>>>>> restore
>>>>>> 
>>>>>>> +	 */
>>>>>>> +	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
>>>>>>> +		return;
>>>>>> Are we guaranteed that debugging is disabled here?
>>>>> Ah, I should be clearing the hw DBCR0 and DBSR.
>>>>> 
>>>>>> We don't want to get debug
>>>>>> exceptions that were meant for some other process.
>>>>>> 
>>>>>>> +
>>>>>>> +	/*
>>>>>>> +	 * Save Host debug register only after restore. If host debug
>>>>>>> +	 * registers are saved and not restored then do not save again.
>>>>>>> +	 */
>>>>>>> +	if (!vcpu->arch.debug_save_restore) {
>>>>>>> +		/* Save Host context */
>>>>>>> +		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
>>>>>>> +		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
>>>>>>> +		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2); #ifdef
>>>>>>> +CONFIG_KVM_E500MC
>>>>>>> +		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4); #endif
>>>>>>> +		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
>>>>>>> +		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2); #if
>>>>>>> +CONFIG_PPC_ADV_DEBUG_IACS>  2
>>>>>>> +		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
>>>>>>> +		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4); #endif
>>>>>>> +		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
>>>>>>> +		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
>>>>>>> +		vcpu->arch.debug_save_restore = true;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	/* Restore Guest Context. Disable all debug events First */
>>>>>>> +	mtspr(SPRN_DBCR0, 0x0);
>>>>>>> +	/* Clear h/w DBSR */
>>>>>>> +	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
>>>>>>> +
>>>>>>> +	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
>>>>>>> +	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2); #ifdef
>>>>>>> +CONFIG_KVM_E500MC
>>>>>>> +	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4); #endif
>>>>>>> +	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
>>>>>>> +	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
>>>>>>> +#if CONFIG_PPC_ADV_DEBUG_IACS>  2
>>>>>>> +	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
>>>>>>> +	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
>>>>>>> +#endif
>>>>>>> +	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
>>>>>>> +	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
>>>>>>> +
>>>>>>> +	/* Enable debug events after other debug registers restored */
>>>>>>> +	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0); }
>>>>>>> +
>>>>>>> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>>>>>>> +					 struct kvm_guest_debug *dbg) {
>>>>>>> +	struct kvmppc_booke_debug_reg *dbg_reg;
>>>>>>> +	int n, b = 0, w = 0;
>>>>>>> +	const u32 bp_code[] = {
>>>>>>> +		DBCR0_IAC1 | DBCR0_IDM,
>>>>>>> +		DBCR0_IAC2 | DBCR0_IDM,
>>>>>>> +		DBCR0_IAC3 | DBCR0_IDM,
>>>>>>> +		DBCR0_IAC4 | DBCR0_IDM
>>>>>>> +	};
>>>>>>> +	const u32 wp_code[] = {
>>>>>>> +		DBCR0_DAC1W | DBCR0_IDM,
>>>>>>> +		DBCR0_DAC2W | DBCR0_IDM,
>>>>>>> +		DBCR0_DAC1R | DBCR0_IDM,
>>>>>>> +		DBCR0_DAC2R | DBCR0_IDM
>>>>>>> +	};
>>>>>>> +
>>>>>>> +	if (!(dbg->control&  KVM_GUESTDBG_ENABLE)) {
>>>>>>> +		/* Clear All debug events */
>>>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>>>> +		vcpu->guest_debug = 0;
>>>>>> Ah, this is where we disable guest_debug. This needs to enable
>>>>>> guest_debug for the guest again, so you need to remove the DE bit
>>>>>> from
>>>> shadow_msrp here.
>>>>>>> +		return 0;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	vcpu->guest_debug = dbg->control;
>>>>>>> +	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
>>>>>>> +	/* Set DBCR0_EDM in guest visible DBCR0 register. */
>>>>>>> +	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
>>>>>>> +
>>>>>>> +	if (vcpu->guest_debug&  KVM_GUESTDBG_SINGLESTEP)
>>>>>>> +		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
>>>>>>> +
>>>>>>> +	if (!(vcpu->guest_debug&  KVM_GUESTDBG_USE_HW_BP)) {
>>>>>>> +		/* Code below handles only HW breakpoints */
>>>>>>> +		kvmppc_booke_vcpu_load_debug_regs(vcpu);
>>>>>>> +		return 0;
>>>>>>> +	}
>>>>>>> +
>>>>>>> +	dbg_reg =&(vcpu->arch.shadow_dbg_reg);
>>>>>>> +
>>>>>>> +	/*
>>>>>>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
>>>>>>> +	 * to occur when MSR.PR is set.
>>>>>>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
>>>>>>> +	 * should clear DBCR1 and DBCR2.
>>>>>>> +	 */
>>>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>>>> +	dbg_reg->dbcr1 = 0;
>>>>>>> +	dbg_reg->dbcr2 = 0;
>>>>>> Does that mean we can't debug guest user space?
>>>>> Yes
>>>> This is wrong.
>>> Really, So far I am assuming qemu debug stub is not mean for debugging guest
>> application.
>> 
>> Ok, let me rephrase: This is confusing. You do trap in PR mode on e500v2. IIRC
>> x86 also traps in kernel and user space. I don't see why e500 hv should be
>> different.
> 
> I am sorry, I think did not read the document correctly.
> 
> DBCR1 = 0 ; means the "00 IAC1 debug conditions unaffected by MSR[PR],MSR[GS].
> 
> Similarly for dbcr2. 
> 
> So yes the guest user space can be debugged.

So why is this conditional on BOOKE_HV then? Wouldn't it make things easier to treat HV and PR identical?


Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan April 3, 2013, 5:47 p.m. UTC | #15
> >>>>>>> +	dbg_reg =&(vcpu->arch.shadow_dbg_reg);
> >>>>>>> +
> >>>>>>> +	/*
> >>>>>>> +	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> >>>>>>> +	 * to occur when MSR.PR is set.
> >>>>>>> +	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> >>>>>>> +	 * should clear DBCR1 and DBCR2.
> >>>>>>> +	 */
> >>>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>>>>>> +	dbg_reg->dbcr1 = 0;
> >>>>>>> +	dbg_reg->dbcr2 = 0;
> >>>>>> Does that mean we can't debug guest user space?
> >>>>> Yes
> >>>> This is wrong.
> >>> Really, So far I am assuming qemu debug stub is not mean for
> >>> debugging guest
> >> application.
> >>
> >> Ok, let me rephrase: This is confusing. You do trap in PR mode on
> >> e500v2. IIRC
> >> x86 also traps in kernel and user space. I don't see why e500 hv
> >> should be different.
> >
> > I am sorry, I think did not read the document correctly.
> >
> > DBCR1 = 0 ; means the "00 IAC1 debug conditions unaffected by MSR[PR],MSR[GS].
> >
> > Similarly for dbcr2.
> >
> > So yes the guest user space can be debugged.
> 
> So why is this conditional on BOOKE_HV then? Wouldn't it make things easier to
> treat HV and PR identical?
> 

On BOOKE-HV we have to keep these to 0, so guest and guest application both can be debugged. Also on HV we have EPCR.DUVD to control that debug events will not come in hypervisor (GS = 0).

On BOOKE; guest and guest application both runs in PR = 1 and hypervisor in PR = 0. So with dbcr1/dbcr2 on booke we control debug exception not to come in hypervisor mode still allow guest and its application debugging.

Thanks
-Bharat





--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander Graf April 3, 2013, 5:56 p.m. UTC | #16
Am 03.04.2013 um 19:47 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:

>>>>>>>>> +    dbg_reg =&(vcpu->arch.shadow_dbg_reg);
>>>>>>>>> +
>>>>>>>>> +    /*
>>>>>>>>> +     * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
>>>>>>>>> +     * to occur when MSR.PR is set.
>>>>>>>>> +     * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
>>>>>>>>> +     * should clear DBCR1 and DBCR2.
>>>>>>>>> +     */
>>>>>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
>>>>>>>>> +    dbg_reg->dbcr1 = 0;
>>>>>>>>> +    dbg_reg->dbcr2 = 0;
>>>>>>>> Does that mean we can't debug guest user space?
>>>>>>> Yes
>>>>>> This is wrong.
>>>>> Really, So far I am assuming qemu debug stub is not mean for
>>>>> debugging guest
>>>> application.
>>>> 
>>>> Ok, let me rephrase: This is confusing. You do trap in PR mode on
>>>> e500v2. IIRC
>>>> x86 also traps in kernel and user space. I don't see why e500 hv
>>>> should be different.
>>> 
>>> I am sorry, I think did not read the document correctly.
>>> 
>>> DBCR1 = 0 ; means the "00 IAC1 debug conditions unaffected by MSR[PR],MSR[GS].
>>> 
>>> Similarly for dbcr2.
>>> 
>>> So yes the guest user space can be debugged.
>> 
>> So why is this conditional on BOOKE_HV then? Wouldn't it make things easier to
>> treat HV and PR identical?
> 
> On BOOKE-HV we have to keep these to 0, so guest and guest application both can be debugged. Also on HV we have EPCR.DUVD to control that debug events will not come in hypervisor (GS = 0).
> 
> On BOOKE; guest and guest application both runs in PR = 1 and hypervisor in PR = 0. So with dbcr1/dbcr2 on booke we control debug exception not to come in hypervisor mode still allow guest and its application debugging.

Ah, can we group these 2 overrides next to each other with an #ifdef ... #else to make this obvious from the code?

Alex

> 
> Thanks
> -Bharat
> 
> 
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Bharat Bhushan April 3, 2013, 6 p.m. UTC | #17
> -----Original Message-----
> From: kvm-owner@vger.kernel.org [mailto:kvm-owner@vger.kernel.org] On Behalf Of
> Alexander Graf
> Sent: Wednesday, April 03, 2013 11:26 PM
> To: Bhushan Bharat-R65777
> Cc: kvm-ppc@vger.kernel.org; kvm@vger.kernel.org; Wood Scott-B07421
> Subject: Re: [PATCH 4/4 v2] KVM: PPC: Add userspace debug stub support
> 
> 
> 
> Am 03.04.2013 um 19:47 schrieb Bhushan Bharat-R65777 <R65777@freescale.com>:
> 
> >>>>>>>>> +    dbg_reg =&(vcpu->arch.shadow_dbg_reg);
> >>>>>>>>> +
> >>>>>>>>> +    /*
> >>>>>>>>> +     * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> >>>>>>>>> +     * to occur when MSR.PR is set.
> >>>>>>>>> +     * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> >>>>>>>>> +     * should clear DBCR1 and DBCR2.
> >>>>>>>>> +     */
> >>>>>>>>> +#ifdef CONFIG_KVM_BOOKE_HV
> >>>>>>>>> +    dbg_reg->dbcr1 = 0;
> >>>>>>>>> +    dbg_reg->dbcr2 = 0;
> >>>>>>>> Does that mean we can't debug guest user space?
> >>>>>>> Yes
> >>>>>> This is wrong.
> >>>>> Really, So far I am assuming qemu debug stub is not mean for
> >>>>> debugging guest
> >>>> application.
> >>>>
> >>>> Ok, let me rephrase: This is confusing. You do trap in PR mode on
> >>>> e500v2. IIRC
> >>>> x86 also traps in kernel and user space. I don't see why e500 hv
> >>>> should be different.
> >>>
> >>> I am sorry, I think did not read the document correctly.
> >>>
> >>> DBCR1 = 0 ; means the "00 IAC1 debug conditions unaffected by
> MSR[PR],MSR[GS].
> >>>
> >>> Similarly for dbcr2.
> >>>
> >>> So yes the guest user space can be debugged.
> >>
> >> So why is this conditional on BOOKE_HV then? Wouldn't it make things
> >> easier to treat HV and PR identical?
> >
> > On BOOKE-HV we have to keep these to 0, so guest and guest application both
> can be debugged. Also on HV we have EPCR.DUVD to control that debug events will
> not come in hypervisor (GS = 0).
> >
> > On BOOKE; guest and guest application both runs in PR = 1 and hypervisor in PR
> = 0. So with dbcr1/dbcr2 on booke we control debug exception not to come in
> hypervisor mode still allow guest and its application debugging.
> 
> Ah, can we group these 2 overrides next to each other with an #ifdef ... #else
> to make this obvious from the code?

I will try :)

Thanks
-Bharat


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index f4ba881..8571952 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -504,7 +504,17 @@  struct kvm_vcpu_arch {
 	u32 mmucfg;
 	u32 epr;
 	u32 crit_save;
+	/* guest debug registers*/
 	struct kvmppc_booke_debug_reg dbg_reg;
+	/* shadow debug registers */
+	struct kvmppc_booke_debug_reg shadow_dbg_reg;
+	/* host debug registers*/
+	struct kvmppc_booke_debug_reg host_dbg_reg;
+	/*
+	 * Flag indicating that debug registers are used by guest
+	 * and requires save restore.
+	*/
+	bool debug_save_restore;
 #endif
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 15f9a00..d7ce449 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -25,6 +25,7 @@ 
 /* Select powerpc specific features in <linux/kvm.h> */
 #define __KVM_HAVE_SPAPR_TCE
 #define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_GUEST_DEBUG
 
 struct kvm_regs {
 	__u64 pc;
@@ -267,7 +268,24 @@  struct kvm_fpu {
 	__u64 fpr[32];
 };
 
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE		0x0
+#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
 struct kvm_debug_exit_arch {
+	__u64 address;
+	/*
+	 * exiting to userspace because of h/w breakpoint, watchpoint
+	 * (read, write or both) and software breakpoint.
+	 */
+	__u32 status;
+	__u32 reserved;
 };
 
 /* for KVM_SET_GUEST_DEBUG */
@@ -279,10 +297,6 @@  struct kvm_guest_debug_arch {
 		 * Type denotes h/w breakpoint, read watchpoint, write
 		 * watchpoint or watchpoint (both read and write).
 		 */
-#define KVMPPC_DEBUG_NOTYPE		0x0
-#define KVMPPC_DEBUG_BREAKPOINT		(1UL << 1)
-#define KVMPPC_DEBUG_WATCH_WRITE	(1UL << 2)
-#define KVMPPC_DEBUG_WATCH_READ		(1UL << 3)
 		__u32 type;
 		__u32 reserved;
 	} bp[16];
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1de93a8..bf20056 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -133,6 +133,30 @@  static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
+{
+	/* Synchronize guest's desire to get debug interrupts into shadow MSR */
+#ifndef CONFIG_KVM_BOOKE_HV
+	vcpu->arch.shadow_msr &= ~MSR_DE;
+	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
+#endif
+
+	/* Force enable debug interrupts when user space wants to debug */
+	if (vcpu->guest_debug) {
+#ifdef CONFIG_KVM_BOOKE_HV
+		/*
+		 * Since there is no shadow MSR, sync MSR_DE into the guest
+		 * visible MSR. Do not allow guest to change MSR[DE].
+		 */
+		vcpu->arch.shared->msr |= MSR_DE;
+		mtspr(SPRN_MSRP, mfspr(SPRN_MSRP) | MSRP_DEP);
+#else
+		vcpu->arch.shadow_msr |= MSR_DE;
+		vcpu->arch.shared->msr &= ~MSR_DE;
+#endif
+	}
+}
+
 /*
  * Helper function for "full" MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -150,6 +174,7 @@  void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 	kvmppc_mmu_msr_notify(vcpu, old_msr);
 	kvmppc_vcpu_sync_spe(vcpu);
 	kvmppc_vcpu_sync_fpu(vcpu);
+	kvmppc_vcpu_sync_debug(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
@@ -736,6 +761,9 @@  static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		run->exit_reason = KVM_EXIT_DCR;
 		return RESUME_HOST;
 
+	case EMULATE_EXIT_USER:
+		return RESUME_HOST;
+
 	case EMULATE_FAIL:
 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
@@ -751,6 +779,30 @@  static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	}
 }
 
+static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	u32 dbsr = mfspr(SPRN_DBSR);
+	mtspr(SPRN_DBSR, dbsr);
+
+	run->debug.arch.status = 0;
+	run->debug.arch.address = vcpu->arch.pc;
+
+	if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
+		run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
+	} else {
+		if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
+		else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
+			run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
+		if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
+			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[0];
+		else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
+			run->debug.arch.address = vcpu->arch.shadow_dbg_reg.dac[1];
+	}
+
+	return RESUME_HOST;
+}
+
 static void kvmppc_fill_pt_regs(struct pt_regs *regs)
 {
 	ulong r1, ip, msr, lr;
@@ -1110,18 +1162,10 @@  int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	}
 
 	case BOOKE_INTERRUPT_DEBUG: {
-		u32 dbsr;
-
-		vcpu->arch.pc = mfspr(SPRN_CSRR0);
-
-		/* clear IAC events in DBSR register */
-		dbsr = mfspr(SPRN_DBSR);
-		dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
-		mtspr(SPRN_DBSR, dbsr);
-
-		run->exit_reason = KVM_EXIT_DEBUG;
+		r = kvmppc_handle_debug(run, vcpu);
+		if (r == RESUME_HOST)
+			run->exit_reason = KVM_EXIT_DEBUG;
 		kvmppc_account_exit(vcpu, DEBUG_EXITS);
-		r = RESUME_HOST;
 		break;
 	}
 
@@ -1172,7 +1216,7 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	kvmppc_set_msr(vcpu, 0);
 
 #ifndef CONFIG_KVM_BOOKE_HV
-	vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
+	vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
 	vcpu->arch.shadow_pid = 1;
 	vcpu->arch.shared->msr = 0;
 #endif
@@ -1527,12 +1571,6 @@  int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	return r;
 }
 
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					 struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
 	return -ENOTSUPP;
@@ -1638,16 +1676,194 @@  void kvmppc_decrementer_func(unsigned long data)
 	kvmppc_set_tsr_bits(vcpu, TSR_DIS);
 }
 
+static void kvmppc_booke_vcpu_put_debug_regs(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->arch.debug_save_restore)
+		return;
+
+	/* Restore Host Context. Disable all debug events First */
+	mtspr(SPRN_DBCR0, 0x0);
+	/* Disable pending debug event by Clearing DBSR */
+	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+
+	mtspr(SPRN_DBCR1, vcpu->arch.host_dbg_reg.dbcr1);
+	mtspr(SPRN_DBCR2, vcpu->arch.host_dbg_reg.dbcr2);
+#ifdef CONFIG_KVM_E500MC
+	mtspr(SPRN_DBCR4, vcpu->arch.host_dbg_reg.dbcr4);
+#endif
+	mtspr(SPRN_IAC1, vcpu->arch.host_dbg_reg.iac[0]);
+	mtspr(SPRN_IAC2, vcpu->arch.host_dbg_reg.iac[1]);
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	mtspr(SPRN_IAC3, vcpu->arch.host_dbg_reg.iac[2]);
+	mtspr(SPRN_IAC4, vcpu->arch.host_dbg_reg.iac[3]);
+#endif
+	mtspr(SPRN_DAC1, vcpu->arch.host_dbg_reg.dac[0]);
+	mtspr(SPRN_DAC2, vcpu->arch.host_dbg_reg.dac[1]);
+
+	/* Enable debug events after all other debug registers restored */
+	mtspr(SPRN_DBCR0, vcpu->arch.host_dbg_reg.dbcr0);
+
+	/* Host debug register are restored */
+	vcpu->arch.debug_save_restore = false;
+}
+
+static void kvmppc_booke_vcpu_load_debug_regs(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Check whether guest still need debug resource, if not then there
+	 * is no need to resotre guest context.
+	 */
+	if (!vcpu->arch.shadow_dbg_reg.dbcr0)
+		return;
+
+	/*
+	 * Save Host debug register only after restore. If host debug
+	 * registers are saved and not restored then do not save again.
+	 */
+	if (!vcpu->arch.debug_save_restore) {
+		/* Save Host context */
+		vcpu->arch.host_dbg_reg.dbcr0 = mfspr(SPRN_DBCR0);
+		vcpu->arch.host_dbg_reg.dbcr1 = mfspr(SPRN_DBCR1);
+		vcpu->arch.host_dbg_reg.dbcr2 = mfspr(SPRN_DBCR2);
+#ifdef CONFIG_KVM_E500MC
+		vcpu->arch.host_dbg_reg.dbcr4 = mfspr(SPRN_DBCR4);
+#endif
+		vcpu->arch.host_dbg_reg.iac[0] = mfspr(SPRN_IAC1);
+		vcpu->arch.host_dbg_reg.iac[1] = mfspr(SPRN_IAC2);
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+		vcpu->arch.host_dbg_reg.iac[2] = mfspr(SPRN_IAC3);
+		vcpu->arch.host_dbg_reg.iac[3] = mfspr(SPRN_IAC4);
+#endif
+		vcpu->arch.host_dbg_reg.dac[0] = mfspr(SPRN_DAC1);
+		vcpu->arch.host_dbg_reg.dac[1] = mfspr(SPRN_DAC2);
+		vcpu->arch.debug_save_restore = true;
+	}
+
+	/* Restore Guest Context. Disable all debug events First */
+	mtspr(SPRN_DBCR0, 0x0);
+	/* Clear h/w DBSR */
+	mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
+
+	mtspr(SPRN_DBCR1, vcpu->arch.shadow_dbg_reg.dbcr1);
+	mtspr(SPRN_DBCR2, vcpu->arch.shadow_dbg_reg.dbcr2);
+#ifdef CONFIG_KVM_E500MC
+	mtspr(SPRN_DBCR4, vcpu->arch.shadow_dbg_reg.dbcr4);
+#endif
+	mtspr(SPRN_IAC1, vcpu->arch.shadow_dbg_reg.iac[0]);
+	mtspr(SPRN_IAC2, vcpu->arch.shadow_dbg_reg.iac[1]);
+#if CONFIG_PPC_ADV_DEBUG_IACS > 2
+	mtspr(SPRN_IAC3, vcpu->arch.shadow_dbg_reg.iac[2]);
+	mtspr(SPRN_IAC4, vcpu->arch.shadow_dbg_reg.iac[3]);
+#endif
+	mtspr(SPRN_DAC1, vcpu->arch.shadow_dbg_reg.dac[0]);
+	mtspr(SPRN_DAC2, vcpu->arch.shadow_dbg_reg.dac[1]);
+
+	/* Enable debug events after other debug registers restored */
+	mtspr(SPRN_DBCR0, vcpu->arch.shadow_dbg_reg.dbcr0);
+}
+
+int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
+					 struct kvm_guest_debug *dbg)
+{
+	struct kvmppc_booke_debug_reg *dbg_reg;
+	int n, b = 0, w = 0;
+	const u32 bp_code[] = {
+		DBCR0_IAC1 | DBCR0_IDM,
+		DBCR0_IAC2 | DBCR0_IDM,
+		DBCR0_IAC3 | DBCR0_IDM,
+		DBCR0_IAC4 | DBCR0_IDM
+	};
+	const u32 wp_code[] = {
+		DBCR0_DAC1W | DBCR0_IDM,
+		DBCR0_DAC2W | DBCR0_IDM,
+		DBCR0_DAC1R | DBCR0_IDM,
+		DBCR0_DAC2R | DBCR0_IDM
+	};
+
+	if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
+		/* Clear All debug events */
+		vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+		vcpu->guest_debug = 0;
+		return 0;
+	}
+
+	vcpu->guest_debug = dbg->control;
+	vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
+	/* Set DBCR0_EDM in guest visible DBCR0 register. */
+	vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
+		vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
+
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+		/* Code below handles only HW breakpoints */
+		kvmppc_booke_vcpu_load_debug_regs(vcpu);
+		return 0;
+	}
+
+	dbg_reg = &(vcpu->arch.shadow_dbg_reg);
+
+	/*
+	 * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
+	 * to occur when MSR.PR is set.
+	 * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
+	 * should clear DBCR1 and DBCR2.
+	 */
+#ifdef CONFIG_KVM_BOOKE_HV
+	dbg_reg->dbcr1 = 0;
+	dbg_reg->dbcr2 = 0;
+#else
+	dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
+			  DBCR1_IAC4US;
+	dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
+#endif
+
+	for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
+		u32 type = dbg->arch.bp[n].type;
+
+		if (!type)
+			break;
+
+		if (type & (KVMPPC_DEBUG_WATCH_READ |
+			    KVMPPC_DEBUG_WATCH_WRITE)) {
+			if (w >= KVMPPC_BOOKE_DAC_NUM)
+				continue;
+
+			if (type & KVMPPC_DEBUG_WATCH_READ)
+				dbg_reg->dbcr0 |= wp_code[w + 2];
+			if (type & KVMPPC_DEBUG_WATCH_WRITE)
+				dbg_reg->dbcr0 |= wp_code[w];
+
+			dbg_reg->dac[w] = dbg->arch.bp[n].addr;
+			w++;
+		} else if (type & KVMPPC_DEBUG_BREAKPOINT) {
+			if (b >= KVMPPC_BOOKE_IAC_NUM)
+				continue;
+
+			dbg_reg->dbcr0 |= bp_code[b];
+			dbg_reg->iac[b] = dbg->arch.bp[n].addr;
+			b++;
+		}
+	}
+
+	kvmppc_booke_vcpu_load_debug_regs(vcpu);
+	return 0;
+}
+
 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = smp_processor_id();
 	current->thread.kvm_vcpu = vcpu;
+
+	kvmppc_booke_vcpu_load_debug_regs(vcpu);
 }
 
 void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	current->thread.kvm_vcpu = NULL;
 	vcpu->cpu = -1;
+
+	kvmppc_booke_vcpu_put_debug_regs(vcpu);
 }
 
 int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e78f353..cefdd38 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -26,6 +26,7 @@ 
 #define XOP_TLBRE   946
 #define XOP_TLBWE   978
 #define XOP_TLBILX  18
+#define XOP_EHPRIV  270
 
 #ifdef CONFIG_KVM_E500MC
 static int dbell2prio(ulong param)
@@ -130,6 +131,15 @@  int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
 			break;
 
+		case XOP_EHPRIV:
+			run->exit_reason = KVM_EXIT_DEBUG;
+			run->debug.arch.address = vcpu->arch.pc;
+			run->debug.arch.status = 0;
+			kvmppc_account_exit(vcpu, DEBUG_EXITS);
+			emulated = EMULATE_EXIT_USER;
+			*advance = 0;
+			break;
+
 		default:
 			emulated = EMULATE_FAIL;
 		}