diff mbox

[v4,09/10] ARM: KVM: Handle I/O aborts

Message ID 20110806104007.27198.63426.stgit@localhost6.localdomain6 (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Aug. 6, 2011, 10:40 a.m. UTC
When the guest accesses I/O memory this will create data abort
exceptions and they are handled by decoding the HSR information
(physical address, read/write, length, register) and forwarding reads
and writes to QEMU which performs the device emulation.

Certain classes of load/store operations do not support the syndrome
information provided in the HSR and we therefore must be able to fetch
the offending instruction from guest memory and decode it manually.

This requires changing the general flow somewhat since new calls to run
the VCPU must check if there's a pending MMIO load and perform the write
after userspace has made the data available.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm_emulate.h |    2 
 arch/arm/include/asm/kvm_host.h    |    1 
 arch/arm/include/asm/kvm_mmu.h     |    1 
 arch/arm/kvm/arm.c                 |    8 +
 arch/arm/kvm/arm_emulate.c         |  279 ++++++++++++++++++++++++++++++++++++
 arch/arm/kvm/arm_mmu.c             |  155 ++++++++++++++++++++
 arch/arm/kvm/trace.h               |   15 ++
 7 files changed, 457 insertions(+), 4 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Aug. 9, 2011, 11:34 a.m. UTC | #1
On 08/06/2011 01:40 PM, Christoffer Dall wrote:
> When the guest accesses I/O memory this will create data abort
> exceptions and they are handled by decoding the HSR information
> (physical address, read/write, length, register) and forwarding reads
> and writes to QEMU which performs the device emulation.
>
> Certain classes of load/store operations do not support the syndrome
> information provided in the HSR and we therefore must be able to fetch
> the offending instruction from guest memory and decode it manually.
>
> This requires changing the general flow somewhat since new calls to run
> the VCPU must check if there's a pending MMIO load and perform the write
> after userspace has made the data available.

We need to move this to arch independent code.  Outside the scope of 
these patches, of course.

>   /******************************************************************************
> - * Co-processor emulation
> + * Utility functions common for all emulation code
> + *****************************************************************************/
> +
> +/*
> + * This one accepts a matrix where the first element is the
> + * bits as they must be, and the second element is the bitmask.
>    */
> +#define INSTR_NONE	-1
> +static int kvm_instr_index(u32 instr, u32 table[][2], int table_entries)
> +{
> +	int i;
> +	u32 mask;
> +
> +	for (i = 0; i<  table_entries; i++) {
> +		mask = table[i][1];
> +		if ((table[i][0]&  mask) == (instr&  mask))
> +			return i;
> +	}
> +	return INSTR_NONE;
> +}

Seems somewhat inefficient to do this for insn emulation.  Is there not 
a common prefix that can be used to determine the mask?

> +
> +/*
> + * Must be ordered with LOADS first and WRITES afterwards
> + * for easy distinction when doing MMIO.
> + */
> +#define NUM_LD_INSTR  9
> +enum INSTR_LS_INDEXES {
> +	INSTR_LS_LDRBT, INSTR_LS_LDRT, INSTR_LS_LDR, INSTR_LS_LDRB,
> +	INSTR_LS_LDRD, INSTR_LS_LDREX, INSTR_LS_LDRH, INSTR_LS_LDRSB,
> +	INSTR_LS_LDRSH,
> +	INSTR_LS_STRBT, INSTR_LS_STRT, INSTR_LS_STR, INSTR_LS_STRB,
> +	INSTR_LS_STRD, INSTR_LS_STREX, INSTR_LS_STRH,
> +	NUM_LS_INSTR
> +};
> +
> +static u32 ls_instr[NUM_LS_INSTR][2] = {
> +	{0x04700000, 0x0d700000}, /* LDRBT */
> +	{0x04300000, 0x0d700000}, /* LDRT  */
> +	{0x04100000, 0x0c500000}, /* LDR   */
> +	{0x04500000, 0x0c500000}, /* LDRB  */
> +	{0x000000d0, 0x0e1000f0}, /* LDRD  */
> +	{0x01900090, 0x0ff000f0}, /* LDREX */
> +	{0x001000b0, 0x0e1000f0}, /* LDRH  */
> +	{0x001000d0, 0x0e1000f0}, /* LDRSB */
> +	{0x001000f0, 0x0e1000f0}, /* LDRSH */
> +	{0x04600000, 0x0d700000}, /* STRBT */
> +	{0x04200000, 0x0d700000}, /* STRT  */
> +	{0x04000000, 0x0c500000}, /* STR   */
> +	{0x04400000, 0x0c500000}, /* STRB  */
> +	{0x000000f0, 0x0e1000f0}, /* STRD  */
> +	{0x01800090, 0x0ff000f0}, /* STREX */
> +	{0x000000b0, 0x0e1000f0}  /* STRH  */
> +};
> +

Okay, maybe not.  But surely there's some clever arithmetic the cpu uses 
to decode this.

> diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
> index 381ea4a..4f20d75 100644
> --- a/arch/arm/kvm/trace.h
> +++ b/arch/arm/kvm/trace.h
> @@ -39,6 +39,21 @@ TRACE_EVENT(kvm_exit,
>   	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
>   );
>
> +TRACE_EVENT(kvm_mmio_emulate,
> +	TP_PROTO(unsigned long vcpu_pc),
> +	TP_ARGS(vcpu_pc),

Please add the instruction bytes and any other information needed to 
decode the opcode (e.g. thumb mode).  Forx86 we have a trace-cmd plugin 
that disassembles guest instructions into the trace; it's very useful.

> +
> +	TP_STRUCT__entry(
> +		__field(	unsigned long,	vcpu_pc		)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->vcpu_pc		= vcpu_pc;
> +	),
> +
> +	TP_printk("Emulate MMIO at: 0x%08lx", __entry->vcpu_pc)
> +);
> +
>   TRACE_EVENT(kvm_emulate_cp15_imp,
>   	TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
>   		 unsigned long CRm, unsigned long Op2, bool is_write),
>
>
Christoffer Dall Aug. 9, 2011, 11:39 a.m. UTC | #2
On Aug 9, 2011, at 1:34 PM, Avi Kivity wrote:

> On 08/06/2011 01:40 PM, Christoffer Dall wrote:
>> When the guest accesses I/O memory this will create data abort
>> exceptions and they are handled by decoding the HSR information
>> (physical address, read/write, length, register) and forwarding reads
>> and writes to QEMU which performs the device emulation.
>> 
>> Certain classes of load/store operations do not support the syndrome
>> information provided in the HSR and we therefore must be able to fetch
>> the offending instruction from guest memory and decode it manually.
>> 
>> This requires changing the general flow somewhat since new calls to run
>> the VCPU must check if there's a pending MMIO load and perform the write
>> after userspace has made the data available.
> 
> We need to move this to arch independent code.  Outside the scope of these patches, of course.

OK, let me know what I can do to make this fit with the ARM implementation nicely.

> 
>>  /******************************************************************************
>> - * Co-processor emulation
>> + * Utility functions common for all emulation code
>> + *****************************************************************************/
>> +
>> +/*
>> + * This one accepts a matrix where the first element is the
>> + * bits as they must be, and the second element is the bitmask.
>>   */
>> +#define INSTR_NONE	-1
>> +static int kvm_instr_index(u32 instr, u32 table[][2], int table_entries)
>> +{
>> +	int i;
>> +	u32 mask;
>> +
>> +	for (i = 0; i<  table_entries; i++) {
>> +		mask = table[i][1];
>> +		if ((table[i][0]&  mask) == (instr&  mask))
>> +			return i;
>> +	}
>> +	return INSTR_NONE;
>> +}
> 
> Seems somewhat inefficient to do this for insn emulation.  Is there not a common prefix that can be used to determine the mask?

hehe, not so much.

> 
>> +
>> +/*
>> + * Must be ordered with LOADS first and WRITES afterwards
>> + * for easy distinction when doing MMIO.
>> + */
>> +#define NUM_LD_INSTR  9
>> +enum INSTR_LS_INDEXES {
>> +	INSTR_LS_LDRBT, INSTR_LS_LDRT, INSTR_LS_LDR, INSTR_LS_LDRB,
>> +	INSTR_LS_LDRD, INSTR_LS_LDREX, INSTR_LS_LDRH, INSTR_LS_LDRSB,
>> +	INSTR_LS_LDRSH,
>> +	INSTR_LS_STRBT, INSTR_LS_STRT, INSTR_LS_STR, INSTR_LS_STRB,
>> +	INSTR_LS_STRD, INSTR_LS_STREX, INSTR_LS_STRH,
>> +	NUM_LS_INSTR
>> +};
>> +
>> +static u32 ls_instr[NUM_LS_INSTR][2] = {
>> +	{0x04700000, 0x0d700000}, /* LDRBT */
>> +	{0x04300000, 0x0d700000}, /* LDRT  */
>> +	{0x04100000, 0x0c500000}, /* LDR   */
>> +	{0x04500000, 0x0c500000}, /* LDRB  */
>> +	{0x000000d0, 0x0e1000f0}, /* LDRD  */
>> +	{0x01900090, 0x0ff000f0}, /* LDREX */
>> +	{0x001000b0, 0x0e1000f0}, /* LDRH  */
>> +	{0x001000d0, 0x0e1000f0}, /* LDRSB */
>> +	{0x001000f0, 0x0e1000f0}, /* LDRSH */
>> +	{0x04600000, 0x0d700000}, /* STRBT */
>> +	{0x04200000, 0x0d700000}, /* STRT  */
>> +	{0x04000000, 0x0c500000}, /* STR   */
>> +	{0x04400000, 0x0c500000}, /* STRB  */
>> +	{0x000000f0, 0x0e1000f0}, /* STRD  */
>> +	{0x01800090, 0x0ff000f0}, /* STREX */
>> +	{0x000000b0, 0x0e1000f0}  /* STRH  */
>> +};
>> +
> 
> Okay, maybe not.  But surely there's some clever arithmetic the cpu uses to decode this.

Probably, but this is only used in the rare case when the virt. extensions doesn't support the fault information. I highly doubt that this is in any critical path for any sane guest OS, but surely one could write a VM that would run very slow. I would like not to spend time on this right now and perhaps get back to it when we have all sorts of other features in place. Or, what do you think?

> 
>> diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
>> index 381ea4a..4f20d75 100644
>> --- a/arch/arm/kvm/trace.h
>> +++ b/arch/arm/kvm/trace.h
>> @@ -39,6 +39,21 @@ TRACE_EVENT(kvm_exit,
>>  	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
>>  );
>> 
>> +TRACE_EVENT(kvm_mmio_emulate,
>> +	TP_PROTO(unsigned long vcpu_pc),
>> +	TP_ARGS(vcpu_pc),
> 
> Please add the instruction bytes and any other information needed to decode the opcode (e.g. thumb mode).  Forx86 we have a trace-cmd plugin that disassembles guest instructions into the trace; it's very useful.

that's a good idea. I will look into it.

> 
>> +
>> +	TP_STRUCT__entry(
>> +		__field(	unsigned long,	vcpu_pc		)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->vcpu_pc		= vcpu_pc;
>> +	),
>> +
>> +	TP_printk("Emulate MMIO at: 0x%08lx", __entry->vcpu_pc)
>> +);
>> +
>>  TRACE_EVENT(kvm_emulate_cp15_imp,
>>  	TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
>>  		 unsigned long CRm, unsigned long Op2, bool is_write),
>> 
>> 
> 
> -- 
> error compiling committee.c: too many arguments to function
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 9, 2011, 11:46 a.m. UTC | #3
On 08/09/2011 02:39 PM, Christoffer Dall wrote:
> >>  +
> >>  +static u32 ls_instr[NUM_LS_INSTR][2] = {
> >>  +	{0x04700000, 0x0d700000}, /* LDRBT */
> >>  +	{0x04300000, 0x0d700000}, /* LDRT  */
> >>  +	{0x04100000, 0x0c500000}, /* LDR   */
> >>  +	{0x04500000, 0x0c500000}, /* LDRB  */
> >>  +	{0x000000d0, 0x0e1000f0}, /* LDRD  */
> >>  +	{0x01900090, 0x0ff000f0}, /* LDREX */
> >>  +	{0x001000b0, 0x0e1000f0}, /* LDRH  */
> >>  +	{0x001000d0, 0x0e1000f0}, /* LDRSB */
> >>  +	{0x001000f0, 0x0e1000f0}, /* LDRSH */
> >>  +	{0x04600000, 0x0d700000}, /* STRBT */
> >>  +	{0x04200000, 0x0d700000}, /* STRT  */
> >>  +	{0x04000000, 0x0c500000}, /* STR   */
> >>  +	{0x04400000, 0x0c500000}, /* STRB  */
> >>  +	{0x000000f0, 0x0e1000f0}, /* STRD  */
> >>  +	{0x01800090, 0x0ff000f0}, /* STREX */
> >>  +	{0x000000b0, 0x0e1000f0}  /* STRH  */
> >>  +};
> >>  +
> >
> >  Okay, maybe not.  But surely there's some clever arithmetic the cpu uses to decode this.
>
> Probably, but this is only used in the rare case when the virt. extensions doesn't support the fault information. I highly doubt that this is in any critical path for any sane guest OS, but surely one could write a VM that would run very slow. I would like not to spend time on this right now and perhaps get back to it when we have all sorts of other features in place. Or, what do you think?

It's the ordinary case of premature optimization that afflicts even the 
best of us.  Best to keep it simple.
diff mbox

Patch

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index af21fd5..9899474 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -46,6 +46,8 @@  int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp14_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_cp15_access(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run);
+int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			unsigned long instr);
 
 /*
  * Return the SPSR for the specified mode of the virtual CPU.
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 59fcd15..86f6cf1 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -88,6 +88,7 @@  struct kvm_vcpu_arch {
 	u64 pc_ipa;		/* IPA for the current PC (VA to PA result) */
 
 	/* IO related fields */
+	bool mmio_sign_extend;	/* for byte/halfword loads */
 	u32 mmio_rd;
 
 	/* Misc. fields */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index a64ab2d..f06f42d 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -40,6 +40,7 @@  void free_hyp_pmds(pgd_t *hyp_pgd);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run);
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a28de12..3e3f6d7 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -385,6 +385,14 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 	int ret;
 
 	for (;;) {
+		if (run->exit_reason == KVM_EXIT_MMIO) {
+			ret = kvm_handle_mmio_return(vcpu, vcpu->run);
+			if (ret)
+				break;
+		}
+
+		run->exit_reason = KVM_EXIT_UNKNOWN;
+
 		trace_kvm_entry(vcpu->arch.regs.pc);
 		debug_ws_enter(vcpu->arch.regs.pc);
 		kvm_guest_enter();
diff --git a/arch/arm/kvm/arm_emulate.c b/arch/arm/kvm/arm_emulate.c
index 37fe029..0c99360 100644
--- a/arch/arm/kvm/arm_emulate.c
+++ b/arch/arm/kvm/arm_emulate.c
@@ -20,6 +20,7 @@ 
 #include <asm/kvm_emulate.h>
 #include <trace/events/kvm.h>
 
+#include "trace.h"
 #include "debug.h"
 #include "trace.h"
 
@@ -128,8 +129,30 @@  u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode)
 }
 
 /******************************************************************************
- * Co-processor emulation
+ * Utility functions common for all emulation code
+ *****************************************************************************/
+
+/*
+ * This one accepts a matrix where the first element is the
+ * bits as they must be, and the second element is the bitmask.
  */
+#define INSTR_NONE	-1
+static int kvm_instr_index(u32 instr, u32 table[][2], int table_entries)
+{
+	int i;
+	u32 mask;
+
+	for (i = 0; i < table_entries; i++) {
+		mask = table[i][1];
+		if ((table[i][0] & mask) == (instr & mask))
+			return i;
+	}
+	return INSTR_NONE;
+}
+
+/******************************************************************************
+ * Co-processor emulation
+ *****************************************************************************/
 
 struct coproc_params {
 	unsigned long CRm;
@@ -314,3 +337,257 @@  int kvm_handle_wfi(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	return 0;
 }
+
+
+/******************************************************************************
+ * Load-Store instruction emulation
+ *****************************************************************************/
+
+/*
+ * Must be ordered with LOADS first and WRITES afterwards
+ * for easy distinction when doing MMIO.
+ */
+#define NUM_LD_INSTR  9
+enum INSTR_LS_INDEXES {
+	INSTR_LS_LDRBT, INSTR_LS_LDRT, INSTR_LS_LDR, INSTR_LS_LDRB,
+	INSTR_LS_LDRD, INSTR_LS_LDREX, INSTR_LS_LDRH, INSTR_LS_LDRSB,
+	INSTR_LS_LDRSH,
+	INSTR_LS_STRBT, INSTR_LS_STRT, INSTR_LS_STR, INSTR_LS_STRB,
+	INSTR_LS_STRD, INSTR_LS_STREX, INSTR_LS_STRH,
+	NUM_LS_INSTR
+};
+
+static u32 ls_instr[NUM_LS_INSTR][2] = {
+	{0x04700000, 0x0d700000}, /* LDRBT */
+	{0x04300000, 0x0d700000}, /* LDRT  */
+	{0x04100000, 0x0c500000}, /* LDR   */
+	{0x04500000, 0x0c500000}, /* LDRB  */
+	{0x000000d0, 0x0e1000f0}, /* LDRD  */
+	{0x01900090, 0x0ff000f0}, /* LDREX */
+	{0x001000b0, 0x0e1000f0}, /* LDRH  */
+	{0x001000d0, 0x0e1000f0}, /* LDRSB */
+	{0x001000f0, 0x0e1000f0}, /* LDRSH */
+	{0x04600000, 0x0d700000}, /* STRBT */
+	{0x04200000, 0x0d700000}, /* STRT  */
+	{0x04000000, 0x0c500000}, /* STR   */
+	{0x04400000, 0x0c500000}, /* STRB  */
+	{0x000000f0, 0x0e1000f0}, /* STRD  */
+	{0x01800090, 0x0ff000f0}, /* STREX */
+	{0x000000b0, 0x0e1000f0}  /* STRH  */
+};
+
+static inline int get_arm_ls_instr_index(u32 instr)
+{
+	return kvm_instr_index(instr, ls_instr, NUM_LS_INSTR);
+}
+
+/*
+ * Load-Store instruction decoding
+ */
+#define INSTR_LS_TYPE_BIT		26
+#define INSTR_LS_RD_MASK		0x0000f000
+#define INSTR_LS_RD_SHIFT		12
+#define INSTR_LS_RN_MASK		0x000f0000
+#define INSTR_LS_RN_SHIFT		16
+#define INSTR_LS_RM_MASK		0x0000000f
+#define INSTR_LS_OFFSET12_MASK		0x00000fff
+
+#define INSTR_LS_BIT_P			24
+#define INSTR_LS_BIT_U			23
+#define INSTR_LS_BIT_B			22
+#define INSTR_LS_BIT_W			21
+#define INSTR_LS_BIT_L			20
+#define INSTR_LS_BIT_S			 6
+#define INSTR_LS_BIT_H			 5
+
+/*
+ * ARM addressing mode defines
+ */
+#define OFFSET_IMM_MASK			0x0e000000
+#define OFFSET_IMM_VALUE		0x04000000
+#define OFFSET_REG_MASK			0x0e000ff0
+#define OFFSET_REG_VALUE		0x06000000
+#define OFFSET_SCALE_MASK		0x0e000010
+#define OFFSET_SCALE_VALUE		0x06000000
+
+#define SCALE_SHIFT_MASK		0x000000a0
+#define SCALE_SHIFT_SHIFT		5
+#define SCALE_SHIFT_LSL			0x0
+#define SCALE_SHIFT_LSR			0x1
+#define SCALE_SHIFT_ASR			0x2
+#define SCALE_SHIFT_ROR_RRX		0x3
+#define SCALE_SHIFT_IMM_MASK		0x00000f80
+#define SCALE_SHIFT_IMM_SHIFT		6
+
+#define PSR_BIT_C			29
+
+static unsigned long ls_word_calc_offset(struct kvm_vcpu *vcpu,
+					 unsigned long instr)
+{
+	int offset = 0;
+
+	if ((instr & OFFSET_IMM_MASK) == OFFSET_IMM_VALUE) {
+		/* Immediate offset/index */
+		offset = instr & INSTR_LS_OFFSET12_MASK;
+
+		if (!(instr & (1U << INSTR_LS_BIT_U)))
+			offset = -offset;
+	}
+
+	if ((instr & OFFSET_REG_MASK) == OFFSET_REG_VALUE) {
+		/* Register offset/index */
+		u8 rm = instr & INSTR_LS_RM_MASK;
+		offset = *vcpu_reg(vcpu, rm);
+
+		if (!(instr & (1U << INSTR_LS_BIT_P)))
+			offset = 0;
+	}
+
+	if ((instr & OFFSET_SCALE_MASK) == OFFSET_SCALE_VALUE) {
+		/* Scaled register offset */
+		int asr_test;
+		u8 rm = instr & INSTR_LS_RM_MASK;
+		u8 shift = (instr & SCALE_SHIFT_MASK) >> SCALE_SHIFT_SHIFT;
+		u32 shift_imm = (instr & SCALE_SHIFT_IMM_MASK)
+				>> SCALE_SHIFT_IMM_SHIFT;
+		offset = *vcpu_reg(vcpu, rm);
+
+		switch (shift) {
+		case SCALE_SHIFT_LSL:
+			offset = offset << shift_imm;
+			break;
+		case SCALE_SHIFT_LSR:
+			if (shift_imm == 0)
+				offset = 0;
+			else
+				offset = ((u32)offset) >> shift_imm;
+			break;
+		case SCALE_SHIFT_ASR:
+			/* Test that the compiler used arithmetic right shift
+			 * for signed values. */
+			asr_test = 0xffffffff;
+			BUG_ON((asr_test >> 2) >= 0);
+			if (shift_imm == 0) {
+				if (offset & (1U << 31))
+					offset = 0xffffffff;
+				else
+					offset = 0;
+			} else {
+				offset = offset >> shift_imm;
+			}
+			break;
+		case SCALE_SHIFT_ROR_RRX:
+			/* Test that the compiler used arithmetic right shift
+			 * for signed values. */
+			asr_test = 0xffffffff;
+			BUG_ON((asr_test >> 2) >= 0);
+			if (shift_imm == 0) {
+				u32 C = (vcpu->arch.regs.cpsr &
+						(1U << PSR_BIT_C));
+				offset = (C << 31) | offset >> 1;
+			} else {
+				offset = ror32(offset, shift_imm);
+			}
+			break;
+		}
+
+		if (instr & (1U << INSTR_LS_BIT_U))
+			return offset;
+		else
+			return -offset;
+	}
+
+	if (instr & (1U << INSTR_LS_BIT_U))
+		return offset;
+	else
+		return -offset;
+
+	BUG();
+}
+
+static int kvm_ls_length(struct kvm_vcpu *vcpu, u32 instr)
+{
+	int index;
+
+	index = get_arm_ls_instr_index(instr);
+	BUG_ON(index == INSTR_NONE);
+
+	if (instr & (1U << INSTR_LS_TYPE_BIT)) {
+		/* LS word or unsigned byte */
+		if (instr & (1U << INSTR_LS_BIT_B))
+			return sizeof(unsigned char);
+		else
+			return sizeof(u32);
+	} else {
+		/* LS halfword, doubleword or signed byte */
+		u32 H = (instr & (1U << INSTR_LS_BIT_H));
+		u32 S = (instr & (1U << INSTR_LS_BIT_S));
+		u32 L = (instr & (1U << INSTR_LS_BIT_L));
+
+		if (!L && S) {
+			kvm_msg("WARNING: d-word for MMIO");
+			return 2 * sizeof(u32);
+		} else if (L && S && !H)
+			return sizeof(char);
+		else
+			return sizeof(u16);
+	}
+
+	BUG();
+}
+
+/**
+ * kvm_emulate_mmio_ls - emulates load/store instructions made to I/O memory
+ * @vcpu:	The vcpu pointer
+ * @fault_ipa:	The IPA that caused the 2nd stage fault
+ * @instr:	The instruction that caused the fault
+ *
+ * Handles emulation of load/store instructions which cannot be emulated through
+ * information found in the HSR on faults. It is necessary in this case to
+ * simply decode the offending instruction in software and determine the
+ * required operands.
+ */
+int kvm_emulate_mmio_ls(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			unsigned long instr)
+{
+	unsigned long rd, rn, offset, len;
+	int index;
+	bool is_write;
+
+	index = get_arm_ls_instr_index(instr);
+	if (index == INSTR_NONE) {
+		kvm_err(-EINVAL, "Unknown load/store instruction");
+		return -EINVAL;
+	}
+
+	is_write = (index < NUM_LD_INSTR) ? false : true;
+	rd = (instr & INSTR_LS_RD_MASK) >> INSTR_LS_RD_SHIFT;
+	len = kvm_ls_length(vcpu, instr);
+
+	vcpu->run->exit_reason = KVM_EXIT_MMIO;
+	vcpu->run->mmio.is_write = is_write;
+	vcpu->run->mmio.phys_addr = fault_ipa;
+	vcpu->run->mmio.len = len;
+	vcpu->arch.mmio_sign_extend = false;
+	vcpu->arch.mmio_rd = rd;
+
+	trace_kvm_mmio_emulate(vcpu->arch.regs.pc);
+	trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE :
+				    KVM_TRACE_MMIO_READ_UNSATISFIED,
+			len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0);
+
+	/* Handle base register writeback */
+	if (!(instr & (1U << INSTR_LS_BIT_P)) ||
+	     (instr & (1U << INSTR_LS_BIT_W))) {
+		rn = (instr & INSTR_LS_RN_MASK) >> INSTR_LS_RN_SHIFT;
+		offset = ls_word_calc_offset(vcpu, instr);
+		*vcpu_reg(vcpu, rn) += offset;
+	}
+
+	/*
+	 * The MMIO instruction is emulated and should not be re-executed
+	 * in the guest. (XXX We don't support Thumb instructions yet).
+	 */
+	*vcpu_reg(vcpu, 15) += 4;
+	return 0;
+}
diff --git a/arch/arm/kvm/arm_mmu.c b/arch/arm/kvm/arm_mmu.c
index 6040aff..032133a 100644
--- a/arch/arm/kvm/arm_mmu.c
+++ b/arch/arm/kvm/arm_mmu.c
@@ -16,10 +16,13 @@ 
 
 #include <linux/mman.h>
 #include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
 #include <asm/pgalloc.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_emulate.h>
 
+#include "trace.h"
 #include "debug.h"
 
 pgd_t *kvm_hyp_pgd;
@@ -341,6 +344,152 @@  static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	return 0;
 }
 
+/**
+ * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
+ * @vcpu: The VCPU pointer
+ * @run:  The VCPU run struct containing the mmio data
+ *
+ * This should only be called after returning to QEMU for MMIO load emulation.
+ */
+int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int *dest;
+	unsigned int len;
+	int mask;
+
+	if (!run->mmio.is_write) {
+		dest = vcpu_reg(vcpu, vcpu->arch.mmio_rd);
+		memset(dest, 0, sizeof(int));
+
+		if (run->mmio.len > 4) {
+			kvm_err(-EINVAL, "Incorrect mmio length");
+			return -EINVAL;
+		}
+
+		len = run->mmio.len;
+		memcpy(dest, run->mmio.data, len);
+
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
+				*((u64 *)run->mmio.data));
+
+		if (vcpu->arch.mmio_sign_extend && len < 4) {
+			mask = 1U << ((len * 8) - 1);
+			*dest = (*dest ^ mask) - mask;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * invalid_io_mem_abort -- Handle I/O aborts ISV bit is clear
+ *
+ * @vcpu:      The vcpu pointer
+ * @fault_ipa: The IPA that caused the 2nd stage fault
+ *
+ * Some load/store instructions cannot be emulated using the information
+ * presented in the HSR, for instance, register write-back instructions are not
+ * supported. We therefore need to fetch the instruction, decode it, and then
+ * emulate its behavior.
+ */
+static int invalid_io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+	unsigned long instr;
+	phys_addr_t pc_ipa;
+
+	if (vcpu->arch.pc_ipa & (1U << 11)) {
+		/* LPAE PAR format */
+		pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 32) - 1);
+	} else {
+		/* VMSAv7 PAR format */
+		pc_ipa = vcpu->arch.pc_ipa & PAGE_MASK & ((1ULL << 40) - 1);
+	}
+	pc_ipa += vcpu->arch.regs.pc & ~PAGE_MASK;
+
+	if (kvm_read_guest(vcpu->kvm, pc_ipa, &instr, sizeof(instr))) {
+		kvm_err(-EFAULT, "Could not copy guest instruction");
+		return -EFAULT;
+	}
+
+	if (vcpu->arch.regs.cpsr & PSR_T_BIT) {
+		/* Need to decode thumb instructions as well */
+		KVMARM_NOT_IMPLEMENTED();
+		return -EINVAL;
+	}
+
+	return kvm_emulate_mmio_ls(vcpu, fault_ipa, instr);
+}
+
+static int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
+			phys_addr_t fault_ipa, struct kvm_memory_slot *memslot)
+{
+	unsigned long rd, len, instr_len;
+	bool is_write, sign_extend;
+
+	if (!(vcpu->arch.hsr & HSR_ISV))
+		return invalid_io_mem_abort(vcpu, fault_ipa);
+
+	if (((vcpu->arch.hsr >> 8) & 1)) {
+		kvm_err(-EFAULT, "Not supported, Cache operation on I/O addr.");
+		return -EFAULT;
+	}
+
+	if ((vcpu->arch.hsr >> 7) & 1) {
+		kvm_err(-EFAULT, "Translation table accesses I/O memory");
+		return -EFAULT;
+	}
+
+	switch ((vcpu->arch.hsr >> 22) & 0x3) {
+	case 0:
+		len = 1;
+		break;
+	case 1:
+		len = 2;
+		break;
+	case 2:
+		len = 4;
+		break;
+	default:
+		kvm_err(-EFAULT, "Invalid I/O abort");
+		return -EFAULT;
+	}
+
+	is_write = ((vcpu->arch.hsr >> 6) & 1);
+	sign_extend = ((vcpu->arch.hsr >> 21) & 1);
+	rd = (vcpu->arch.hsr >> 16) & 0xf;
+	BUG_ON(rd > 15);
+
+	if (rd == 15) {
+		kvm_err(-EFAULT, "I/O memory trying to read/write pc");
+		return -EFAULT;
+	}
+
+	/* Get instruction length in bytes */
+	instr_len = ((vcpu->arch.hsr >> 25) & 1) ? 4 : 2;
+
+	/* Export MMIO operations to user space */
+	run->exit_reason = KVM_EXIT_MMIO;
+	run->mmio.is_write = is_write;
+	run->mmio.phys_addr = fault_ipa;
+	run->mmio.len = len;
+	vcpu->arch.mmio_sign_extend = sign_extend;
+	vcpu->arch.mmio_rd = rd;
+
+	trace_kvm_mmio((is_write) ? KVM_TRACE_MMIO_WRITE :
+				    KVM_TRACE_MMIO_READ_UNSATISFIED,
+			len, fault_ipa, (is_write) ? *vcpu_reg(vcpu, rd) : 0);
+
+	if (is_write)
+		memcpy(run->mmio.data, vcpu_reg(vcpu, rd), len);
+
+	/*
+	 * The MMIO instruction is emulated and should not be re-executed
+	 * in the guest.
+	 */
+	*vcpu_reg(vcpu, 15) += instr_len;
+	return 0;
+}
+
 #define HSR_ABT_FS	(0x3f)
 #define HPFAR_MASK	(~0xf)
 
@@ -385,9 +534,9 @@  int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 			return -EFAULT;
 		}
 
-		kvm_msg("I/O address abort...");
-		KVMARM_NOT_IMPLEMENTED();
-		return -EINVAL;
+		/* Adjust page offset */
+		fault_ipa += vcpu->arch.hdfar % PAGE_SIZE;
+		return io_mem_abort(vcpu, run, fault_ipa, memslot);
 	}
 
 	memslot = gfn_to_memslot(vcpu->kvm, gfn);
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 381ea4a..4f20d75 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -39,6 +39,21 @@  TRACE_EVENT(kvm_exit,
 	TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
 );
 
+TRACE_EVENT(kvm_mmio_emulate,
+	TP_PROTO(unsigned long vcpu_pc),
+	TP_ARGS(vcpu_pc),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_pc		)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_pc		= vcpu_pc;
+	),
+
+	TP_printk("Emulate MMIO at: 0x%08lx", __entry->vcpu_pc)
+);
+
 TRACE_EVENT(kvm_emulate_cp15_imp,
 	TP_PROTO(unsigned long Op1, unsigned long Rt1, unsigned long CRn,
 		 unsigned long CRm, unsigned long Op2, bool is_write),