diff mbox

[v4,06/10] ARM: KVM: World-switch implementation

Message ID 20110806103941.27198.33157.stgit@localhost6.localdomain6 (mailing list archive)
State New, archived
Headers show

Commit Message

Christoffer Dall Aug. 6, 2011, 10:39 a.m. UTC
Provides complete world-switch implementation to switch to other guests
runinng in non-secure modes. Includes Hyp exception handlers that
captures necessary exception information and stores the information on
the VCPU and KVM structures.

Switching to Hyp mode is done through a simple HVC instructions. The
exception vector code will check that the HVC comes from VMID==0 and if
so will store the necessary state on the Hyp stack, which will look like
this (see hyp_hvc):
  ...
  Hyp_Sp + 4: lr_usr
  Hyp_Sp    : spsr (Host-SVC cpsr)

When returning from Hyp mode to SVC mode, another HVC instruction is
executed from Hyp mode, which is taken in the Hyp_Svc handler. The Hyp
stack pointer should be where it was left from the above initial call,
since the values on the stack will be used to restore state (see
hyp_svc).

Otherwise, the world-switch is pretty straight-forward. All state that
can be modified by the guest is first backed up on the Hyp stack and the
VCPU values is loaded onto the hardware. State, which is not loaded, but
theoretically modifiable by the guest is protected through the
virtualiation features to generate a trap and cause software emulation.
Upon guest returns, all state is restored from hardware onto the VCPU
struct and the original state is restored from the Hyp-stack onto the
hardware.

One controversy may be the back-door call to __irq_svc (the host
kernel's own physical IRQ handler) which is called when a physical IRQ
exception is taken in Hyp mode while running in the guest.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
---
 arch/arm/include/asm/kvm.h      |    1 
 arch/arm/include/asm/kvm_arm.h  |   26 ++
 arch/arm/include/asm/kvm_host.h |    8 +
 arch/arm/kernel/armksyms.c      |    6 +
 arch/arm/kernel/asm-offsets.c   |   33 +++
 arch/arm/kernel/entry-armv.S    |    1 
 arch/arm/kvm/arm.c              |   55 ++++-
 arch/arm/kvm/arm_guest.c        |    2 
 arch/arm/kvm/arm_interrupts.S   |  443 +++++++++++++++++++++++++++++++++++++++
 9 files changed, 570 insertions(+), 5 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Avi Kivity Aug. 9, 2011, 11:09 a.m. UTC | #1
On 08/06/2011 01:39 PM, Christoffer Dall wrote:
> Provides complete world-switch implementation to switch to other guests
> runinng in non-secure modes. Includes Hyp exception handlers that
> captures necessary exception information and stores the information on
> the VCPU and KVM structures.
>
> Switching to Hyp mode is done through a simple HVC instructions. The
> exception vector code will check that the HVC comes from VMID==0 and if
> so will store the necessary state on the Hyp stack, which will look like
> this (see hyp_hvc):
>    ...
>    Hyp_Sp + 4: lr_usr
>    Hyp_Sp    : spsr (Host-SVC cpsr)
>
> When returning from Hyp mode to SVC mode, another HVC instruction is
> executed from Hyp mode, which is taken in the Hyp_Svc handler. The Hyp
> stack pointer should be where it was left from the above initial call,
> since the values on the stack will be used to restore state (see
> hyp_svc).
>
> Otherwise, the world-switch is pretty straight-forward. All state that
> can be modified by the guest is first backed up on the Hyp stack and the
> VCPU values is loaded onto the hardware. State, which is not loaded, but
> theoretically modifiable by the guest is protected through the
> virtualiation features to generate a trap and cause software emulation.
> Upon guest returns, all state is restored from hardware onto the VCPU
> struct and the original state is restored from the Hyp-stack onto the
> hardware.
>
> One controversy may be the back-door call to __irq_svc (the host
> kernel's own physical IRQ handler) which is called when a physical IRQ
> exception is taken in Hyp mode while running in the guest.
>
>
>   void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
>   {
> +	unsigned long start, end;
> +
>   	latest_vcpu = NULL;
> -	KVMARM_NOT_IMPLEMENTED();
> +
> +	start = (unsigned long)vcpu,
> +	end = start + sizeof(struct kvm_vcpu);
> +	remove_hyp_mappings(kvm_hyp_pgd, start, end);

What if vcpu shares a page with another mapped structure?

> +
> +	kmem_cache_free(kvm_vcpu_cache, vcpu);
>   }

>   	return 0;
>   }
>
> +/**
> + * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
> + * @vcpu:	The VCPU pointer
> + * @run:	The kvm_run structure pointer used for userspace state exchange
> + *
> + * This function is called through the VCPU_RUN ioctl called from user space. It
> + * will execute VM code in a loop until the time slice for the process is used
> + * or some emulation is needed from user space in which case the function will
> + * return with return value 0 and with the kvm_run structure filled in with the
> + * required data for the requested emulation.
> + */
>   int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>   {
> -	KVMARM_NOT_IMPLEMENTED();
> -	return -EINVAL;
> +	unsigned long flags;
> +	int ret;
> +
> +	for (;;) {
> +		trace_kvm_entry(vcpu->arch.regs.pc);
> +		debug_ws_enter(vcpu->arch.regs.pc);

why both trace_kvm and debug_ws?

> +		kvm_guest_enter();
> +
> +		local_irq_save(flags);

local_irq_disable() is likely sufficient - the call path never changes.

> +		ret = __kvm_vcpu_run(vcpu);
> +		local_irq_restore(flags);
> +
> +		kvm_guest_exit();
> +		debug_ws_exit(vcpu->arch.regs.pc);
> +		trace_kvm_exit(vcpu->arch.regs.pc);
> +	}
> +
> +	return ret;
>   }
>
>
Christoffer Dall Aug. 9, 2011, 11:29 a.m. UTC | #2
On Aug 9, 2011, at 1:09 PM, Avi Kivity wrote:

> On 08/06/2011 01:39 PM, Christoffer Dall wrote:
>> Provides complete world-switch implementation to switch to other guests
>> runinng in non-secure modes. Includes Hyp exception handlers that
>> captures necessary exception information and stores the information on
>> the VCPU and KVM structures.
>> 
>> Switching to Hyp mode is done through a simple HVC instructions. The
>> exception vector code will check that the HVC comes from VMID==0 and if
>> so will store the necessary state on the Hyp stack, which will look like
>> this (see hyp_hvc):
>>   ...
>>   Hyp_Sp + 4: lr_usr
>>   Hyp_Sp    : spsr (Host-SVC cpsr)
>> 
>> When returning from Hyp mode to SVC mode, another HVC instruction is
>> executed from Hyp mode, which is taken in the Hyp_Svc handler. The Hyp
>> stack pointer should be where it was left from the above initial call,
>> since the values on the stack will be used to restore state (see
>> hyp_svc).
>> 
>> Otherwise, the world-switch is pretty straight-forward. All state that
>> can be modified by the guest is first backed up on the Hyp stack and the
>> VCPU values is loaded onto the hardware. State, which is not loaded, but
>> theoretically modifiable by the guest is protected through the
>> virtualiation features to generate a trap and cause software emulation.
>> Upon guest returns, all state is restored from hardware onto the VCPU
>> struct and the original state is restored from the Hyp-stack onto the
>> hardware.
>> 
>> One controversy may be the back-door call to __irq_svc (the host
>> kernel's own physical IRQ handler) which is called when a physical IRQ
>> exception is taken in Hyp mode while running in the guest.
>> 
>> 
>>  void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
>>  {
>> +	unsigned long start, end;
>> +
>>  	latest_vcpu = NULL;
>> -	KVMARM_NOT_IMPLEMENTED();
>> +
>> +	start = (unsigned long)vcpu,
>> +	end = start + sizeof(struct kvm_vcpu);
>> +	remove_hyp_mappings(kvm_hyp_pgd, start, end);
> 
> What if vcpu shares a page with another mapped structure?

Then we have a problem. I will think about this. Thanks.

> 
>> +
>> +	kmem_cache_free(kvm_vcpu_cache, vcpu);
>>  }
> 
>>  	return 0;
>>  }
>> 
>> +/**
>> + * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
>> + * @vcpu:	The VCPU pointer
>> + * @run:	The kvm_run structure pointer used for userspace state exchange
>> + *
>> + * This function is called through the VCPU_RUN ioctl called from user space. It
>> + * will execute VM code in a loop until the time slice for the process is used
>> + * or some emulation is needed from user space in which case the function will
>> + * return with return value 0 and with the kvm_run structure filled in with the
>> + * required data for the requested emulation.
>> + */
>>  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
>>  {
>> -	KVMARM_NOT_IMPLEMENTED();
>> -	return -EINVAL;
>> +	unsigned long flags;
>> +	int ret;
>> +
>> +	for (;;) {
>> +		trace_kvm_entry(vcpu->arch.regs.pc);
>> +		debug_ws_enter(vcpu->arch.regs.pc);
> 
> why both trace_kvm and debug_ws?

Because tracepoints was a bit impractical to use for debugging some problems, but this is actually not relevant anymore and I will clean it up for next series. Sorry.

> 
>> +		kvm_guest_enter();
>> +
>> +		local_irq_save(flags);
> 
> local_irq_disable() is likely sufficient - the call path never changes.

good point.

> 
>> +		ret = __kvm_vcpu_run(vcpu);
>> +		local_irq_restore(flags);
>> +
>> +		kvm_guest_exit();
>> +		debug_ws_exit(vcpu->arch.regs.pc);
>> +		trace_kvm_exit(vcpu->arch.regs.pc);
>> +	}
>> +
>> +	return ret;
>>  }
>> 
>> 
> 
> -- 
> error compiling committee.c: too many arguments to function
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm/include/asm/kvm.h b/arch/arm/include/asm/kvm.h
index 8935062..ff88ca0 100644
--- a/arch/arm/include/asm/kvm.h
+++ b/arch/arm/include/asm/kvm.h
@@ -51,6 +51,7 @@  struct kvm_regs {
 	__u32 cpsr;
 	__u32 spsr[5];		/* Banked SPSR,  indexed by MODE_  */
 	struct {
+		__u32 c0_midr;
 		__u32 c1_sys;
 		__u32 c2_base0;
 		__u32 c2_base1;
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index e378a37..1769187 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -100,5 +100,31 @@ 
 #define VTTBR_X		(5 - VTCR_GUEST_T0SZ)
 #endif
 
+/* Hyp Syndrome Register (HSR) bits */
+#define HSR_EC_SHIFT	(26)
+#define HSR_EC		(0x3fU << HSR_EC_SHIFT)
+#define HSR_IL		(1U << 25)
+#define HSR_ISS		(HSR_IL - 1)
+#define HSR_ISV_SHIFT	(24)
+#define HSR_ISV		(1U << HSR_ISV_SHIFT)
+
+#define HSR_EC_UNKNOWN	(0x00)
+#define HSR_EC_WFI	(0x01)
+#define HSR_EC_CP15_32	(0x03)
+#define HSR_EC_CP15_64	(0x04)
+#define HSR_EC_CP14_MR	(0x05)
+#define HSR_EC_CP14_LS	(0x06)
+#define HSR_EC_CP_0_13	(0x07)
+#define HSR_EC_CP10_ID	(0x08)
+#define HSR_EC_JAZELLE	(0x09)
+#define HSR_EC_BXJ	(0x0A)
+#define HSR_EC_CP14_64	(0x0C)
+#define HSR_EC_SVC_HYP	(0x11)
+#define HSR_EC_HVC	(0x12)
+#define HSR_EC_SMC	(0x13)
+#define HSR_EC_IABT	(0x20)
+#define HSR_EC_IABT_HYP	(0x21)
+#define HSR_EC_DABT	(0x24)
+#define HSR_EC_DABT_HYP	(0x25)
 
 #endif /* __KVM_ARM_H__ */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 06d1263..59fcd15 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -62,6 +62,7 @@  struct kvm_vcpu_arch {
 
 	/* System control coprocessor (cp15) */
 	struct {
+		u32 c0_MIDR;		/* Main ID Register */
 		u32 c1_SCTLR;		/* System Control Register */
 		u32 c1_ACTLR;		/* Auxilliary Control Register */
 		u32 c1_CPACR;		/* Coprocessor Access Control */
@@ -69,6 +70,12 @@  struct kvm_vcpu_arch {
 		u64 c2_TTBR1;		/* Translation Table Base Register 1 */
 		u32 c2_TTBCR;		/* Translation Table Base Control R. */
 		u32 c3_DACR;		/* Domain Access Control Register */
+		u32 c10_PRRR;		/* Primary Region Remap Register */
+		u32 c10_NMRR;		/* Normal Memory Remap Register */
+		u32 c13_CID;		/* Context ID Register */
+		u32 c13_TID_URW;	/* Thread ID, User R/W */
+		u32 c13_TID_URO;	/* Thread ID, User R/O */
+		u32 c13_TID_PRIV;	/* Thread ID, Priveleged */
 	} cp15;
 
 	u32 virt_irq;		/* HCR exception mask */
@@ -78,6 +85,7 @@  struct kvm_vcpu_arch {
 	u32 hdfar;		/* Hyp Data Fault Address Register */
 	u32 hifar;		/* Hyp Inst. Fault Address Register */
 	u32 hpfar;		/* Hyp IPA Fault Address Register */
+	u64 pc_ipa;		/* IPA for the current PC (VA to PA result) */
 
 	/* IO related fields */
 	u32 mmio_rd;
diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c
index acca35a..819b78c 100644
--- a/arch/arm/kernel/armksyms.c
+++ b/arch/arm/kernel/armksyms.c
@@ -49,6 +49,12 @@  extern void __aeabi_ulcmp(void);
 
 extern void fpundefinstr(void);
 
+#ifdef CONFIG_KVM_ARM_HOST
+/* This is needed for KVM */
+extern void __irq_svc(void);
+
+EXPORT_SYMBOL_GPL(__irq_svc);
+#endif
 
 EXPORT_SYMBOL(__backtrace);
 
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 927522c..cfa4a52 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -16,6 +16,7 @@ 
 #include <asm/cacheflush.h>
 #include <asm/glue-df.h>
 #include <asm/glue-pf.h>
+#include <linux/kvm_host.h>
 #include <asm/mach/arch.h>
 #include <asm/thread_info.h>
 #include <asm/memory.h>
@@ -129,5 +130,37 @@  int main(void)
   DEFINE(DMA_BIDIRECTIONAL,	DMA_BIDIRECTIONAL);
   DEFINE(DMA_TO_DEVICE,		DMA_TO_DEVICE);
   DEFINE(DMA_FROM_DEVICE,	DMA_FROM_DEVICE);
+#ifdef CONFIG_KVM_ARM_HOST
+  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
+  DEFINE(VCPU_MIDR,		offsetof(struct kvm_vcpu, arch.cp15.c0_MIDR));
+  DEFINE(VCPU_SCTLR,		offsetof(struct kvm_vcpu, arch.cp15.c1_SCTLR));
+  DEFINE(VCPU_CPACR,		offsetof(struct kvm_vcpu, arch.cp15.c1_CPACR));
+  DEFINE(VCPU_TTBR0,		offsetof(struct kvm_vcpu, arch.cp15.c2_TTBR0));
+  DEFINE(VCPU_TTBR1,		offsetof(struct kvm_vcpu, arch.cp15.c2_TTBR1));
+  DEFINE(VCPU_TTBCR,		offsetof(struct kvm_vcpu, arch.cp15.c2_TTBCR));
+  DEFINE(VCPU_DACR,		offsetof(struct kvm_vcpu, arch.cp15.c3_DACR));
+  DEFINE(VCPU_PRRR,		offsetof(struct kvm_vcpu, arch.cp15.c10_PRRR));
+  DEFINE(VCPU_NMRR,		offsetof(struct kvm_vcpu, arch.cp15.c10_NMRR));
+  DEFINE(VCPU_CID,		offsetof(struct kvm_vcpu, arch.cp15.c13_CID));
+  DEFINE(VCPU_TID_URW,		offsetof(struct kvm_vcpu, arch.cp15.c13_TID_URW));
+  DEFINE(VCPU_TID_URO,		offsetof(struct kvm_vcpu, arch.cp15.c13_TID_URO));
+  DEFINE(VCPU_TID_PRIV,		offsetof(struct kvm_vcpu, arch.cp15.c13_TID_PRIV));
+  DEFINE(VCPU_REGS,		offsetof(struct kvm_vcpu, arch.regs));
+  DEFINE(VCPU_USR_REGS,		offsetof(struct kvm_vcpu, arch.regs.usr_regs));
+  DEFINE(VCPU_SVC_REGS,		offsetof(struct kvm_vcpu, arch.regs.svc_regs));
+  DEFINE(VCPU_ABT_REGS,		offsetof(struct kvm_vcpu, arch.regs.abt_regs));
+  DEFINE(VCPU_UND_REGS,		offsetof(struct kvm_vcpu, arch.regs.und_regs));
+  DEFINE(VCPU_IRQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.irq_regs));
+  DEFINE(VCPU_FIQ_REGS,		offsetof(struct kvm_vcpu, arch.regs.fiq_regs));
+  DEFINE(VCPU_PC,		offsetof(struct kvm_vcpu, arch.regs.pc));
+  DEFINE(VCPU_CPSR,		offsetof(struct kvm_vcpu, arch.regs.cpsr));
+  DEFINE(VCPU_VIRT_IRQ,		offsetof(struct kvm_vcpu, arch.virt_irq));
+  DEFINE(VCPU_HSR,		offsetof(struct kvm_vcpu, arch.hsr));
+  DEFINE(VCPU_HDFAR,		offsetof(struct kvm_vcpu, arch.hdfar));
+  DEFINE(VCPU_HIFAR,		offsetof(struct kvm_vcpu, arch.hifar));
+  DEFINE(VCPU_HPFAR,		offsetof(struct kvm_vcpu, arch.hpfar));
+  DEFINE(VCPU_PC_IPA,		offsetof(struct kvm_vcpu, arch.pc_ipa));
+  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
+#endif
   return 0; 
 }
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index e8d8856..c40f3b5 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -198,6 +198,7 @@  __dabt_svc:
 ENDPROC(__dabt_svc)
 
 	.align	5
+	.globl __irq_svc
 __irq_svc:
 	svc_entry
 
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 071912e..196eace 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -235,8 +235,15 @@  out:
 
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
+	unsigned long start, end;
+
 	latest_vcpu = NULL;
-	KVMARM_NOT_IMPLEMENTED();
+
+	start = (unsigned long)vcpu,
+	end = start + sizeof(struct kvm_vcpu);
+	remove_hyp_mappings(kvm_hyp_pgd, start, end);
+
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
@@ -251,7 +258,20 @@  int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	KVMARM_NOT_IMPLEMENTED();
+	unsigned long cpsr;
+	unsigned long sctlr;
+
+	/* Init execution CPSR */
+	asm volatile ("mrs	%[cpsr], cpsr" :
+			[cpsr] "=r" (cpsr));
+	vcpu->arch.regs.cpsr = SVC_MODE | PSR_I_BIT | PSR_F_BIT | PSR_A_BIT |
+				(cpsr & PSR_E_BIT);
+
+	/* Init SCTLR with MMU disabled */
+	asm volatile ("mrc	p15, 0, %[sctlr], c1, c0, 0" :
+			[sctlr] "=r" (sctlr));
+	vcpu->arch.cp15.c1_SCTLR = sctlr & ~1U;
+
 	return 0;
 }
 
@@ -291,10 +311,37 @@  int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	return 0;
 }
 
+/**
+ * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
+ * @vcpu:	The VCPU pointer
+ * @run:	The kvm_run structure pointer used for userspace state exchange
+ *
+ * This function is called through the VCPU_RUN ioctl called from user space. It
+ * will execute VM code in a loop until the time slice for the process is used
+ * or some emulation is needed from user space in which case the function will
+ * return with return value 0 and with the kvm_run structure filled in with the
+ * required data for the requested emulation.
+ */
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	KVMARM_NOT_IMPLEMENTED();
-	return -EINVAL;
+	unsigned long flags;
+	int ret;
+
+	for (;;) {
+		trace_kvm_entry(vcpu->arch.regs.pc);
+		debug_ws_enter(vcpu->arch.regs.pc);
+		kvm_guest_enter();
+
+		local_irq_save(flags);
+		ret = __kvm_vcpu_run(vcpu);
+		local_irq_restore(flags);
+
+		kvm_guest_exit();
+		debug_ws_exit(vcpu->arch.regs.pc);
+		trace_kvm_exit(vcpu->arch.regs.pc);
+	}
+
+	return ret;
 }
 
 static int kvm_arch_vm_ioctl_irq_line(struct kvm *kvm,
diff --git a/arch/arm/kvm/arm_guest.c b/arch/arm/kvm/arm_guest.c
index 94a5c54..3a23bee 100644
--- a/arch/arm/kvm/arm_guest.c
+++ b/arch/arm/kvm/arm_guest.c
@@ -73,6 +73,7 @@  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	/*
 	 * Co-processor registers.
 	 */
+	regs->cp15.c0_midr = vcpu->arch.cp15.c0_MIDR;
 	regs->cp15.c1_sys = vcpu->arch.cp15.c1_SCTLR;
 	regs->cp15.c2_base0 = vcpu->arch.cp15.c2_TTBR0;
 	regs->cp15.c2_base1 = vcpu->arch.cp15.c2_TTBR1;
@@ -111,6 +112,7 @@  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	/*
 	 * Co-processor registers.
 	 */
+	vcpu->arch.cp15.c0_MIDR = regs->cp15.c0_midr;
 	vcpu->arch.cp15.c1_SCTLR = regs->cp15.c1_sys;
 
 	vcpu_regs->pc = regs->reg15;
diff --git a/arch/arm/kvm/arm_interrupts.S b/arch/arm/kvm/arm_interrupts.S
index 2edc49b..d516bf4 100644
--- a/arch/arm/kvm/arm_interrupts.S
+++ b/arch/arm/kvm/arm_interrupts.S
@@ -21,6 +21,12 @@ 
 #include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 
+#define VCPU_USR_REG(_reg_nr)	(VCPU_USR_REGS + (_reg_nr * 4))
+#define VCPU_USR_SP		(VCPU_USR_REG(13))
+#define VCPU_FIQ_REG(_reg_nr)	(VCPU_FIQ_REGS + (_reg_nr * 4))
+#define VCPU_FIQ_SPSR		(VCPU_FIQ_REG(7))
+
+
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 @  Hypervisor world-switch code
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@ -28,9 +34,317 @@ 
 	.text
 	.arm
 
+/* These are simply for the macros to work - value don't have meaning */
+.equ usr, 0
+.equ svc, 1
+.equ abt, 2
+.equ und, 3
+.equ irq, 4
+.equ fiq, 5
+
+.macro store_mode_state base_reg, mode
+	.if \mode == usr
+	mrs	r2, SP_usr
+	mov	r3, lr
+	stmdb	\base_reg!, {r2, r3}
+	.elseif \mode != fiq
+	mrs	r2, SP_\mode
+	mrs	r3, LR_\mode
+	mrs	r4, SPSR_\mode
+	stmdb	\base_reg!, {r2, r3, r4}
+	.else
+	mrs	r2, r8_fiq
+	mrs	r3, r9_fiq
+	mrs	r4, r10_fiq
+	mrs	r5, r11_fiq
+	mrs	r6, r12_fiq
+	mrs	r7, SP_fiq
+	mrs	r8, LR_fiq
+	mrs	r9, SPSR_fiq
+	stmdb	\base_reg!, {r2-r9}
+	.endif
+.endm
+
+.macro load_mode_state base_reg, mode
+	.if \mode == usr
+	ldmia	\base_reg!, {r2, r3}
+	msr	SP_usr, r2
+	mov	lr, r3
+	.elseif \mode != fiq
+	ldmia	\base_reg!, {r2, r3, r4}
+	msr	SP_\mode, r2
+	msr	LR_\mode, r3
+	msr	SPSR_\mode, r4
+	.else
+	ldmia	\base_reg!, {r2-r9}
+	msr	r8_fiq, r2
+	msr	r9_fiq, r3
+	msr	r10_fiq, r4
+	msr	r11_fiq, r5
+	msr	r12_fiq, r6
+	msr	SP_fiq, r7
+	msr	LR_fiq, r8
+	msr	SPSR_fiq, r9
+	.endif
+.endm
+
+/* Reads cp15 registers from hardware and stores then in memory
+ * @vcpu:   If 0, registers are written in-order to the stack,
+ * 	    otherwise to the VCPU struct pointed to by vcpup
+ * @vcpup:  Register pointing to VCPU struct
+ */
+.macro read_cp15_state vcpu=0, vcpup
+	mrc	p15, 0, r2, c1, c0, 0	@ SCTLR
+	mrc	p15, 0, r3, c1, c0, 2	@ CPACR
+	mrc	p15, 0, r4, c2, c0, 2	@ TTBCR
+	mrc	p15, 0, r5, c3, c0, 0	@ DACR
+	mrrc	p15, 0, r6, r7, c2	@ TTBR 0
+	mrrc	p15, 1, r8, r9, c2	@ TTBR 1
+	mrc	p15, 0, r10, c10, c2, 0	@ PRRR
+	mrc	p15, 0, r11, c10, c2, 1	@ NMRR
+
+	.if \vcpu == 0
+	push	{r2-r11}		@ Push CP15 registers
+	.else
+	str	r2, [\vcpup, #VCPU_SCTLR]
+	str	r3, [\vcpup, #VCPU_CPACR]
+	str	r4, [\vcpup, #VCPU_TTBCR]
+	str	r5, [\vcpup, #VCPU_DACR]
+	add	\vcpup, \vcpup, #VCPU_TTBR0
+	strd	r6, r7, [\vcpup]
+	add	\vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+	strd	r8, r9, [\vcpup]
+	sub	\vcpup, \vcpup, #(VCPU_TTBR1)
+	str	r10, [\vcpup, #VCPU_PRRR]
+	str	r11, [\vcpup, #VCPU_NMRR]
+	.endif
+
+	mrc	p15, 0, r2, c13, c0, 1	@ CID
+	mrc	p15, 0, r3, c13, c0, 2	@ TID_URW
+	mrc	p15, 0, r4, c13, c0, 3	@ TID_URO
+	mrc	p15, 0, r5, c13, c0, 4	@ TID_PRIV
+	.if \vcpu == 0
+	push	{r2-r5}			@ Push CP15 registers
+	.else
+	str	r2, [\vcpup, #VCPU_CID]
+	str	r3, [\vcpup, #VCPU_TID_URW]
+	str	r4, [\vcpup, #VCPU_TID_URO]
+	str	r5, [\vcpup, #VCPU_TID_PRIV]
+	.endif
+.endm
+
+/* Reads cp15 registers from memory and writes them to hardware
+ * @vcpu:   If 0, registers are read in-order from the stack,
+ * 	    otherwise from the VCPU struct pointed to by vcpup
+ * @vcpup:  Register pointing to VCPU struct
+ */
+.macro write_cp15_state vcpu=0, vcpup
+	.if \vcpu == 0
+	pop	{r2-r5}
+	.else
+	ldr	r2, [\vcpup, #VCPU_CID]
+	ldr	r3, [\vcpup, #VCPU_TID_URW]
+	ldr	r4, [\vcpup, #VCPU_TID_URO]
+	ldr	r5, [\vcpup, #VCPU_TID_PRIV]
+	.endif
+
+	mcr	p15, 0, r2, c13, c0, 1	@ CID
+	mcr	p15, 0, r3, c13, c0, 2	@ TID_URW
+	mcr	p15, 0, r4, c13, c0, 3	@ TID_URO
+	mcr	p15, 0, r5, c13, c0, 4	@ TID_PRIV
+
+	.if \vcpu == 0
+	pop	{r2-r11}
+	.else
+	ldr	r2, [\vcpup, #VCPU_SCTLR]
+	ldr	r3, [\vcpup, #VCPU_CPACR]
+	ldr	r4, [\vcpup, #VCPU_TTBCR]
+	ldr	r5, [\vcpup, #VCPU_DACR]
+	add	\vcpup, \vcpup, #VCPU_TTBR0
+	ldrd	r6, r7, [\vcpup]
+	add	\vcpup, \vcpup, #(VCPU_TTBR1 - VCPU_TTBR0)
+	ldrd	r8, r9, [\vcpup]
+	sub	\vcpup, \vcpup, #(VCPU_TTBR1)
+	ldr	r10, [\vcpup, #VCPU_PRRR]
+	ldr	r11, [\vcpup, #VCPU_NMRR]
+	.endif
+
+	mcr	p15, 0, r2, c1, c0, 0	@ SCTLR
+	mcr	p15, 0, r3, c1, c0, 2	@ CPACR
+	mcr	p15, 0, r4, c2, c0, 2	@ TTBCR
+	mcr	p15, 0, r5, c3, c0, 0	@ DACR
+	mcrr	p15, 0, r6, r7, c2	@ TTBR 0
+	mcrr	p15, 1, r8, r9, c2	@ TTBR 1
+	mcr	p15, 0, r10, c10, c2, 0	@ PRRR
+	mcr	p15, 0, r11, c10, c2, 1	@ NMRR
+.endm
+
+/* Configures the HSTR (Hyp System Trap Register) on entry/return
+ * (hardware reset value is 0) */
+.macro set_hstr entry
+	mrc	p15, 4, r2, c1, c1, 3
+	ldr	r3, =0x9e00
+	.if \entry == 1
+	orr	r2, r2, r3		@ Trap CR{9,10,11,12,15}
+	.else
+	bic	r2, r2, r3		@ Don't trap any CRx accesses
+	.endif
+	mcr	p15, 4, r2, c1, c1, 3
+.endm
+
+/* Enable/Disable: stage-2 trans., trap interrupts, trap wfi/wfe, trap smc */
+.macro configure_hyp_role entry, vcpu_ptr
+	mrc	p15, 4, r2, c1, c1, 0	@ HCR
+	bic	r2, r2, #HCR_VIRT_EXCP_MASK
+	ldr	r3, =HCR_GUEST_MASK
+	.if \entry == 1
+	orr	r2, r2, r3
+	ldr	r3, [\vcpu_ptr, #VCPU_VIRT_IRQ]
+	orr	r2, r2, r3
+	.else
+	bic	r2, r2, r3
+	.endif
+	mcr	p15, 4, r2, c1, c1, 0
+.endm
+
+@ This must be called from Hyp mode!
+@ Arguments:
+@  r0: pointer to vcpu struct
 ENTRY(__kvm_vcpu_run)
+	hvc	#0			@ Change to Hyp-mode
+
+	@ Now we're in Hyp-mode and lr_usr, spsr_hyp are on the stack
+	mrs	r2, sp_usr
+	push	{r2}			@ Push r13_usr
+	push	{r4-r12}		@ Push r4-r12
+
+	store_mode_state sp, svc
+	store_mode_state sp, abt
+	store_mode_state sp, und
+	store_mode_state sp, irq
+	store_mode_state sp, fiq
+
+	@ Store hardware CP15 state and load guest state
+	read_cp15_state
+	write_cp15_state 1, r0
+
+	push	{r0}			@ Push the VCPU pointer
+
+	@ Set up guest memory translation
+	ldr	r1, [r0, #VCPU_KVM]	@ r1 points to kvm struct
+	ldrd	r2, r3, [r1, #KVM_VTTBR]
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+
+	@ Configure Hyp-role
+	configure_hyp_role 1, r0
+
+	@ Trap coprocessor CRx for all x except 2 and 14
+	set_hstr 1
+
+	@ Write standard A-9 CPU id in MIDR
+	ldr	r1, [r0, #VCPU_MIDR]
+	mcr	p15, 4, r1, c0, c0, 0
+
+	@ Load guest registers
+	add	r0, r0, #(VCPU_USR_SP)
+	load_mode_state r0, usr
+	load_mode_state r0, svc
+	load_mode_state r0, abt
+	load_mode_state r0, und
+	load_mode_state r0, irq
+	load_mode_state r0, fiq
+
+	@ Load return state (r0 now points to vcpu->arch.regs.pc)
+	ldmia	r0, {r2, r3}
+	msr	ELR_hyp, r2
+	msr	spsr, r3
+
+	@ Load remaining registers and do the switch
+	sub	r0, r0, #(VCPU_PC - VCPU_USR_REGS)
+	ldmia	r0, {r0-r12}
+	eret
+
+__kvm_vcpu_return:
+	@ Store return state
+	mrs	r2, ELR_hyp
+	mrs	r3, spsr
+	str	r2, [r1, #VCPU_PC]
+	str	r3, [r1, #VCPU_CPSR]
+
+	@ Store guest registers
+	add	r1, r1, #(VCPU_FIQ_SPSR + 4)
+	store_mode_state r1, fiq
+	store_mode_state r1, irq
+	store_mode_state r1, und
+	store_mode_state r1, abt
+	store_mode_state r1, svc
+	store_mode_state r1, usr
+	sub	r1, r1, #(VCPU_USR_REG(13))
+
+	@ Don't trap coprocessor accesses for host kernel
+	set_hstr 0
+
+	@ Reset Hyp-role
+	configure_hyp_role 0, r1
+
+	@ Let guest read hardware MIDR
+	mrc	p15, 0, r2, c0, c0, 0
+	mcr	p15, 4, r2, c0, c0, 0
+
+	@ Set VMID == 0
+	mov	r2, #0
+	mov	r3, #0
+	mcrr	p15, 6, r2, r3, c2	@ Write VTTBR
+
+	@ Store guest CP15 state and restore host state
+	read_cp15_state 1, r1
+	write_cp15_state
+
+	load_mode_state sp, fiq
+	load_mode_state sp, irq
+	load_mode_state sp, und
+	load_mode_state sp, abt
+	load_mode_state sp, svc
+
+	pop	{r4-r12}		@ Pop r4-r12
+	pop	{r2}			@ Pop r13_usr
+	msr	sp_usr, r2
+
+	hvc	#0
+
+	cmp	r0, #ARM_EXCEPTION_IRQ
+	bne	return_to_ioctl
+
+	/*
+	 * It's time to launch the kernel IRQ handler for IRQ exceptions. This
+	 * requires some manipulation though.
+	 *
+	 *  - The easiest entry point to the host handler is __irq_svc.
+	 *  - The __irq_svc expects to be called from SVC mode, which has been
+	 *    switched to from vector_stub code in entry-armv.S. The __irq_svc
+	 *    calls svc_entry which uses values stored in memory and pointed to
+	 *    by r0 to return from handler. We allocate this memory on the
+	 *    stack, which will contain these values:
+	 *      0x8:   cpsr
+	 *      0x4:   return_address
+	 *      0x0:   r0
+	 */
+	adr	r1, irq_kernel_resume	@ Where to resume
+	mrs	r2, cpsr		@ CPSR when we return
+	push	{r0 - r2}
+	mov	r0, sp
+	b	__irq_svc
+
+irq_kernel_resume:
+	pop	{r0}
+	add	sp, sp, #8
+
+return_to_ioctl:
 THUMB(	orr	lr, lr, #1)
 	mov	pc, lr
+
+	.ltorg
+
 __kvm_vcpu_run_end:
 	.globl __kvm_vcpu_run_end
 
@@ -39,9 +353,136 @@  __kvm_vcpu_run_end:
 @  Hypervisor exception vector and handlers
 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
+	.text
+	.arm
+
 	.align 5
 __kvm_hyp_vector:
 	.globl __kvm_hyp_vector
-	nop
+
+	@ Hyp-mode exception vector
+	b	hyp_reset
+	b	hyp_undef
+	b	hyp_svc
+	b	hyp_pabt
+	b	hyp_dabt
+	b	hyp_hvc
+	b	hyp_irq
+	b	hyp_fiq
+
+	.align
+hyp_reset:
+	sub	pc, pc, #8
+
+	.align
+hyp_undef:
+	sub	pc, pc, #8
+
+	.align
+hyp_svc:
+	@ Can only get here if HVC or SVC is called from Hyp, mode which means
+	@ we want to change mode back to SVC mode.
+	@ NB: Stack pointer should be where hyp_hvc handler left it!
+	ldr	lr, [sp, #4]
+	msr	spsr, lr
+	ldr	lr, [sp]
+	add	sp, sp, #8
+	eret
+
+	.align
+hyp_pabt:
+	sub	pc, pc, #8
+
+	.align
+hyp_dabt:
+	sub	pc, pc, #8
+
+	.align
+hyp_hvc:
+	@ Getting here is either becuase of a trap from a guest or from calling
+	@ HVC from the host kernel, which means "switch to Hyp mode".
+	push	{r0, r1, r2}
+
+	@ Check syndrome register
+	mrc	p15, 4, r0, c5, c2, 0	@ HSR
+	lsr	r1, r0, #HSR_EC_SHIFT
+	cmp	r1, #HSR_EC_HVC
+	bne	guest_trap		@ Not HVC instr.
+
+	@ Let's check if the HVC came from VMID 0 and allow simple
+	@ switch to Hyp mode
+	mrrc    p15, 6, r1, r2, c2
+	lsr     r2, r2, #16
+	and     r2, r2, #0xff
+	cmp     r2, #0
+	bne	guest_trap		@ Guest called HVC
+
+	pop	{r0, r1, r2}
+
+	@ Store lr_usr,spsr (svc cpsr) on stack
+	sub	sp, sp, #8
+	str	lr, [sp]
+	mrs	lr, spsr
+	str	lr, [sp, #4]
+
+	@ Return to caller in Hyp mode
+	mrs	lr, ELR_hyp
+	mov	pc, lr
+
+guest_trap:
+	ldr	r1, [sp, #12]		@ Load VCPU pointer
+	str	r0, [r1, #VCPU_HSR]
+	add	r1, r1, #VCPU_USR_REG(3)
+	stmia	r1, {r3-r12}
+	sub	r1, r1, #(VCPU_USR_REG(3) - VCPU_USR_REG(0))
+	pop	{r3, r4, r5}
+	add	sp, sp, #4		@ We loaded the VCPU pointer above
+	stmia	r1, {r3, r4, r5}
+	sub	r1, r1, #VCPU_USR_REG(0)
+
+	@ Check if we need the fault information
+	lsr	r2, r0, #HSR_EC_SHIFT
+	cmp	r2, #HSR_EC_IABT
+	beq	2f
+	cmpne	r2, #HSR_EC_DABT
+	bne	1f
+
+	@ For non-valid data aborts, get the offending instr. PA
+	lsr	r2, r0, #HSR_ISV_SHIFT
+	ands	r2, r2, #1
+	bne	2f
+	mrs	r3, ELR_hyp
+	mcr	p15, 0, r3, c7, c8, 0	@ VA to PA, V2PCWPR
+	mrrc	p15, 0, r4, r5, c7	@ PAR
+	add	r6, r1, #VCPU_PC_IPA
+	strd	r4, r5, [r6]
+
+2:	mrc	p15, 4, r2, c6, c0, 0	@ HDFAR
+	mrc	p15, 4, r3, c6, c0, 2	@ HIFAR
+	mrc	p15, 4, r4, c6, c0, 4	@ HPFAR
+	add	r5, r1, #VCPU_HDFAR
+	stmia	r5, {r2, r3, r4}
+
+1:	mov	r0, #ARM_EXCEPTION_HVC
+	b	__kvm_vcpu_return
+
+	.align
+hyp_irq:
+	push	{r0}
+	ldr	r0, [sp, #4]		@ Load VCPU pointer
+	add	r0, r0, #(VCPU_USR_REG(1))
+	stmia	r0, {r1-r12}
+	pop	{r0, r1}		@ r1 == vcpu pointer
+	str	r0, [r1, #VCPU_USR_REG(0)]
+
+	mov	r0, #ARM_EXCEPTION_IRQ
+	b	__kvm_vcpu_return
+
+	.align
+hyp_fiq:
+	sub	pc, pc, #8
+
+	.ltorg
+
 __kvm_hyp_vector_end:
 	.globl __kvm_hyp_vector_end