diff mbox

[4/7] powerpc/kvm/xics: Add kernel emulation for the XICS interrupt controller

Message ID 1345192724.11751.72.camel@pasglop (mailing list archive)
State New, archived
Headers show

Commit Message

Benjamin Herrenschmidt Aug. 17, 2012, 8:38 a.m. UTC
This is an initial variant of the in-kernel XICS emulation
for both HV and PR KVM running in PAPR mode.

This is based on an initial implementation by Michael Ellerman
<michael@ellerman.id.au> reworked by myself.

It supports up to 4095 "BUID" (blocks of interrupts) of up to
4096 interrupts each.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm.h      |   39 ++
 arch/powerpc/include/asm/kvm_host.h |    8 +
 arch/powerpc/include/asm/kvm_ppc.h  |   30 +-
 arch/powerpc/kvm/Makefile           |    1 +
 arch/powerpc/kvm/book3s.c           |    3 +-
 arch/powerpc/kvm/book3s_hv.c        |   20 +
 arch/powerpc/kvm/book3s_pr.c        |   13 +
 arch/powerpc/kvm/book3s_pr_papr.c   |   19 +-
 arch/powerpc/kvm/book3s_rtas.c      |   51 +-
 arch/powerpc/kvm/book3s_xics.c      |  882 +++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/booke.c            |    3 +-
 arch/powerpc/kvm/powerpc.c          |   28 +-
 include/linux/kvm.h                 |    9 +-
 13 files changed, 1090 insertions(+), 16 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_xics.c




--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 3dc91df..f653424 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -296,6 +296,45 @@  struct kvm_rtas_token_args {
 	__u64 token;	/* Use a token of 0 to undefine a mapping */
 };
 
+/* for KVM_CAP_SPAPR_XICS */
+#define __KVM_HAVE_IRQCHIP_ARGS
+struct kvm_irqchip_args {
+#define KVM_IRQCHIP_TYPE_ICP	0	/* XICS: ICP (presentation controller) */
+#define KVM_IRQCHIP_TYPE_ICS	1	/* XICS: ICS (source controller) */
+	__u32 type;
+	union {
+		/* XICS ICP arguments. This needs to be called once before
+		 * creating any VCPU to initialize the main kernel XICS data
+		 * structures.
+		 */
+		struct {
+#define KVM_ICP_FLAG_NOREALMODE		0x00000001 /* Disable real mode ICP */
+			__u32 flags;
+		} icp;
+
+		/* XICS ICS arguments. You can call this for every BUID you
+		 * want to make available.
+		 *
+		 * The BUID is 12 bits, the interrupt number within a BUID
+		 * is up to 12 bits as well. The resulting interrupt numbers
+		 * exposed to the guest are BUID || IRQ which is 24 bit
+		 *
+		 * BUID cannot be 0.
+		 */
+		struct {
+			__u32 flags;
+			__u16 buid;
+			__u16 nr_irqs;
+		} ics;
+	};
+};
+
+struct kvm_spapr_xics_xive {
+	__u32 irq;
+	__u32 server;
+	__u32 priority;
+};
+
 struct kvm_book3e_206_tlb_entry {
 	__u32 mas8;
 	__u32 mas1;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index d9c3f63..ccbf3dc 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -187,6 +187,10 @@  struct kvmppc_linear_info {
 	int		 type;
 };
 
+/* XICS components, defined in boo3s_xics.c */
+struct kvmppc_xics;
+struct kvmppc_icp;
+
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
@@ -251,6 +255,7 @@  struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
 	struct list_head rtas_tokens;
+	struct kvmppc_xics *xics;
 #endif
 };
 
@@ -532,6 +537,9 @@  struct kvm_vcpu_arch {
 	u64 stolen_logged;
 	struct kvmppc_vpa slb_shadow;
 #endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	struct kvmppc_icp *icp; /* XICS presentation controller */
+#endif
 };
 
 /* Values for vcpu->arch.state */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e23bfc6..ce81d91 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -102,8 +102,7 @@  extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
-extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-                                         struct kvm_interrupt *irq);
+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                   unsigned int op, int *advance);
@@ -127,6 +126,12 @@  extern long kvmppc_prepare_vrma(struct kvm *kvm,
 extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
 			struct kvm_memory_slot *memslot, unsigned long porder);
 extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg);
+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu);
+extern void kvmppc_xics_free(struct kvm *kvm);
+
 extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 				struct kvm_create_spapr_tce *args);
 extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@@ -152,6 +157,8 @@  extern void kvmppc_bookehv_exit(void);
 extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
 extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
 extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority);
+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority);
 
 /*
  * Cuts out inst bits with ordering according to spec.
@@ -213,6 +220,25 @@  static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
 
 static inline void kvm_linear_init(void)
 {}
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg);
+
+static inline int kvmppc_xics_enabled(struct kvm *kvm)
+{
+	return kvm->arch.xics != NULL;
+}
+
+#else
+static inline int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl,
+				    unsigned long arg)\
+{
+	return -ENOTTY;
+}
+static inline int kvmppc_xics_enabled(struct kvm *kvm) { return 0; }
 #endif
 
 int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 536f65f..ec2f8da 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -81,6 +81,7 @@  kvm-book3s_64-module-objs := \
 	book3s.o \
 	book3s_64_vio.o \
 	book3s_rtas.o \
+	book3s_xics.o \
 	$(kvm-book3s_64-objs-y)
 
 kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a836..5c631e4 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -160,8 +160,7 @@  void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
 	kvmppc_book3s_queue_irqprio(vcpu, vec);
 }
 
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-                                  struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 6199063..b41e586 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -465,6 +465,14 @@  int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 		/* Send the error out to userspace via KVM_RUN */
 		return rc;
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+		if (kvmppc_xics_enabled(vcpu->kvm)) {
+			ret = kvmppc_xics_hcall(vcpu, req);
+			break;
+		} /* fallthrough */
 	default:
 		return RESUME_HOST;
 	}
@@ -673,6 +681,13 @@  struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
 
+	/* Create the XICS */
+	if (kvmppc_xics_enabled(kvm)) {
+		err = kvmppc_xics_create_icp(vcpu);
+		if (err < 0)
+			goto free_vcpu;
+	}
+
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
 	/*
@@ -727,6 +742,8 @@  void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 		kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
 	spin_unlock(&vcpu->arch.vpa_update_lock);
 	kvm_vcpu_uninit(vcpu);
+	if (kvmppc_xics_enabled(vcpu->kvm))
+		kvmppc_xics_free_icp(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu);
 }
 
@@ -1602,6 +1619,9 @@  void kvmppc_core_destroy_vm(struct kvm *kvm)
 
 	kvmppc_rtas_tokens_free(kvm);
 
+	if (kvmppc_xics_enabled(kvm))
+		kvmppc_xics_free(kvm);
+
 	kvmppc_free_hpt(kvm);
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 }
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 9f4c13f..ab9776b 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -984,6 +984,13 @@  struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err < 0)
 		goto uninit_vcpu;
 
+	/* Create the XICS */
+	if (kvmppc_xics_enabled(kvm)) {
+		err = kvmppc_xics_create_icp(vcpu);
+		if (err < 0)
+			goto free_vcpu;
+	}
+
 	return vcpu;
 
 uninit_vcpu:
@@ -1000,6 +1007,8 @@  void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
+	if (kvmppc_xics_enabled(vcpu->kvm))
+		kvmppc_xics_free_icp(vcpu);
 	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
 	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu_book3s->shadow_vcpu);
@@ -1199,6 +1208,7 @@  int kvmppc_core_init_vm(struct kvm *kvm)
 {
 #ifdef CONFIG_PPC64
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+	INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
 #endif
 
 	return 0;
@@ -1209,6 +1219,9 @@  void kvmppc_core_destroy_vm(struct kvm *kvm)
 #ifdef CONFIG_PPC64
 	WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
 #endif
+	if (kvmppc_xics_enabled(kvm))
+		kvmppc_xics_free(kvm);
+
 }
 
 static int kvmppc_book3s_init(void)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 175404a..8352cac 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -227,6 +227,15 @@  static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+{
+	long rc = kvmppc_xics_hcall(vcpu, cmd);
+	if (rc == H_TOO_HARD)
+		return EMULATE_FAIL;
+	kvmppc_set_gpr(vcpu, 3, rc);
+	return EMULATE_DONE;
+}
+
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
 	switch (cmd) {
@@ -246,11 +255,17 @@  int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
 		vcpu->stat.halt_wakeup++;
 		return EMULATE_DONE;
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+		if (kvmppc_xics_enabled(vcpu->kvm))
+			return kvmppc_h_pr_xics_hcall(vcpu, cmd);
+		break;
 	case H_RTAS:
 		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
 			return RESUME_HOST;
-		rc = kvmppc_rtas_hcall(vcpu);
-		if (rc != 0)
+		if (kvmppc_rtas_hcall(vcpu) != 0)
 			break;
 		kvmppc_set_gpr(vcpu, 3, 0);
 		return EMULATE_DONE;
diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c
index 8a324e8..6a6c1fe 100644
--- a/arch/powerpc/kvm/book3s_rtas.c
+++ b/arch/powerpc/kvm/book3s_rtas.c
@@ -18,12 +18,61 @@ 
 #include <asm/rtas.h>
 
 
+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (args->nargs != 3 || args->nret != 1) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+	server = args->args[1];
+	priority = args->args[2];
+
+	rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+	if (rc)
+		rc = -3;
+out:
+	args->rets[0] = rc;
+}
+
+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+	u32 irq, server, priority;
+	int rc;
+
+	if (args->nargs != 1 || args->nret != 3) {
+		rc = -3;
+		goto out;
+	}
+
+	irq = args->args[0];
+
+	server = priority = 0;
+	rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+	if (rc) {
+		rc = -3;
+		goto out;
+	}
+
+	args->rets[1] = server;
+	args->rets[2] = priority;
+out:
+	args->rets[0] = rc;
+}
+
 struct rtas_handler {
 	void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
 	char *name;
 };
 
-static struct rtas_handler rtas_handlers[] = { };
+static struct rtas_handler rtas_handlers[] = {
+	{ .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
+	{ .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+};
 
 struct rtas_token_definition {
 	struct list_head list;
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
new file mode 100644
index 0000000..5638e21
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -0,0 +1,882 @@ 
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define MASKED	0xff
+
+#define XICS_DBG(fmt...) do { } while(0)
+
+#undef DEBUG_REALMODE
+
+/*
+ * LOCKING
+ * =======
+ *
+ * Each ICP has its own lock, and there is one lock for the ICS (ie. all
+ * information about irq sources).
+ *
+ * The ICS lock nests inside any of the ICP locks. ie. you are allowed
+ * to take the ICS lock while holding an ICP lock, but not vice versa.
+ */
+
+/*
+ * Interrupt numbering
+ * ===================
+ *
+ * The 24-bit global interrupt numbers are divided in two components,
+ * the BUID and the interrupt source. We have arbitrarily chosen a
+ * 10 bit
+ */
+
+#define KVMPPC_XICS_MAX_BUID	0xfff
+#define KVMPPC_XICS_IRQ_COUNT	0x1000
+#define KVMPPC_XICS_BUID_SHIFT	12
+#define KVMPPC_XICS_SRC_MASK	0xfff
+
+/* State for one irq in an ics */
+struct ics_irq_state {
+	u32 number;
+	u32 server;
+	u8  priority;
+	u8  saved_priority; /* currently unused */
+	u8  resend;
+	u8  masked_pending;
+	u8  asserted; /* Only for LSI */
+};
+
+#define ICP_RESEND_MAP_SIZE	\
+	((KVMPPC_XICS_MAX_BUID + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct kvmppc_icp {
+	struct mutex lock;
+	struct kvm_vcpu *vcpu;
+	u32 pending_irq;	/* XISR */
+	u8  pending_priority;
+	u8  current_priority;	/* CPPR */
+	u8  mfrr;		/* MFRR */
+	bool need_resend;
+	unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+};
+
+
+struct kvmppc_ics {
+	struct mutex lock;
+	u16 buid;
+	u16 nr_irqs;
+	struct ics_irq_state irq_state[];
+};
+
+struct kvmppc_xics {
+	struct kvm *kvm;
+	struct dentry *dentry;
+	u32 max_buid;
+	struct kvmppc_ics *ics[KVMPPC_XICS_MAX_BUID]; /* [1...MAX_BUID] */
+};
+
+static struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, u32 nr)
+{
+	struct kvm_vcpu *vcpu = NULL;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (nr == vcpu->vcpu_id)
+			return vcpu->arch.icp;
+	}
+	return NULL;
+}
+
+static struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+					       u32 irq, u16 *source)
+{
+	u16 buid = irq >> KVMPPC_XICS_BUID_SHIFT;
+	u16 src = irq & KVMPPC_XICS_SRC_MASK;
+	struct kvmppc_ics *ics;
+
+	ics = xics->ics[buid - 1];
+	if (!ics)
+		return NULL;
+	if (src >= ics->nr_irqs)
+		return NULL;
+	if (source)
+		*source = src;
+	return ics;
+}
+
+
+/* -- ICS routines -- */
+
+static void icp_deliver_irq(struct kvmppc_xics *xics,
+			    struct kvmppc_icp *icp,
+			    struct kvmppc_ics *ics, u16 src);
+
+static void __ics_reject_irq(struct kvmppc_icp *icp,
+			     struct kvmppc_ics *ics, u16 src)
+{
+	struct ics_irq_state *state = &ics->irq_state[src];
+
+	XICS_DBG("server %d reject src %#x\n", icp->vcpu->vcpu_id, src);
+
+	/* XXX check if it still level & asserted ? */
+	state->resend = 1;
+	set_bit(ics->buid, icp->resend_map);
+	icp->need_resend = true;
+}
+
+static void ics_reject_irq(struct kvmppc_xics *xics,
+			   struct kvmppc_icp *icp, u32 irq)
+{
+	struct kvmppc_ics *ics;
+	u16 src;
+
+	lockdep_assert_held(&icp->lock);
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		pr_warning("ics_reject_irq: IRQ 0x%06x not found !\n", irq);
+		return;
+	}
+
+	mutex_lock(&ics->lock);
+	__ics_reject_irq(icp, ics, src);
+	mutex_unlock(&ics->lock);
+}
+
+static void ics_eoi(struct kvmppc_xics *xics, struct kvmppc_icp *icp, 
+		    u32 irq)
+{
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;
+	u16 src;
+
+	XICS_DBG("ics_eoi 0x%06x\n", irq);
+
+	lockdep_assert_held(&icp->lock);
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		pr_warning("ics_eoi: IRQ 0x%06x not found !\n", irq);
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	mutex_lock(&ics->lock);
+
+	/* If it's an LSI and still asserted we resend */
+	if (state->asserted) {
+		state->resend = 1;
+		set_bit(ics->buid, icp->resend_map);
+		icp->need_resend = true;
+	}
+
+	mutex_unlock(&ics->lock);
+}
+
+static void ics_deliver_irq(struct kvmppc_xics *xics,
+			    u32 irq, u32 level)
+{
+	struct kvmppc_icp *icp;
+	struct ics_irq_state *state;
+	struct kvmppc_ics *ics;	
+	bool deliver = false;
+	u32 server;
+	u16 src;
+
+	XICS_DBG("ics deliver 0x%06x (level: %d)\n", irq, level);
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics) {
+		pr_warning("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+		return;
+	}
+	state = &ics->irq_state[src];
+
+	mutex_lock(&ics->lock);
+
+	if (level == KVM_INTERRUPT_SET_LEVEL)
+		state->asserted = 1;
+	else if (level == KVM_INTERRUPT_UNSET) {
+		state->asserted = 0;
+		goto unlock;
+	}
+
+	if (state->priority != MASKED) {
+		deliver = true;
+		server = state->server;
+	} else {
+		XICS_DBG("masked pending\n");
+		state->masked_pending = 1;
+	}	
+
+unlock:
+	mutex_unlock(&ics->lock);
+
+	if (deliver) {
+		 icp = kvmppc_xics_find_server(xics->kvm, server);
+		 /* Configured server not found... XXX FALLBACK */
+		 if (icp) 
+			 icp_deliver_irq(xics, icp, ics, src);
+	}
+}
+
+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+			     struct kvmppc_icp *icp)
+{
+	u32 server = icp->vcpu->vcpu_id;
+	int i;
+
+	mutex_lock(&ics->lock);
+
+	for (i = 0; i < ics->nr_irqs; i++) {
+		struct ics_irq_state *state = &ics->irq_state[i];
+
+		if (!state->resend || state->server != server)
+			continue;
+
+		XICS_DBG("resend 0x%06x prio %d\n", state->number,
+			      state->priority);
+
+		state->resend = 0;
+		if (state->priority == MASKED)
+			continue;
+
+		mutex_unlock(&ics->lock);
+		icp_deliver_irq(xics, icp, ics, i);
+		mutex_lock(&ics->lock);
+	}
+
+	mutex_unlock(&ics->lock);
+}
+
+static void icp_check_resend(struct kvmppc_xics *xics,
+			     struct kvmppc_icp *icp)
+{
+	u32 buid;
+	
+	for_each_set_bit(buid, icp->resend_map, xics->max_buid + 1) {
+		struct kvmppc_ics *ics = xics->ics[buid - 1];
+
+		if (!test_and_clear_bit(buid, icp->resend_map))
+			continue;
+		if (!ics)
+			continue;
+		ics_check_resend(xics, ics, icp);
+	}
+}
+
+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_icp *icp;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+	bool deliver;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	icp = kvmppc_xics_find_server(kvm, server);
+	if (!icp)
+		return -EINVAL;
+
+	mutex_lock(&ics->lock);
+
+	state->server = server;
+	state->priority = priority;
+	deliver = false;
+	if (state->masked_pending && state->priority != MASKED) {
+		state->masked_pending = 0;
+		deliver = true;
+	}
+
+	mutex_unlock(&ics->lock);
+
+	XICS_DBG("irq 0x%06x server %d prio %#x\n", irq, server, priority);
+
+	if (deliver)
+		icp_deliver_irq(xics, icp, ics, src);
+
+	return 0;
+}
+
+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	struct kvmppc_ics *ics;
+	struct ics_irq_state *state;
+	u16 src;
+
+	if (!xics)
+		return -ENODEV;
+
+	ics = kvmppc_xics_find_ics(xics, irq, &src);
+	if (!ics)
+		return -EINVAL;
+	state = &ics->irq_state[src];
+
+	mutex_lock(&ics->lock);
+	*server = state->server;
+	*priority = state->priority;
+	mutex_unlock(&ics->lock);
+
+	XICS_DBG("irq 0x%06x server %d prio %#x\n",
+		      irq, state->server, state->priority);
+
+	return 0;
+}
+
+/* -- ICP routines, including hcalls -- */
+
+static void icp_external_interrupt(struct kvmppc_icp *icp)
+{
+	unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+
+	lockdep_assert_held(&icp->lock);
+
+	kvmppc_book3s_queue_irqprio(icp->vcpu, vec);
+	kvm_vcpu_kick(icp->vcpu);
+}
+
+static void icp_deliver_irq(struct kvmppc_xics *xics,
+			    struct kvmppc_icp *icp,
+			    struct kvmppc_ics *ics, u16 src)
+{
+	struct ics_irq_state state_copy;
+
+	mutex_lock(&icp->lock);
+
+	/* Snapshot irq state */
+	mutex_lock(&ics->lock);
+	state_copy = ics->irq_state[src];
+
+	if (state_copy.priority > icp->current_priority) {
+		/* CPU is not interested in us */
+		__ics_reject_irq(icp, ics, src);
+		mutex_unlock(&ics->lock);
+		goto out;
+	}
+
+	if (icp->pending_irq) {
+		/* An interrupt is pending */
+		if (icp->pending_priority <= state_copy.priority) {
+			/* pending irq is equally or more favoured */
+			__ics_reject_irq(icp, ics, src);
+			mutex_unlock(&ics->lock);
+			goto out;
+		}
+	}
+	mutex_unlock(&ics->lock);
+
+	/* We are more favoured, reject pending irq */
+	if (icp->pending_irq)
+		ics_reject_irq(xics, icp, icp->pending_irq);
+
+	icp->pending_irq = state_copy.number;
+	icp->pending_priority = state_copy.priority;
+
+	XICS_DBG("irq 0x%06x pending on %d prio %#x\n",
+		     state_copy.number, state_copy.server, state_copy.priority);
+
+	icp_external_interrupt(icp);
+
+out:
+	mutex_unlock(&icp->lock);
+}
+
+static void icp_check_ipi(struct kvmppc_xics *xics, struct kvmppc_icp *icp)
+{
+	lockdep_assert_held(&icp->lock);
+
+	if (icp->mfrr >= icp->current_priority)
+		return;
+
+	XICS_DBG("cpu %d can take IPI mfrr=%#x\n",
+		     icp->vcpu->vcpu_id, icp->mfrr);
+
+	if (icp->pending_irq) {
+		/* IPI is less favoured */
+		if (icp->pending_priority <= icp->mfrr) {
+			XICS_DBG("ODD: pending_prio=%#x pending_irq=%#x\n",
+				     icp->pending_priority, icp->pending_irq);
+			return;
+		}
+
+		/* IPI is more favoured, reject the other interrupt */
+		ics_reject_irq(xics, icp, icp->pending_irq);
+	}
+
+	icp->pending_irq = XICS_IPI;
+	icp->pending_priority = icp->mfrr;
+	icp_external_interrupt(icp);
+}
+
+static u32 icp_accept(struct kvm_vcpu *vcpu, struct kvmppc_icp *icp)
+{
+	u32 xirr;
+
+	mutex_lock(&icp->lock);
+
+	kvmppc_core_dequeue_external(vcpu);
+
+	/* The XIRR is the pending interrupt & current priority */
+	xirr = icp->pending_irq | (icp->current_priority << 24);
+
+	/* The pending priority becomes current */
+	icp->current_priority = icp->pending_priority;
+
+	/* Clear the pending interrupt */
+	icp->pending_irq = 0;
+
+	mutex_unlock(&icp->lock);
+
+	return xirr;
+}
+
+static unsigned long h_xirr(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u32 xirr;
+
+	xirr = icp_accept(vcpu, icp);
+
+	XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
+
+	return xirr;
+}
+
+static int h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+		 unsigned long mfrr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp;
+
+	XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
+			vcpu->vcpu_id, server, mfrr);
+
+	icp = kvmppc_xics_find_server(vcpu->kvm, server);
+	if (!icp)
+		return H_PARAMETER;
+
+	mutex_lock(&icp->lock);
+
+	icp->mfrr = mfrr;
+	icp_check_ipi(xics, icp);
+
+	mutex_unlock(&icp->lock);
+
+	return H_SUCCESS;
+}
+
+static void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	u8 old_priority;
+	bool check_resend = false;
+
+	XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
+
+	mutex_lock(&icp->lock);
+
+	old_priority = icp->current_priority;
+	icp->current_priority = cppr;
+
+	if (icp->pending_irq &&
+	    icp->current_priority < icp->pending_priority) {
+		u32 pending = icp->pending_irq;
+		/* Pending irq is less favoured than our new priority */
+		icp->pending_irq = 0;
+		kvmppc_core_dequeue_external(vcpu);
+		ics_reject_irq(xics, icp, pending);
+	}
+
+	/* Check if there is anything we can accept now */
+	if (!icp->pending_irq)
+		icp_check_ipi(xics, icp);
+	if (!icp->pending_irq && icp->need_resend) {
+		check_resend = true;
+		icp->need_resend = false;
+	}
+	
+	mutex_unlock(&icp->lock);
+
+	if (check_resend)
+		icp_check_resend(xics, icp);
+}
+
+static void h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+	struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+	struct kvmppc_icp *icp = vcpu->arch.icp;
+	bool check_resend = false;
+
+	XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
+
+	mutex_lock(&icp->lock);
+
+	icp->current_priority = xirr >> 24;
+
+	/* If nothing is pending since accept, check for an IPI */
+	if (!icp->pending_irq)
+		icp_check_ipi(xics, icp);
+
+	if (!icp->pending_irq && icp->need_resend) {
+		check_resend = true;
+		icp->need_resend = false;
+	}
+
+	ics_eoi(xics, icp, xirr & 0xFFFFFF);
+
+	mutex_unlock(&icp->lock);
+
+	if (check_resend)
+		icp_check_resend(xics, icp);
+}
+
+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+	unsigned long res;
+	int rc = H_SUCCESS;
+
+	/* Check if we have an ICP */
+	if (!vcpu->arch.icp || !vcpu->kvm->arch.xics)
+		return H_HARDWARE;
+
+	switch (req) {
+	case H_XIRR:
+		res = h_xirr(vcpu);
+		kvmppc_set_gpr(vcpu, 4, res);
+		break;
+	case H_CPPR:
+		h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_EOI:
+		h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+		break;
+	case H_IPI:
+		rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+			   kvmppc_get_gpr(vcpu, 5));
+		break;
+	}
+
+	return rc;
+}
+
+
+/* -- Initialisation code etc. -- */
+
+static int xics_debug_show(struct seq_file *m, void *private)
+{
+	struct kvmppc_xics *xics = m->private;
+	struct kvm *kvm = xics->kvm;
+	struct kvm_vcpu *vcpu;
+	int buid, i;
+
+	if (!kvm)
+		return 0;
+
+	seq_printf(m, "=========\nICP state\n=========\n");
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvmppc_icp *icp = vcpu->arch.icp;
+
+		if (!icp)
+			continue;
+
+		mutex_lock(&icp->lock);
+
+		seq_printf(m, "cpu server %#x pending %#x pending prio %#x cppr %#x "
+			   "mfrr %#x\n", vcpu->vcpu_id, icp->pending_irq,
+			   icp->pending_priority, icp->current_priority,
+			   icp->mfrr);
+
+		mutex_unlock(&icp->lock);
+	}
+
+	for (buid = 1; buid <= KVMPPC_XICS_MAX_BUID; buid++) {
+		struct kvmppc_ics *ics = xics->ics[buid - 1];
+
+		if (!ics)
+			continue;
+
+		seq_printf(m, "=========\nICS state for BUID 0x%x\n=========\n", buid);
+
+		mutex_lock(&ics->lock);
+
+		for (i = 0; i < ics->nr_irqs; i++) {
+			struct ics_irq_state *irq = &ics->irq_state[i];
+
+			seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x "
+				   "asserted %d resend %d masked pending %d\n",
+				   irq->number, irq->server, irq->priority,
+				   irq->saved_priority, irq->asserted, irq->resend,
+				   irq->masked_pending);
+
+		}
+		mutex_unlock(&ics->lock);
+	}
+	return 0;
+}
+
+static int xics_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, xics_debug_show, inode->i_private);
+}
+
+static const struct file_operations xics_debug_fops = {
+	.open = xics_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static void xics_debugfs_init(struct kvmppc_xics *xics)
+{
+	char *name;
+
+	name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
+	if (!name) {
+		pr_err("%s: no memory for name\n", __func__);
+		return;
+	}
+
+	xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+					   xics, &xics_debug_fops);
+
+	pr_debug("%s: created %s\n", __func__, name);
+	kfree(name);
+}
+
+static int kvmppc_xics_create_ics(struct kvmppc_xics *xics, u16 buid, u16 nr_irq)
+{
+	struct kvmppc_ics *ics;
+	int i, size;
+
+
+	/* Create the ICS */
+	size = sizeof(struct kvmppc_ics) + sizeof(struct ics_irq_state) * nr_irqs;
+	ics = kzalloc(size, GFP_KERNEL);
+	if (!ics)
+		return -ENOMEM;
+
+	mutex_init(&ics->lock);
+	ics->buid = buid;
+	ics->nr_irqs = nr_irqs;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ics->irq_state[i].number = (buid << KVMPPC_XICS_BUID_SHIFT) | i;
+		ics->irq_state[i].priority = MASKED;
+		ics->irq_state[i].saved_priority = MASKED;
+	}
+	smp_wmb();
+	xics->ics[buid - 1] = ics;
+
+	if (buid > xics->max_buid)
+		xics->max_buid = buid;
+
+	return 0;
+}
+
+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_icp *icp;
+
+	icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
+	if (!icp)
+		return -ENOMEM;
+
+	mutex_init(&icp->lock);
+	icp->vcpu = vcpu;
+	icp->mfrr = MASKED;
+	vcpu->arch.icp = icp;
+
+	XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
+
+	return 0;
+}
+
+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+{
+	if (!vcpu->arch.icp)
+		return;
+	kfree(vcpu->arch.icp);
+	vcpu->arch.icp = NULL;
+}
+
+void kvmppc_xics_free(struct kvm *kvm)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	int i;
+
+	if (!xics)
+		return;
+
+	lockdep_assert_held(&kvm->lock);
+
+	debugfs_remove(xics->dentry);
+
+	if (xics->kvm) {
+		xics->kvm->arch.xics = NULL;
+		xics->kvm = NULL;
+	}
+
+	for (i = 0; i < xics->max_buid; i++) {
+		if (xics->ics[i])
+			kfree(xics->ics[i]);
+	}
+	kfree(xics);
+}
+
+/* -- ioctls -- */
+
+static int kvm_vm_ioctl_create_icp(struct kvm *kvm,
+				   struct kvm_irqchip_args *args)
+{
+	struct kvmppc_xics *xics;
+	int rc = 0;
+
+	mutex_lock(&kvm->lock);
+
+	/* Already there ? */
+	if (kvm->arch.xics)
+		return -EEXIST;
+
+	xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+	if (!xics) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	xics->kvm = kvm;
+	kvm->arch.xics = xics;
+	xics_debugfs_init(xics);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return rc;
+}
+
+static int kvm_vm_ioctl_create_ics(struct kvm *kvm,
+				   struct kvm_irqchip_args *args)
+{
+	struct kvmppc_xics *xics = kvm->arch.xics;
+	u16 nr_irqs, buid;
+	int rc;
+
+	if (!xics)
+		return -ENODEV;
+
+	nr_irqs = args->ics.nr_irqs;
+	buid = args->ics.buid;
+
+	/* BUID 0 is bogus */
+	if (buid == 0) {
+		rc = 0;
+		goto out;
+	}
+
+	/* Sanity checks */
+	if (nr_irqs == 0 || nr_irqs > KVMPPC_XICS_IRQ_COUNT ||
+	    buid > KVMPPC_XICS_MAX_BUID)
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+
+	/* BUID already exists */
+	if (xics->ics[buid - 1]) {
+		rc = -EEXIST;
+		goto out;
+	}
+
+	/* Create the ICS */
+	rc = kvmppc_xics_create_ics(xics, buid, nr_irqs);
+out:
+	mutex_unlock(&kvm->lock);
+	return rc;
+}
+
+static int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
+{
+	struct kvmppc_xics *xics;
+
+	/* locking against multiple callers? */
+
+	xics = kvm->arch.xics;
+	if (!xics)
+		return -ENODEV;
+
+	switch (args->level) {
+	case KVM_INTERRUPT_SET:
+	case KVM_INTERRUPT_SET_LEVEL:
+	case KVM_INTERRUPT_UNSET:
+		ics_deliver_irq(xics, args->irq, args->level);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int rc;
+
+	switch (ioctl) {
+	case KVM_CREATE_IRQCHIP: {
+		struct kvm_irqchip_args args;
+
+		rc = -EFAULT;
+		if (copy_from_user(&args, argp, sizeof(args)))
+			break;
+		rc = -EINVAL;
+		if (args.type == KVM_IRQCHIP_TYPE_ICP)
+			rc = kvm_vm_ioctl_create_icp(kvm, &args);
+		else if (args.type == KVM_IRQCHIP_TYPE_ICS)
+			rc = kvm_vm_ioctl_create_ics(kvm, &args);
+		break;
+	}
+
+	case KVM_IRQ_LINE: {
+		struct kvm_irq_level args;
+
+		rc = -EFAULT;
+		if (copy_from_user(&args, argp, sizeof(args)))
+			break;
+		rc = kvm_vm_ioctl_xics_irq(kvm, &args);
+		break;
+	}
+
+	default:
+		rc = -ENOTTY;
+		break;
+	}
+
+	return rc;
+}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 5ecfd80..507c9f5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -199,8 +199,7 @@  void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
 	kvmppc_booke_queue_irqprio(vcpu, prio);
 }
 
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
-                                  struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 {
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4b7522f..89e3572 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -286,6 +286,7 @@  int kvm_dev_ioctl_check_extension(long ext)
 		break;
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CAP_PPC_GET_SMMU_INFO:
+	case KVM_CAP_SPAPR_XICS:
 		r = 1;
 		break;
 #endif
@@ -611,7 +612,7 @@  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
 {
 	if (irq->irq == KVM_INTERRUPT_UNSET) {
-		kvmppc_core_dequeue_external(vcpu, irq);
+		kvmppc_core_dequeue_external(vcpu);
 		return 0;
 	}
 
@@ -841,11 +842,6 @@  long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-
-	case KVM_PPC_RTAS_DEFINE_TOKEN:
-		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
-		break;
-
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -859,7 +855,27 @@  long kvm_arch_vm_ioctl(struct file *filp,
 			r = -EFAULT;
 		break;
 	}
+	case KVM_PPC_RTAS_DEFINE_TOKEN:
+		r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+		break;
 #endif /* CONFIG_PPC_BOOK3S_64 */
+	case KVM_IRQ_LINE:
+		if (kvmppc_xics_enabled(kvm))
+			r = kvmppc_xics_ioctl(kvm, ioctl, arg);
+		else
+			r = -ENOTTY;
+		break;
+	case KVM_CREATE_IRQCHIP: {
+		u32 type;
+
+		r = -EFAULT;
+		if (get_user(type, (u32 __user *)argp))
+			break;
+		r = -EINVAL;
+		if (type == KVM_IRQCHIP_TYPE_ICP || type == KVM_IRQCHIP_TYPE_ICS)
+			r = kvmppc_xics_ioctl(kvm, ioctl, arg);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 35c063a..f9a396f 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -111,6 +111,7 @@  struct kvm_irq_level {
 	 * ACPI gsi notion of irq.
 	 * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
 	 * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
+	 * On powerpc SPAPR, the ICS source number, level is ignored.
 	 */
 	union {
 		__u32 irq;
@@ -620,6 +621,7 @@  struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_ALLOC_HTAB 80
 #define KVM_CAP_PPC_VPA 81
 #define KVM_CAP_PPC_RTAS 82
+#define KVM_CAP_SPAPR_XICS 83
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -753,6 +755,11 @@  struct kvm_msi {
 	__u8  pad[16];
 };
 
+#ifndef __KVM_HAVE_IRQCHIP_ARGS
+/* Allow arch code to optionally define args for KVM_CREATE_IRQCHIP */
+struct kvm_irqchip_args { };
+#endif
+
 /*
  * ioctls for VM fds
  */
@@ -783,7 +790,7 @@  struct kvm_s390_ucas_mapping {
 #define KVM_S390_VCPU_FAULT	 _IOW(KVMIO, 0x52, unsigned long)
 
 /* Device model IOC */
-#define KVM_CREATE_IRQCHIP        _IO(KVMIO,   0x60)
+#define KVM_CREATE_IRQCHIP        _IOW(KVMIO,  0x60, struct kvm_irqchip_args)
 #define KVM_IRQ_LINE              _IOW(KVMIO,  0x61, struct kvm_irq_level)
 #define KVM_GET_IRQCHIP           _IOWR(KVMIO, 0x62, struct kvm_irqchip)
 #define KVM_SET_IRQCHIP           _IOR(KVMIO,  0x63, struct kvm_irqchip)