@@ -296,6 +296,45 @@ struct kvm_rtas_token_args {
__u64 token; /* Use a token of 0 to undefine a mapping */
};
+/* for KVM_CAP_SPAPR_XICS */
+#define __KVM_HAVE_IRQCHIP_ARGS
+struct kvm_irqchip_args {
+#define KVM_IRQCHIP_TYPE_ICP 0 /* XICS: ICP (presentation controller) */
+#define KVM_IRQCHIP_TYPE_ICS 1 /* XICS: ICS (source controller) */
+ __u32 type;
+ union {
+ /* XICS ICP arguments. This needs to be called once before
+ * creating any VCPU to initialize the main kernel XICS data
+ * structures.
+ */
+ struct {
+#define KVM_ICP_FLAG_NOREALMODE 0x00000001 /* Disable real mode ICP */
+ __u32 flags;
+ } icp;
+
+ /* XICS ICS arguments. You can call this for every BUID you
+ * want to make available.
+ *
+ * The BUID is 12 bits, the interrupt number within a BUID
+ * is up to 12 bits as well. The resulting interrupt numbers
+ * exposed to the guest are BUID || IRQ which is 24 bit
+ *
+ * BUID cannot be 0.
+ */
+ struct {
+ __u32 flags;
+ __u16 buid;
+ __u16 nr_irqs;
+ } ics;
+ };
+};
+
+struct kvm_spapr_xics_xive {
+ __u32 irq;
+ __u32 server;
+ __u32 priority;
+};
+
struct kvm_book3e_206_tlb_entry {
__u32 mas8;
__u32 mas1;
@@ -187,6 +187,10 @@ struct kvmppc_linear_info {
int type;
};
+/* XICS components, defined in boo3s_xics.c */
+struct kvmppc_xics;
+struct kvmppc_icp;
+
/*
* The reverse mapping array has one entry for each HPTE,
* which stores the guest's view of the second word of the HPTE
@@ -251,6 +255,7 @@ struct kvm_arch {
#ifdef CONFIG_PPC_BOOK3S_64
struct list_head spapr_tce_tables;
struct list_head rtas_tokens;
+ struct kvmppc_xics *xics;
#endif
};
@@ -532,6 +537,9 @@ struct kvm_vcpu_arch {
u64 stolen_logged;
struct kvmppc_vpa slb_shadow;
#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct kvmppc_icp *icp; /* XICS presentation controller */
+#endif
};
/* Values for vcpu->arch.state */
@@ -102,8 +102,7 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
-extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq);
+extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int op, int *advance);
@@ -127,6 +126,12 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg);
+extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
+extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu);
+extern void kvmppc_xics_free(struct kvm *kvm);
+
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
@@ -152,6 +157,8 @@ extern void kvmppc_bookehv_exit(void);
extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp);
extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu);
extern void kvmppc_rtas_tokens_free(struct kvm *kvm);
+extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority);
+extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority);
/*
* Cuts out inst bits with ordering according to spec.
@@ -213,6 +220,25 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr)
static inline void kvm_linear_init(void)
{}
+
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+
+extern int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg);
+
+static inline int kvmppc_xics_enabled(struct kvm *kvm)
+{
+ return kvm->arch.xics != NULL;
+}
+
+#else
+static inline int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl,
+ unsigned long arg)\
+{
+ return -ENOTTY;
+}
+static inline int kvmppc_xics_enabled(struct kvm *kvm) { return 0; }
#endif
int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
@@ -81,6 +81,7 @@ kvm-book3s_64-module-objs := \
book3s.o \
book3s_64_vio.o \
book3s_rtas.o \
+ book3s_xics.o \
$(kvm-book3s_64-objs-y)
kvm-objs-$(CONFIG_KVM_BOOK3S_64) := $(kvm-book3s_64-module-objs)
@@ -160,8 +160,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_book3s_queue_irqprio(vcpu, vec);
}
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
@@ -465,6 +465,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
/* Send the error out to userspace via KVM_RUN */
return rc;
+ case H_XIRR:
+ case H_CPPR:
+ case H_EOI:
+ case H_IPI:
+ if (kvmppc_xics_enabled(vcpu->kvm)) {
+ ret = kvmppc_xics_hcall(vcpu, req);
+ break;
+ } /* fallthrough */
default:
return RESUME_HOST;
}
@@ -673,6 +681,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
spin_lock_init(&vcpu->arch.vpa_update_lock);
+ /* Create the XICS */
+ if (kvmppc_xics_enabled(kvm)) {
+ err = kvmppc_xics_create_icp(vcpu);
+ if (err < 0)
+ goto free_vcpu;
+ }
+
kvmppc_mmu_book3s_hv_init(vcpu);
/*
@@ -727,6 +742,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
kvmppc_unpin_guest_page(vcpu->kvm, vcpu->arch.vpa.pinned_addr);
spin_unlock(&vcpu->arch.vpa_update_lock);
kvm_vcpu_uninit(vcpu);
+ if (kvmppc_xics_enabled(vcpu->kvm))
+ kvmppc_xics_free_icp(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
@@ -1602,6 +1619,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvmppc_rtas_tokens_free(kvm);
+ if (kvmppc_xics_enabled(kvm))
+ kvmppc_xics_free(kvm);
+
kvmppc_free_hpt(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
}
@@ -984,6 +984,13 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
if (err < 0)
goto uninit_vcpu;
+ /* Create the XICS */
+ if (kvmppc_xics_enabled(kvm)) {
+ err = kvmppc_xics_create_icp(vcpu);
+ if (err < 0)
+ goto free_vcpu;
+ }
+
return vcpu;
uninit_vcpu:
@@ -1000,6 +1007,8 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+ if (kvmppc_xics_enabled(vcpu->kvm))
+ kvmppc_xics_free_icp(vcpu);
free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
kvm_vcpu_uninit(vcpu);
kfree(vcpu_book3s->shadow_vcpu);
@@ -1199,6 +1208,7 @@ int kvmppc_core_init_vm(struct kvm *kvm)
{
#ifdef CONFIG_PPC64
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
+ INIT_LIST_HEAD(&kvm->arch.rtas_tokens);
#endif
return 0;
@@ -1209,6 +1219,9 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
#ifdef CONFIG_PPC64
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
#endif
+ if (kvmppc_xics_enabled(kvm))
+ kvmppc_xics_free(kvm);
+
}
static int kvmppc_book3s_init(void)
@@ -227,6 +227,15 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
+{
+ long rc = kvmppc_xics_hcall(vcpu, cmd);
+ if (rc == H_TOO_HARD)
+ return EMULATE_FAIL;
+ kvmppc_set_gpr(vcpu, 3, rc);
+ return EMULATE_DONE;
+}
+
int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
{
switch (cmd) {
@@ -246,11 +255,17 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
+ case H_XIRR:
+ case H_CPPR:
+ case H_EOI:
+ case H_IPI:
+ if (kvmppc_xics_enabled(vcpu->kvm))
+ return kvmppc_h_pr_xics_hcall(vcpu, cmd);
+ break;
case H_RTAS:
if (list_empty(&vcpu->kvm->arch.rtas_tokens))
return RESUME_HOST;
- rc = kvmppc_rtas_hcall(vcpu);
- if (rc != 0)
+ if (kvmppc_rtas_hcall(vcpu) != 0)
break;
kvmppc_set_gpr(vcpu, 3, 0);
return EMULATE_DONE;
@@ -18,12 +18,61 @@
#include <asm/rtas.h>
+static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq, server, priority;
+ int rc;
+
+ if (args->nargs != 3 || args->nret != 1) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = args->args[0];
+ server = args->args[1];
+ priority = args->args[2];
+
+ rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority);
+ if (rc)
+ rc = -3;
+out:
+ args->rets[0] = rc;
+}
+
+static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args)
+{
+ u32 irq, server, priority;
+ int rc;
+
+ if (args->nargs != 1 || args->nret != 3) {
+ rc = -3;
+ goto out;
+ }
+
+ irq = args->args[0];
+
+ server = priority = 0;
+ rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority);
+ if (rc) {
+ rc = -3;
+ goto out;
+ }
+
+ args->rets[1] = server;
+ args->rets[2] = priority;
+out:
+ args->rets[0] = rc;
+}
+
struct rtas_handler {
void (*handler)(struct kvm_vcpu *vcpu, struct rtas_args *args);
char *name;
};
-static struct rtas_handler rtas_handlers[] = { };
+static struct rtas_handler rtas_handlers[] = {
+ { .name = "ibm,set-xive", .handler = kvm_rtas_set_xive },
+ { .name = "ibm,get-xive", .handler = kvm_rtas_get_xive },
+};
struct rtas_token_definition {
struct list_head list;
new file mode 100644
@@ -0,0 +1,882 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+#include <linux/gfp.h>
+
+#include <asm/uaccess.h>
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define MASKED 0xff
+
+#define XICS_DBG(fmt...) do { } while(0)
+
+#undef DEBUG_REALMODE
+
+/*
+ * LOCKING
+ * =======
+ *
+ * Each ICP has its own lock, and there is one lock for the ICS (ie. all
+ * information about irq sources).
+ *
+ * The ICS lock nests inside any of the ICP locks. ie. you are allowed
+ * to take the ICS lock while holding an ICP lock, but not vice versa.
+ */
+
+/*
+ * Interrupt numbering
+ * ===================
+ *
+ * The 24-bit global interrupt numbers are divided in two components,
+ * the BUID and the interrupt source. We have arbitrarily chosen a
+ * 10 bit
+ */
+
+#define KVMPPC_XICS_MAX_BUID 0xfff
+#define KVMPPC_XICS_IRQ_COUNT 0x1000
+#define KVMPPC_XICS_BUID_SHIFT 12
+#define KVMPPC_XICS_SRC_MASK 0xfff
+
+/* State for one irq in an ics */
+struct ics_irq_state {
+ u32 number;
+ u32 server;
+ u8 priority;
+ u8 saved_priority; /* currently unused */
+ u8 resend;
+ u8 masked_pending;
+ u8 asserted; /* Only for LSI */
+};
+
+#define ICP_RESEND_MAP_SIZE \
+ ((KVMPPC_XICS_MAX_BUID + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+struct kvmppc_icp {
+ struct mutex lock;
+ struct kvm_vcpu *vcpu;
+ u32 pending_irq; /* XISR */
+ u8 pending_priority;
+ u8 current_priority; /* CPPR */
+ u8 mfrr; /* MFRR */
+ bool need_resend;
+ unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+};
+
+
+struct kvmppc_ics {
+ struct mutex lock;
+ u16 buid;
+ u16 nr_irqs;
+ struct ics_irq_state irq_state[];
+};
+
+struct kvmppc_xics {
+ struct kvm *kvm;
+ struct dentry *dentry;
+ u32 max_buid;
+ struct kvmppc_ics *ics[KVMPPC_XICS_MAX_BUID]; /* [1...MAX_BUID] */
+};
+
+static struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, u32 nr)
+{
+ struct kvm_vcpu *vcpu = NULL;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (nr == vcpu->vcpu_id)
+ return vcpu->arch.icp;
+ }
+ return NULL;
+}
+
+static struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+ u32 irq, u16 *source)
+{
+ u16 buid = irq >> KVMPPC_XICS_BUID_SHIFT;
+ u16 src = irq & KVMPPC_XICS_SRC_MASK;
+ struct kvmppc_ics *ics;
+
+ ics = xics->ics[buid - 1];
+ if (!ics)
+ return NULL;
+ if (src >= ics->nr_irqs)
+ return NULL;
+ if (source)
+ *source = src;
+ return ics;
+}
+
+
+/* -- ICS routines -- */
+
+static void icp_deliver_irq(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp,
+ struct kvmppc_ics *ics, u16 src);
+
+static void __ics_reject_irq(struct kvmppc_icp *icp,
+ struct kvmppc_ics *ics, u16 src)
+{
+ struct ics_irq_state *state = &ics->irq_state[src];
+
+ XICS_DBG("server %d reject src %#x\n", icp->vcpu->vcpu_id, src);
+
+ /* XXX check if it still level & asserted ? */
+ state->resend = 1;
+ set_bit(ics->buid, icp->resend_map);
+ icp->need_resend = true;
+}
+
+static void ics_reject_irq(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp, u32 irq)
+{
+ struct kvmppc_ics *ics;
+ u16 src;
+
+ lockdep_assert_held(&icp->lock);
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ pr_warning("ics_reject_irq: IRQ 0x%06x not found !\n", irq);
+ return;
+ }
+
+ mutex_lock(&ics->lock);
+ __ics_reject_irq(icp, ics, src);
+ mutex_unlock(&ics->lock);
+}
+
+static void ics_eoi(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+ u32 irq)
+{
+ struct ics_irq_state *state;
+ struct kvmppc_ics *ics;
+ u16 src;
+
+ XICS_DBG("ics_eoi 0x%06x\n", irq);
+
+ lockdep_assert_held(&icp->lock);
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ pr_warning("ics_eoi: IRQ 0x%06x not found !\n", irq);
+ return;
+ }
+ state = &ics->irq_state[src];
+
+ mutex_lock(&ics->lock);
+
+ /* If it's an LSI and still asserted we resend */
+ if (state->asserted) {
+ state->resend = 1;
+ set_bit(ics->buid, icp->resend_map);
+ icp->need_resend = true;
+ }
+
+ mutex_unlock(&ics->lock);
+}
+
+static void ics_deliver_irq(struct kvmppc_xics *xics,
+ u32 irq, u32 level)
+{
+ struct kvmppc_icp *icp;
+ struct ics_irq_state *state;
+ struct kvmppc_ics *ics;
+ bool deliver = false;
+ u32 server;
+ u16 src;
+
+ XICS_DBG("ics deliver 0x%06x (level: %d)\n", irq, level);
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics) {
+ pr_warning("ics_deliver_irq: IRQ 0x%06x not found !\n", irq);
+ return;
+ }
+ state = &ics->irq_state[src];
+
+ mutex_lock(&ics->lock);
+
+ if (level == KVM_INTERRUPT_SET_LEVEL)
+ state->asserted = 1;
+ else if (level == KVM_INTERRUPT_UNSET) {
+ state->asserted = 0;
+ goto unlock;
+ }
+
+ if (state->priority != MASKED) {
+ deliver = true;
+ server = state->server;
+ } else {
+ XICS_DBG("masked pending\n");
+ state->masked_pending = 1;
+ }
+
+unlock:
+ mutex_unlock(&ics->lock);
+
+ if (deliver) {
+ icp = kvmppc_xics_find_server(xics->kvm, server);
+ /* Configured server not found... XXX FALLBACK */
+ if (icp)
+ icp_deliver_irq(xics, icp, ics, src);
+ }
+}
+
+static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
+ struct kvmppc_icp *icp)
+{
+ u32 server = icp->vcpu->vcpu_id;
+ int i;
+
+ mutex_lock(&ics->lock);
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ struct ics_irq_state *state = &ics->irq_state[i];
+
+ if (!state->resend || state->server != server)
+ continue;
+
+ XICS_DBG("resend 0x%06x prio %d\n", state->number,
+ state->priority);
+
+ state->resend = 0;
+ if (state->priority == MASKED)
+ continue;
+
+ mutex_unlock(&ics->lock);
+ icp_deliver_irq(xics, icp, ics, i);
+ mutex_lock(&ics->lock);
+ }
+
+ mutex_unlock(&ics->lock);
+}
+
+static void icp_check_resend(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp)
+{
+ u32 buid;
+
+ for_each_set_bit(buid, icp->resend_map, xics->max_buid + 1) {
+ struct kvmppc_ics *ics = xics->ics[buid - 1];
+
+ if (!test_and_clear_bit(buid, icp->resend_map))
+ continue;
+ if (!ics)
+ continue;
+ ics_check_resend(xics, ics, icp);
+ }
+}
+
+int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_icp *icp;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+ bool deliver;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ icp = kvmppc_xics_find_server(kvm, server);
+ if (!icp)
+ return -EINVAL;
+
+ mutex_lock(&ics->lock);
+
+ state->server = server;
+ state->priority = priority;
+ deliver = false;
+ if (state->masked_pending && state->priority != MASKED) {
+ state->masked_pending = 0;
+ deliver = true;
+ }
+
+ mutex_unlock(&ics->lock);
+
+ XICS_DBG("irq 0x%06x server %d prio %#x\n", irq, server, priority);
+
+ if (deliver)
+ icp_deliver_irq(xics, icp, ics, src);
+
+ return 0;
+}
+
+int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ struct kvmppc_ics *ics;
+ struct ics_irq_state *state;
+ u16 src;
+
+ if (!xics)
+ return -ENODEV;
+
+ ics = kvmppc_xics_find_ics(xics, irq, &src);
+ if (!ics)
+ return -EINVAL;
+ state = &ics->irq_state[src];
+
+ mutex_lock(&ics->lock);
+ *server = state->server;
+ *priority = state->priority;
+ mutex_unlock(&ics->lock);
+
+ XICS_DBG("irq 0x%06x server %d prio %#x\n",
+ irq, state->server, state->priority);
+
+ return 0;
+}
+
+/* -- ICP routines, including hcalls -- */
+
+static void icp_external_interrupt(struct kvmppc_icp *icp)
+{
+ unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+
+ lockdep_assert_held(&icp->lock);
+
+ kvmppc_book3s_queue_irqprio(icp->vcpu, vec);
+ kvm_vcpu_kick(icp->vcpu);
+}
+
+static void icp_deliver_irq(struct kvmppc_xics *xics,
+ struct kvmppc_icp *icp,
+ struct kvmppc_ics *ics, u16 src)
+{
+ struct ics_irq_state state_copy;
+
+ mutex_lock(&icp->lock);
+
+ /* Snapshot irq state */
+ mutex_lock(&ics->lock);
+ state_copy = ics->irq_state[src];
+
+ if (state_copy.priority > icp->current_priority) {
+ /* CPU is not interested in us */
+ __ics_reject_irq(icp, ics, src);
+ mutex_unlock(&ics->lock);
+ goto out;
+ }
+
+ if (icp->pending_irq) {
+ /* An interrupt is pending */
+ if (icp->pending_priority <= state_copy.priority) {
+ /* pending irq is equally or more favoured */
+ __ics_reject_irq(icp, ics, src);
+ mutex_unlock(&ics->lock);
+ goto out;
+ }
+ }
+ mutex_unlock(&ics->lock);
+
+ /* We are more favoured, reject pending irq */
+ if (icp->pending_irq)
+ ics_reject_irq(xics, icp, icp->pending_irq);
+
+ icp->pending_irq = state_copy.number;
+ icp->pending_priority = state_copy.priority;
+
+ XICS_DBG("irq 0x%06x pending on %d prio %#x\n",
+ state_copy.number, state_copy.server, state_copy.priority);
+
+ icp_external_interrupt(icp);
+
+out:
+ mutex_unlock(&icp->lock);
+}
+
+static void icp_check_ipi(struct kvmppc_xics *xics, struct kvmppc_icp *icp)
+{
+ lockdep_assert_held(&icp->lock);
+
+ if (icp->mfrr >= icp->current_priority)
+ return;
+
+ XICS_DBG("cpu %d can take IPI mfrr=%#x\n",
+ icp->vcpu->vcpu_id, icp->mfrr);
+
+ if (icp->pending_irq) {
+ /* IPI is less favoured */
+ if (icp->pending_priority <= icp->mfrr) {
+ XICS_DBG("ODD: pending_prio=%#x pending_irq=%#x\n",
+ icp->pending_priority, icp->pending_irq);
+ return;
+ }
+
+ /* IPI is more favoured, reject the other interrupt */
+ ics_reject_irq(xics, icp, icp->pending_irq);
+ }
+
+ icp->pending_irq = XICS_IPI;
+ icp->pending_priority = icp->mfrr;
+ icp_external_interrupt(icp);
+}
+
+static u32 icp_accept(struct kvm_vcpu *vcpu, struct kvmppc_icp *icp)
+{
+ u32 xirr;
+
+ mutex_lock(&icp->lock);
+
+ kvmppc_core_dequeue_external(vcpu);
+
+ /* The XIRR is the pending interrupt & current priority */
+ xirr = icp->pending_irq | (icp->current_priority << 24);
+
+ /* The pending priority becomes current */
+ icp->current_priority = icp->pending_priority;
+
+ /* Clear the pending interrupt */
+ icp->pending_irq = 0;
+
+ mutex_unlock(&icp->lock);
+
+ return xirr;
+}
+
+static unsigned long h_xirr(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u32 xirr;
+
+ xirr = icp_accept(vcpu, icp);
+
+ XICS_DBG("h_xirr vcpu %d xirr %#x\n", vcpu->vcpu_id, xirr);
+
+ return xirr;
+}
+
+static int h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp;
+
+ XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n",
+ vcpu->vcpu_id, server, mfrr);
+
+ icp = kvmppc_xics_find_server(vcpu->kvm, server);
+ if (!icp)
+ return H_PARAMETER;
+
+ mutex_lock(&icp->lock);
+
+ icp->mfrr = mfrr;
+ icp_check_ipi(xics, icp);
+
+ mutex_unlock(&icp->lock);
+
+ return H_SUCCESS;
+}
+
+static void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ u8 old_priority;
+ bool check_resend = false;
+
+ XICS_DBG("h_cppr vcpu %d cppr %#lx\n", vcpu->vcpu_id, cppr);
+
+ mutex_lock(&icp->lock);
+
+ old_priority = icp->current_priority;
+ icp->current_priority = cppr;
+
+ if (icp->pending_irq &&
+ icp->current_priority < icp->pending_priority) {
+ u32 pending = icp->pending_irq;
+ /* Pending irq is less favoured than our new priority */
+ icp->pending_irq = 0;
+ kvmppc_core_dequeue_external(vcpu);
+ ics_reject_irq(xics, icp, pending);
+ }
+
+ /* Check if there is anything we can accept now */
+ if (!icp->pending_irq)
+ icp_check_ipi(xics, icp);
+ if (!icp->pending_irq && icp->need_resend) {
+ check_resend = true;
+ icp->need_resend = false;
+ }
+
+ mutex_unlock(&icp->lock);
+
+ if (check_resend)
+ icp_check_resend(xics, icp);
+}
+
+static void h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+ struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+ bool check_resend = false;
+
+ XICS_DBG("h_eoi vcpu %d eoi %#lx\n", vcpu->vcpu_id, xirr);
+
+ mutex_lock(&icp->lock);
+
+ icp->current_priority = xirr >> 24;
+
+ /* If nothing is pending since accept, check for an IPI */
+ if (!icp->pending_irq)
+ icp_check_ipi(xics, icp);
+
+ if (!icp->pending_irq && icp->need_resend) {
+ check_resend = true;
+ icp->need_resend = false;
+ }
+
+ ics_eoi(xics, icp, xirr & 0xFFFFFF);
+
+ mutex_unlock(&icp->lock);
+
+ if (check_resend)
+ icp_check_resend(xics, icp);
+}
+
+int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ unsigned long res;
+ int rc = H_SUCCESS;
+
+ /* Check if we have an ICP */
+ if (!vcpu->arch.icp || !vcpu->kvm->arch.xics)
+ return H_HARDWARE;
+
+ switch (req) {
+ case H_XIRR:
+ res = h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 4, res);
+ break;
+ case H_CPPR:
+ h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_EOI:
+ h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ break;
+ case H_IPI:
+ rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ break;
+ }
+
+ return rc;
+}
+
+
+/* -- Initialisation code etc. -- */
+
+static int xics_debug_show(struct seq_file *m, void *private)
+{
+ struct kvmppc_xics *xics = m->private;
+ struct kvm *kvm = xics->kvm;
+ struct kvm_vcpu *vcpu;
+ int buid, i;
+
+ if (!kvm)
+ return 0;
+
+ seq_printf(m, "=========\nICP state\n=========\n");
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvmppc_icp *icp = vcpu->arch.icp;
+
+ if (!icp)
+ continue;
+
+ mutex_lock(&icp->lock);
+
+ seq_printf(m, "cpu server %#x pending %#x pending prio %#x cppr %#x "
+ "mfrr %#x\n", vcpu->vcpu_id, icp->pending_irq,
+ icp->pending_priority, icp->current_priority,
+ icp->mfrr);
+
+ mutex_unlock(&icp->lock);
+ }
+
+ for (buid = 1; buid <= KVMPPC_XICS_MAX_BUID; buid++) {
+ struct kvmppc_ics *ics = xics->ics[buid - 1];
+
+ if (!ics)
+ continue;
+
+ seq_printf(m, "=========\nICS state for BUID 0x%x\n=========\n", buid);
+
+ mutex_lock(&ics->lock);
+
+ for (i = 0; i < ics->nr_irqs; i++) {
+ struct ics_irq_state *irq = &ics->irq_state[i];
+
+ seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x "
+ "asserted %d resend %d masked pending %d\n",
+ irq->number, irq->server, irq->priority,
+ irq->saved_priority, irq->asserted, irq->resend,
+ irq->masked_pending);
+
+ }
+ mutex_unlock(&ics->lock);
+ }
+ return 0;
+}
+
+static int xics_debug_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, xics_debug_show, inode->i_private);
+}
+
+static const struct file_operations xics_debug_fops = {
+ .open = xics_debug_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static void xics_debugfs_init(struct kvmppc_xics *xics)
+{
+ char *name;
+
+ name = kasprintf(GFP_KERNEL, "kvm-xics-%p", xics);
+ if (!name) {
+ pr_err("%s: no memory for name\n", __func__);
+ return;
+ }
+
+ xics->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
+ xics, &xics_debug_fops);
+
+ pr_debug("%s: created %s\n", __func__, name);
+ kfree(name);
+}
+
+static int kvmppc_xics_create_ics(struct kvmppc_xics *xics, u16 buid, u16 nr_irq)
+{
+ struct kvmppc_ics *ics;
+ int i, size;
+
+
+ /* Create the ICS */
+ size = sizeof(struct kvmppc_ics) + sizeof(struct ics_irq_state) * nr_irqs;
+ ics = kzalloc(size, GFP_KERNEL);
+ if (!ics)
+ return -ENOMEM;
+
+ mutex_init(&ics->lock);
+ ics->buid = buid;
+ ics->nr_irqs = nr_irqs;
+
+ for (i = 0; i < nr_irqs; i++) {
+ ics->irq_state[i].number = (buid << KVMPPC_XICS_BUID_SHIFT) | i;
+ ics->irq_state[i].priority = MASKED;
+ ics->irq_state[i].saved_priority = MASKED;
+ }
+ smp_wmb();
+ xics->ics[buid - 1] = ics;
+
+ if (buid > xics->max_buid)
+ xics->max_buid = buid;
+
+ return 0;
+}
+
+int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu)
+{
+ struct kvmppc_icp *icp;
+
+ icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL);
+ if (!icp)
+ return -ENOMEM;
+
+ mutex_init(&icp->lock);
+ icp->vcpu = vcpu;
+ icp->mfrr = MASKED;
+ vcpu->arch.icp = icp;
+
+ XICS_DBG("created server for vcpu %d\n", vcpu->vcpu_id);
+
+ return 0;
+}
+
+void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu)
+{
+ if (!vcpu->arch.icp)
+ return;
+ kfree(vcpu->arch.icp);
+ vcpu->arch.icp = NULL;
+}
+
+void kvmppc_xics_free(struct kvm *kvm)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ int i;
+
+ if (!xics)
+ return;
+
+ lockdep_assert_held(&kvm->lock);
+
+ debugfs_remove(xics->dentry);
+
+ if (xics->kvm) {
+ xics->kvm->arch.xics = NULL;
+ xics->kvm = NULL;
+ }
+
+ for (i = 0; i < xics->max_buid; i++) {
+ if (xics->ics[i])
+ kfree(xics->ics[i]);
+ }
+ kfree(xics);
+}
+
+/* -- ioctls -- */
+
+static int kvm_vm_ioctl_create_icp(struct kvm *kvm,
+ struct kvm_irqchip_args *args)
+{
+ struct kvmppc_xics *xics;
+ int rc = 0;
+
+ mutex_lock(&kvm->lock);
+
+ /* Already there ? */
+ if (kvm->arch.xics)
+ return -EEXIST;
+
+ xics = kzalloc(sizeof(*xics), GFP_KERNEL);
+ if (!xics) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ xics->kvm = kvm;
+ kvm->arch.xics = xics;
+ xics_debugfs_init(xics);
+
+out:
+ mutex_unlock(&kvm->lock);
+ return rc;
+}
+
+static int kvm_vm_ioctl_create_ics(struct kvm *kvm,
+ struct kvm_irqchip_args *args)
+{
+ struct kvmppc_xics *xics = kvm->arch.xics;
+ u16 nr_irqs, buid;
+ int rc;
+
+ if (!xics)
+ return -ENODEV;
+
+ nr_irqs = args->ics.nr_irqs;
+ buid = args->ics.buid;
+
+ /* BUID 0 is bogus */
+ if (buid == 0) {
+ rc = 0;
+ goto out;
+ }
+
+ /* Sanity checks */
+ if (nr_irqs == 0 || nr_irqs > KVMPPC_XICS_IRQ_COUNT ||
+ buid > KVMPPC_XICS_MAX_BUID)
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+
+ /* BUID already exists */
+ if (xics->ics[buid - 1]) {
+ rc = -EEXIST;
+ goto out;
+ }
+
+ /* Create the ICS */
+ rc = kvmppc_xics_create_ics(xics, buid, nr_irqs);
+out:
+ mutex_unlock(&kvm->lock);
+ return rc;
+}
+
+static int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args)
+{
+ struct kvmppc_xics *xics;
+
+ /* locking against multiple callers? */
+
+ xics = kvm->arch.xics;
+ if (!xics)
+ return -ENODEV;
+
+ switch (args->level) {
+ case KVM_INTERRUPT_SET:
+ case KVM_INTERRUPT_SET_LEVEL:
+ case KVM_INTERRUPT_UNSET:
+ ics_deliver_irq(xics, args->irq, args->level);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int kvmppc_xics_ioctl(struct kvm *kvm, unsigned ioctl, unsigned long arg)
+{
+ void __user *argp = (void __user *)arg;
+ int rc;
+
+ switch (ioctl) {
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_irqchip_args args;
+
+ rc = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ rc = -EINVAL;
+ if (args.type == KVM_IRQCHIP_TYPE_ICP)
+ rc = kvm_vm_ioctl_create_icp(kvm, &args);
+ else if (args.type == KVM_IRQCHIP_TYPE_ICS)
+ rc = kvm_vm_ioctl_create_ics(kvm, &args);
+ break;
+ }
+
+ case KVM_IRQ_LINE: {
+ struct kvm_irq_level args;
+
+ rc = -EFAULT;
+ if (copy_from_user(&args, argp, sizeof(args)))
+ break;
+ rc = kvm_vm_ioctl_xics_irq(kvm, &args);
+ break;
+ }
+
+ default:
+ rc = -ENOTTY;
+ break;
+ }
+
+ return rc;
+}
@@ -199,8 +199,7 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
kvmppc_booke_queue_irqprio(vcpu, prio);
}
-void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
- struct kvm_interrupt *irq)
+void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
{
clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
@@ -286,6 +286,7 @@ int kvm_dev_ioctl_check_extension(long ext)
break;
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_PPC_GET_SMMU_INFO:
+ case KVM_CAP_SPAPR_XICS:
r = 1;
break;
#endif
@@ -611,7 +612,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
{
if (irq->irq == KVM_INTERRUPT_UNSET) {
- kvmppc_core_dequeue_external(vcpu, irq);
+ kvmppc_core_dequeue_external(vcpu);
return 0;
}
@@ -841,11 +842,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
-
- case KVM_PPC_RTAS_DEFINE_TOKEN:
- r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
- break;
-
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
@@ -859,7 +855,27 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
+ case KVM_PPC_RTAS_DEFINE_TOKEN:
+ r = kvm_vm_ioctl_rtas_define_token(kvm, argp);
+ break;
#endif /* CONFIG_PPC_BOOK3S_64 */
+ case KVM_IRQ_LINE:
+ if (kvmppc_xics_enabled(kvm))
+ r = kvmppc_xics_ioctl(kvm, ioctl, arg);
+ else
+ r = -ENOTTY;
+ break;
+ case KVM_CREATE_IRQCHIP: {
+ u32 type;
+
+ r = -EFAULT;
+ if (get_user(type, (u32 __user *)argp))
+ break;
+ r = -EINVAL;
+ if (type == KVM_IRQCHIP_TYPE_ICP || type == KVM_IRQCHIP_TYPE_ICS)
+ r = kvmppc_xics_ioctl(kvm, ioctl, arg);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -111,6 +111,7 @@ struct kvm_irq_level {
* ACPI gsi notion of irq.
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
+ * On powerpc SPAPR, the ICS source number, level is ignored.
*/
union {
__u32 irq;
@@ -620,6 +621,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_ALLOC_HTAB 80
#define KVM_CAP_PPC_VPA 81
#define KVM_CAP_PPC_RTAS 82
+#define KVM_CAP_SPAPR_XICS 83
#ifdef KVM_CAP_IRQ_ROUTING
@@ -753,6 +755,11 @@ struct kvm_msi {
__u8 pad[16];
};
+#ifndef __KVM_HAVE_IRQCHIP_ARGS
+/* Allow arch code to optionally define args for KVM_CREATE_IRQCHIP */
+struct kvm_irqchip_args { };
+#endif
+
/*
* ioctls for VM fds
*/
@@ -783,7 +790,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
/* Device model IOC */
-#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
+#define KVM_CREATE_IRQCHIP _IOW(KVMIO, 0x60, struct kvm_irqchip_args)
#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
This is an initial variant of the in-kernel XICS emulation for both HV and PR KVM running in PAPR mode. This is based on an initial implementation by Michael Ellerman <michael@ellerman.id.au> reworked by myself. It supports up to 4095 "BUID" (blocks of interrupts) of up to 4096 interrupts each. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> --- arch/powerpc/include/asm/kvm.h | 39 ++ arch/powerpc/include/asm/kvm_host.h | 8 + arch/powerpc/include/asm/kvm_ppc.h | 30 +- arch/powerpc/kvm/Makefile | 1 + arch/powerpc/kvm/book3s.c | 3 +- arch/powerpc/kvm/book3s_hv.c | 20 + arch/powerpc/kvm/book3s_pr.c | 13 + arch/powerpc/kvm/book3s_pr_papr.c | 19 +- arch/powerpc/kvm/book3s_rtas.c | 51 +- arch/powerpc/kvm/book3s_xics.c | 882 +++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/booke.c | 3 +- arch/powerpc/kvm/powerpc.c | 28 +- include/linux/kvm.h | 9 +- 13 files changed, 1090 insertions(+), 16 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_xics.c -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html