Message ID | 1483163161-2402-7-git-send-email-suravee.suthikulpanit@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 31/12/2016 05:45, Suravee Suthikulpanit wrote: > VMEXIT_DO_NOACCEL: > This oocurs when a guest access to an APIC register that cannot be "occurs" > accelerated by AVIC. In this case, Xen tries to emulate register accesses. > > This fault is also generated if an EOI isattempted when the highest priority "is attempted" > @@ -122,6 +125,8 @@ int svm_avic_dom_init(struct domain *d) > clear_domain_page(_mfn(mfn)); > d->arch.hvm_domain.svm.avic_phy_apic_id_table_mfn = mfn; > > + spin_lock_init(&d->arch.hvm_domain.svm.avic_ldr_mode_lock); > + I think this hunk belongs in the previous patch. > return ret; > err_out: > svm_avic_dom_destroy(d); > @@ -222,6 +227,338 @@ int svm_avic_init_vmcb(struct vcpu *v) > } > > /* > + * Note: > + * This function handles the AVIC_INCOMP_IPI #vmexit when AVIC is enabled. > + * The hardware generates this fault when an IPI could not be delivered > + * to all targeted guest virtual processors because at least one guest > + * virtual processor was not allocated to a physical core at the time. > + */ > +void svm_avic_vmexit_do_incomp_ipi(struct cpu_user_regs *regs) > +{ > + struct vcpu *curr = current; > + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; > + u32 icrh = vmcb->exitinfo1 >> 32; > + u32 icrl = vmcb->exitinfo1; > + u32 id = vmcb->exitinfo2 >> 32; > + u32 index = vmcb->exitinfo2 && 0xFF; > + > + switch ( id ) > + { > + case AVIC_INCMP_IPI_ERR_INVALID_INT_TYPE: > + /* > + * AVIC hardware handles the delivery of > + * IPIs when the specified Message Type is Fixed > + * (also known as fixed delivery mode) and > + * the Trigger Mode is edge-triggered. The hardware > + * also supports self and broadcast delivery modes > + * specified via the Destination Shorthand(DSH) > + * field of the ICRL. Logical and physical APIC ID > + * formats are supported. All other IPI types cause > + * a #VMEXIT, which needs to emulated. > + */ > + vlapic_reg_write(curr, APIC_ICR2, icrh); > + vlapic_reg_write(curr, APIC_ICR, icrl); > + break; Please have a newline between break and case. > + case AVIC_INCMP_IPI_ERR_TARGET_NOT_RUN: > + { > + /* > + * At this point, we expect that the AVIC HW has already > + * set the appropriate IRR bits on the valid target > + * vcpus. So, we just need to kick the appropriate vcpu. > + */ > + struct vcpu *curc; > + struct domain *curd = curr->domain; currd is the more common name for this, and I would use a plain v instead of curc as it isn't curr. > + uint32_t dest = GET_xAPIC_DEST_FIELD(icrh); > + uint32_t short_hand = icrl & APIC_SHORT_MASK; > + bool dest_mode = !!(icrl & APIC_DEST_MASK); > + > + for_each_vcpu ( curd, curc ) > + { > + if ( curc != curr && > + vlapic_match_dest(vcpu_vlapic(curc), vcpu_vlapic(curr), > + short_hand, dest, dest_mode) ) > + { > + vcpu_kick(curc); > + break; > + } > + } > + break; > + } > + case AVIC_INCMP_IPI_ERR_INV_TARGET: > + dprintk(XENLOG_ERR, > + "SVM: %s: Invalid IPI target (icr=%#08x:%08x, idx=%u)\n", > + __func__, icrh, icrl, index); > + break; Shouldn't this case be emulated to raise appropriate APIC errors in the guests view? > + case AVIC_INCMP_IPI_ERR_INV_BK_PAGE: > + dprintk(XENLOG_ERR, > + "SVM: %s: Invalid bk page (icr=%#08x:%08x, idx=%u)\n", > + __func__, icrh, icrl, index); > + break; > + default: > + dprintk(XENLOG_ERR, "SVM: %s: Unknown IPI interception (%#x)\n", > + __func__, id); These two cases are an error in Xen's setup, or the hardware. They should be gprintk()s (so as not to disappear in release builds), and cause a domain_crash(). > + } > +} > + > +static struct avic_log_apic_id_ent * > +avic_get_logical_id_entry(const struct vcpu *v, u32 ldr, bool flat) > +{ > + unsigned int index; > + struct avic_log_apic_id_ent *avic_log_apid_id_table; > + const struct svm_domain *d = &v->domain->arch.hvm_domain.svm; > + unsigned int dest_id = GET_APIC_LOGICAL_ID(ldr); > + > + if ( !dest_id ) > + return NULL; > + > + if ( flat ) > + { > + index = ffs(dest_id) - 1; > + if ( index > 7 ) > + return NULL; > + } > + else > + { > + unsigned int cluster = (dest_id & 0xf0) >> 4; > + int apic = ffs(dest_id & 0x0f) - 1; > + > + if ( (apic < 0) || (apic > 7) || (cluster >= 0xf) ) > + return NULL; > + index = (cluster << 2) + apic; > + } > + > + ASSERT(index <= 255); > + > + avic_log_apid_id_table = mfn_to_virt(d->avic_log_apic_id_table_mfn); As with the phsyical table, this is buggy above 5TB, and should probably be a global mapping to start with. > + > + return &avic_log_apid_id_table[index]; > +} > + > +static int avic_ldr_write(struct vcpu *v, u8 g_phy_id, u32 ldr, bool valid) > +{ > + struct avic_log_apic_id_ent *entry, new_entry; > + u32 *bp = avic_get_bk_page_entry(v, APIC_DFR); > + > + if ( !bp ) > + return -EINVAL; > + > + entry = avic_get_logical_id_entry(v, ldr, (*bp == APIC_DFR_FLAT)); > + if (!entry) > + return -EINVAL; > + > + new_entry = *entry; > + smp_rmb(); > + new_entry.guest_phy_apic_id = g_phy_id; > + new_entry.valid = valid; > + *entry = new_entry; > + smp_wmb(); > + > + return 0; > +} > + > +static int avic_handle_ldr_update(struct vcpu *v) > +{ > + int ret = 0; > + u32 *ldr = avic_get_bk_page_entry(v, APIC_LDR); > + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID); > + > + if ( !ldr || !*ldr || !apic_id_reg ) > + return -EINVAL; > + > + ret = avic_ldr_write(v, GET_APIC_PHYSICAL_ID(*apic_id_reg), *ldr, true); > + if ( ret && v->arch.hvm_svm.avic_last_ldr ) > + { > + /* > + * Note: > + * In case of failure to update LDR register, > + * we set the guest physical APIC ID to 0, > + * and set the entry logical APID ID entry > + * to invalid (false). > + */ > + avic_ldr_write(v, 0, v->arch.hvm_svm.avic_last_ldr, false); > + v->arch.hvm_svm.avic_last_ldr = 0; > + } > + else > + { > + /* > + * Note: > + * This saves the last valid LDR so that we > + * know which entry in the local APIC ID > + * to clean up when the LDR is updated. > + */ > + v->arch.hvm_svm.avic_last_ldr = *ldr; > + } > + > + return ret; > +} > + > +static int avic_handle_apic_id_update(struct vcpu *v, bool init) > +{ > + struct avic_phy_apic_id_ent *old, *new; > + struct arch_svm_struct *s = &v->arch.hvm_svm; > + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID); > + > + if ( !apic_id_reg ) > + return -EINVAL; > + > + old = s->avic_last_phy_id; > + ASSERT(old); > + > + new = avic_get_phy_apic_id_ent(v, GET_APIC_PHYSICAL_ID(*apic_id_reg)); > + if ( !new ) > + return 0; > + > + /* We need to move physical_id_entry to new offset */ > + *new = *old; > + *((u64 *)old) = 0ULL; > + s->avic_last_phy_id = new; > + > + /* > + * Update the guest physical APIC ID in the logical > + * APIC ID table entry if LDR is already setup. > + */ > + if ( v->arch.hvm_svm.avic_last_ldr ) > + avic_handle_ldr_update(v); > + > + return 0; > +} > + > +static int avic_handle_dfr_update(struct vcpu *v) > +{ > + u32 mod; > + struct svm_domain *d = &v->domain->arch.hvm_domain.svm; > + u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR); > + > + if ( !dfr ) > + return -EINVAL; > + > + mod = (*dfr >> 28) & 0xFu; > + > + spin_lock(&d->avic_ldr_mode_lock); > + if ( d->avic_ldr_mode != mod ) > + { > + /* > + * We assume that all local APICs are using the same type. > + * If LDR mode changes, we need to flush the domain AVIC logical > + * APIC id table. > + */ The logical APIC ID table is per-domain, yet LDR mode is per-vcpu. How can these coexist if we flush the table like this? How would a multi-vcpu domain actually change its mode without this logic in Xen corrupting the table? > + clear_domain_page(_mfn(d->avic_log_apic_id_table_mfn)); > + smp_wmb(); > + d->avic_ldr_mode = mod; > + } > + spin_unlock(&d->avic_ldr_mode_lock); > + > + if ( v->arch.hvm_svm.avic_last_ldr ) > + avic_handle_ldr_update(v); > + > + return 0; > +} > + > +static int avic_unaccel_trap_write(struct vcpu *v) > +{ > + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; > + u32 offset = vmcb->exitinfo1 & AVIC_UNACCEL_ACCESS_OFFSET_MASK; > + u32 *reg = avic_get_bk_page_entry(v, offset); > + > + if ( !reg ) > + return X86EMUL_UNHANDLEABLE; > + > + switch ( offset ) > + { > + case APIC_ID: > + if ( avic_handle_apic_id_update(v, false) ) > + return X86EMUL_UNHANDLEABLE; > + break; > + case APIC_LDR: > + if ( avic_handle_ldr_update(v) ) > + return X86EMUL_UNHANDLEABLE; > + break; > + case APIC_DFR: > + if ( avic_handle_dfr_update(v) ) > + return X86EMUL_UNHANDLEABLE; > + break; > + default: > + break; > + } > + > + vlapic_reg_write(v, offset, *reg); > + > + return X86EMUL_OKAY; > +} > + > +/* > + * Note: > + * This function handles the AVIC_NOACCEL #vmexit when AVIC is enabled. > + * The hardware generates this fault when : > + * - A guest access to an APIC register that is not accelerated > + * by AVIC hardware. > + * - EOI is attempted when the highest priority in-service interrupt > + * is level-triggered. > + */ > +void svm_avic_vmexit_do_noaccel(struct cpu_user_regs *regs) > +{ > + struct vcpu *curr = current; > + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; > + u32 offset = vmcb->exitinfo1 & 0xFF0; > + u32 rw = (vmcb->exitinfo1 >> 32) & 0x1; > + > + switch ( offset ) > + { > + case APIC_ID: > + case APIC_EOI: > + case APIC_RRR: > + case APIC_LDR: > + case APIC_DFR: > + case APIC_SPIV: > + case APIC_ESR: > + case APIC_ICR: > + case APIC_LVTT: > + case APIC_LVTTHMR: > + case APIC_LVTPC: > + case APIC_LVT0: > + case APIC_LVT1: > + case APIC_LVTERR: > + case APIC_TMICT: > + case APIC_TDCR: Please use a bitmap in a similar way to hvm_x2apic_msr_read(). It generates far more efficient code. > + /* > + * Handling AVIC Trap (intercept right after the access). > + */ > + if ( !rw ) > + { > + /* > + * If a read trap happens, the CPU microcode does not > + * implement the spec. So it is all microcode then :) > + */ > + BUG(); domain_crash() please, and a suitable gprintk(). There is no need to take down the entire system, as non-avic guests should be able to continue functioning. > + } > + if ( avic_unaccel_trap_write(curr) != X86EMUL_OKAY ) > + { > + gprintk(XENLOG_ERR, "%s: Failed to handle trap write (%#x)\n", > + __func__, offset); > + return; > + } > + break; > + default: > + /* > + * Handling AVIC Fault (intercept before the access). > + */ > + if ( !rw ) > + { > + u32 *entry = avic_get_bk_page_entry(curr, offset); > + > + if ( !entry ) > + return; > + > + *entry = vlapic_read_aligned(vcpu_vlapic(curr), offset); > + } > + hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op, > + HVM_DELIVER_NO_ERROR_CODE); What is this doing here? I don't see any connection. ~Andrew
> + > +static int avic_ldr_write(struct vcpu *v, u8 g_phy_id, u32 ldr, bool valid) > +{ > + struct avic_log_apic_id_ent *entry, new_entry; > + u32 *bp = avic_get_bk_page_entry(v, APIC_DFR); dfr would be a better name (and you use it in avic_handle_dfr_update()). Also, 'logical' instead of 'log' in avic_log_apic_id_ent would be far less confusing imo. > + > + if ( !bp ) > + return -EINVAL; > + > + entry = avic_get_logical_id_entry(v, ldr, (*bp == APIC_DFR_FLAT)); > + if (!entry) > + return -EINVAL; > + > + new_entry = *entry; > + smp_rmb(); > + new_entry.guest_phy_apic_id = g_phy_id; > + new_entry.valid = valid; > + *entry = new_entry; > + smp_wmb(); > + > + return 0; > +} > + > +static int avic_handle_apic_id_update(struct vcpu *v, bool init) > +{ > + struct avic_phy_apic_id_ent *old, *new; > + struct arch_svm_struct *s = &v->arch.hvm_svm; > + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID); > + > + if ( !apic_id_reg ) > + return -EINVAL; > + > + old = s->avic_last_phy_id; > + ASSERT(old); > + > + new = avic_get_phy_apic_id_ent(v, GET_APIC_PHYSICAL_ID(*apic_id_reg)); > + if ( !new ) > + return 0; > + > + /* We need to move physical_id_entry to new offset */ > + *new = *old; > + *((u64 *)old) = 0ULL; This is pretty ugly. Can you define an invalid entry and assign it here instead? > + s->avic_last_phy_id = new; > + > + /* > + * Update the guest physical APIC ID in the logical > + * APIC ID table entry if LDR is already setup. > + */ > + if ( v->arch.hvm_svm.avic_last_ldr ) > + avic_handle_ldr_update(v); > + > + return 0; > +} > + > +static int avic_handle_dfr_update(struct vcpu *v) > +{ > + u32 mod; > + struct svm_domain *d = &v->domain->arch.hvm_domain.svm; > + u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR); > + > + if ( !dfr ) > + return -EINVAL; > + > + mod = (*dfr >> 28) & 0xFu; > + > + spin_lock(&d->avic_ldr_mode_lock); > + if ( d->avic_ldr_mode != mod ) > + { > + /* > + * We assume that all local APICs are using the same type. > + * If LDR mode changes, we need to flush the domain AVIC logical > + * APIC id table. > + */ > + clear_domain_page(_mfn(d->avic_log_apic_id_table_mfn)); > + smp_wmb(); Is this needed? I think clear_page() guarantees visibility/ordering (we have SFENCE in clear_page_sse2() for this reason). -boris
Hi Andrew, On 1/3/17 00:28, Andrew Cooper wrote: > On 31/12/2016 05:45, Suravee Suthikulpanit wrote: >> [...] >> + case AVIC_INCMP_IPI_ERR_INV_TARGET: >> + dprintk(XENLOG_ERR, >> + "SVM: %s: Invalid IPI target (icr=%#08x:%08x, idx=%u)\n", >> + __func__, icrh, icrl, index); >> + break; > > Shouldn't this case be emulated to raise appropriate APIC errors in the > guests view? > Actually, I think we would need to domain_crash(). >> [...] >> +static int avic_handle_dfr_update(struct vcpu *v) >> +{ >> + u32 mod; >> + struct svm_domain *d = &v->domain->arch.hvm_domain.svm; >> + u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR); >> + >> + if ( !dfr ) >> + return -EINVAL; >> + >> + mod = (*dfr >> 28) & 0xFu; >> + >> + spin_lock(&d->avic_ldr_mode_lock); >> + if ( d->avic_ldr_mode != mod ) >> + { >> + /* >> + * We assume that all local APICs are using the same type. >> + * If LDR mode changes, we need to flush the domain AVIC logical My apology.... s/LDR mode/DFR mode/ >> + * APIC id table. >> + */ > > The logical APIC ID table is per-domain, yet LDR mode is per-vcpu. How > can these coexist if we flush the table like this? How would a > multi-vcpu domain actually change its mode without this logic in Xen > corrupting the table? My apology.... s/avic_ldr_mode/avic_dfr_mode/ The per-domain avic_dfr_mode is used to store the value of DFR. The idea here is that if the DFR is changed, we need to flush the logical APIC ID table and updated the avic_dfr_mode. In multi-vcpu domain, this will be done when the first vcpu updates its DFR register. Flushing the table will automatically invalidate the entries in the table, which should cause a #VMEXIT. >> [...] >> + /* >> + * Handling AVIC Fault (intercept before the access). >> + */ >> + if ( !rw ) >> + { >> + u32 *entry = avic_get_bk_page_entry(curr, offset); >> + >> + if ( !entry ) >> + return; >> + >> + *entry = vlapic_read_aligned(vcpu_vlapic(curr), offset); >> + } This part is not needed. I will remove it. >> + hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op, >> + HVM_DELIVER_NO_ERROR_CODE); > > What is this doing here? I don't see any connection. > > ~Andrew > We need to emulate the instruction for the #vmexit do noaccel fault case. Thanks, Suravee
Hi Boris, On 1/3/17 22:34, Boris Ostrovsky wrote: >> +static int avic_handle_dfr_update(struct vcpu *v) >> +{ >> + u32 mod; >> + struct svm_domain *d = &v->domain->arch.hvm_domain.svm; >> + u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR); >> + >> + if ( !dfr ) >> + return -EINVAL; >> + >> + mod = (*dfr >> 28) & 0xFu; >> + >> + spin_lock(&d->avic_ldr_mode_lock); >> + if ( d->avic_ldr_mode != mod ) >> + { >> + /* >> + * We assume that all local APICs are using the same type. >> + * If LDR mode changes, we need to flush the domain AVIC logical >> + * APIC id table. >> + */ >> + clear_domain_page(_mfn(d->avic_log_apic_id_table_mfn)); >> + smp_wmb(); > Is this needed? I think clear_page() guarantees visibility/ordering (we > have SFENCE in clear_page_sse2() for this reason). > > -boris > Ah... Okay. Thanks for pointing out. Suravee
diff --git a/xen/arch/x86/hvm/svm/avic.c b/xen/arch/x86/hvm/svm/avic.c index b62f982..c0b7151 100644 --- a/xen/arch/x86/hvm/svm/avic.c +++ b/xen/arch/x86/hvm/svm/avic.c @@ -19,6 +19,7 @@ #include <xen/sched.h> #include <xen/stdbool.h> #include <asm/acpi.h> +#include <asm/apic.h> #include <asm/apicdef.h> #include <asm/event.h> #include <asm/hvm/emulate.h> @@ -40,6 +41,8 @@ #define AVIC_HPA_MASK (((1ULL << 40) - 1) << AVIC_HPA_SHIFT) #define AVIC_VAPIC_BAR_MASK AVIC_HPA_MASK +#define AVIC_UNACCEL_ACCESS_OFFSET_MASK 0xFF0 + /* * Note: * Currently, svm-avic mode is not supported with nested virtualization. @@ -122,6 +125,8 @@ int svm_avic_dom_init(struct domain *d) clear_domain_page(_mfn(mfn)); d->arch.hvm_domain.svm.avic_phy_apic_id_table_mfn = mfn; + spin_lock_init(&d->arch.hvm_domain.svm.avic_ldr_mode_lock); + return ret; err_out: svm_avic_dom_destroy(d); @@ -222,6 +227,338 @@ int svm_avic_init_vmcb(struct vcpu *v) } /* + * Note: + * This function handles the AVIC_INCOMP_IPI #vmexit when AVIC is enabled. + * The hardware generates this fault when an IPI could not be delivered + * to all targeted guest virtual processors because at least one guest + * virtual processor was not allocated to a physical core at the time. + */ +void svm_avic_vmexit_do_incomp_ipi(struct cpu_user_regs *regs) +{ + struct vcpu *curr = current; + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; + u32 icrh = vmcb->exitinfo1 >> 32; + u32 icrl = vmcb->exitinfo1; + u32 id = vmcb->exitinfo2 >> 32; + u32 index = vmcb->exitinfo2 && 0xFF; + + switch ( id ) + { + case AVIC_INCMP_IPI_ERR_INVALID_INT_TYPE: + /* + * AVIC hardware handles the delivery of + * IPIs when the specified Message Type is Fixed + * (also known as fixed delivery mode) and + * the Trigger Mode is edge-triggered. The hardware + * also supports self and broadcast delivery modes + * specified via the Destination Shorthand(DSH) + * field of the ICRL. Logical and physical APIC ID + * formats are supported. All other IPI types cause + * a #VMEXIT, which needs to emulated. + */ + vlapic_reg_write(curr, APIC_ICR2, icrh); + vlapic_reg_write(curr, APIC_ICR, icrl); + break; + case AVIC_INCMP_IPI_ERR_TARGET_NOT_RUN: + { + /* + * At this point, we expect that the AVIC HW has already + * set the appropriate IRR bits on the valid target + * vcpus. So, we just need to kick the appropriate vcpu. + */ + struct vcpu *curc; + struct domain *curd = curr->domain; + uint32_t dest = GET_xAPIC_DEST_FIELD(icrh); + uint32_t short_hand = icrl & APIC_SHORT_MASK; + bool dest_mode = !!(icrl & APIC_DEST_MASK); + + for_each_vcpu ( curd, curc ) + { + if ( curc != curr && + vlapic_match_dest(vcpu_vlapic(curc), vcpu_vlapic(curr), + short_hand, dest, dest_mode) ) + { + vcpu_kick(curc); + break; + } + } + break; + } + case AVIC_INCMP_IPI_ERR_INV_TARGET: + dprintk(XENLOG_ERR, + "SVM: %s: Invalid IPI target (icr=%#08x:%08x, idx=%u)\n", + __func__, icrh, icrl, index); + break; + case AVIC_INCMP_IPI_ERR_INV_BK_PAGE: + dprintk(XENLOG_ERR, + "SVM: %s: Invalid bk page (icr=%#08x:%08x, idx=%u)\n", + __func__, icrh, icrl, index); + break; + default: + dprintk(XENLOG_ERR, "SVM: %s: Unknown IPI interception (%#x)\n", + __func__, id); + } +} + +static struct avic_log_apic_id_ent * +avic_get_logical_id_entry(const struct vcpu *v, u32 ldr, bool flat) +{ + unsigned int index; + struct avic_log_apic_id_ent *avic_log_apid_id_table; + const struct svm_domain *d = &v->domain->arch.hvm_domain.svm; + unsigned int dest_id = GET_APIC_LOGICAL_ID(ldr); + + if ( !dest_id ) + return NULL; + + if ( flat ) + { + index = ffs(dest_id) - 1; + if ( index > 7 ) + return NULL; + } + else + { + unsigned int cluster = (dest_id & 0xf0) >> 4; + int apic = ffs(dest_id & 0x0f) - 1; + + if ( (apic < 0) || (apic > 7) || (cluster >= 0xf) ) + return NULL; + index = (cluster << 2) + apic; + } + + ASSERT(index <= 255); + + avic_log_apid_id_table = mfn_to_virt(d->avic_log_apic_id_table_mfn); + + return &avic_log_apid_id_table[index]; +} + +static int avic_ldr_write(struct vcpu *v, u8 g_phy_id, u32 ldr, bool valid) +{ + struct avic_log_apic_id_ent *entry, new_entry; + u32 *bp = avic_get_bk_page_entry(v, APIC_DFR); + + if ( !bp ) + return -EINVAL; + + entry = avic_get_logical_id_entry(v, ldr, (*bp == APIC_DFR_FLAT)); + if (!entry) + return -EINVAL; + + new_entry = *entry; + smp_rmb(); + new_entry.guest_phy_apic_id = g_phy_id; + new_entry.valid = valid; + *entry = new_entry; + smp_wmb(); + + return 0; +} + +static int avic_handle_ldr_update(struct vcpu *v) +{ + int ret = 0; + u32 *ldr = avic_get_bk_page_entry(v, APIC_LDR); + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID); + + if ( !ldr || !*ldr || !apic_id_reg ) + return -EINVAL; + + ret = avic_ldr_write(v, GET_APIC_PHYSICAL_ID(*apic_id_reg), *ldr, true); + if ( ret && v->arch.hvm_svm.avic_last_ldr ) + { + /* + * Note: + * In case of failure to update LDR register, + * we set the guest physical APIC ID to 0, + * and set the entry logical APID ID entry + * to invalid (false). + */ + avic_ldr_write(v, 0, v->arch.hvm_svm.avic_last_ldr, false); + v->arch.hvm_svm.avic_last_ldr = 0; + } + else + { + /* + * Note: + * This saves the last valid LDR so that we + * know which entry in the local APIC ID + * to clean up when the LDR is updated. + */ + v->arch.hvm_svm.avic_last_ldr = *ldr; + } + + return ret; +} + +static int avic_handle_apic_id_update(struct vcpu *v, bool init) +{ + struct avic_phy_apic_id_ent *old, *new; + struct arch_svm_struct *s = &v->arch.hvm_svm; + u32 *apic_id_reg = avic_get_bk_page_entry(v, APIC_ID); + + if ( !apic_id_reg ) + return -EINVAL; + + old = s->avic_last_phy_id; + ASSERT(old); + + new = avic_get_phy_apic_id_ent(v, GET_APIC_PHYSICAL_ID(*apic_id_reg)); + if ( !new ) + return 0; + + /* We need to move physical_id_entry to new offset */ + *new = *old; + *((u64 *)old) = 0ULL; + s->avic_last_phy_id = new; + + /* + * Update the guest physical APIC ID in the logical + * APIC ID table entry if LDR is already setup. + */ + if ( v->arch.hvm_svm.avic_last_ldr ) + avic_handle_ldr_update(v); + + return 0; +} + +static int avic_handle_dfr_update(struct vcpu *v) +{ + u32 mod; + struct svm_domain *d = &v->domain->arch.hvm_domain.svm; + u32 *dfr = avic_get_bk_page_entry(v, APIC_DFR); + + if ( !dfr ) + return -EINVAL; + + mod = (*dfr >> 28) & 0xFu; + + spin_lock(&d->avic_ldr_mode_lock); + if ( d->avic_ldr_mode != mod ) + { + /* + * We assume that all local APICs are using the same type. + * If LDR mode changes, we need to flush the domain AVIC logical + * APIC id table. + */ + clear_domain_page(_mfn(d->avic_log_apic_id_table_mfn)); + smp_wmb(); + d->avic_ldr_mode = mod; + } + spin_unlock(&d->avic_ldr_mode_lock); + + if ( v->arch.hvm_svm.avic_last_ldr ) + avic_handle_ldr_update(v); + + return 0; +} + +static int avic_unaccel_trap_write(struct vcpu *v) +{ + struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb; + u32 offset = vmcb->exitinfo1 & AVIC_UNACCEL_ACCESS_OFFSET_MASK; + u32 *reg = avic_get_bk_page_entry(v, offset); + + if ( !reg ) + return X86EMUL_UNHANDLEABLE; + + switch ( offset ) + { + case APIC_ID: + if ( avic_handle_apic_id_update(v, false) ) + return X86EMUL_UNHANDLEABLE; + break; + case APIC_LDR: + if ( avic_handle_ldr_update(v) ) + return X86EMUL_UNHANDLEABLE; + break; + case APIC_DFR: + if ( avic_handle_dfr_update(v) ) + return X86EMUL_UNHANDLEABLE; + break; + default: + break; + } + + vlapic_reg_write(v, offset, *reg); + + return X86EMUL_OKAY; +} + +/* + * Note: + * This function handles the AVIC_NOACCEL #vmexit when AVIC is enabled. + * The hardware generates this fault when : + * - A guest access to an APIC register that is not accelerated + * by AVIC hardware. + * - EOI is attempted when the highest priority in-service interrupt + * is level-triggered. + */ +void svm_avic_vmexit_do_noaccel(struct cpu_user_regs *regs) +{ + struct vcpu *curr = current; + struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb; + u32 offset = vmcb->exitinfo1 & 0xFF0; + u32 rw = (vmcb->exitinfo1 >> 32) & 0x1; + + switch ( offset ) + { + case APIC_ID: + case APIC_EOI: + case APIC_RRR: + case APIC_LDR: + case APIC_DFR: + case APIC_SPIV: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTT: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + case APIC_TMICT: + case APIC_TDCR: + /* + * Handling AVIC Trap (intercept right after the access). + */ + if ( !rw ) + { + /* + * If a read trap happens, the CPU microcode does not + * implement the spec. + */ + BUG(); + } + if ( avic_unaccel_trap_write(curr) != X86EMUL_OKAY ) + { + gprintk(XENLOG_ERR, "%s: Failed to handle trap write (%#x)\n", + __func__, offset); + return; + } + break; + default: + /* + * Handling AVIC Fault (intercept before the access). + */ + if ( !rw ) + { + u32 *entry = avic_get_bk_page_entry(curr, offset); + + if ( !entry ) + return; + + *entry = vlapic_read_aligned(vcpu_vlapic(curr), offset); + } + hvm_emulate_one_vm_event(EMUL_KIND_NORMAL, TRAP_invalid_op, + HVM_DELIVER_NO_ERROR_CODE); + } + + return; +} + +/* * Local variables: * mode: C * c-file-style: "BSD" diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index cef29b2..cb5281c 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -2682,6 +2682,14 @@ void svm_vmexit_handler(struct cpu_user_regs *regs) svm_vmexit_do_pause(regs); break; + case VMEXIT_AVIC_INCOMP_IPI: + svm_avic_vmexit_do_incomp_ipi(regs); + break; + + case VMEXIT_AVIC_NOACCEL: + svm_avic_vmexit_do_noaccel(regs); + break; + default: unexpected_exit_type: gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = %#"PRIx64", " diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h index be9a535..aa5e7ec 100644 --- a/xen/include/asm-x86/apic.h +++ b/xen/include/asm-x86/apic.h @@ -16,6 +16,8 @@ #define APIC_DEBUG 2 #define SET_APIC_LOGICAL_ID(x) (((x)<<24)) +#define GET_APIC_LOGICAL_ID(x) (((x)>>24) & 0xFFu) +#define GET_APIC_PHYSICAL_ID(x) (((x)>>24) & 0xFFu) #define IO_APIC_REDIR_VECTOR_MASK 0x000FF #define IO_APIC_REDIR_DEST_LOGICAL 0x00800 diff --git a/xen/include/asm-x86/hvm/svm/avic.h b/xen/include/asm-x86/hvm/svm/avic.h index 16b433c..7e0abcb 100644 --- a/xen/include/asm-x86/hvm/svm/avic.h +++ b/xen/include/asm-x86/hvm/svm/avic.h @@ -37,4 +37,7 @@ bool_t svm_avic_vcpu_enabled(const struct vcpu *v); void svm_avic_update_vapic_bar(const struct vcpu *v,uint64_t data); int svm_avic_init_vmcb(struct vcpu *v); +void svm_avic_vmexit_do_incomp_ipi(struct cpu_user_regs *regs); +void svm_avic_vmexit_do_noaccel(struct cpu_user_regs *regs); + #endif /* _SVM_AVIC_H_ */ diff --git a/xen/include/asm-x86/hvm/svm/vmcb.h b/xen/include/asm-x86/hvm/svm/vmcb.h index d3d045f..9abf077 100644 --- a/xen/include/asm-x86/hvm/svm/vmcb.h +++ b/xen/include/asm-x86/hvm/svm/vmcb.h @@ -302,6 +302,8 @@ enum VMEXIT_EXITCODE VMEXIT_MWAIT_CONDITIONAL= 140, /* 0x8c */ VMEXIT_XSETBV = 141, /* 0x8d */ VMEXIT_NPF = 1024, /* 0x400, nested paging fault */ + VMEXIT_AVIC_INCOMP_IPI = 1025, /* 0x401 */ + VMEXIT_AVIC_NOACCEL = 1026, /* 0x402 */ VMEXIT_INVALID = -1 };
AVIC introduces two new #vmexit handlers: VMEXIT_INCOMP_IPI: This occurs when an IPI could not be delivered to all targeted guest virtual processors because at least one guest virtual processor was not allocated to a physical core at the time. In this case, Xen would try to emulate IPI. VMEXIT_DO_NOACCEL: This oocurs when a guest access to an APIC register that cannot be accelerated by AVIC. In this case, Xen tries to emulate register accesses. This fault is also generated if an EOI isattempted when the highest priority in-service interrupt is set for level-triggered mode. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Jan Beulich <JBeulich@suse.com> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> --- xen/arch/x86/hvm/svm/avic.c | 337 +++++++++++++++++++++++++++++++++++++ xen/arch/x86/hvm/svm/svm.c | 8 + xen/include/asm-x86/apic.h | 2 + xen/include/asm-x86/hvm/svm/avic.h | 3 + xen/include/asm-x86/hvm/svm/vmcb.h | 2 + 5 files changed, 352 insertions(+)