Message ID | 1479434244-10223-3-git-send-email-feng.wu@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
> From: Wu, Feng > Sent: Friday, November 18, 2016 9:57 AM > > This patch handles some corner cases when the last assigned device > is removed from the domain. In this case we should carefully handle > pi descriptor and the per-cpu blocking list, to make sure: > - all the PI descriptor are in the right state when next time a > devices is assigned to the domain again. > - No remaining vcpus of the domain in the per-cpu blocking list. > > Here we call vmx_pi_unblock_vcpu() to remove the vCPU from the blocking list > if it is on the list. However, this could happen when vmx_vcpu_block() is > being called, hence we might incorrectly add the vCPU to the blocking list > while the last devcie is detached from the domain. Consider that the situation > can only occur when detaching the last device from the domain and it is not > a frequent operation, so we use domain_pause before that, which is considered > as an clean and maintainable solution for the situation. > > Signed-off-by: Feng Wu <feng.wu@intel.com> > Reviewed-by: Jan Beulich <jbeulich@suse.com> > --- > v7: > - Prevent the domain from pausing itself. > > v6: > - Comments changes > - Rename vmx_pi_list_remove() to vmx_pi_unblock_vcpu() > > v5: > - Remove a no-op wrapper > > v4: > - Rename some functions: > vmx_pi_remove_vcpu_from_blocking_list() -> vmx_pi_list_remove() > vmx_pi_blocking_cleanup() -> vmx_pi_list_cleanup() > - Remove the check in vmx_pi_list_cleanup() > - Comments adjustment > > xen/arch/x86/hvm/vmx/vmx.c | 28 ++++++++++++++++++++++++---- > xen/drivers/passthrough/pci.c | 14 ++++++++++++++ > 2 files changed, 38 insertions(+), 4 deletions(-) > > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > index f3911f2..a8dcabe 100644 > --- a/xen/arch/x86/hvm/vmx/vmx.c > +++ b/xen/arch/x86/hvm/vmx/vmx.c > @@ -158,14 +158,12 @@ static void vmx_pi_switch_to(struct vcpu *v) > pi_clear_sn(pi_desc); > } > > -static void vmx_pi_do_resume(struct vcpu *v) > +static void vmx_pi_unblock_vcpu(struct vcpu *v) > { > unsigned long flags; > spinlock_t *pi_blocking_list_lock; > struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc; > > - ASSERT(!test_bit(_VPF_blocked, &v->pause_flags)); > - > /* > * Set 'NV' field back to posted_intr_vector, so the > * Posted-Interrupts can be delivered to the vCPU when > @@ -173,12 +171,12 @@ static void vmx_pi_do_resume(struct vcpu *v) > */ > write_atomic(&pi_desc->nv, posted_intr_vector); > > - /* The vCPU is not on any blocking list. */ > pi_blocking_list_lock = v->arch.hvm_vmx.pi_blocking.lock; > > /* Prevent the compiler from eliminating the local variable.*/ > smp_rmb(); > > + /* The vCPU is not on any blocking list. */ > if ( pi_blocking_list_lock == NULL ) > return; > > @@ -198,6 +196,13 @@ static void vmx_pi_do_resume(struct vcpu *v) > spin_unlock_irqrestore(pi_blocking_list_lock, flags); > } > > +static void vmx_pi_do_resume(struct vcpu *v) > +{ > + ASSERT(!test_bit(_VPF_blocked, &v->pause_flags)); > + > + vmx_pi_unblock_vcpu(v); > +} > + > /* This function is called when pcidevs_lock is held */ > void vmx_pi_hooks_assign(struct domain *d) > { > @@ -215,11 +220,21 @@ void vmx_pi_hooks_assign(struct domain *d) > /* This function is called when pcidevs_lock is held */ > void vmx_pi_hooks_deassign(struct domain *d) > { > + struct vcpu *v; > + > if ( !iommu_intpost || !has_hvm_container_domain(d) ) > return; > > ASSERT(d->arch.hvm_domain.vmx.vcpu_block); > > + /* > + * Pausing the domain can make sure the vCPU is not "the vCPUs are" > + * running and hence not calling the hooks simultaneously > + * when deassigning the PI hooks and removing the vCPU > + * from the blocking list. > + */ > + domain_pause(d); > + > d->arch.hvm_domain.vmx.vcpu_block = NULL; > d->arch.hvm_domain.vmx.pi_switch_from = NULL; > d->arch.hvm_domain.vmx.pi_do_resume = NULL; > @@ -230,6 +245,11 @@ void vmx_pi_hooks_deassign(struct domain *d) > * assigned and "from" hook is NULL. However, it is not straightforward > * to find a clear solution, so just leave it here. > */ > + > + for_each_vcpu ( d, v ) > + vmx_pi_unblock_vcpu(v); > + > + domain_unpause(d); > } > > static int vmx_domain_initialise(struct domain *d) > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > index 8bce213..e71732f 100644 > --- a/xen/drivers/passthrough/pci.c > +++ b/xen/drivers/passthrough/pci.c > @@ -1602,6 +1602,13 @@ int iommu_do_pci_domctl( > break; > > case XEN_DOMCTL_assign_device: > + /* no domain_pause() */ > + if ( d == current->domain ) > + { > + ret = -EINVAL; > + break; > + } > + don't understand why adding above check, and why "no domain_pause" matters in this change. > ret = -ENODEV; > if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI ) > break; > @@ -1642,6 +1649,13 @@ int iommu_do_pci_domctl( > break; > > case XEN_DOMCTL_deassign_device: > + /* no domain_pause() */ > + if ( d == current->domain ) > + { > + ret = -EINVAL; > + break; > + } > + > ret = -ENODEV; > if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI ) > break; > -- > 2.1.0
> -----Original Message----- > From: Tian, Kevin > Sent: Friday, November 18, 2016 11:19 AM > To: Wu, Feng <feng.wu@intel.com>; xen-devel@lists.xen.org > Cc: jbeulich@suse.com; andrew.cooper3@citrix.com; > george.dunlap@eu.citrix.com; dario.faggioli@citrix.com > Subject: RE: [PATCH v8 2/7] VMX: Properly handle pi when all the assigned > devices are removed > > > From: Wu, Feng > > Sent: Friday, November 18, 2016 9:57 AM > > > > This patch handles some corner cases when the last assigned device > > is removed from the domain. In this case we should carefully handle > > pi descriptor and the per-cpu blocking list, to make sure: > > - all the PI descriptor are in the right state when next time a > > devices is assigned to the domain again. > > - No remaining vcpus of the domain in the per-cpu blocking list. > > > > Here we call vmx_pi_unblock_vcpu() to remove the vCPU from the blocking list > > if it is on the list. However, this could happen when vmx_vcpu_block() is > > being called, hence we might incorrectly add the vCPU to the blocking list > > while the last devcie is detached from the domain. Consider that the situation > > can only occur when detaching the last device from the domain and it is not > > a frequent operation, so we use domain_pause before that, which is > considered > > as an clean and maintainable solution for the situation. > > > > Signed-off-by: Feng Wu <feng.wu@intel.com> > > Reviewed-by: Jan Beulich <jbeulich@suse.com> > > --- > > v7: > > - Prevent the domain from pausing itself. > > > > v6: > > - Comments changes > > - Rename vmx_pi_list_remove() to vmx_pi_unblock_vcpu() > > > > v5: > > - Remove a no-op wrapper > > > > v4: > > - Rename some functions: > > vmx_pi_remove_vcpu_from_blocking_list() -> vmx_pi_list_remove() > > vmx_pi_blocking_cleanup() -> vmx_pi_list_cleanup() > > - Remove the check in vmx_pi_list_cleanup() > > - Comments adjustment > > > > xen/arch/x86/hvm/vmx/vmx.c | 28 ++++++++++++++++++++++++---- > > xen/drivers/passthrough/pci.c | 14 ++++++++++++++ > > 2 files changed, 38 insertions(+), 4 deletions(-) > > > > diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c > > index f3911f2..a8dcabe 100644 > > --- a/xen/arch/x86/hvm/vmx/vmx.c > > +++ b/xen/arch/x86/hvm/vmx/vmx.c > > @@ -158,14 +158,12 @@ static void vmx_pi_switch_to(struct vcpu *v) > > pi_clear_sn(pi_desc); > > } > > > > -static void vmx_pi_do_resume(struct vcpu *v) > > +static void vmx_pi_unblock_vcpu(struct vcpu *v) > > { > > unsigned long flags; > > spinlock_t *pi_blocking_list_lock; > > struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc; > > > > - ASSERT(!test_bit(_VPF_blocked, &v->pause_flags)); > > - > > /* > > * Set 'NV' field back to posted_intr_vector, so the > > * Posted-Interrupts can be delivered to the vCPU when > > @@ -173,12 +171,12 @@ static void vmx_pi_do_resume(struct vcpu *v) > > */ > > write_atomic(&pi_desc->nv, posted_intr_vector); > > > > - /* The vCPU is not on any blocking list. */ > > pi_blocking_list_lock = v->arch.hvm_vmx.pi_blocking.lock; > > > > /* Prevent the compiler from eliminating the local variable.*/ > > smp_rmb(); > > > > + /* The vCPU is not on any blocking list. */ > > if ( pi_blocking_list_lock == NULL ) > > return; > > > > @@ -198,6 +196,13 @@ static void vmx_pi_do_resume(struct vcpu *v) > > spin_unlock_irqrestore(pi_blocking_list_lock, flags); > > } > > > > +static void vmx_pi_do_resume(struct vcpu *v) > > +{ > > + ASSERT(!test_bit(_VPF_blocked, &v->pause_flags)); > > + > > + vmx_pi_unblock_vcpu(v); > > +} > > + > > /* This function is called when pcidevs_lock is held */ > > void vmx_pi_hooks_assign(struct domain *d) > > { > > @@ -215,11 +220,21 @@ void vmx_pi_hooks_assign(struct domain *d) > > /* This function is called when pcidevs_lock is held */ > > void vmx_pi_hooks_deassign(struct domain *d) > > { > > + struct vcpu *v; > > + > > if ( !iommu_intpost || !has_hvm_container_domain(d) ) > > return; > > > > ASSERT(d->arch.hvm_domain.vmx.vcpu_block); > > > > + /* > > + * Pausing the domain can make sure the vCPU is not > > "the vCPUs are" > > > + * running and hence not calling the hooks simultaneously > > + * when deassigning the PI hooks and removing the vCPU > > + * from the blocking list. > > + */ > > + domain_pause(d); > > + > > d->arch.hvm_domain.vmx.vcpu_block = NULL; > > d->arch.hvm_domain.vmx.pi_switch_from = NULL; > > d->arch.hvm_domain.vmx.pi_do_resume = NULL; > > @@ -230,6 +245,11 @@ void vmx_pi_hooks_deassign(struct domain *d) > > * assigned and "from" hook is NULL. However, it is not straightforward > > * to find a clear solution, so just leave it here. > > */ > > + > > + for_each_vcpu ( d, v ) > > + vmx_pi_unblock_vcpu(v); > > + > > + domain_unpause(d); > > } > > > > static int vmx_domain_initialise(struct domain *d) > > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > > index 8bce213..e71732f 100644 > > --- a/xen/drivers/passthrough/pci.c > > +++ b/xen/drivers/passthrough/pci.c > > @@ -1602,6 +1602,13 @@ int iommu_do_pci_domctl( > > break; > > > > case XEN_DOMCTL_assign_device: > > + /* no domain_pause() */ > > + if ( d == current->domain ) > > + { > > + ret = -EINVAL; > > + break; > > + } > > + > > don't understand why adding above check, and why "no domain_pause" > matters in this change. In fact, this change is according Jan's following comments on v6: " There's one additional caveat here which no-one of us so far thought of: Currently there's nothing preventing the domctl-s under which this sits from being issued by the control domain for itself. Various other domctl-s, however, guard against this case when intending to pause the target domain. The same needs to be done for the ones leading here." We need to prevent the domain from pausing itself. Thanks, Feng > > > ret = -ENODEV; > > if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI ) > > break; > > @@ -1642,6 +1649,13 @@ int iommu_do_pci_domctl( > > break; > > > > case XEN_DOMCTL_deassign_device: > > + /* no domain_pause() */ > > + if ( d == current->domain ) > > + { > > + ret = -EINVAL; > > + break; > > + } > > + > > ret = -ENODEV; > > if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI ) > > break; > > -- > > 2.1.0
> From: Wu, Feng > Sent: Friday, November 18, 2016 12:27 PM > > > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > > > index 8bce213..e71732f 100644 > > > --- a/xen/drivers/passthrough/pci.c > > > +++ b/xen/drivers/passthrough/pci.c > > > @@ -1602,6 +1602,13 @@ int iommu_do_pci_domctl( > > > break; > > > > > > case XEN_DOMCTL_assign_device: > > > + /* no domain_pause() */ > > > + if ( d == current->domain ) > > > + { > > > + ret = -EINVAL; > > > + break; > > > + } > > > + > > > > don't understand why adding above check, and why "no domain_pause" > > matters in this change. > > In fact, this change is according Jan's following comments on v6: > > " There's one additional caveat here which no-one of us so far thought > of: Currently there's nothing preventing the domctl-s under which > this sits from being issued by the control domain for itself. Various > other domctl-s, however, guard against this case when intending > to pause the target domain. The same needs to be done for the > ones leading here." > > We need to prevent the domain from pausing itself. > XEN_DOMCTL_assign_device doesn't imply a domain_pause operation, at least not obvious in this level. If we don't have PI enabled underneath, is above guard still necessary? If the answer is yes, the comment should be elaborated for easy understanding... :-) Thanks Kevin
> -----Original Message----- > From: Tian, Kevin > Sent: Friday, November 18, 2016 12:59 PM > To: Wu, Feng <feng.wu@intel.com>; xen-devel@lists.xen.org > Cc: jbeulich@suse.com; andrew.cooper3@citrix.com; > george.dunlap@eu.citrix.com; dario.faggioli@citrix.com > Subject: RE: [PATCH v8 2/7] VMX: Properly handle pi when all the assigned > devices are removed > > > From: Wu, Feng > > Sent: Friday, November 18, 2016 12:27 PM > > > > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c > > > > index 8bce213..e71732f 100644 > > > > --- a/xen/drivers/passthrough/pci.c > > > > +++ b/xen/drivers/passthrough/pci.c > > > > @@ -1602,6 +1602,13 @@ int iommu_do_pci_domctl( > > > > break; > > > > > > > > case XEN_DOMCTL_assign_device: > > > > + /* no domain_pause() */ > > > > + if ( d == current->domain ) > > > > + { > > > > + ret = -EINVAL; > > > > + break; > > > > + } > > > > + > > > > > > don't understand why adding above check, and why "no domain_pause" > > > matters in this change. > > > > In fact, this change is according Jan's following comments on v6: > > > > " There's one additional caveat here which no-one of us so far thought > > of: Currently there's nothing preventing the domctl-s under which > > this sits from being issued by the control domain for itself. Various > > other domctl-s, however, guard against this case when intending > > to pause the target domain. The same needs to be done for the > > ones leading here." > > > > We need to prevent the domain from pausing itself. > > > > XEN_DOMCTL_assign_device doesn't imply a domain_pause operation, > at least not obvious in this level. If we don't have PI enabled underneath, > is above guard still necessary? If the answer is yes, the comment should > be elaborated for easy understanding... :-) I think the domain_pause() is introduced in PI related logic. So maybe I need Jan's comments about whether we need to add this check unconditionally here. Anyway, the comments need to be more detailed. Thanks, Feng > > Thanks > Kevin
>>> On 18.11.16 at 06:22, <feng.wu@intel.com> wrote: >> From: Tian, Kevin >> Sent: Friday, November 18, 2016 12:59 PM >> > From: Wu, Feng >> > Sent: Friday, November 18, 2016 12:27 PM >> > > > diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c >> > > > index 8bce213..e71732f 100644 >> > > > --- a/xen/drivers/passthrough/pci.c >> > > > +++ b/xen/drivers/passthrough/pci.c >> > > > @@ -1602,6 +1602,13 @@ int iommu_do_pci_domctl( >> > > > break; >> > > > >> > > > case XEN_DOMCTL_assign_device: >> > > > + /* no domain_pause() */ >> > > > + if ( d == current->domain ) >> > > > + { >> > > > + ret = -EINVAL; >> > > > + break; >> > > > + } >> > > > + >> > > >> > > don't understand why adding above check, and why "no domain_pause" >> > > matters in this change. >> > >> > In fact, this change is according Jan's following comments on v6: >> > >> > " There's one additional caveat here which no-one of us so far thought >> > of: Currently there's nothing preventing the domctl-s under which >> > this sits from being issued by the control domain for itself. Various >> > other domctl-s, however, guard against this case when intending >> > to pause the target domain. The same needs to be done for the >> > ones leading here." >> > >> > We need to prevent the domain from pausing itself. >> > >> >> XEN_DOMCTL_assign_device doesn't imply a domain_pause operation, >> at least not obvious in this level. If we don't have PI enabled underneath, >> is above guard still necessary? If the answer is yes, the comment should >> be elaborated for easy understanding... :-) > > I think the domain_pause() is introduced in PI related logic. So maybe I > need Jan's comments about whether we need to add this check unconditionally > here. Anyway, the comments need to be more detailed. I've never said anything about the specifics of the check (e.g. whether it needs to be unconditional), I had only pointed out that such a check is needed if you're adding pausing. That said, I don't think there's anything wrong with having it unconditional: In the end we don't mean to support self-(de)assignment of devices. Jan
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index f3911f2..a8dcabe 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -158,14 +158,12 @@ static void vmx_pi_switch_to(struct vcpu *v) pi_clear_sn(pi_desc); } -static void vmx_pi_do_resume(struct vcpu *v) +static void vmx_pi_unblock_vcpu(struct vcpu *v) { unsigned long flags; spinlock_t *pi_blocking_list_lock; struct pi_desc *pi_desc = &v->arch.hvm_vmx.pi_desc; - ASSERT(!test_bit(_VPF_blocked, &v->pause_flags)); - /* * Set 'NV' field back to posted_intr_vector, so the * Posted-Interrupts can be delivered to the vCPU when @@ -173,12 +171,12 @@ static void vmx_pi_do_resume(struct vcpu *v) */ write_atomic(&pi_desc->nv, posted_intr_vector); - /* The vCPU is not on any blocking list. */ pi_blocking_list_lock = v->arch.hvm_vmx.pi_blocking.lock; /* Prevent the compiler from eliminating the local variable.*/ smp_rmb(); + /* The vCPU is not on any blocking list. */ if ( pi_blocking_list_lock == NULL ) return; @@ -198,6 +196,13 @@ static void vmx_pi_do_resume(struct vcpu *v) spin_unlock_irqrestore(pi_blocking_list_lock, flags); } +static void vmx_pi_do_resume(struct vcpu *v) +{ + ASSERT(!test_bit(_VPF_blocked, &v->pause_flags)); + + vmx_pi_unblock_vcpu(v); +} + /* This function is called when pcidevs_lock is held */ void vmx_pi_hooks_assign(struct domain *d) { @@ -215,11 +220,21 @@ void vmx_pi_hooks_assign(struct domain *d) /* This function is called when pcidevs_lock is held */ void vmx_pi_hooks_deassign(struct domain *d) { + struct vcpu *v; + if ( !iommu_intpost || !has_hvm_container_domain(d) ) return; ASSERT(d->arch.hvm_domain.vmx.vcpu_block); + /* + * Pausing the domain can make sure the vCPU is not + * running and hence not calling the hooks simultaneously + * when deassigning the PI hooks and removing the vCPU + * from the blocking list. + */ + domain_pause(d); + d->arch.hvm_domain.vmx.vcpu_block = NULL; d->arch.hvm_domain.vmx.pi_switch_from = NULL; d->arch.hvm_domain.vmx.pi_do_resume = NULL; @@ -230,6 +245,11 @@ void vmx_pi_hooks_deassign(struct domain *d) * assigned and "from" hook is NULL. However, it is not straightforward * to find a clear solution, so just leave it here. */ + + for_each_vcpu ( d, v ) + vmx_pi_unblock_vcpu(v); + + domain_unpause(d); } static int vmx_domain_initialise(struct domain *d) diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c index 8bce213..e71732f 100644 --- a/xen/drivers/passthrough/pci.c +++ b/xen/drivers/passthrough/pci.c @@ -1602,6 +1602,13 @@ int iommu_do_pci_domctl( break; case XEN_DOMCTL_assign_device: + /* no domain_pause() */ + if ( d == current->domain ) + { + ret = -EINVAL; + break; + } + ret = -ENODEV; if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI ) break; @@ -1642,6 +1649,13 @@ int iommu_do_pci_domctl( break; case XEN_DOMCTL_deassign_device: + /* no domain_pause() */ + if ( d == current->domain ) + { + ret = -EINVAL; + break; + } + ret = -ENODEV; if ( domctl->u.assign_device.dev != XEN_DOMCTL_DEV_PCI ) break;