diff mbox

Domctl and physdevop for passthrough (Was: Re: Stabilising some tools only HVMOPs?)

Message ID 56D586B502000078000D7C28@prv-mh.provo.novell.com (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Beulich March 1, 2016, 11:10 a.m. UTC
>>> On 01.03.16 at 11:52, <wei.liu2@citrix.com> wrote:
> On Tue, Mar 01, 2016 at 12:54:09AM -0700, Jan Beulich wrote:
>> >>> On 29.02.16 at 19:12, <wei.liu2@citrix.com> wrote:
>> > I read the XSA-154 patch and think a little bit on whether making
>> > dedicated hypercall is feasible.
>> > 
>> > 1. The patch for XSA-154 mentions that only MMIO mappings with
>> >    inconsistent attributes can cause system instability.
>> > 2. PV case is hard, but the device model library is only of interest to
>> >    HVM domain, so PV can be ignored.
>> > 3. We want to continue honoring pinned cachability attributes for HVM
>> >    domain.
>> > 
>> > It seems we have a way forward. Say, we have new hypercall just for
>> > pinning video ram cachability attribute.
>> > 
>> > The new hypercall has following properties:
>> > 
>> > 1. It can only be used on HVM domains.
>> > 2. It can only be used on mfns that are not in MMIO ranges, because
>> >    vram is just normal ram.
>> > 3. It can only set the cachability attribute to WC (used by video ram).
>> > 4. It is not considered stable.
>> > 
>> > so that it won't be abused to change cachability attributes of MMIO
>> > mappings on PV guest to make the host unstable. The stale data issue is
>> > of no relevance as stated in XSA-154 patch.
>> > 
>> > Does this sound plausible?
>> 
>> Yes, it does, but it extends our dependency on what we've been
>> told in the context of XSA-154 is actually true (and has been true
>> for all earlier processor generations, and will continue to be true
>> in the future).
>> But then I don't immediately see why the existing
>> pinning operation won't suffice: It's a domctl (i.e. we can change
>> it), you say you don't need it to be stable, and it's already
>> documented as being intended for RAM only (albeit iirc that's not
>> getting enforced anywhere right now). The main present
>> problem (which I don't see a new hypercall to solve) is that it's
>> GFN-based, and the GFN->MFN mapping can change after such
>> pinning got established. Otoh I think that by changing the
>> placement of the hvm_get_mem_pinned_cacheattr() calls we
>> could enforce the RAM-only aspect quite easily. Let me put
>> together a patch ...
> 
> That would be good. Thank you very much.

Actually here you go, albeit for now compile-tested only. Maybe
you, Andrew, or someone else has some early comment or
opinion on this already nevertheless. One thing to consider
cache flushing wise is whether when deleting a WC range it
wouldn't suffice to just force write combining buffers to be
cleared, instead of a full cache flush. But I guess that would
better be a 2nd patch anyway.

Jan

- call hvm_get_mem_pinned_cacheattr() for RAM ranges only (requires
  some re-ordering in epte_get_entry_emt(), to fully handle all MMIO
  aspects first)
- remove unnecessary indirection for hvm_get_mem_pinned_cacheattr()'s
  return of the type
- make hvm_set_mem_pinned_cacheattr() return an error on bad domain
  kind or obviously bad GFN range
- also avoid cache flush on EPT when removing a UC- range
- other code structure adjustments without intended functional change
- call hvm_get_mem_pinned_cacheattr() for RAM ranges only (requires
  some re-ordering in epte_get_entry_emt(), to fully handle all MMIO
  aspects first)
- remove unnecessary indirection for hvm_get_mem_pinned_cacheattr()'s
  return of the type
- make hvm_set_mem_pinned_cacheattr() return an error on bad domain
  kind or obviously bad GFN range
- also avoid cache flush on EPT when removing a UC- range
- other code structure adjustments without intended functional change

--- unstable.orig/xen/arch/x86/hvm/mtrr.c
+++ unstable/xen/arch/x86/hvm/mtrr.c
@@ -521,14 +521,12 @@ struct hvm_mem_pinned_cacheattr_range {
 
 static DEFINE_RCU_READ_LOCK(pinned_cacheattr_rcu_lock);
 
-void hvm_init_cacheattr_region_list(
-    struct domain *d)
+void hvm_init_cacheattr_region_list(struct domain *d)
 {
     INIT_LIST_HEAD(&d->arch.hvm_domain.pinned_cacheattr_ranges);
 }
 
-void hvm_destroy_cacheattr_region_list(
-    struct domain *d)
+void hvm_destroy_cacheattr_region_list(struct domain *d)
 {
     struct list_head *head = &d->arch.hvm_domain.pinned_cacheattr_ranges;
     struct hvm_mem_pinned_cacheattr_range *range;
@@ -543,20 +541,14 @@ void hvm_destroy_cacheattr_region_list(
     }
 }
 
-int hvm_get_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t guest_fn,
-    unsigned int order,
-    uint32_t *type)
+int hvm_get_mem_pinned_cacheattr(struct domain *d, uint64_t guest_fn,
+                                 unsigned int order)
 {
     struct hvm_mem_pinned_cacheattr_range *range;
     uint64_t mask = ~(uint64_t)0 << order;
-    int rc = 0;
+    int rc = -ENXIO;
 
-    *type = ~0;
-
-    if ( !is_hvm_domain(d) )
-        return 0;
+    ASSERT(has_hvm_container_domain(d));
 
     rcu_read_lock(&pinned_cacheattr_rcu_lock);
     list_for_each_entry_rcu ( range,
@@ -566,14 +558,13 @@ int hvm_get_mem_pinned_cacheattr(
         if ( ((guest_fn & mask) >= range->start) &&
              ((guest_fn | ~mask) <= range->end) )
         {
-            *type = range->type;
-            rc = 1;
+            rc = range->type;
             break;
         }
         if ( ((guest_fn & mask) <= range->end) &&
              (range->start <= (guest_fn | ~mask)) )
         {
-            rc = -1;
+            rc = -EADDRNOTAVAIL;
             break;
         }
     }
@@ -587,20 +578,21 @@ static void free_pinned_cacheattr_entry(
     xfree(container_of(rcu, struct hvm_mem_pinned_cacheattr_range, rcu));
 }
 
-int32_t hvm_set_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t gfn_start,
-    uint64_t gfn_end,
-    uint32_t  type)
+int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
+                                 uint64_t gfn_end, uint32_t type)
 {
     struct hvm_mem_pinned_cacheattr_range *range;
     int rc = 1;
 
-    if ( !is_hvm_domain(d) || gfn_end < gfn_start )
-        return 0;
+    if ( !is_hvm_domain(d) )
+        return -EOPNOTSUPP;
+
+    if ( gfn_end < gfn_start || (gfn_start | gfn_end) >> paddr_bits )
+        return -EINVAL;
 
-    if ( type == XEN_DOMCTL_DELETE_MEM_CACHEATTR )
+    switch ( type )
     {
+    case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
         /* Remove the requested range. */
         rcu_read_lock(&pinned_cacheattr_rcu_lock);
         list_for_each_entry_rcu ( range,
@@ -613,22 +605,37 @@ int32_t hvm_set_mem_pinned_cacheattr(
                 type = range->type;
                 call_rcu(&range->rcu, free_pinned_cacheattr_entry);
                 p2m_memory_type_changed(d);
-                if ( type != PAT_TYPE_UNCACHABLE )
+                switch ( type )
+                {
+                case PAT_TYPE_UC_MINUS:
+                    /*
+                     * For EPT we can also avoid the flush in this case;
+                     * see epte_get_entry_emt().
+                     */
+                    if ( hap_enabled(d) && cpu_has_vmx )
+                case PAT_TYPE_UNCACHABLE:
+                        break;
+                    /* fall through */
+                default:
                     flush_all(FLUSH_CACHE);
+                    break;
+                }
                 return 0;
             }
         rcu_read_unlock(&pinned_cacheattr_rcu_lock);
         return -ENOENT;
-    }
 
-    if ( !((type == PAT_TYPE_UNCACHABLE) ||
-           (type == PAT_TYPE_WRCOMB) ||
-           (type == PAT_TYPE_WRTHROUGH) ||
-           (type == PAT_TYPE_WRPROT) ||
-           (type == PAT_TYPE_WRBACK) ||
-           (type == PAT_TYPE_UC_MINUS)) ||
-         !is_hvm_domain(d) )
+    case PAT_TYPE_UC_MINUS:
+    case PAT_TYPE_UNCACHABLE:
+    case PAT_TYPE_WRBACK:
+    case PAT_TYPE_WRCOMB:
+    case PAT_TYPE_WRPROT:
+    case PAT_TYPE_WRTHROUGH:
+        break;
+
+    default:
         return -EINVAL;
+    }
 
     rcu_read_lock(&pinned_cacheattr_rcu_lock);
     list_for_each_entry_rcu ( range,
@@ -762,7 +769,6 @@ int epte_get_entry_emt(struct domain *d,
                        unsigned int order, uint8_t *ipat, bool_t direct_mmio)
 {
     int gmtrr_mtype, hmtrr_mtype;
-    uint32_t type;
     struct vcpu *v = current;
 
     *ipat = 0;
@@ -782,30 +788,28 @@ int epte_get_entry_emt(struct domain *d,
                                  mfn_x(mfn) + (1UL << order) - 1) )
         return -1;
 
-    switch ( hvm_get_mem_pinned_cacheattr(d, gfn, order, &type) )
+    if ( direct_mmio )
     {
-    case 1:
+        if ( (mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> order )
+            return MTRR_TYPE_UNCACHABLE;
+        if ( order )
+            return -1;
         *ipat = 1;
-        return type != PAT_TYPE_UC_MINUS ? type : PAT_TYPE_UNCACHABLE;
-    case -1:
-        return -1;
+        return MTRR_TYPE_WRBACK;
     }
 
-    if ( !need_iommu(d) && !cache_flush_permitted(d) )
+    gmtrr_mtype = hvm_get_mem_pinned_cacheattr(d, gfn, order);
+    if ( gmtrr_mtype >= 0 )
     {
-        ASSERT(!direct_mmio ||
-               !((mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >>
-                 order));
         *ipat = 1;
-        return MTRR_TYPE_WRBACK;
+        return gmtrr_mtype != PAT_TYPE_UC_MINUS ? gmtrr_mtype
+                                                : MTRR_TYPE_UNCACHABLE;
     }
+    if ( gmtrr_mtype == -EADDRNOTAVAIL )
+        return -1;
 
-    if ( direct_mmio )
+    if ( !need_iommu(d) && !cache_flush_permitted(d) )
     {
-        if ( (mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> order )
-            return MTRR_TYPE_UNCACHABLE;
-        if ( order )
-            return -1;
         *ipat = 1;
         return MTRR_TYPE_WRBACK;
     }
--- unstable.orig/xen/arch/x86/mm/shadow/multi.c
+++ unstable/xen/arch/x86/mm/shadow/multi.c
@@ -607,7 +607,7 @@ _sh_propagate(struct vcpu *v,
     if ( (level == 1) && is_hvm_domain(d) &&
          !is_xen_heap_mfn(mfn_x(target_mfn)) )
     {
-        unsigned int type;
+        int type;
 
         ASSERT(!(sflags & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)));
 
@@ -618,7 +618,9 @@ _sh_propagate(struct vcpu *v,
          * 3) if disables snoop control, compute the PAT index with
          *    gMTRR and gPAT.
          */
-        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), 0, &type) )
+        if ( !mmio_mfn &&
+             (type = hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn),
+                                                  0)) >= 0 )
             sflags |= pat_type_2_pte_flags(type);
         else if ( d->arch.hvm_domain.is_in_uc_mode )
             sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
--- unstable.orig/xen/include/asm-x86/hvm/cacheattr.h
+++ unstable/xen/include/asm-x86/hvm/cacheattr.h
@@ -1,29 +1,23 @@
 #ifndef __HVM_CACHEATTR_H__
 #define __HVM_CACHEATTR_H__
 
-void hvm_init_cacheattr_region_list(
-    struct domain *d);
-void hvm_destroy_cacheattr_region_list(
-    struct domain *d);
+#include <xen/types.h>
+
+struct domain;
+void hvm_init_cacheattr_region_list(struct domain *d);
+void hvm_destroy_cacheattr_region_list(struct domain *d);
 
 /*
  * To see guest_fn is in the pinned range or not,
- * if yes, return 1, and set type to value in this range
- * if no,  return 0, setting type to ~0
- * if ambiguous, return -1, setting type to ~0 (possible only for order > 0)
+ * if yes, return the (non-negative) type
+ * if no or ambiguous, return a negative error code
  */
-int hvm_get_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t guest_fn,
-    unsigned int order,
-    uint32_t *type);
+int hvm_get_mem_pinned_cacheattr(struct domain *d, uint64_t guest_fn,
+                                 unsigned int order);
 
 
 /* Set pinned caching type for a domain. */
-int32_t hvm_set_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t gfn_start,
-    uint64_t gfn_end,
-    uint32_t  type);
+int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
+                                 uint64_t gfn_end, uint32_t type);
 
 #endif /* __HVM_CACHEATTR_H__ */
diff mbox

Patch

--- unstable.orig/xen/arch/x86/hvm/mtrr.c
+++ unstable/xen/arch/x86/hvm/mtrr.c
@@ -521,14 +521,12 @@  struct hvm_mem_pinned_cacheattr_range {
 
 static DEFINE_RCU_READ_LOCK(pinned_cacheattr_rcu_lock);
 
-void hvm_init_cacheattr_region_list(
-    struct domain *d)
+void hvm_init_cacheattr_region_list(struct domain *d)
 {
     INIT_LIST_HEAD(&d->arch.hvm_domain.pinned_cacheattr_ranges);
 }
 
-void hvm_destroy_cacheattr_region_list(
-    struct domain *d)
+void hvm_destroy_cacheattr_region_list(struct domain *d)
 {
     struct list_head *head = &d->arch.hvm_domain.pinned_cacheattr_ranges;
     struct hvm_mem_pinned_cacheattr_range *range;
@@ -543,20 +541,14 @@  void hvm_destroy_cacheattr_region_list(
     }
 }
 
-int hvm_get_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t guest_fn,
-    unsigned int order,
-    uint32_t *type)
+int hvm_get_mem_pinned_cacheattr(struct domain *d, uint64_t guest_fn,
+                                 unsigned int order)
 {
     struct hvm_mem_pinned_cacheattr_range *range;
     uint64_t mask = ~(uint64_t)0 << order;
-    int rc = 0;
+    int rc = -ENXIO;
 
-    *type = ~0;
-
-    if ( !is_hvm_domain(d) )
-        return 0;
+    ASSERT(has_hvm_container_domain(d));
 
     rcu_read_lock(&pinned_cacheattr_rcu_lock);
     list_for_each_entry_rcu ( range,
@@ -566,14 +558,13 @@  int hvm_get_mem_pinned_cacheattr(
         if ( ((guest_fn & mask) >= range->start) &&
              ((guest_fn | ~mask) <= range->end) )
         {
-            *type = range->type;
-            rc = 1;
+            rc = range->type;
             break;
         }
         if ( ((guest_fn & mask) <= range->end) &&
              (range->start <= (guest_fn | ~mask)) )
         {
-            rc = -1;
+            rc = -EADDRNOTAVAIL;
             break;
         }
     }
@@ -587,20 +578,21 @@  static void free_pinned_cacheattr_entry(
     xfree(container_of(rcu, struct hvm_mem_pinned_cacheattr_range, rcu));
 }
 
-int32_t hvm_set_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t gfn_start,
-    uint64_t gfn_end,
-    uint32_t  type)
+int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
+                                 uint64_t gfn_end, uint32_t type)
 {
     struct hvm_mem_pinned_cacheattr_range *range;
     int rc = 1;
 
-    if ( !is_hvm_domain(d) || gfn_end < gfn_start )
-        return 0;
+    if ( !is_hvm_domain(d) )
+        return -EOPNOTSUPP;
+
+    if ( gfn_end < gfn_start || (gfn_start | gfn_end) >> paddr_bits )
+        return -EINVAL;
 
-    if ( type == XEN_DOMCTL_DELETE_MEM_CACHEATTR )
+    switch ( type )
     {
+    case XEN_DOMCTL_DELETE_MEM_CACHEATTR:
         /* Remove the requested range. */
         rcu_read_lock(&pinned_cacheattr_rcu_lock);
         list_for_each_entry_rcu ( range,
@@ -613,22 +605,37 @@  int32_t hvm_set_mem_pinned_cacheattr(
                 type = range->type;
                 call_rcu(&range->rcu, free_pinned_cacheattr_entry);
                 p2m_memory_type_changed(d);
-                if ( type != PAT_TYPE_UNCACHABLE )
+                switch ( type )
+                {
+                case PAT_TYPE_UC_MINUS:
+                    /*
+                     * For EPT we can also avoid the flush in this case;
+                     * see epte_get_entry_emt().
+                     */
+                    if ( hap_enabled(d) && cpu_has_vmx )
+                case PAT_TYPE_UNCACHABLE:
+                        break;
+                    /* fall through */
+                default:
                     flush_all(FLUSH_CACHE);
+                    break;
+                }
                 return 0;
             }
         rcu_read_unlock(&pinned_cacheattr_rcu_lock);
         return -ENOENT;
-    }
 
-    if ( !((type == PAT_TYPE_UNCACHABLE) ||
-           (type == PAT_TYPE_WRCOMB) ||
-           (type == PAT_TYPE_WRTHROUGH) ||
-           (type == PAT_TYPE_WRPROT) ||
-           (type == PAT_TYPE_WRBACK) ||
-           (type == PAT_TYPE_UC_MINUS)) ||
-         !is_hvm_domain(d) )
+    case PAT_TYPE_UC_MINUS:
+    case PAT_TYPE_UNCACHABLE:
+    case PAT_TYPE_WRBACK:
+    case PAT_TYPE_WRCOMB:
+    case PAT_TYPE_WRPROT:
+    case PAT_TYPE_WRTHROUGH:
+        break;
+
+    default:
         return -EINVAL;
+    }
 
     rcu_read_lock(&pinned_cacheattr_rcu_lock);
     list_for_each_entry_rcu ( range,
@@ -762,7 +769,6 @@  int epte_get_entry_emt(struct domain *d,
                        unsigned int order, uint8_t *ipat, bool_t direct_mmio)
 {
     int gmtrr_mtype, hmtrr_mtype;
-    uint32_t type;
     struct vcpu *v = current;
 
     *ipat = 0;
@@ -782,30 +788,28 @@  int epte_get_entry_emt(struct domain *d,
                                  mfn_x(mfn) + (1UL << order) - 1) )
         return -1;
 
-    switch ( hvm_get_mem_pinned_cacheattr(d, gfn, order, &type) )
+    if ( direct_mmio )
     {
-    case 1:
+        if ( (mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> order )
+            return MTRR_TYPE_UNCACHABLE;
+        if ( order )
+            return -1;
         *ipat = 1;
-        return type != PAT_TYPE_UC_MINUS ? type : PAT_TYPE_UNCACHABLE;
-    case -1:
-        return -1;
+        return MTRR_TYPE_WRBACK;
     }
 
-    if ( !need_iommu(d) && !cache_flush_permitted(d) )
+    gmtrr_mtype = hvm_get_mem_pinned_cacheattr(d, gfn, order);
+    if ( gmtrr_mtype >= 0 )
     {
-        ASSERT(!direct_mmio ||
-               !((mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >>
-                 order));
         *ipat = 1;
-        return MTRR_TYPE_WRBACK;
+        return gmtrr_mtype != PAT_TYPE_UC_MINUS ? gmtrr_mtype
+                                                : MTRR_TYPE_UNCACHABLE;
     }
+    if ( gmtrr_mtype == -EADDRNOTAVAIL )
+        return -1;
 
-    if ( direct_mmio )
+    if ( !need_iommu(d) && !cache_flush_permitted(d) )
     {
-        if ( (mfn_x(mfn) ^ d->arch.hvm_domain.vmx.apic_access_mfn) >> order )
-            return MTRR_TYPE_UNCACHABLE;
-        if ( order )
-            return -1;
         *ipat = 1;
         return MTRR_TYPE_WRBACK;
     }
--- unstable.orig/xen/arch/x86/mm/shadow/multi.c
+++ unstable/xen/arch/x86/mm/shadow/multi.c
@@ -607,7 +607,7 @@  _sh_propagate(struct vcpu *v,
     if ( (level == 1) && is_hvm_domain(d) &&
          !is_xen_heap_mfn(mfn_x(target_mfn)) )
     {
-        unsigned int type;
+        int type;
 
         ASSERT(!(sflags & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)));
 
@@ -618,7 +618,9 @@  _sh_propagate(struct vcpu *v,
          * 3) if disables snoop control, compute the PAT index with
          *    gMTRR and gPAT.
          */
-        if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), 0, &type) )
+        if ( !mmio_mfn &&
+             (type = hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn),
+                                                  0)) >= 0 )
             sflags |= pat_type_2_pte_flags(type);
         else if ( d->arch.hvm_domain.is_in_uc_mode )
             sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
--- unstable.orig/xen/include/asm-x86/hvm/cacheattr.h
+++ unstable/xen/include/asm-x86/hvm/cacheattr.h
@@ -1,29 +1,23 @@ 
 #ifndef __HVM_CACHEATTR_H__
 #define __HVM_CACHEATTR_H__
 
-void hvm_init_cacheattr_region_list(
-    struct domain *d);
-void hvm_destroy_cacheattr_region_list(
-    struct domain *d);
+#include <xen/types.h>
+
+struct domain;
+void hvm_init_cacheattr_region_list(struct domain *d);
+void hvm_destroy_cacheattr_region_list(struct domain *d);
 
 /*
  * To see guest_fn is in the pinned range or not,
- * if yes, return 1, and set type to value in this range
- * if no,  return 0, setting type to ~0
- * if ambiguous, return -1, setting type to ~0 (possible only for order > 0)
+ * if yes, return the (non-negative) type
+ * if no or ambiguous, return a negative error code
  */
-int hvm_get_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t guest_fn,
-    unsigned int order,
-    uint32_t *type);
+int hvm_get_mem_pinned_cacheattr(struct domain *d, uint64_t guest_fn,
+                                 unsigned int order);
 
 
 /* Set pinned caching type for a domain. */
-int32_t hvm_set_mem_pinned_cacheattr(
-    struct domain *d,
-    uint64_t gfn_start,
-    uint64_t gfn_end,
-    uint32_t  type);
+int hvm_set_mem_pinned_cacheattr(struct domain *d, uint64_t gfn_start,
+                                 uint64_t gfn_end, uint32_t type);
 
 #endif /* __HVM_CACHEATTR_H__ */