diff mbox

[v7,08/16] x86: implement set value flow for MBA

Message ID 1507884068-18757-9-git-send-email-yi.y.sun@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yi Sun Oct. 13, 2017, 8:41 a.m. UTC
This patch implements set value flow for MBA including its callback
function and domctl interface.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
---
CC: Jan Beulich <jbeulich@suse.com>
CC: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Wei Liu <wei.liu2@citrix.com>
CC: Roger Pau Monné <roger.pau@citrix.com>
CC: Chao Peng <chao.p.peng@linux.intel.com>

v7:
    - change name of 'check_val' to 'sanitize'.
      (suggested by Jan Beulich)
    - fix comments.
      (suggested by Jan Beulich)
    - add parentheses and change '== 0' to '!'.
      (suggested by Jan Beulich)
    - remove unnecessary check of 'mba.thrtl_max'.
      (suggested by Jan Beulich)
    - remove unnecessary intermediate variable 'mod'.
      (suggested by Jan Beulich)
    - refine an assignement sentence to use '&='.
      (suggested by Jan Beulich)
    - change type of last parameter of 'sanitize' to 'uint32_t' and
      apply same change to 'cat_check_cbm'.
      (suggested by Jan Beulich)
v6:
    - split co-exist features' values setting flow to a new patch.
      (suggested by Jan Beulich)
    - restore codes related to 'mba_check_thrtl' and 'check_value'.
      (suggested by Jan Beulich)
v5:
    - adjust position of 'cat_check_cbm' to not to make changes so big.
      (suggested by Roger Pau Monné)
    - remove 'props' from 'struct cos_write_info'.
      (suggested by Roger Pau Monné)
    - make a single return statement in 'mba_check_thrtl'.
      (suggested by Jan Beulich)
v4:
    - remove 'ALLOC_' from macro names.
      (suggested by Roger Pau Monné)
    - join two checks into a single if.
      (suggested by Roger Pau Monné)
    - remove redundant local variable 'array_len'.
      (suggested by Roger Pau Monné)
v3:
    - modify commit message to make it clear.
      (suggested by Roger Pau Monné)
    - modify functionality of 'check_val' to make it simple to only check value.
      Change the last parameter type from 'unsigned long *' to 'unsigned long'.
      (suggested by Roger Pau Monné)
    - call rdmsrl to get value just written into MSR for MBA. Because HW can
      automatically change input value to what it wants.
      (suggested by Roger Pau Monné)
    - change type of 'write_msr' to 'uint32_t' to return the value actually
      written into MSR. Then, change 'do_write_psr_msrs' to set the returned
      value into 'cos_reg_val[]'
    - move the declaration of 'j' into loop in 'do_write_psr_msrs'.
      (suggested by Roger Pau Monné)
    - change 'mba_info' to 'mba'.
      (suggested by Roger Pau Monné)
    - change 'cat_info' to 'cat'.
      (suggested by Roger Pau Monné)
    - rename 'psr_cat/PSR_CAT' to 'psr_alloc/PSR_ALLOC' and remove 'op/OP'
      from name.
      (suggested by Roger Pau Monné)
    - change 'PSR_VAL_TYPE_MBA' to 'PSR_TYPE_MBA_THRTL'.
      (suggested by Roger Pau Monné)
v2:
    - remove linear mode 'thrtl_max' check in 'mba_check_thrtl' because it has
      been checked in 'mba_init_feature'.
      (suggested by Chao Peng)
    - for non-linear mode, check if '*thrtl' is not 0 in 'mba_check_thrtl'. If
      it is 0, we do not need to change it.
      (suggested by Chao Peng)
    - move comments to explain changes of 'cos_write_info' from psr.c to commit
      message.
      (suggested by Chao Peng)
---
 xen/arch/x86/domctl.c       |  6 +++++
 xen/arch/x86/psr.c          | 58 ++++++++++++++++++++++++++++++++++++++-------
 xen/include/public/domctl.h |  1 +
 3 files changed, 56 insertions(+), 9 deletions(-)

Comments

Jan Beulich Oct. 13, 2017, 3:56 p.m. UTC | #1
>>> On 13.10.17 at 10:41, <yi.y.sun@linux.intel.com> wrote:
> @@ -274,16 +280,18 @@ static enum psr_feat_type psr_type_to_feat_type(enum psr_type type)
>      return feat_type;
>  }
>  
> -static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm)
> +/* Implementation of allocation features' functions. */
> +static bool cat_check_cbm(const struct feat_node *feat, uint32_t *val)
>  {
>      unsigned int first_bit, zero_bit;
> +    unsigned int cbm_len = feat->cat.cbm_len;
> +    unsigned long cbm = *val;

These are necessary changes.

> -    /* Set bits should only in the range of [0, cbm_len]. */
> -    if ( cbm & (~0ul << cbm_len) )
> -        return false;
> -
> -    /* At least one bit need to be set. */
> -    if ( cbm == 0 )
> +    /*
> +     * Set bits should be only in the range of [0, cbm_len).
> +     * And, at least one bit need to be set.
> +     */
> +    if ( (cbm & (~0ul << cbm_len)) || !cbm )

But all of this doesn't really belong here. I don't outright object to
you leaving it the way it is, but I'd prefer if you dropped these
changes, or moved them to a separate patch if you think this is
worthwhile.

> @@ -501,6 +511,35 @@ static bool mba_get_feat_info(const struct feat_node *feat,
>  static void mba_write_msr(unsigned int cos, uint32_t val,
>                            enum psr_type type)
>  {
> +    wrmsrl(MSR_IA32_PSR_MBA_MASK(cos), val);
> +}
> +
> +static bool mba_sanitize_thrtl(const struct feat_node *feat, uint32_t *thrtl)
> +{
> +    if ( *thrtl > feat->mba.thrtl_max )
> +        return false;
> +
> +    /*
> +     * Per SDM (chapter "Memory Bandwidth Allocation Configuration"):
> +     * 1. Linear mode: In the linear mode the input precision is defined
> +     *    as 100-(MBA_MAX). For instance, if the MBA_MAX value is 90, the
> +     *    input precision is 10%. Values not an even multiple of the
> +     *    precision (e.g., 12%) will be rounded down (e.g., to 10% delay
> +     *    applied).
> +     * 2. Non-linear mode: Input delay values are powers-of-two from zero
> +     *    to the MBA_MAX value from CPUID. In this case any values not a
> +     *    power of two will be rounded down the next nearest power of two.
> +     */
> +    if ( feat->mba.linear )
> +        *thrtl -= *thrtl % (100 - feat->mba.thrtl_max);
> +    else
> +    {
> +        /* Not power of 2. */
> +        if ( *thrtl & (*thrtl - 1) )
> +            *thrtl &= 1 << (flsl(*thrtl) - 1);

fls() will do now that the parameter type is uint32_t.

Also why do you think &= is better than plain = here?

Jan
Yi Sun Oct. 16, 2017, 1:44 a.m. UTC | #2
On 17-10-13 09:56:14, Jan Beulich wrote:
> >>> On 13.10.17 at 10:41, <yi.y.sun@linux.intel.com> wrote:
> > @@ -274,16 +280,18 @@ static enum psr_feat_type psr_type_to_feat_type(enum psr_type type)
> >      return feat_type;
> >  }
> >  
> > -static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm)
> > +/* Implementation of allocation features' functions. */
> > +static bool cat_check_cbm(const struct feat_node *feat, uint32_t *val)
> >  {
> >      unsigned int first_bit, zero_bit;
> > +    unsigned int cbm_len = feat->cat.cbm_len;
> > +    unsigned long cbm = *val;
> 
> These are necessary changes.
> 
> > -    /* Set bits should only in the range of [0, cbm_len]. */
> > -    if ( cbm & (~0ul << cbm_len) )
> > -        return false;
> > -
> > -    /* At least one bit need to be set. */
> > -    if ( cbm == 0 )
> > +    /*
> > +     * Set bits should be only in the range of [0, cbm_len).
> > +     * And, at least one bit need to be set.
> > +     */
> > +    if ( (cbm & (~0ul << cbm_len)) || !cbm )
> 
> But all of this doesn't really belong here. I don't outright object to
> you leaving it the way it is, but I'd prefer if you dropped these
> changes, or moved them to a separate patch if you think this is
> worthwhile.
> 
Then, I would prefer to drop these changes.

> > @@ -501,6 +511,35 @@ static bool mba_get_feat_info(const struct feat_node *feat,
> >  static void mba_write_msr(unsigned int cos, uint32_t val,
> >                            enum psr_type type)
> >  {
> > +    wrmsrl(MSR_IA32_PSR_MBA_MASK(cos), val);
> > +}
> > +
> > +static bool mba_sanitize_thrtl(const struct feat_node *feat, uint32_t *thrtl)
> > +{
> > +    if ( *thrtl > feat->mba.thrtl_max )
> > +        return false;
> > +
> > +    /*
> > +     * Per SDM (chapter "Memory Bandwidth Allocation Configuration"):
> > +     * 1. Linear mode: In the linear mode the input precision is defined
> > +     *    as 100-(MBA_MAX). For instance, if the MBA_MAX value is 90, the
> > +     *    input precision is 10%. Values not an even multiple of the
> > +     *    precision (e.g., 12%) will be rounded down (e.g., to 10% delay
> > +     *    applied).
> > +     * 2. Non-linear mode: Input delay values are powers-of-two from zero
> > +     *    to the MBA_MAX value from CPUID. In this case any values not a
> > +     *    power of two will be rounded down the next nearest power of two.
> > +     */
> > +    if ( feat->mba.linear )
> > +        *thrtl -= *thrtl % (100 - feat->mba.thrtl_max);
> > +    else
> > +    {
> > +        /* Not power of 2. */
> > +        if ( *thrtl & (*thrtl - 1) )
> > +            *thrtl &= 1 << (flsl(*thrtl) - 1);
> 
> fls() will do now that the parameter type is uint32_t.
> 
Yes, you are right. Sorry for missing it.

> Also why do you think &= is better than plain = here?

Not better. Will change it to '='.

> 
> Jan
diff mbox

Patch

diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index ffb038c..e95004b 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -1465,6 +1465,12 @@  long arch_do_domctl(
                               PSR_TYPE_L2_CBM);
             break;
 
+        case XEN_DOMCTL_PSR_SET_MBA_THRTL:
+            ret = psr_set_val(d, domctl->u.psr_alloc.target,
+                              domctl->u.psr_alloc.data,
+                              PSR_TYPE_MBA_THRTL);
+            break;
+
 #define domctl_psr_get_val(d, domctl, type, copyback) ({    \
     uint32_t v_;                                            \
     int r_ = psr_get_val((d), (domctl)->u.psr_alloc.target, \
diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
index f5b395b..c80ba25 100644
--- a/xen/arch/x86/psr.c
+++ b/xen/arch/x86/psr.c
@@ -138,6 +138,12 @@  static const struct feat_props {
 
     /* write_msr is used to write out feature MSR register. */
     void (*write_msr)(unsigned int cos, uint32_t val, enum psr_type type);
+
+    /*
+     * sanitize is used to check if input val fulfills SDM requirement.
+     * And change it to valid value if SDM allows.
+     */
+    bool (*sanitize)(const struct feat_node *feat, uint32_t *val);
 } *feat_props[FEAT_TYPE_NUM];
 
 /*
@@ -274,16 +280,18 @@  static enum psr_feat_type psr_type_to_feat_type(enum psr_type type)
     return feat_type;
 }
 
-static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm)
+/* Implementation of allocation features' functions. */
+static bool cat_check_cbm(const struct feat_node *feat, uint32_t *val)
 {
     unsigned int first_bit, zero_bit;
+    unsigned int cbm_len = feat->cat.cbm_len;
+    unsigned long cbm = *val;
 
-    /* Set bits should only in the range of [0, cbm_len]. */
-    if ( cbm & (~0ul << cbm_len) )
-        return false;
-
-    /* At least one bit need to be set. */
-    if ( cbm == 0 )
+    /*
+     * Set bits should be only in the range of [0, cbm_len).
+     * And, at least one bit need to be set.
+     */
+    if ( (cbm & (~0ul << cbm_len)) || !cbm )
         return false;
 
     first_bit = find_first_bit(&cbm, cbm_len);
@@ -297,7 +305,6 @@  static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm)
     return true;
 }
 
-/* Implementation of allocation features' functions. */
 static bool cat_init_feature(const struct cpuid_leaf *regs,
                              struct feat_node *feat,
                              struct psr_socket_info *info,
@@ -436,6 +443,7 @@  static const struct feat_props l3_cat_props = {
     .alt_type = PSR_TYPE_UNKNOWN,
     .get_feat_info = cat_get_feat_info,
     .write_msr = l3_cat_write_msr,
+    .sanitize = cat_check_cbm,
 };
 
 /* L3 CDP props */
@@ -466,6 +474,7 @@  static const struct feat_props l3_cdp_props = {
     .alt_type = PSR_TYPE_L3_CBM,
     .get_feat_info = l3_cdp_get_feat_info,
     .write_msr = l3_cdp_write_msr,
+    .sanitize = cat_check_cbm,
 };
 
 /* L2 CAT props */
@@ -481,6 +490,7 @@  static const struct feat_props l2_cat_props = {
     .alt_type = PSR_TYPE_UNKNOWN,
     .get_feat_info = cat_get_feat_info,
     .write_msr = l2_cat_write_msr,
+    .sanitize = cat_check_cbm,
 };
 
 /* MBA props */
@@ -501,6 +511,35 @@  static bool mba_get_feat_info(const struct feat_node *feat,
 static void mba_write_msr(unsigned int cos, uint32_t val,
                           enum psr_type type)
 {
+    wrmsrl(MSR_IA32_PSR_MBA_MASK(cos), val);
+}
+
+static bool mba_sanitize_thrtl(const struct feat_node *feat, uint32_t *thrtl)
+{
+    if ( *thrtl > feat->mba.thrtl_max )
+        return false;
+
+    /*
+     * Per SDM (chapter "Memory Bandwidth Allocation Configuration"):
+     * 1. Linear mode: In the linear mode the input precision is defined
+     *    as 100-(MBA_MAX). For instance, if the MBA_MAX value is 90, the
+     *    input precision is 10%. Values not an even multiple of the
+     *    precision (e.g., 12%) will be rounded down (e.g., to 10% delay
+     *    applied).
+     * 2. Non-linear mode: Input delay values are powers-of-two from zero
+     *    to the MBA_MAX value from CPUID. In this case any values not a
+     *    power of two will be rounded down the next nearest power of two.
+     */
+    if ( feat->mba.linear )
+        *thrtl -= *thrtl % (100 - feat->mba.thrtl_max);
+    else
+    {
+        /* Not power of 2. */
+        if ( *thrtl & (*thrtl - 1) )
+            *thrtl &= 1 << (flsl(*thrtl) - 1);
+    }
+
+    return true;
 }
 
 static const struct feat_props mba_props = {
@@ -509,6 +548,7 @@  static const struct feat_props mba_props = {
     .alt_type = PSR_TYPE_UNKNOWN,
     .get_feat_info = mba_get_feat_info,
     .write_msr = mba_write_msr,
+    .sanitize = mba_sanitize_thrtl,
 };
 
 static bool __init parse_psr_bool(const char *s, const char *delim,
@@ -974,7 +1014,7 @@  static int insert_val_into_array(uint32_t val[],
     if ( array_len < props->cos_num )
         return -ENOSPC;
 
-    if ( !psr_check_cbm(feat->cat.cbm_len, new_val) )
+    if ( !props->sanitize(feat, &new_val) )
         return -EINVAL;
 
     /*
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index e8f4c4c..fb57e64 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -1069,6 +1069,7 @@  struct xen_domctl_psr_alloc {
 #define XEN_DOMCTL_PSR_GET_L3_DATA    5
 #define XEN_DOMCTL_PSR_SET_L2_CBM     6
 #define XEN_DOMCTL_PSR_GET_L2_CBM     7
+#define XEN_DOMCTL_PSR_SET_MBA_THRTL  8
 #define XEN_DOMCTL_PSR_GET_MBA_THRTL  9
     uint32_t cmd;       /* IN: XEN_DOMCTL_PSR_* */
     uint32_t target;    /* IN */