Message ID | 1506160104-5890-8-git-send-email-yi.y.sun@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Sat, Sep 23, 2017 at 09:48:16AM +0000, Yi Sun wrote: > @@ -274,29 +277,6 @@ static enum psr_feat_type psr_type_to_feat_type(enum psr_type type) > return feat_type; > } > > -static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm) > -{ > - unsigned int first_bit, zero_bit; > - > - /* Set bits should only in the range of [0, cbm_len]. */ > - if ( cbm & (~0ul << cbm_len) ) > - return false; > - > - /* At least one bit need to be set. */ > - if ( cbm == 0 ) > - return false; > - > - first_bit = find_first_bit(&cbm, cbm_len); > - zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); > - > - /* Set bits should be contiguous. */ > - if ( zero_bit < cbm_len && > - find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) > - return false; > - > - return true; > -} > - > /* Implementation of allocation features' functions. */ > static bool cat_init_feature(const struct cpuid_leaf *regs, > struct feat_node *feat, > @@ -426,11 +406,36 @@ static bool cat_get_feat_info(const struct feat_node *feat, > return true; > } > > +static bool cat_check_cbm(const struct feat_node *feat, unsigned long cbm) > +{ > + unsigned int first_bit, zero_bit; > + unsigned int cbm_len = feat->cat.cbm_len; > + > + /* > + * Set bits should only in the range of [0, cbm_len]. > + * And, at least one bit need to be set. > + */ > + if ( cbm & (~0ul << cbm_len) || cbm == 0 ) > + return false; > + > + first_bit = find_first_bit(&cbm, cbm_len); > + zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); > + > + /* Set bits should be contiguous. */ > + if ( zero_bit < cbm_len && > + find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) > + return false; > + > + return true; > +} Why do you need to move the code apart from renaming it? > @@ -1210,25 +1237,39 @@ static unsigned int get_socket_cpu(unsigned int socket) > struct cos_write_info > { > unsigned int cos; > - struct feat_node *feature; > + struct feat_node **features; > const uint32_t *val; > - const struct feat_props *props; > + unsigned int array_len; > + const struct feat_props **props; Why do you need props here, from the usage below it's just pointing to feat_props, which is already available in this context. > }; > > static void do_write_psr_msrs(void *data) > { > const struct cos_write_info *info = data; > - struct feat_node *feat = info->feature; > - const struct feat_props *props = info->props; > - unsigned int i, cos = info->cos, cos_num = props->cos_num; > + unsigned int i, index = 0, cos = info->cos; > + const uint32_t *val_array = info->val; > > - for ( i = 0; i < cos_num; i++ ) > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) > { > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) > + struct feat_node *feat = info->features[i]; > + const struct feat_props *props = info->props[i]; If you use ARRAY_SIZE(feat_props), the above should be feat_props[i]. Also I'm worried about the size of the props array, isn't there a possibility that the props array is smaller than the feature array? Thanks, Roger.
On 17-09-26 10:39:31, Roger Pau Monn� wrote: > On Sat, Sep 23, 2017 at 09:48:16AM +0000, Yi Sun wrote: > > @@ -274,29 +277,6 @@ static enum psr_feat_type psr_type_to_feat_type(enum psr_type type) > > return feat_type; > > } > > > > -static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm) > > -{ > > - unsigned int first_bit, zero_bit; > > - > > - /* Set bits should only in the range of [0, cbm_len]. */ > > - if ( cbm & (~0ul << cbm_len) ) > > - return false; > > - > > - /* At least one bit need to be set. */ > > - if ( cbm == 0 ) > > - return false; > > - > > - first_bit = find_first_bit(&cbm, cbm_len); > > - zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); > > - > > - /* Set bits should be contiguous. */ > > - if ( zero_bit < cbm_len && > > - find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) > > - return false; > > - > > - return true; > > -} > > - > > /* Implementation of allocation features' functions. */ > > static bool cat_init_feature(const struct cpuid_leaf *regs, > > struct feat_node *feat, > > @@ -426,11 +406,36 @@ static bool cat_get_feat_info(const struct feat_node *feat, > > return true; > > } > > > > +static bool cat_check_cbm(const struct feat_node *feat, unsigned long cbm) > > +{ > > + unsigned int first_bit, zero_bit; > > + unsigned int cbm_len = feat->cat.cbm_len; > > + > > + /* > > + * Set bits should only in the range of [0, cbm_len]. > > + * And, at least one bit need to be set. > > + */ > > + if ( cbm & (~0ul << cbm_len) || cbm == 0 ) > > + return false; > > + > > + first_bit = find_first_bit(&cbm, cbm_len); > > + zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); > > + > > + /* Set bits should be contiguous. */ > > + if ( zero_bit < cbm_len && > > + find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) > > + return false; > > + > > + return true; > > +} > > Why do you need to move the code apart from renaming it? > Because it is CAT specific function now. I moved it into CAT section. '/* Implementation of allocation features' functions. */' > > @@ -1210,25 +1237,39 @@ static unsigned int get_socket_cpu(unsigned int socket) > > struct cos_write_info > > { > > unsigned int cos; > > - struct feat_node *feature; > > + struct feat_node **features; > > const uint32_t *val; > > - const struct feat_props *props; > > + unsigned int array_len; > > + const struct feat_props **props; > > Why do you need props here, from the usage below it's just pointing > to feat_props, which is already available in this context. > I may drop it. > > }; > > > > static void do_write_psr_msrs(void *data) > > { > > const struct cos_write_info *info = data; > > - struct feat_node *feat = info->feature; > > - const struct feat_props *props = info->props; > > - unsigned int i, cos = info->cos, cos_num = props->cos_num; > > + unsigned int i, index = 0, cos = info->cos; > > + const uint32_t *val_array = info->val; > > > > - for ( i = 0; i < cos_num; i++ ) > > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) > > { > > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) > > + struct feat_node *feat = info->features[i]; > > + const struct feat_props *props = info->props[i]; > > If you use ARRAY_SIZE(feat_props), the above should be feat_props[i]. Ok. > Also I'm worried about the size of the props array, isn't there a > possibility that the props array is smaller than the feature array? > No, every member is inserted into props array if the feature is initialized successfully and inserted into feature array. So, they are 1:1. > > Thanks, Roger.
>>> On 28.09.17 at 04:39, <yi.y.sun@linux.intel.com> wrote: > On 17-09-26 10:39:31, Roger Pau Monn wrote: >> On Sat, Sep 23, 2017 at 09:48:16AM +0000, Yi Sun wrote: >> > @@ -274,29 +277,6 @@ static enum psr_feat_type psr_type_to_feat_type(enum psr_type type) >> > return feat_type; >> > } >> > >> > -static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm) >> > -{ >> > - unsigned int first_bit, zero_bit; >> > - >> > - /* Set bits should only in the range of [0, cbm_len]. */ >> > - if ( cbm & (~0ul << cbm_len) ) >> > - return false; >> > - >> > - /* At least one bit need to be set. */ >> > - if ( cbm == 0 ) >> > - return false; >> > - >> > - first_bit = find_first_bit(&cbm, cbm_len); >> > - zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); >> > - >> > - /* Set bits should be contiguous. */ >> > - if ( zero_bit < cbm_len && >> > - find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) >> > - return false; >> > - >> > - return true; >> > -} >> > - >> > /* Implementation of allocation features' functions. */ >> > static bool cat_init_feature(const struct cpuid_leaf *regs, >> > struct feat_node *feat, >> > @@ -426,11 +406,36 @@ static bool cat_get_feat_info(const struct feat_node *feat, >> > return true; >> > } >> > >> > +static bool cat_check_cbm(const struct feat_node *feat, unsigned long cbm) >> > +{ >> > + unsigned int first_bit, zero_bit; >> > + unsigned int cbm_len = feat->cat.cbm_len; >> > + >> > + /* >> > + * Set bits should only in the range of [0, cbm_len]. >> > + * And, at least one bit need to be set. >> > + */ >> > + if ( cbm & (~0ul << cbm_len) || cbm == 0 ) >> > + return false; >> > + >> > + first_bit = find_first_bit(&cbm, cbm_len); >> > + zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); >> > + >> > + /* Set bits should be contiguous. */ >> > + if ( zero_bit < cbm_len && >> > + find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) >> > + return false; >> > + >> > + return true; >> > +} >> >> Why do you need to move the code apart from renaming it? >> > Because it is CAT specific function now. I moved it into CAT section. > '/* Implementation of allocation features' functions. */' The easier thing then would have been to move the comment line up. That way actual changes to the function also become obvious. Besides that - isn't the A in MBA also "Allocation"? Jan
>>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: > This patch implements set value flow for MBA including its callback > function and domctl interface. > > It also changes the memebers in 'cos_write_info' to transfer the > feature array, feature properties array and value array. Then, we > can write all features values on the cos id into MSRs. > > Because multiple features may co-exist, we need handle all features to write > values of them into a COS register with new COS ID. E.g: > 1. L3 CAT and MBA co-exist. > 2. Dom1 and Dom2 share a same COS ID (2). The L3 CAT CBM of Dom1 is 0x1ff, > the MBA Thrtle of Dom1 is 0xa. > 3. User wants to change MBA Thrtl of Dom1 to be 0x14. Because COS ID 2 is > used by Dom2 too, we have to pick a new COS ID 3. The values of Dom1 on > COS ID 3 are all default values as below: > --------- > | COS 3 | > --------- > L3 CAT | 0x7ff | > --------- > MBA | 0x0 | > --------- > 4. After setting, the L3 CAT CBM value of Dom1 should be kept and the new MBA > Thrtl is set. So, the values on COS ID 3 should be below. > --------- > | COS 3 | > --------- > L3 CAT | 0x1ff | > --------- > MBA | 0x14 | > --------- > > So, we should write all features values into their MSRs. That requires the > feature array, feature properties array and value array are input. How is this last aspect (and the respective changes) related to MBA? I.e. why isn't this needed with the (also independent but possibly co-existing) L2/L3 CAT features? > --- a/xen/arch/x86/psr.c > +++ b/xen/arch/x86/psr.c > @@ -137,7 +137,10 @@ static const struct feat_props { > uint32_t data[], unsigned int array_len); > > /* write_msr is used to write out feature MSR register. */ > - void (*write_msr)(unsigned int cos, uint32_t val, enum psr_type type); > + uint32_t (*write_msr)(unsigned int cos, uint32_t val, enum psr_type type); Again the type change would better be a prereq patch, to allow the focus here to be on MBA. > @@ -502,9 +514,23 @@ static bool mba_get_feat_info(const struct feat_node *feat, > return true; > } > > -static void mba_write_msr(unsigned int cos, uint32_t val, > - enum psr_type type) > +static uint32_t mba_write_msr(unsigned int cos, uint32_t val, > + enum psr_type type) > +{ > + wrmsrl(MSR_IA32_PSR_MBA_MASK(cos), val); > + > + /* Read actual value set by hardware. */ > + rdmsrl(MSR_IA32_PSR_MBA_MASK(cos), val); > + > + return val; > +} > + > +static bool mba_check_thrtl(const struct feat_node *feat, unsigned long thrtl) > { > + if ( thrtl > feat->mba.thrtl_max ) > + return false; > + > + return true; This can be had with a single return statement. > static void do_write_psr_msrs(void *data) > { > const struct cos_write_info *info = data; > - struct feat_node *feat = info->feature; > - const struct feat_props *props = info->props; > - unsigned int i, cos = info->cos, cos_num = props->cos_num; > + unsigned int i, index = 0, cos = info->cos; > + const uint32_t *val_array = info->val; > > - for ( i = 0; i < cos_num; i++ ) > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) > { > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) > + struct feat_node *feat = info->features[i]; > + const struct feat_props *props = info->props[i]; > + unsigned int cos_num, j; > + > + if ( !feat || !props ) > + continue; > + > + cos_num = props->cos_num; > + if ( info->array_len < index + cos_num ) > + return; > + > + for ( j = 0; j < cos_num; j++ ) > { > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; > - props->write_msr(cos, info->val[i], props->type[i]); > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) > + feat->cos_reg_val[cos * cos_num + j] = > + props->write_msr(cos, val_array[index + j], props->type[j]); This renders partly useless the check: If hardware can alter the value, repeatedly requesting the same value to be written will no longer guarantee the MSR write to be skipped. If hardware behavior can't be predicted you may want to consider recording both the value in found by reading back the register written and the value that was written - a match with either would eliminate the need to do the write. Jan
On 17-09-28 05:36:11, Jan Beulich wrote: > >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: > > This patch implements set value flow for MBA including its callback > > function and domctl interface. > > > > It also changes the memebers in 'cos_write_info' to transfer the > > feature array, feature properties array and value array. Then, we > > can write all features values on the cos id into MSRs. > > > > Because multiple features may co-exist, we need handle all features to write > > values of them into a COS register with new COS ID. E.g: > > 1. L3 CAT and MBA co-exist. > > 2. Dom1 and Dom2 share a same COS ID (2). The L3 CAT CBM of Dom1 is 0x1ff, > > the MBA Thrtle of Dom1 is 0xa. > > 3. User wants to change MBA Thrtl of Dom1 to be 0x14. Because COS ID 2 is > > used by Dom2 too, we have to pick a new COS ID 3. The values of Dom1 on > > COS ID 3 are all default values as below: > > --------- > > | COS 3 | > > --------- > > L3 CAT | 0x7ff | > > --------- > > MBA | 0x0 | > > --------- > > 4. After setting, the L3 CAT CBM value of Dom1 should be kept and the new MBA > > Thrtl is set. So, the values on COS ID 3 should be below. > > --------- > > | COS 3 | > > --------- > > L3 CAT | 0x1ff | > > --------- > > MBA | 0x14 | > > --------- > > > > So, we should write all features values into their MSRs. That requires the > > feature array, feature properties array and value array are input. > > How is this last aspect (and the respective changes) related to MBA? > I.e. why isn't this needed with the (also independent but possibly > co-existing) L2/L3 CAT features? > I tried to introduce this in L2 CAT patch set but did not succeed. As there is no HW that L2 CAT and L3 CAT co-exist so far, I did not insist on this. > > --- a/xen/arch/x86/psr.c > > +++ b/xen/arch/x86/psr.c > > @@ -137,7 +137,10 @@ static const struct feat_props { > > uint32_t data[], unsigned int array_len); > > > > /* write_msr is used to write out feature MSR register. */ > > - void (*write_msr)(unsigned int cos, uint32_t val, enum psr_type type); > > + uint32_t (*write_msr)(unsigned int cos, uint32_t val, enum psr_type type); > > Again the type change would better be a prereq patch, to allow the > focus here to be on MBA. > Sure, will move it to a new patch. > > @@ -502,9 +514,23 @@ static bool mba_get_feat_info(const struct feat_node *feat, > > return true; > > } > > > > -static void mba_write_msr(unsigned int cos, uint32_t val, > > - enum psr_type type) > > +static uint32_t mba_write_msr(unsigned int cos, uint32_t val, > > + enum psr_type type) > > +{ > > + wrmsrl(MSR_IA32_PSR_MBA_MASK(cos), val); > > + > > + /* Read actual value set by hardware. */ > > + rdmsrl(MSR_IA32_PSR_MBA_MASK(cos), val); > > + > > + return val; > > +} > > + > > +static bool mba_check_thrtl(const struct feat_node *feat, unsigned long thrtl) > > { > > + if ( thrtl > feat->mba.thrtl_max ) > > + return false; > > + > > + return true; > > This can be had with a single return statement. > Sure. > > static void do_write_psr_msrs(void *data) > > { > > const struct cos_write_info *info = data; > > - struct feat_node *feat = info->feature; > > - const struct feat_props *props = info->props; > > - unsigned int i, cos = info->cos, cos_num = props->cos_num; > > + unsigned int i, index = 0, cos = info->cos; > > + const uint32_t *val_array = info->val; > > > > - for ( i = 0; i < cos_num; i++ ) > > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) > > { > > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) > > + struct feat_node *feat = info->features[i]; > > + const struct feat_props *props = info->props[i]; > > + unsigned int cos_num, j; > > + > > + if ( !feat || !props ) > > + continue; > > + > > + cos_num = props->cos_num; > > + if ( info->array_len < index + cos_num ) > > + return; > > + > > + for ( j = 0; j < cos_num; j++ ) > > { > > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; > > - props->write_msr(cos, info->val[i], props->type[i]); > > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) > > + feat->cos_reg_val[cos * cos_num + j] = > > + props->write_msr(cos, val_array[index + j], props->type[j]); > > This renders partly useless the check: If hardware can alter the > value, repeatedly requesting the same value to be written will > no longer guarantee the MSR write to be skipped. If hardware > behavior can't be predicted you may want to consider recording > both the value in found by reading back the register written and > the value that was written - a match with either would eliminate > the need to do the write. > The hardware behavior is explicitly defined by SDM and mentioned in 'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW can alter MBA value if the value is not valid. This check is not only for MBA but also for CAT features that the HW cannot alter CAT value. Although this check is not a critical check, it can prevent some non-necessary MSR write. If you still think we should handle the case that user inputs an invalid value every time, I think we can restore the codes in 'mba_check_thrtl' to change invalid value to valid one, then insert the valid value into val_array. Then, this check is always valid. > Jan
>>> Yi Sun <yi.y.sun@linux.intel.com> 09/29/17 4:58 AM >>> >On 17-09-28 05:36:11, Jan Beulich wrote: >> >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: >> > This patch implements set value flow for MBA including its callback >> > function and domctl interface. >> > >> > It also changes the memebers in 'cos_write_info' to transfer the >> > feature array, feature properties array and value array. Then, we >> > can write all features values on the cos id into MSRs. >> > >> > Because multiple features may co-exist, we need handle all features to write >> > values of them into a COS register with new COS ID. E.g: >> > 1. L3 CAT and MBA co-exist. >> > 2. Dom1 and Dom2 share a same COS ID (2). The L3 CAT CBM of Dom1 is 0x1ff, >> > the MBA Thrtle of Dom1 is 0xa. >> > 3. User wants to change MBA Thrtl of Dom1 to be 0x14. Because COS ID 2 is >> > used by Dom2 too, we have to pick a new COS ID 3. The values of Dom1 on >> > COS ID 3 are all default values as below: >> > --------- >> > | COS 3 | >> > --------- >> > L3 CAT | 0x7ff | >> > --------- >> > MBA | 0x0 | >> > --------- >> > 4. After setting, the L3 CAT CBM value of Dom1 should be kept and the new MBA >> > Thrtl is set. So, the values on COS ID 3 should be below. >> > --------- >> > | COS 3 | >> > --------- >> > L3 CAT | 0x1ff | >> > --------- >> > MBA | 0x14 | >> > --------- >> > >> > So, we should write all features values into their MSRs. That requires the >> > feature array, feature properties array and value array are input. >> >> How is this last aspect (and the respective changes) related to MBA? >> I.e. why isn't this needed with the (also independent but possibly >> co-existing) L2/L3 CAT features? >> >I tried to introduce this in L2 CAT patch set but did not succeed. As there is >no HW that L2 CAT and L3 CAT co-exist so far, I did not insist on this. Hmm, I'm afraid this wasn't then made clear enough to understand. I would certainly not have been against something that could in theory occur with L2/L3 CAT alone. In any event this means you don't want to mix this into this MBA specific change here. >> > static void do_write_psr_msrs(void *data) >> > { >> > const struct cos_write_info *info = data; >> > - struct feat_node *feat = info->feature; >> > - const struct feat_props *props = info->props; >> > - unsigned int i, cos = info->cos, cos_num = props->cos_num; >> > + unsigned int i, index = 0, cos = info->cos; >> > + const uint32_t *val_array = info->val; >> > >> > - for ( i = 0; i < cos_num; i++ ) >> > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) >> > { >> > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) >> > + struct feat_node *feat = info->features[i]; >> > + const struct feat_props *props = info->props[i]; >> > + unsigned int cos_num, j; >> > + >> > + if ( !feat || !props ) >> > + continue; >> > + >> > + cos_num = props->cos_num; >> > + if ( info->array_len < index + cos_num ) >> > + return; >> > + >> > + for ( j = 0; j < cos_num; j++ ) >> > { >> > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; >> > - props->write_msr(cos, info->val[i], props->type[i]); >> > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) >> > + feat->cos_reg_val[cos * cos_num + j] = >> > + props->write_msr(cos, val_array[index + j], props->type[j]); >> >> This renders partly useless the check: If hardware can alter the >> value, repeatedly requesting the same value to be written will >> no longer guarantee the MSR write to be skipped. If hardware >> behavior can't be predicted you may want to consider recording >> both the value in found by reading back the register written and >> the value that was written - a match with either would eliminate >> the need to do the write. >> >The hardware behavior is explicitly defined by SDM and mentioned in >'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW >can alter MBA value if the value is not valid. So if hardware behavior is fully defined, why don't you pre-adjust what is to be written to the value hardware would alter it to? >This check is not only for MBA but also for CAT features that the HW >cannot alter CAT value. I don't understand this part. > Although this check is not a critical check, >it can prevent some non-necessary MSR write. That's my point - while previously all unnecessary writes were avoided, you now avoid only some. >If you still think we should handle the case that user inputs an invalid >value every time, I think we can restore the codes in 'mba_check_thrtl' >to change invalid value to valid one, then insert the valid value into >val_array. Then, this check is always valid. I don't think I've asked to deal with "invalid" values here (which should be rejected anyway, but that's a different topic). Values adjusted by hardware don't fall into the "invalid" category for me. Jan
On 17-10-03 23:59:46, Jan Beulich wrote: > >>> Yi Sun <yi.y.sun@linux.intel.com> 09/29/17 4:58 AM >>> > >On 17-09-28 05:36:11, Jan Beulich wrote: > >> >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: > >> > This patch implements set value flow for MBA including its callback > >> > function and domctl interface. > >> > > >> > It also changes the memebers in 'cos_write_info' to transfer the > >> > feature array, feature properties array and value array. Then, we > >> > can write all features values on the cos id into MSRs. > >> > > >> > Because multiple features may co-exist, we need handle all features to write > >> > values of them into a COS register with new COS ID. E.g: > >> > 1. L3 CAT and MBA co-exist. > >> > 2. Dom1 and Dom2 share a same COS ID (2). The L3 CAT CBM of Dom1 is 0x1ff, > >> > the MBA Thrtle of Dom1 is 0xa. > >> > 3. User wants to change MBA Thrtl of Dom1 to be 0x14. Because COS ID 2 is > >> > used by Dom2 too, we have to pick a new COS ID 3. The values of Dom1 on > >> > COS ID 3 are all default values as below: > >> > --------- > >> > | COS 3 | > >> > --------- > >> > L3 CAT | 0x7ff | > >> > --------- > >> > MBA | 0x0 | > >> > --------- > >> > 4. After setting, the L3 CAT CBM value of Dom1 should be kept and the new MBA > >> > Thrtl is set. So, the values on COS ID 3 should be below. > >> > --------- > >> > | COS 3 | > >> > --------- > >> > L3 CAT | 0x1ff | > >> > --------- > >> > MBA | 0x14 | > >> > --------- > >> > > >> > So, we should write all features values into their MSRs. That requires the > >> > feature array, feature properties array and value array are input. > >> > >> How is this last aspect (and the respective changes) related to MBA? > >> I.e. why isn't this needed with the (also independent but possibly > >> co-existing) L2/L3 CAT features? > >> > >I tried to introduce this in L2 CAT patch set but did not succeed. As there is > >no HW that L2 CAT and L3 CAT co-exist so far, I did not insist on this. > > Hmm, I'm afraid this wasn't then made clear enough to understand. I would > certainly not have been against something that could in theory occur with > L2/L3 CAT alone. In any event this means you don't want to mix this into this > MBA specific change here. > Anyway, I think you suggest to split this as a new patch, right? > >> > static void do_write_psr_msrs(void *data) > >> > { > >> > const struct cos_write_info *info = data; > >> > - struct feat_node *feat = info->feature; > >> > - const struct feat_props *props = info->props; > >> > - unsigned int i, cos = info->cos, cos_num = props->cos_num; > >> > + unsigned int i, index = 0, cos = info->cos; > >> > + const uint32_t *val_array = info->val; > >> > > >> > - for ( i = 0; i < cos_num; i++ ) > >> > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) > >> > { > >> > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) > >> > + struct feat_node *feat = info->features[i]; > >> > + const struct feat_props *props = info->props[i]; > >> > + unsigned int cos_num, j; > >> > + > >> > + if ( !feat || !props ) > >> > + continue; > >> > + > >> > + cos_num = props->cos_num; > >> > + if ( info->array_len < index + cos_num ) > >> > + return; > >> > + > >> > + for ( j = 0; j < cos_num; j++ ) > >> > { > >> > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; > >> > - props->write_msr(cos, info->val[i], props->type[i]); > >> > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) > >> > + feat->cos_reg_val[cos * cos_num + j] = > >> > + props->write_msr(cos, val_array[index + j], props->type[j]); > >> > >> This renders partly useless the check: If hardware can alter the > >> value, repeatedly requesting the same value to be written will > >> no longer guarantee the MSR write to be skipped. If hardware > >> behavior can't be predicted you may want to consider recording > >> both the value in found by reading back the register written and > >> the value that was written - a match with either would eliminate > >> the need to do the write. > >> > >The hardware behavior is explicitly defined by SDM and mentioned in > >'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW > >can alter MBA value if the value is not valid. > > So if hardware behavior is fully defined, why don't you pre-adjust what is > to be written to the value hardware would alter it to? > In previous version of MBA patch set, I pre-adjust the value in 'mba_check_thrtl'. But Roger did not like that. So, the pre-adjust codes are removed. > >This check is not only for MBA but also for CAT features that the HW > >cannot alter CAT value. > > I don't understand this part. > I mean the check here are for all features so we cannot remove it. > > Although this check is not a critical check, > >it can prevent some non-necessary MSR write. > > That's my point - while previously all unnecessary writes were avoided, > you now avoid only some. > Without the pre-adjust codes in 'mba_check_thrtl', if user inputs value, e.g. 11/22/33/..., this check cannot prevent the write action. So, only some can be avoided in current codes. > >If you still think we should handle the case that user inputs an invalid > >value every time, I think we can restore the codes in 'mba_check_thrtl' > >to change invalid value to valid one, then insert the valid value into > >val_array. Then, this check is always valid. > > I don't think I've asked to deal with "invalid" values here (which should be > rejected anyway, but that's a different topic). Values adjusted by hardware > don't fall into the "invalid" category for me. > If the pre-adjust codes in 'mba_check_thrtl' are restored, all values written to HW are valid. > Jan
On Thu, Oct 05, 2017 at 04:48:12AM +0000, Yi Sun wrote: > On 17-10-03 23:59:46, Jan Beulich wrote: > > >>> Yi Sun <yi.y.sun@linux.intel.com> 09/29/17 4:58 AM >>> > > >On 17-09-28 05:36:11, Jan Beulich wrote: > > >> >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: > > >> > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; > > >> > - props->write_msr(cos, info->val[i], props->type[i]); > > >> > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) > > >> > + feat->cos_reg_val[cos * cos_num + j] = > > >> > + props->write_msr(cos, val_array[index + j], props->type[j]); > > >> > > >> This renders partly useless the check: If hardware can alter the > > >> value, repeatedly requesting the same value to be written will > > >> no longer guarantee the MSR write to be skipped. If hardware > > >> behavior can't be predicted you may want to consider recording > > >> both the value in found by reading back the register written and > > >> the value that was written - a match with either would eliminate > > >> the need to do the write. > > >> > > >The hardware behavior is explicitly defined by SDM and mentioned in > > >'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW > > >can alter MBA value if the value is not valid. > > > > So if hardware behavior is fully defined, why don't you pre-adjust what is > > to be written to the value hardware would alter it to? > > > In previous version of MBA patch set, I pre-adjust the value in 'mba_check_thrtl'. > But Roger did not like that. So, the pre-adjust codes are removed. IMHO it's quite pointless to do such adjustments when the hardware performs them already. Also, I fear that our adjustments might get out-of-sync in the future with what hardware actually does. Maybe the result read back from the hardware (ie: adjusted) can be stored and used in order to check whether a new value should be written or not when switching? (I think this is the same that Jan suggested above). Roger.
>>> On 05.10.17 at 06:48, <yi.y.sun@linux.intel.com> wrote: > On 17-10-03 23:59:46, Jan Beulich wrote: >> >>> Yi Sun <yi.y.sun@linux.intel.com> 09/29/17 4:58 AM >>> >> >On 17-09-28 05:36:11, Jan Beulich wrote: >> >> >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: >> >> > This patch implements set value flow for MBA including its callback >> >> > function and domctl interface. >> >> > >> >> > It also changes the memebers in 'cos_write_info' to transfer the >> >> > feature array, feature properties array and value array. Then, we >> >> > can write all features values on the cos id into MSRs. >> >> > >> >> > Because multiple features may co-exist, we need handle all features to write >> >> > values of them into a COS register with new COS ID. E.g: >> >> > 1. L3 CAT and MBA co-exist. >> >> > 2. Dom1 and Dom2 share a same COS ID (2). The L3 CAT CBM of Dom1 is 0x1ff, >> >> > the MBA Thrtle of Dom1 is 0xa. >> >> > 3. User wants to change MBA Thrtl of Dom1 to be 0x14. Because COS ID 2 is >> >> > used by Dom2 too, we have to pick a new COS ID 3. The values of Dom1 on >> >> > COS ID 3 are all default values as below: >> >> > --------- >> >> > | COS 3 | >> >> > --------- >> >> > L3 CAT | 0x7ff | >> >> > --------- >> >> > MBA | 0x0 | >> >> > --------- >> >> > 4. After setting, the L3 CAT CBM value of Dom1 should be kept and the new MBA >> >> > Thrtl is set. So, the values on COS ID 3 should be below. >> >> > --------- >> >> > | COS 3 | >> >> > --------- >> >> > L3 CAT | 0x1ff | >> >> > --------- >> >> > MBA | 0x14 | >> >> > --------- >> >> > >> >> > So, we should write all features values into their MSRs. That requires the >> >> > feature array, feature properties array and value array are input. >> >> >> >> How is this last aspect (and the respective changes) related to MBA? >> >> I.e. why isn't this needed with the (also independent but possibly >> >> co-existing) L2/L3 CAT features? >> >> >> >I tried to introduce this in L2 CAT patch set but did not succeed. As there is >> >no HW that L2 CAT and L3 CAT co-exist so far, I did not insist on this. >> >> Hmm, I'm afraid this wasn't then made clear enough to understand. I would >> certainly not have been against something that could in theory occur with >> L2/L3 CAT alone. In any event this means you don't want to mix this into this >> MBA specific change here. >> > Anyway, I think you suggest to split this as a new patch, right? Yes indeed. >> >> > static void do_write_psr_msrs(void *data) >> >> > { >> >> > const struct cos_write_info *info = data; >> >> > - struct feat_node *feat = info->feature; >> >> > - const struct feat_props *props = info->props; >> >> > - unsigned int i, cos = info->cos, cos_num = props->cos_num; >> >> > + unsigned int i, index = 0, cos = info->cos; >> >> > + const uint32_t *val_array = info->val; >> >> > >> >> > - for ( i = 0; i < cos_num; i++ ) >> >> > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) >> >> > { >> >> > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) >> >> > + struct feat_node *feat = info->features[i]; >> >> > + const struct feat_props *props = info->props[i]; >> >> > + unsigned int cos_num, j; >> >> > + >> >> > + if ( !feat || !props ) >> >> > + continue; >> >> > + >> >> > + cos_num = props->cos_num; >> >> > + if ( info->array_len < index + cos_num ) >> >> > + return; >> >> > + >> >> > + for ( j = 0; j < cos_num; j++ ) >> >> > { >> >> > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; >> >> > - props->write_msr(cos, info->val[i], props->type[i]); >> >> > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) >> >> > + feat->cos_reg_val[cos * cos_num + j] = >> >> > + props->write_msr(cos, val_array[index + j], props->type[j]); >> >> >> >> This renders partly useless the check: If hardware can alter the >> >> value, repeatedly requesting the same value to be written will >> >> no longer guarantee the MSR write to be skipped. If hardware >> >> behavior can't be predicted you may want to consider recording >> >> both the value in found by reading back the register written and >> >> the value that was written - a match with either would eliminate >> >> the need to do the write. >> >> >> >The hardware behavior is explicitly defined by SDM and mentioned in >> >'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW >> >can alter MBA value if the value is not valid. >> >> So if hardware behavior is fully defined, why don't you pre-adjust what is >> to be written to the value hardware would alter it to? >> > In previous version of MBA patch set, I pre-adjust the value in 'mba_check_thrtl'. > But Roger did not like that. So, the pre-adjust codes are removed. Could you point me at or repeat the reason(s) of his dislike? >> >This check is not only for MBA but also for CAT features that the HW >> >cannot alter CAT value. >> >> I don't understand this part. >> > I mean the check here are for all features so we cannot remove it. I _still_ don't understand: If the check can't be removed (even without MBA in mind), then the implication would be that the code prior to this series is buggy. In which case I'd expect you to submit a standalone bug fix, rather than mixing the fix into here. >> > Although this check is not a critical check, >> >it can prevent some non-necessary MSR write. >> >> That's my point - while previously all unnecessary writes were avoided, >> you now avoid only some. >> > Without the pre-adjust codes in 'mba_check_thrtl', if user inputs value, e.g. > 11/22/33/..., this check cannot prevent the write action. So, only some can > be avoided in current codes. Right. If it's worthwhile avoiding the writes, all of them should be avoided when the resulting value isn't going to change. Otherwise the write avoidance logic can/should be dropped altogether. Jan
>>> On 05.10.17 at 10:39, <roger.pau@citrix.com> wrote: > On Thu, Oct 05, 2017 at 04:48:12AM +0000, Yi Sun wrote: >> On 17-10-03 23:59:46, Jan Beulich wrote: >> > >>> Yi Sun <yi.y.sun@linux.intel.com> 09/29/17 4:58 AM >>> >> > >On 17-09-28 05:36:11, Jan Beulich wrote: >> > >> >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: >> > >> > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; >> > >> > - props->write_msr(cos, info->val[i], props->type[i]); >> > >> > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + > j] ) >> > >> > + feat->cos_reg_val[cos * cos_num + j] = >> > >> > + props->write_msr(cos, val_array[index + j], > props->type[j]); >> > >> >> > >> This renders partly useless the check: If hardware can alter the >> > >> value, repeatedly requesting the same value to be written will >> > >> no longer guarantee the MSR write to be skipped. If hardware >> > >> behavior can't be predicted you may want to consider recording >> > >> both the value in found by reading back the register written and >> > >> the value that was written - a match with either would eliminate >> > >> the need to do the write. >> > >> >> > >The hardware behavior is explicitly defined by SDM and mentioned in >> > >'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW >> > >can alter MBA value if the value is not valid. >> > >> > So if hardware behavior is fully defined, why don't you pre-adjust what is >> > to be written to the value hardware would alter it to? >> > >> In previous version of MBA patch set, I pre-adjust the value in > 'mba_check_thrtl'. >> But Roger did not like that. So, the pre-adjust codes are removed. > > IMHO it's quite pointless to do such adjustments when the hardware > performs them already. Also, I fear that our adjustments might get > out-of-sync in the future with what hardware actually does. > > Maybe the result read back from the hardware (ie: adjusted) can be > stored and used in order to check whether a new value should be > written or not when switching? (I think this is the same that Jan > suggested above). Not exactly, no - I'd like to avoid the write for _any_ value resulting in the one currently stored in the hardware register. Hence my earlier question on whether the transformation done by hardware is well defined (i.e. _not_ model dependent or fully defined by CPUID output). Jan
On 17-10-05 03:39:06, Jan Beulich wrote: > >>> On 05.10.17 at 10:39, <roger.pau@citrix.com> wrote: > > On Thu, Oct 05, 2017 at 04:48:12AM +0000, Yi Sun wrote: > >> On 17-10-03 23:59:46, Jan Beulich wrote: > >> > >>> Yi Sun <yi.y.sun@linux.intel.com> 09/29/17 4:58 AM >>> > >> > >On 17-09-28 05:36:11, Jan Beulich wrote: > >> > >> >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: > >> > >> > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; > >> > >> > - props->write_msr(cos, info->val[i], props->type[i]); > >> > >> > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + > > j] ) > >> > >> > + feat->cos_reg_val[cos * cos_num + j] = > >> > >> > + props->write_msr(cos, val_array[index + j], > > props->type[j]); > >> > >> > >> > >> This renders partly useless the check: If hardware can alter the > >> > >> value, repeatedly requesting the same value to be written will > >> > >> no longer guarantee the MSR write to be skipped. If hardware > >> > >> behavior can't be predicted you may want to consider recording > >> > >> both the value in found by reading back the register written and > >> > >> the value that was written - a match with either would eliminate > >> > >> the need to do the write. > >> > >> > >> > >The hardware behavior is explicitly defined by SDM and mentioned in > >> > >'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW > >> > >can alter MBA value if the value is not valid. > >> > > >> > So if hardware behavior is fully defined, why don't you pre-adjust what is > >> > to be written to the value hardware would alter it to? > >> > > >> In previous version of MBA patch set, I pre-adjust the value in > > 'mba_check_thrtl'. > >> But Roger did not like that. So, the pre-adjust codes are removed. > > > > IMHO it's quite pointless to do such adjustments when the hardware > > performs them already. Also, I fear that our adjustments might get > > out-of-sync in the future with what hardware actually does. > > > > Maybe the result read back from the hardware (ie: adjusted) can be > > stored and used in order to check whether a new value should be > > written or not when switching? (I think this is the same that Jan > > suggested above). > > Not exactly, no - I'd like to avoid the write for _any_ value > resulting in the one currently stored in the hardware register. > Hence my earlier question on whether the transformation > done by hardware is well defined (i.e. _not_ model dependent > or fully defined by CPUID output). > SDM does not mention it is model dependent. So, it is _not_ model dependent. > Jan
On 17-10-05 02:55:17, Jan Beulich wrote: > >>> On 05.10.17 at 06:48, <yi.y.sun@linux.intel.com> wrote: > > On 17-10-03 23:59:46, Jan Beulich wrote: > >> >>> Yi Sun <yi.y.sun@linux.intel.com> 09/29/17 4:58 AM >>> > >> >On 17-09-28 05:36:11, Jan Beulich wrote: > >> >> >>> On 23.09.17 at 11:48, <yi.y.sun@linux.intel.com> wrote: [...] > >> >> > { > >> >> > const struct cos_write_info *info = data; > >> >> > - struct feat_node *feat = info->feature; > >> >> > - const struct feat_props *props = info->props; > >> >> > - unsigned int i, cos = info->cos, cos_num = props->cos_num; > >> >> > + unsigned int i, index = 0, cos = info->cos; > >> >> > + const uint32_t *val_array = info->val; > >> >> > > >> >> > - for ( i = 0; i < cos_num; i++ ) > >> >> > + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) > >> >> > { > >> >> > - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) > >> >> > + struct feat_node *feat = info->features[i]; > >> >> > + const struct feat_props *props = info->props[i]; > >> >> > + unsigned int cos_num, j; > >> >> > + > >> >> > + if ( !feat || !props ) > >> >> > + continue; > >> >> > + > >> >> > + cos_num = props->cos_num; > >> >> > + if ( info->array_len < index + cos_num ) > >> >> > + return; > >> >> > + > >> >> > + for ( j = 0; j < cos_num; j++ ) > >> >> > { > >> >> > - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; > >> >> > - props->write_msr(cos, info->val[i], props->type[i]); > >> >> > + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) > >> >> > + feat->cos_reg_val[cos * cos_num + j] = > >> >> > + props->write_msr(cos, val_array[index + j], props->type[j]); > >> >> > >> >> This renders partly useless the check: If hardware can alter the > >> >> value, repeatedly requesting the same value to be written will > >> >> no longer guarantee the MSR write to be skipped. If hardware > >> >> behavior can't be predicted you may want to consider recording > >> >> both the value in found by reading back the register written and > >> >> the value that was written - a match with either would eliminate > >> >> the need to do the write. > >> >> > >> >The hardware behavior is explicitly defined by SDM and mentioned in > >> >'xl-psr.markdown' and 'intel_psr_mba.pandoc'. User should know that HW > >> >can alter MBA value if the value is not valid. > >> > >> So if hardware behavior is fully defined, why don't you pre-adjust what is > >> to be written to the value hardware would alter it to? > >> > > In previous version of MBA patch set, I pre-adjust the value in 'mba_check_thrtl'. > > But Roger did not like that. So, the pre-adjust codes are removed. > > Could you point me at or repeat the reason(s) of his dislike? > Roger has replied. > >> >This check is not only for MBA but also for CAT features that the HW > >> >cannot alter CAT value. > >> > >> I don't understand this part. > >> > > I mean the check here are for all features so we cannot remove it. > > I _still_ don't understand: If the check can't be removed (even > without MBA in mind), then the implication would be that the > code prior to this series is buggy. In which case I'd expect you to > submit a standalone bug fix, rather than mixing the fix into here. > Ok, I will send out a stand alone patch to fix this. > >> > Although this check is not a critical check, > >> >it can prevent some non-necessary MSR write. > >> > >> That's my point - while previously all unnecessary writes were avoided, > >> you now avoid only some. > >> > > Without the pre-adjust codes in 'mba_check_thrtl', if user inputs value, e.g. > > 11/22/33/..., this check cannot prevent the write action. So, only some can > > be avoided in current codes. > > Right. If it's worthwhile avoiding the writes, all of them should be > avoided when the resulting value isn't going to change. Otherwise > the write avoidance logic can/should be dropped altogether. > Per discussion in other mails, I think I will restore codes in 'mba_check_thrtl'. > Jan
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c index 22650d7..66c9cab 100644 --- a/xen/arch/x86/domctl.c +++ b/xen/arch/x86/domctl.c @@ -1468,6 +1468,12 @@ long arch_do_domctl( PSR_TYPE_L2_CBM); break; + case XEN_DOMCTL_PSR_SET_MBA_THRTL: + ret = psr_set_val(d, domctl->u.psr_alloc.target, + domctl->u.psr_alloc.data, + PSR_TYPE_MBA_THRTL); + break; + case XEN_DOMCTL_PSR_GET_L3_CBM: ret = psr_get_val(d, domctl->u.psr_alloc.target, &val32, PSR_TYPE_L3_CBM); diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c index 1d0a215..c6d4e31 100644 --- a/xen/arch/x86/psr.c +++ b/xen/arch/x86/psr.c @@ -137,7 +137,10 @@ static const struct feat_props { uint32_t data[], unsigned int array_len); /* write_msr is used to write out feature MSR register. */ - void (*write_msr)(unsigned int cos, uint32_t val, enum psr_type type); + uint32_t (*write_msr)(unsigned int cos, uint32_t val, enum psr_type type); + + /* check_val is used to check if input val fulfills SDM requirement. */ + bool (*check_val)(const struct feat_node *feat, unsigned long val); } *feat_props[FEAT_TYPE_NUM]; /* @@ -274,29 +277,6 @@ static enum psr_feat_type psr_type_to_feat_type(enum psr_type type) return feat_type; } -static bool psr_check_cbm(unsigned int cbm_len, unsigned long cbm) -{ - unsigned int first_bit, zero_bit; - - /* Set bits should only in the range of [0, cbm_len]. */ - if ( cbm & (~0ul << cbm_len) ) - return false; - - /* At least one bit need to be set. */ - if ( cbm == 0 ) - return false; - - first_bit = find_first_bit(&cbm, cbm_len); - zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); - - /* Set bits should be contiguous. */ - if ( zero_bit < cbm_len && - find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) - return false; - - return true; -} - /* Implementation of allocation features' functions. */ static bool cat_init_feature(const struct cpuid_leaf *regs, struct feat_node *feat, @@ -426,11 +406,36 @@ static bool cat_get_feat_info(const struct feat_node *feat, return true; } +static bool cat_check_cbm(const struct feat_node *feat, unsigned long cbm) +{ + unsigned int first_bit, zero_bit; + unsigned int cbm_len = feat->cat.cbm_len; + + /* + * Set bits should only in the range of [0, cbm_len]. + * And, at least one bit need to be set. + */ + if ( cbm & (~0ul << cbm_len) || cbm == 0 ) + return false; + + first_bit = find_first_bit(&cbm, cbm_len); + zero_bit = find_next_zero_bit(&cbm, cbm_len, first_bit); + + /* Set bits should be contiguous. */ + if ( zero_bit < cbm_len && + find_next_bit(&cbm, cbm_len, zero_bit) < cbm_len ) + return false; + + return true; +} + /* L3 CAT props */ -static void l3_cat_write_msr(unsigned int cos, uint32_t val, - enum psr_type type) +static uint32_t l3_cat_write_msr(unsigned int cos, uint32_t val, + enum psr_type type) { wrmsrl(MSR_IA32_PSR_L3_MASK(cos), val); + + return val; } static const struct feat_props l3_cat_props = { @@ -439,6 +444,7 @@ static const struct feat_props l3_cat_props = { .alt_type = PSR_TYPE_UNKNOWN, .get_feat_info = cat_get_feat_info, .write_msr = l3_cat_write_msr, + .check_val = cat_check_cbm, }; /* L3 CDP props */ @@ -453,13 +459,15 @@ static bool l3_cdp_get_feat_info(const struct feat_node *feat, return true; } -static void l3_cdp_write_msr(unsigned int cos, uint32_t val, - enum psr_type type) +static uint32_t l3_cdp_write_msr(unsigned int cos, uint32_t val, + enum psr_type type) { wrmsrl(((type == PSR_TYPE_L3_DATA) ? MSR_IA32_PSR_L3_MASK_DATA(cos) : MSR_IA32_PSR_L3_MASK_CODE(cos)), val); + + return val; } static const struct feat_props l3_cdp_props = { @@ -469,13 +477,16 @@ static const struct feat_props l3_cdp_props = { .alt_type = PSR_TYPE_L3_CBM, .get_feat_info = l3_cdp_get_feat_info, .write_msr = l3_cdp_write_msr, + .check_val = cat_check_cbm, }; /* L2 CAT props */ -static void l2_cat_write_msr(unsigned int cos, uint32_t val, - enum psr_type type) +static uint32_t l2_cat_write_msr(unsigned int cos, uint32_t val, + enum psr_type type) { wrmsrl(MSR_IA32_PSR_L2_MASK(cos), val); + + return val; } static const struct feat_props l2_cat_props = { @@ -484,6 +495,7 @@ static const struct feat_props l2_cat_props = { .alt_type = PSR_TYPE_UNKNOWN, .get_feat_info = cat_get_feat_info, .write_msr = l2_cat_write_msr, + .check_val = cat_check_cbm, }; /* MBA props */ @@ -502,9 +514,23 @@ static bool mba_get_feat_info(const struct feat_node *feat, return true; } -static void mba_write_msr(unsigned int cos, uint32_t val, - enum psr_type type) +static uint32_t mba_write_msr(unsigned int cos, uint32_t val, + enum psr_type type) +{ + wrmsrl(MSR_IA32_PSR_MBA_MASK(cos), val); + + /* Read actual value set by hardware. */ + rdmsrl(MSR_IA32_PSR_MBA_MASK(cos), val); + + return val; +} + +static bool mba_check_thrtl(const struct feat_node *feat, unsigned long thrtl) { + if ( thrtl > feat->mba.thrtl_max ) + return false; + + return true; } static const struct feat_props mba_props = { @@ -513,6 +539,7 @@ static const struct feat_props mba_props = { .alt_type = PSR_TYPE_UNKNOWN, .get_feat_info = mba_get_feat_info, .write_msr = mba_write_msr, + .check_val = mba_check_thrtl, }; static bool __init parse_psr_bool(const char *s, const char *delim, @@ -978,7 +1005,7 @@ static int insert_val_into_array(uint32_t val[], if ( array_len < props->cos_num ) return -ENOSPC; - if ( !psr_check_cbm(feat->cat.cbm_len, new_val) ) + if ( !props->check_val(feat, new_val) ) return -EINVAL; /* @@ -1210,25 +1237,39 @@ static unsigned int get_socket_cpu(unsigned int socket) struct cos_write_info { unsigned int cos; - struct feat_node *feature; + struct feat_node **features; const uint32_t *val; - const struct feat_props *props; + unsigned int array_len; + const struct feat_props **props; }; static void do_write_psr_msrs(void *data) { const struct cos_write_info *info = data; - struct feat_node *feat = info->feature; - const struct feat_props *props = info->props; - unsigned int i, cos = info->cos, cos_num = props->cos_num; + unsigned int i, index = 0, cos = info->cos; + const uint32_t *val_array = info->val; - for ( i = 0; i < cos_num; i++ ) + for ( i = 0; i < ARRAY_SIZE(feat_props); i++ ) { - if ( feat->cos_reg_val[cos * cos_num + i] != info->val[i] ) + struct feat_node *feat = info->features[i]; + const struct feat_props *props = info->props[i]; + unsigned int cos_num, j; + + if ( !feat || !props ) + continue; + + cos_num = props->cos_num; + if ( info->array_len < index + cos_num ) + return; + + for ( j = 0; j < cos_num; j++ ) { - feat->cos_reg_val[cos * cos_num + i] = info->val[i]; - props->write_msr(cos, info->val[i], props->type[i]); + if ( feat->cos_reg_val[cos * cos_num + j] != val_array[index + j] ) + feat->cos_reg_val[cos * cos_num + j] = + props->write_msr(cos, val_array[index + j], props->type[j]); } + + index += cos_num; } } @@ -1236,30 +1277,19 @@ static int write_psr_msrs(unsigned int socket, unsigned int cos, const uint32_t val[], unsigned int array_len, enum psr_feat_type feat_type) { - int ret; struct psr_socket_info *info = get_socket_info(socket); struct cos_write_info data = { .cos = cos, - .feature = info->features[feat_type], - .props = feat_props[feat_type], + .features = info->features, + .val = val, + .array_len = array_len, + .props = feat_props, }; if ( cos > info->features[feat_type]->cos_max ) return -EINVAL; - /* Skip to the feature's value head. */ - ret = skip_prior_features(&array_len, feat_type); - if ( ret < 0 ) - return ret; - - val += ret; - - if ( array_len < feat_props[feat_type]->cos_num ) - return -ENOSPC; - - data.val = val; - if ( socket == cpu_to_socket(smp_processor_id()) ) do_write_psr_msrs(&data); else diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h index ec6d2de..714b3e8 100644 --- a/xen/include/public/domctl.h +++ b/xen/include/public/domctl.h @@ -1156,6 +1156,7 @@ struct xen_domctl_psr_alloc { #define XEN_DOMCTL_PSR_GET_L3_DATA 5 #define XEN_DOMCTL_PSR_SET_L2_CBM 6 #define XEN_DOMCTL_PSR_GET_L2_CBM 7 +#define XEN_DOMCTL_PSR_SET_MBA_THRTL 8 #define XEN_DOMCTL_PSR_GET_MBA_THRTL 9 uint32_t cmd; /* IN: XEN_DOMCTL_PSR_* */ uint32_t target; /* IN */
This patch implements set value flow for MBA including its callback function and domctl interface. It also changes the memebers in 'cos_write_info' to transfer the feature array, feature properties array and value array. Then, we can write all features values on the cos id into MSRs. Because multiple features may co-exist, we need handle all features to write values of them into a COS register with new COS ID. E.g: 1. L3 CAT and MBA co-exist. 2. Dom1 and Dom2 share a same COS ID (2). The L3 CAT CBM of Dom1 is 0x1ff, the MBA Thrtle of Dom1 is 0xa. 3. User wants to change MBA Thrtl of Dom1 to be 0x14. Because COS ID 2 is used by Dom2 too, we have to pick a new COS ID 3. The values of Dom1 on COS ID 3 are all default values as below: --------- | COS 3 | --------- L3 CAT | 0x7ff | --------- MBA | 0x0 | --------- 4. After setting, the L3 CAT CBM value of Dom1 should be kept and the new MBA Thrtl is set. So, the values on COS ID 3 should be below. --------- | COS 3 | --------- L3 CAT | 0x1ff | --------- MBA | 0x14 | --------- So, we should write all features values into their MSRs. That requires the feature array, feature properties array and value array are input. Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> --- CC: Jan Beulich <jbeulich@suse.com> CC: Andrew Cooper <andrew.cooper3@citrix.com> CC: Wei Liu <wei.liu2@citrix.com> CC: Roger Pau Monné <roger.pau@citrix.com> CC: Chao Peng <chao.p.peng@linux.intel.com> v4: - remove 'ALLOC_' from macro names. (suggested by Roger Pau Monné) - join two checks into a single if. (suggested by Roger Pau Monné) - remove redundant local variable 'array_len'. (suggested by Roger Pau Monné) v3: - modify commit message to make it clear. (suggested by Roger Pau Monné) - modify functionality of 'check_val' to make it simple to only check value. Change the last parameter type from 'unsigned long *' to 'unsigned long'. (suggested by Roger Pau Monné) - call rdmsrl to get value just written into MSR for MBA. Because HW can automatically change input value to what it wants. (suggested by Roger Pau Monné) - change type of 'write_msr' to 'uint32_t' to return the value actually written into MSR. Then, change 'do_write_psr_msrs' to set the returned value into 'cos_reg_val[]' - move the declaration of 'j' into loop in 'do_write_psr_msrs'. (suggested by Roger Pau Monné) - change 'mba_info' to 'mba'. (suggested by Roger Pau Monné) - change 'cat_info' to 'cat'. (suggested by Roger Pau Monné) - rename 'psr_cat/PSR_CAT' to 'psr_alloc/PSR_ALLOC' and remove 'op/OP' from name. (suggested by Roger Pau Monné) - change 'PSR_VAL_TYPE_MBA' to 'PSR_TYPE_MBA_THRTL'. (suggested by Roger Pau Monné) v2: - remove linear mode 'thrtl_max' check in 'mba_check_thrtl' because it has been checked in 'mba_init_feature'. (suggested by Chao Peng) - for non-linear mode, check if '*thrtl' is not 0 in 'mba_check_thrtl'. If it is 0, we do not need to change it. (suggested by Chao Peng) - move comments to explain changes of 'cos_write_info' from psr.c to commit message. (suggested by Chao Peng) --- xen/arch/x86/domctl.c | 6 ++ xen/arch/x86/psr.c | 144 ++++++++++++++++++++++++++------------------ xen/include/public/domctl.h | 1 + 3 files changed, 94 insertions(+), 57 deletions(-)