diff mbox

[RESEND,v5,04/24] x86: refactor psr: implement CPU init and free flow.

Message ID 1484805686-7249-5-git-send-email-yi.y.sun@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Yi Sun Jan. 19, 2017, 6:01 a.m. UTC
This patch implements the CPU init and free flow including L3 CAT
initialization and feature list free.

Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
---
v5:
    - modify commit message beacuse of code changes.
    - add 'struct cpuid_leaf_regs' to save cpu registers value to reduce
      parameters of init_feature function.
    - modify comments to make them accurate.
    - modify variables names to make them better, e.g. 'feat_tmp' to 'feat'.
    - use 'list_for_each_entry_safe' when free features.
    - do not delete 'feat_l3_cat' to make it can be reused when cpu online.
    - use 'current_cpu_data'.
    - clear 'X86_FEATURE_PQE' if cpuid_level is not right.
    - Print socket info when 'opt_cpu_info' is true.
    - remove 'cpu_prepare_work' function and move contents of it into
      'psr_cpu_prepare'.
---
 xen/arch/x86/psr.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 174 insertions(+), 2 deletions(-)

Comments

Konrad Rzeszutek Wilk Jan. 31, 2017, 2:44 a.m. UTC | #1
On Thu, Jan 19, 2017 at 02:01:06PM +0800, Yi Sun wrote:
> This patch implements the CPU init and free flow including L3 CAT
> initialization and feature list free.
> 
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> ---
> v5:
>     - modify commit message beacuse of code changes.
>     - add 'struct cpuid_leaf_regs' to save cpu registers value to reduce
>       parameters of init_feature function.
>     - modify comments to make them accurate.
>     - modify variables names to make them better, e.g. 'feat_tmp' to 'feat'.
>     - use 'list_for_each_entry_safe' when free features.
>     - do not delete 'feat_l3_cat' to make it can be reused when cpu online.
>     - use 'current_cpu_data'.
>     - clear 'X86_FEATURE_PQE' if cpuid_level is not right.
>     - Print socket info when 'opt_cpu_info' is true.
>     - remove 'cpu_prepare_work' function and move contents of it into
>       'psr_cpu_prepare'.
> ---
>  xen/arch/x86/psr.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 174 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
> index f7ff3fc..e9dc07a 100644
> --- a/xen/arch/x86/psr.c
> +++ b/xen/arch/x86/psr.c
> @@ -35,6 +35,9 @@
>  #define PSR_CAT        (1<<1)
>  #define PSR_CDP        (1<<2)
>  
> +#define CAT_CBM_LEN_MASK 0x1f
> +#define CAT_COS_MAX_MASK 0xffff
> +
>  /*
>   * Per SDM chapter 'Cache Allocation Technology: Cache Mask Configuration',
>   * the MSRs range from 0C90H through 0D0FH (inclusive), enables support for
> @@ -127,6 +130,13 @@ struct feat_node {
>      struct list_head list;
>  };
>  
> +struct cpuid_leaf_regs {
> +    unsigned int eax;
> +    unsigned int ebx;
> +    unsigned int ecx;
> +    unsigned int edx;
> +};

Could you use 'struct cpuid_leaf' in x86_emulate.h ?

It would only require "#include <asm/x86_emulate.h>" I believe.

> +
>  struct psr_assoc {
>      uint64_t val;
>      uint64_t cos_mask;
> @@ -134,11 +144,76 @@ struct psr_assoc {
>  
>  struct psr_cmt *__read_mostly psr_cmt;
>  
> +static struct psr_socket_info *__read_mostly socket_info;
> +
>  static unsigned int opt_psr;
>  static unsigned int __initdata opt_rmid_max = 255;
> +static unsigned int __read_mostly opt_cos_max = MAX_COS_REG_CNT;
>  static uint64_t rmid_mask;
>  static DEFINE_PER_CPU(struct psr_assoc, psr_assoc);
>  
> +/*
> + * Declare global feature list entry for every feature to facilitate the
> + * feature list creation. It will be allocated in psr_cpu_prepare() and
> + * inserted into feature list in cpu_init_work().

You may also want to say it is protected by the cpu_add_remove_lock
spinlock.

> + */
> +static struct feat_node *feat_l3_cat;
> +
> +/* Common functions. */
> +static void free_feature(struct psr_socket_info *info)
> +{
> +    struct feat_node *feat, *next;
> +
> +    if ( !info )
> +        return;
> +
> +    list_for_each_entry_safe(feat, next, &info->feat_list, list)
> +    {
> +        clear_bit(feat->feature, &info->feat_mask);

Would it make sense to use __clear_bit?

As in you have already a lock (cpu_add_remove_lock) so nobody
can change the feat_mask. Hence thinking there is no need for the
lock operation?

> +        list_del(&feat->list);
> +        xfree(feat);
> +    }
> +}
> +
> +/* L3 CAT functions implementation. */
> +static void l3_cat_init_feature(struct cpuid_leaf_regs regs,
> +                                struct feat_node *feat,
> +                                struct psr_socket_info *info)
> +{
> +    struct psr_cat_hw_info l3_cat;
> +    unsigned int socket;
> +
> +    /* No valid value so do not enable feature. */
> +    if ( !regs.eax || !regs.edx )
> +        return;
> +
> +    l3_cat.cbm_len = (regs.eax & CAT_CBM_LEN_MASK) + 1;
> +    l3_cat.cos_max = min(opt_cos_max, regs.edx & CAT_COS_MAX_MASK);
> +
> +    /* cos=0 is reserved as default cbm(all bits within cbm_len are 1). */
> +    feat->cos_reg_val[0] = (1ull << l3_cat.cbm_len) - 1;
> +
> +    feat->feature = PSR_SOCKET_L3_CAT;
> +    __set_bit(PSR_SOCKET_L3_CAT, &info->feat_mask);

Aha! So you do reuse the enum! In which case you really
want to make sure you mention that in 'struct psr_socket_info'
the 'feat_mask' is actually the bit values of enum psr_feat_type.

Also do you want to add an ASSERT before you call the __set_bit:

ASSERT ( !test_bit(PSR_SOCKET_L3_CAT, &info->feat_mask) );

?
> +
> +    feat->info.l3_cat_info = l3_cat;
> +
> +    info->nr_feat++;
> +
> +    /* Add this feature into list. */
> +    list_add_tail(&feat->list, &info->feat_list);
> +
> +    socket = cpu_to_socket(smp_processor_id());
> +    if ( opt_cpu_info )
> +        printk(XENLOG_INFO
> +           "L3 CAT: enabled on socket %u, cos_max:%u, cbm_len:%u\n",

Odd spacing. I would think that the "L3 CAT: .. "
would be on the same column as XENLOG_INFO ?

Or you could flip this logic (if you are worried about the silly
80 characters requirement) around and do:

if ( !opt_cpu_info )
	return

And then do the printk on its own line?
> +           socket, feat->info.l3_cat_info.cos_max,
> +           feat->info.l3_cat_info.cbm_len);
> +}
> +
> +static const struct feat_ops l3_cat_ops = {
> +};
> +
>  static void __init parse_psr_bool(char *s, char *value, char *feature,
>                                    unsigned int mask)
>  {
> @@ -178,6 +253,9 @@ static void __init parse_psr_param(char *s)
>          if ( val_str && !strcmp(s, "rmid_max") )
>              opt_rmid_max = simple_strtoul(val_str, NULL, 0);
>  
> +        if ( val_str && !strcmp(s, "cos_max") )
> +            opt_cos_max = simple_strtoul(val_str, NULL, 0);
> +
>          s = ss + 1;
>      } while ( ss );
>  }
> @@ -333,18 +411,108 @@ void psr_domain_free(struct domain *d)
>      psr_free_rmid(d);
>  }
>  
> +static void cpu_init_work(void)
> +{
> +    struct psr_socket_info *info;
> +    unsigned int socket;
> +    unsigned int cpu = smp_processor_id();
> +    struct feat_node *feat;
> +    struct cpuid_leaf_regs regs;

Could you use the x86_emulate.h one?

Would it also make sense to make sure that you initialize
to default values:

	struct cpuid_leaf regs = { .a = 0, .b = 0, .c = 0, .d = 0 };
> +
> +    if ( !cpu_has(&current_cpu_data, X86_FEATURE_PQE) )
> +        return;
> +    else if ( current_cpu_data.cpuid_level < PSR_CPUID_LEVEL_CAT )
> +    {
> +        clear_bit(X86_FEATURE_PQE, current_cpu_data.x86_capability);
> +        return;
> +    }
> +
> +    socket = cpu_to_socket(cpu);
> +    info = socket_info + socket;
> +    if ( info->feat_mask )
> +        return;
> +
> +    INIT_LIST_HEAD(&info->feat_list);
> +    spin_lock_init(&info->ref_lock);
> +
> +    cpuid_count(PSR_CPUID_LEVEL_CAT, 0,
> +                &regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
> +    if ( regs.ebx & PSR_RESOURCE_TYPE_L3 )
> +    {
> +        cpuid_count(PSR_CPUID_LEVEL_CAT, 1,
> +                    &regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
> +
> +        feat = feat_l3_cat;
> +        feat_l3_cat = NULL;

.. so how does this work when this is initialized on a third
socket? Won't feat_l3_cat at that point be NULL leading to this
one below becoming a NULL pointer?

Ah wait. psr_cpu_prepare will allocate it.

Hm, could you add a comment about this?

/* psr_cpu_prepare will allocate it on subsequent CPU onlining. */

> +        feat->ops = l3_cat_ops;
> +
> +        l3_cat_init_feature(regs, feat, info);
> +    }
> +}
> +
> +static void cpu_fini_work(unsigned int cpu)
> +{
> +    unsigned int socket = cpu_to_socket(cpu);
> +
> +    if ( !socket_cpumask[socket] || cpumask_empty(socket_cpumask[socket]) )
> +    {
> +        free_feature(socket_info + socket);
> +    }
> +}
> +
> +static void __init init_psr(void)
> +{
> +    if ( opt_cos_max < 1 )
> +    {
> +        printk(XENLOG_INFO "CAT: disabled, cos_max is too small\n");
> +        return;
> +    }
> +
> +    socket_info = xzalloc_array(struct psr_socket_info, nr_sockets);
> +
> +    if ( !socket_info )
> +    {
> +        printk(XENLOG_INFO "Fail to alloc socket_info!\n");

Failed
> +        return;
> +    }
> +}
> +
> +static void __init psr_free(void)
> +{
> +    unsigned int i;
> +
> +    for ( i = 0; i < nr_sockets; i++ )
> +        free_feature(&socket_info[i]);
> +
> +    xfree(socket_info);
> +    socket_info = NULL;
> +}
> +
>  static int psr_cpu_prepare(unsigned int cpu)
>  {
> +    if ( !socket_info )
> +        return 0;
> +
> +    /* Malloc memory for the global feature head here. */
> +    if ( feat_l3_cat == NULL &&
> +         (feat_l3_cat = xzalloc(struct feat_node)) == NULL )

/me scratches his head.

Lets say we have a two socket machine. Each socket has
two CPUs (so total of 4 CPUs).

On CPU0  (in psr_presmp_init) it allocates feat_l3_cat. Then
later in (in psr_presmp_init) you call psr_cpu_init which
sets feat_l3_cat to NULL (and 'feat' is feat_l3_cat).

When CPU1 starts up then, psr_cpu_prepare is called
which means we allocate a new feat_l3_cat. 'cpu_init_work'
short-circuits (as info->feat_mask has a value) and does not
do anything.

When CPU2 (socket two), starts up then feat_l3_cat (as 'feat')
is used (in psr_cpu_init and again sets feat_l3_cat to NULL).

The last CPU3 (socket #2) starts up, and since feat_l3_cat is
NULL - it is allocated. But in 'cpu_init_work' it short-circuits
so we don't use third allocated 'feat_l3_cat' .

And we have an 'feat_l3_cat' that is not used for anything..

In other words - a memory leak. Granted a very small one.


> +        return -ENOMEM;
> +
>      return 0;
>  }
>  
>  static void psr_cpu_init(void)
>  {
> +    if ( socket_info )
> +        cpu_init_work();
> +
>      psr_assoc_init();
>  }
>  
>  static void psr_cpu_fini(unsigned int cpu)
>  {
> +    if ( socket_info )
> +        cpu_fini_work(cpu);
>      return;
>  }
>  
> @@ -386,10 +554,14 @@ static int __init psr_presmp_init(void)
>      if ( (opt_psr & PSR_CMT) && opt_rmid_max )
>          init_psr_cmt(opt_rmid_max);
>  
> -    psr_cpu_prepare(0);
> +    if ( opt_psr & PSR_CAT )
> +        init_psr();
> +
> +    if ( psr_cpu_prepare(0) )
> +        psr_free();
>  
>      psr_cpu_init();
> -    if ( psr_cmt_enabled() )
> +    if ( psr_cmt_enabled() || socket_info )
>          register_cpu_notifier(&cpu_nfb);
>  
>      return 0;
> -- 
> 1.9.1
>
Jan Beulich Jan. 31, 2017, 10:14 a.m. UTC | #2
>>> On 31.01.17 at 03:44, <konrad.wilk@oracle.com> wrote:
> On Thu, Jan 19, 2017 at 02:01:06PM +0800, Yi Sun wrote:
>> @@ -127,6 +130,13 @@ struct feat_node {
>>      struct list_head list;
>>  };
>>  
>> +struct cpuid_leaf_regs {
>> +    unsigned int eax;
>> +    unsigned int ebx;
>> +    unsigned int ecx;
>> +    unsigned int edx;
>> +};
> 
> Could you use 'struct cpuid_leaf' in x86_emulate.h ?
> 
> It would only require "#include <asm/x86_emulate.h>" I believe.

Indeed - I recall specifically having asked for that.

>>  static int psr_cpu_prepare(unsigned int cpu)
>>  {
>> +    if ( !socket_info )
>> +        return 0;
>> +
>> +    /* Malloc memory for the global feature head here. */
>> +    if ( feat_l3_cat == NULL &&
>> +         (feat_l3_cat = xzalloc(struct feat_node)) == NULL )
> 
> /me scratches his head.
> 
> Lets say we have a two socket machine. Each socket has
> two CPUs (so total of 4 CPUs).
> 
> On CPU0  (in psr_presmp_init) it allocates feat_l3_cat. Then
> later in (in psr_presmp_init) you call psr_cpu_init which
> sets feat_l3_cat to NULL (and 'feat' is feat_l3_cat).
> 
> When CPU1 starts up then, psr_cpu_prepare is called
> which means we allocate a new feat_l3_cat. 'cpu_init_work'
> short-circuits (as info->feat_mask has a value) and does not
> do anything.
> 
> When CPU2 (socket two), starts up then feat_l3_cat (as 'feat')
> is used (in psr_cpu_init and again sets feat_l3_cat to NULL).
> 
> The last CPU3 (socket #2) starts up, and since feat_l3_cat is
> NULL - it is allocated. But in 'cpu_init_work' it short-circuits
> so we don't use third allocated 'feat_l3_cat' .
> 
> And we have an 'feat_l3_cat' that is not used for anything..
> 
> In other words - a memory leak. Granted a very small one.

I did recommend to do it that way, to simplify things a little.

Jan
Andrew Cooper Jan. 31, 2017, 10:53 a.m. UTC | #3
On 31/01/17 02:44, Konrad Rzeszutek Wilk wrote:
> On Thu, Jan 19, 2017 at 02:01:06PM +0800, Yi Sun wrote:
>> This patch implements the CPU init and free flow including L3 CAT
>> initialization and feature list free.
>>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> ---
>> v5:
>>     - modify commit message beacuse of code changes.
>>     - add 'struct cpuid_leaf_regs' to save cpu registers value to reduce
>>       parameters of init_feature function.
>>     - modify comments to make them accurate.
>>     - modify variables names to make them better, e.g. 'feat_tmp' to 'feat'.
>>     - use 'list_for_each_entry_safe' when free features.
>>     - do not delete 'feat_l3_cat' to make it can be reused when cpu online.
>>     - use 'current_cpu_data'.
>>     - clear 'X86_FEATURE_PQE' if cpuid_level is not right.
>>     - Print socket info when 'opt_cpu_info' is true.
>>     - remove 'cpu_prepare_work' function and move contents of it into
>>       'psr_cpu_prepare'.
>> ---
>>  xen/arch/x86/psr.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 174 insertions(+), 2 deletions(-)
>>
>> diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
>> index f7ff3fc..e9dc07a 100644
>> --- a/xen/arch/x86/psr.c
>> +++ b/xen/arch/x86/psr.c
>> @@ -35,6 +35,9 @@
>>  #define PSR_CAT        (1<<1)
>>  #define PSR_CDP        (1<<2)
>>  
>> +#define CAT_CBM_LEN_MASK 0x1f
>> +#define CAT_COS_MAX_MASK 0xffff
>> +
>>  /*
>>   * Per SDM chapter 'Cache Allocation Technology: Cache Mask Configuration',
>>   * the MSRs range from 0C90H through 0D0FH (inclusive), enables support for
>> @@ -127,6 +130,13 @@ struct feat_node {
>>      struct list_head list;
>>  };
>>  
>> +struct cpuid_leaf_regs {
>> +    unsigned int eax;
>> +    unsigned int ebx;
>> +    unsigned int ecx;
>> +    unsigned int edx;
>> +};
> Could you use 'struct cpuid_leaf' in x86_emulate.h ?
>
> It would only require "#include <asm/x86_emulate.h>" I believe.

I expect my cpuid_leaf work is newer than this series.

If there are going to be external users, I would also recommend moving
the cpuid_{,count}_leaf() helpers to static inlines in cpuid.h.

~Andrew
Konrad Rzeszutek Wilk Jan. 31, 2017, 2:13 p.m. UTC | #4
On Tue, Jan 31, 2017 at 03:14:25AM -0700, Jan Beulich wrote:
> >>> On 31.01.17 at 03:44, <konrad.wilk@oracle.com> wrote:
> > On Thu, Jan 19, 2017 at 02:01:06PM +0800, Yi Sun wrote:
> >> @@ -127,6 +130,13 @@ struct feat_node {
> >>      struct list_head list;
> >>  };
> >>  
> >> +struct cpuid_leaf_regs {
> >> +    unsigned int eax;
> >> +    unsigned int ebx;
> >> +    unsigned int ecx;
> >> +    unsigned int edx;
> >> +};
> > 
> > Could you use 'struct cpuid_leaf' in x86_emulate.h ?
> > 
> > It would only require "#include <asm/x86_emulate.h>" I believe.
> 
> Indeed - I recall specifically having asked for that.
> 
> >>  static int psr_cpu_prepare(unsigned int cpu)
> >>  {
> >> +    if ( !socket_info )
> >> +        return 0;
> >> +
> >> +    /* Malloc memory for the global feature head here. */
> >> +    if ( feat_l3_cat == NULL &&
> >> +         (feat_l3_cat = xzalloc(struct feat_node)) == NULL )
> > 
> > /me scratches his head.
> > 
> > Lets say we have a two socket machine. Each socket has
> > two CPUs (so total of 4 CPUs).
> > 
> > On CPU0  (in psr_presmp_init) it allocates feat_l3_cat. Then
> > later in (in psr_presmp_init) you call psr_cpu_init which
> > sets feat_l3_cat to NULL (and 'feat' is feat_l3_cat).
> > 
> > When CPU1 starts up then, psr_cpu_prepare is called
> > which means we allocate a new feat_l3_cat. 'cpu_init_work'
> > short-circuits (as info->feat_mask has a value) and does not
> > do anything.
> > 
> > When CPU2 (socket two), starts up then feat_l3_cat (as 'feat')
> > is used (in psr_cpu_init and again sets feat_l3_cat to NULL).
> > 
> > The last CPU3 (socket #2) starts up, and since feat_l3_cat is
> > NULL - it is allocated. But in 'cpu_init_work' it short-circuits
> > so we don't use third allocated 'feat_l3_cat' .
> > 
> > And we have an 'feat_l3_cat' that is not used for anything..
> > 
> > In other words - a memory leak. Granted a very small one.
> 
> I did recommend to do it that way, to simplify things a little.

Aha! In which case perhaps a little comment saying that it is
OK to have this one memory leak to make the code simpler.


> 
> Jan
>
diff mbox

Patch

diff --git a/xen/arch/x86/psr.c b/xen/arch/x86/psr.c
index f7ff3fc..e9dc07a 100644
--- a/xen/arch/x86/psr.c
+++ b/xen/arch/x86/psr.c
@@ -35,6 +35,9 @@ 
 #define PSR_CAT        (1<<1)
 #define PSR_CDP        (1<<2)
 
+#define CAT_CBM_LEN_MASK 0x1f
+#define CAT_COS_MAX_MASK 0xffff
+
 /*
  * Per SDM chapter 'Cache Allocation Technology: Cache Mask Configuration',
  * the MSRs range from 0C90H through 0D0FH (inclusive), enables support for
@@ -127,6 +130,13 @@  struct feat_node {
     struct list_head list;
 };
 
+struct cpuid_leaf_regs {
+    unsigned int eax;
+    unsigned int ebx;
+    unsigned int ecx;
+    unsigned int edx;
+};
+
 struct psr_assoc {
     uint64_t val;
     uint64_t cos_mask;
@@ -134,11 +144,76 @@  struct psr_assoc {
 
 struct psr_cmt *__read_mostly psr_cmt;
 
+static struct psr_socket_info *__read_mostly socket_info;
+
 static unsigned int opt_psr;
 static unsigned int __initdata opt_rmid_max = 255;
+static unsigned int __read_mostly opt_cos_max = MAX_COS_REG_CNT;
 static uint64_t rmid_mask;
 static DEFINE_PER_CPU(struct psr_assoc, psr_assoc);
 
+/*
+ * Declare global feature list entry for every feature to facilitate the
+ * feature list creation. It will be allocated in psr_cpu_prepare() and
+ * inserted into feature list in cpu_init_work().
+ */
+static struct feat_node *feat_l3_cat;
+
+/* Common functions. */
+static void free_feature(struct psr_socket_info *info)
+{
+    struct feat_node *feat, *next;
+
+    if ( !info )
+        return;
+
+    list_for_each_entry_safe(feat, next, &info->feat_list, list)
+    {
+        clear_bit(feat->feature, &info->feat_mask);
+        list_del(&feat->list);
+        xfree(feat);
+    }
+}
+
+/* L3 CAT functions implementation. */
+static void l3_cat_init_feature(struct cpuid_leaf_regs regs,
+                                struct feat_node *feat,
+                                struct psr_socket_info *info)
+{
+    struct psr_cat_hw_info l3_cat;
+    unsigned int socket;
+
+    /* No valid value so do not enable feature. */
+    if ( !regs.eax || !regs.edx )
+        return;
+
+    l3_cat.cbm_len = (regs.eax & CAT_CBM_LEN_MASK) + 1;
+    l3_cat.cos_max = min(opt_cos_max, regs.edx & CAT_COS_MAX_MASK);
+
+    /* cos=0 is reserved as default cbm(all bits within cbm_len are 1). */
+    feat->cos_reg_val[0] = (1ull << l3_cat.cbm_len) - 1;
+
+    feat->feature = PSR_SOCKET_L3_CAT;
+    __set_bit(PSR_SOCKET_L3_CAT, &info->feat_mask);
+
+    feat->info.l3_cat_info = l3_cat;
+
+    info->nr_feat++;
+
+    /* Add this feature into list. */
+    list_add_tail(&feat->list, &info->feat_list);
+
+    socket = cpu_to_socket(smp_processor_id());
+    if ( opt_cpu_info )
+        printk(XENLOG_INFO
+           "L3 CAT: enabled on socket %u, cos_max:%u, cbm_len:%u\n",
+           socket, feat->info.l3_cat_info.cos_max,
+           feat->info.l3_cat_info.cbm_len);
+}
+
+static const struct feat_ops l3_cat_ops = {
+};
+
 static void __init parse_psr_bool(char *s, char *value, char *feature,
                                   unsigned int mask)
 {
@@ -178,6 +253,9 @@  static void __init parse_psr_param(char *s)
         if ( val_str && !strcmp(s, "rmid_max") )
             opt_rmid_max = simple_strtoul(val_str, NULL, 0);
 
+        if ( val_str && !strcmp(s, "cos_max") )
+            opt_cos_max = simple_strtoul(val_str, NULL, 0);
+
         s = ss + 1;
     } while ( ss );
 }
@@ -333,18 +411,108 @@  void psr_domain_free(struct domain *d)
     psr_free_rmid(d);
 }
 
+static void cpu_init_work(void)
+{
+    struct psr_socket_info *info;
+    unsigned int socket;
+    unsigned int cpu = smp_processor_id();
+    struct feat_node *feat;
+    struct cpuid_leaf_regs regs;
+
+    if ( !cpu_has(&current_cpu_data, X86_FEATURE_PQE) )
+        return;
+    else if ( current_cpu_data.cpuid_level < PSR_CPUID_LEVEL_CAT )
+    {
+        clear_bit(X86_FEATURE_PQE, current_cpu_data.x86_capability);
+        return;
+    }
+
+    socket = cpu_to_socket(cpu);
+    info = socket_info + socket;
+    if ( info->feat_mask )
+        return;
+
+    INIT_LIST_HEAD(&info->feat_list);
+    spin_lock_init(&info->ref_lock);
+
+    cpuid_count(PSR_CPUID_LEVEL_CAT, 0,
+                &regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
+    if ( regs.ebx & PSR_RESOURCE_TYPE_L3 )
+    {
+        cpuid_count(PSR_CPUID_LEVEL_CAT, 1,
+                    &regs.eax, &regs.ebx, &regs.ecx, &regs.edx);
+
+        feat = feat_l3_cat;
+        feat_l3_cat = NULL;
+        feat->ops = l3_cat_ops;
+
+        l3_cat_init_feature(regs, feat, info);
+    }
+}
+
+static void cpu_fini_work(unsigned int cpu)
+{
+    unsigned int socket = cpu_to_socket(cpu);
+
+    if ( !socket_cpumask[socket] || cpumask_empty(socket_cpumask[socket]) )
+    {
+        free_feature(socket_info + socket);
+    }
+}
+
+static void __init init_psr(void)
+{
+    if ( opt_cos_max < 1 )
+    {
+        printk(XENLOG_INFO "CAT: disabled, cos_max is too small\n");
+        return;
+    }
+
+    socket_info = xzalloc_array(struct psr_socket_info, nr_sockets);
+
+    if ( !socket_info )
+    {
+        printk(XENLOG_INFO "Fail to alloc socket_info!\n");
+        return;
+    }
+}
+
+static void __init psr_free(void)
+{
+    unsigned int i;
+
+    for ( i = 0; i < nr_sockets; i++ )
+        free_feature(&socket_info[i]);
+
+    xfree(socket_info);
+    socket_info = NULL;
+}
+
 static int psr_cpu_prepare(unsigned int cpu)
 {
+    if ( !socket_info )
+        return 0;
+
+    /* Malloc memory for the global feature head here. */
+    if ( feat_l3_cat == NULL &&
+         (feat_l3_cat = xzalloc(struct feat_node)) == NULL )
+        return -ENOMEM;
+
     return 0;
 }
 
 static void psr_cpu_init(void)
 {
+    if ( socket_info )
+        cpu_init_work();
+
     psr_assoc_init();
 }
 
 static void psr_cpu_fini(unsigned int cpu)
 {
+    if ( socket_info )
+        cpu_fini_work(cpu);
     return;
 }
 
@@ -386,10 +554,14 @@  static int __init psr_presmp_init(void)
     if ( (opt_psr & PSR_CMT) && opt_rmid_max )
         init_psr_cmt(opt_rmid_max);
 
-    psr_cpu_prepare(0);
+    if ( opt_psr & PSR_CAT )
+        init_psr();
+
+    if ( psr_cpu_prepare(0) )
+        psr_free();
 
     psr_cpu_init();
-    if ( psr_cmt_enabled() )
+    if ( psr_cmt_enabled() || socket_info )
         register_cpu_notifier(&cpu_nfb);
 
     return 0;