diff mbox series

[-V2] cxl/region: Support to calculate memory tier abstract distance

Message ID 20240611055423.470574-1-ying.huang@intel.com
State Superseded
Headers show
Series [-V2] cxl/region: Support to calculate memory tier abstract distance | expand

Commit Message

Huang, Ying June 11, 2024, 5:54 a.m. UTC
To place memory nodes backed by CXL regions in the appropriate memory
tiers.  So that, pages can be promoted/demoted with the existing
memory tiering mechanism.

The abstract distance is calculated based on the memory access latency
and bandwidth of CXL regions.  Which in turn comes from the HMAT
and CDAT, etc.

Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
---
Changes:

v2:

- Added comments to struct cxl_region and minor fixes (Thanks Jonathan!)
- Link to v1: https://lore.kernel.org/linux-cxl/20240531024852.282767-1-ying.huang@intel.com/

---
 drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
 drivers/cxl/cxl.h         |  2 ++
 2 files changed, 38 insertions(+), 4 deletions(-)

Comments

Alison Schofield June 11, 2024, 9:28 p.m. UTC | #1
On Tue, Jun 11, 2024 at 01:54:23PM +0800, Ying Huang wrote:
> To place memory nodes backed by CXL regions in the appropriate memory
> tiers.  So that, pages can be promoted/demoted with the existing
> memory tiering mechanism.

How about more context for the unfamiliar because my first lookup was
what's an 'abstract distance'? (not sure I even got it right below ;))

An abstract distance value must be assigned by the driver that makes
the memory available to the system. It reflects relative performance
and is used to place memory nodes backed by CXL regions in the appropriate
memory tiers allowing promotion/demotion within the existing memory tiering
mechanism.

> 
> The abstract distance is calculated based on the memory access latency
> and bandwidth of CXL regions.  Which in turn comes from the HMAT
> and CDAT, etc.

etc ?  
I think the latency and bandwidth were already calculated, so you
don't need to explain those. Perhaps drop the whole 'While in turn
come from..." part.

a bit below...

> 
> Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Bharata B Rao <bharata@amd.com>
> Cc: Alistair Popple <apopple@nvidia.com>
> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> Changes:
> 
> v2:
> 
> - Added comments to struct cxl_region and minor fixes (Thanks Jonathan!)
> - Link to v1: https://lore.kernel.org/linux-cxl/20240531024852.282767-1-ying.huang@intel.com/
> 
> ---
>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
>  drivers/cxl/cxl.h         |  2 ++
>  2 files changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 3c2b6144be23..81d0910c0a02 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -9,6 +9,7 @@
>  #include <linux/uuid.h>
>  #include <linux/sort.h>
>  #include <linux/idr.h>
> +#include <linux/memory-tiers.h>
>  #include <cxlmem.h>
>  #include <cxl.h>
>  #include "core.h"
> @@ -2304,14 +2305,20 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>  	return true;
>  }
>  
> +static int cxl_region_nid(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
> +	struct cxl_decoder *cxld = &cxled->cxld;
> +
> +	return phys_to_target_node(cxld->hpa_range.start);
> +}
> +

I believe it's OK to send a resource_size_t to phys_to_target_node()
like this:

--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2308,10 +2308,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
 static int cxl_region_nid(struct cxl_region *cxlr)
 {
        struct cxl_region_params *p = &cxlr->params;
-       struct cxl_endpoint_decoder *cxled = p->targets[0];
-       struct cxl_decoder *cxld = &cxled->cxld;

-       return phys_to_target_node(cxld->hpa_range.start);
+       return phys_to_target_node(p->res->start);
 }


>  static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>  					  unsigned long action, void *arg)
>  {
>  	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
>  					       memory_notifier);
> -	struct cxl_region_params *p = &cxlr->params;
> -	struct cxl_endpoint_decoder *cxled = p->targets[0];
> -	struct cxl_decoder *cxld = &cxled->cxld;
>  	struct memory_notify *mnb = arg;
>  	int nid = mnb->status_change_nid;
>  	int region_nid;
> @@ -2319,7 +2326,7 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>  	if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
>  		return NOTIFY_DONE;
>  
> -	region_nid = phys_to_target_node(cxld->hpa_range.start);
> +	region_nid = cxl_region_nid(cxlr);
>  	if (nid != region_nid)
>  		return NOTIFY_DONE;
>  
> @@ -2329,6 +2336,27 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>  	return NOTIFY_OK;
>  }
>  
> +static int cxl_region_calculate_adistance(struct notifier_block *nb,
> +					  unsigned long nid, void *data)
> +{
> +	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
> +					       adist_notifier);
> +	struct access_coordinate *perf;
> +	int *adist = data;
> +	int region_nid;
> +
> +	region_nid = cxl_region_nid(cxlr);
> +	if (nid != region_nid)
> +		return NOTIFY_OK;
> +
> +	perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
> +
> +	if (mt_perf_to_adistance(perf, adist))
> +		return NOTIFY_OK;
> +
> +	return NOTIFY_STOP;
> +}
> +
>  /**
>   * devm_cxl_add_region - Adds a region to a decoder
>   * @cxlrd: root decoder
> @@ -2371,6 +2399,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
>  	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
>  	register_memory_notifier(&cxlr->memory_notifier);
>  
> +	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
> +	cxlr->adist_notifier.priority = 100;
> +	register_mt_adistance_algorithm(&cxlr->adist_notifier);
> +
>  	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
>  	if (rc)
>  		return ERR_PTR(rc);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 603c0120cff8..f46252373159 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -522,6 +522,7 @@ struct cxl_region_params {
>   * @params: active + config params for the region
>   * @coord: QoS access coordinates for the region
>   * @memory_notifier: notifier for setting the access coordinates to node
> + * @adist_notifier: notifier for calculating the abstract distance of node
>   */
>  struct cxl_region {
>  	struct device dev;
> @@ -534,6 +535,7 @@ struct cxl_region {
>  	struct cxl_region_params params;
>  	struct access_coordinate coord[ACCESS_COORDINATE_MAX];
>  	struct notifier_block memory_notifier;
> +	struct notifier_block adist_notifier;
>  };
>  
>  struct cxl_nvdimm_bridge {
> -- 
> 2.39.2
> 
>
Huang, Ying June 12, 2024, 2:09 a.m. UTC | #2
Hi, Alison,

Thanks for review!

Alison Schofield <alison.schofield@intel.com> writes:

> On Tue, Jun 11, 2024 at 01:54:23PM +0800, Ying Huang wrote:
>> To place memory nodes backed by CXL regions in the appropriate memory
>> tiers.  So that, pages can be promoted/demoted with the existing
>> memory tiering mechanism.
>
> How about more context for the unfamiliar because my first lookup was
> what's an 'abstract distance'? (not sure I even got it right below ;))

Sorry about this.

> An abstract distance value must be assigned by the driver that makes
> the memory available to the system. It reflects relative performance
> and is used to place memory nodes backed by CXL regions in the appropriate
> memory tiers allowing promotion/demotion within the existing memory tiering
> mechanism.

This looks good.  Will use it.  Thanks!

>> 
>> The abstract distance is calculated based on the memory access latency
>> and bandwidth of CXL regions.  Which in turn comes from the HMAT
>> and CDAT, etc.
>
> etc ?  
> I think the latency and bandwidth were already calculated, so you
> don't need to explain those. Perhaps drop the whole 'While in turn
> come from..." part.

Sure.

> a bit below...
>
>> 
>> Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Cc: Dave Jiang <dave.jiang@intel.com>
>> Cc: Dan Williams <dan.j.williams@intel.com>
>> Cc: Bharata B Rao <bharata@amd.com>
>> Cc: Alistair Popple <apopple@nvidia.com>
>> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
>> ---
>> Changes:
>> 
>> v2:
>> 
>> - Added comments to struct cxl_region and minor fixes (Thanks Jonathan!)
>> - Link to v1: https://lore.kernel.org/linux-cxl/20240531024852.282767-1-ying.huang@intel.com/
>> 
>> ---
>>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
>>  drivers/cxl/cxl.h         |  2 ++
>>  2 files changed, 38 insertions(+), 4 deletions(-)
>> 
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index 3c2b6144be23..81d0910c0a02 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -9,6 +9,7 @@
>>  #include <linux/uuid.h>
>>  #include <linux/sort.h>
>>  #include <linux/idr.h>
>> +#include <linux/memory-tiers.h>
>>  #include <cxlmem.h>
>>  #include <cxl.h>
>>  #include "core.h"
>> @@ -2304,14 +2305,20 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>>  	return true;
>>  }
>>  
>> +static int cxl_region_nid(struct cxl_region *cxlr)
>> +{
>> +	struct cxl_region_params *p = &cxlr->params;
>> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
>> +	struct cxl_decoder *cxld = &cxled->cxld;
>> +
>> +	return phys_to_target_node(cxld->hpa_range.start);
>> +}
>> +
>
> I believe it's OK to send a resource_size_t to phys_to_target_node()
> like this:
>
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2308,10 +2308,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>  static int cxl_region_nid(struct cxl_region *cxlr)
>  {
>         struct cxl_region_params *p = &cxlr->params;
> -       struct cxl_endpoint_decoder *cxled = p->targets[0];
> -       struct cxl_decoder *cxld = &cxled->cxld;
>
> -       return phys_to_target_node(cxld->hpa_range.start);
> +       return phys_to_target_node(p->res->start);
>  }
>

I believe this works.  But the original implementation is just a
mechanical code movement from cxl_region_perf_attrs_callback().  So, I
prefer to keep it stupid. Then, further optimization can be done on top
of it.  Is it good for you?

>
>>  static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>>  					  unsigned long action, void *arg)
>>  {
>>  	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
>>  					       memory_notifier);
>> -	struct cxl_region_params *p = &cxlr->params;
>> -	struct cxl_endpoint_decoder *cxled = p->targets[0];
>> -	struct cxl_decoder *cxld = &cxled->cxld;
>>  	struct memory_notify *mnb = arg;
>>  	int nid = mnb->status_change_nid;
>>  	int region_nid;
>> @@ -2319,7 +2326,7 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>>  	if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
>>  		return NOTIFY_DONE;
>>  
>> -	region_nid = phys_to_target_node(cxld->hpa_range.start);
>> +	region_nid = cxl_region_nid(cxlr);
>>  	if (nid != region_nid)
>>  		return NOTIFY_DONE;
>>  
>> @@ -2329,6 +2336,27 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
>>  	return NOTIFY_OK;
>>  }
>>  
>> +static int cxl_region_calculate_adistance(struct notifier_block *nb,
>> +					  unsigned long nid, void *data)
>> +{
>> +	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
>> +					       adist_notifier);
>> +	struct access_coordinate *perf;
>> +	int *adist = data;
>> +	int region_nid;
>> +
>> +	region_nid = cxl_region_nid(cxlr);
>> +	if (nid != region_nid)
>> +		return NOTIFY_OK;
>> +
>> +	perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
>> +
>> +	if (mt_perf_to_adistance(perf, adist))
>> +		return NOTIFY_OK;
>> +
>> +	return NOTIFY_STOP;
>> +}
>> +
>>  /**
>>   * devm_cxl_add_region - Adds a region to a decoder
>>   * @cxlrd: root decoder
>> @@ -2371,6 +2399,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
>>  	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
>>  	register_memory_notifier(&cxlr->memory_notifier);
>>  
>> +	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
>> +	cxlr->adist_notifier.priority = 100;
>> +	register_mt_adistance_algorithm(&cxlr->adist_notifier);
>> +
>>  	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
>>  	if (rc)
>>  		return ERR_PTR(rc);
>> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
>> index 603c0120cff8..f46252373159 100644
>> --- a/drivers/cxl/cxl.h
>> +++ b/drivers/cxl/cxl.h
>> @@ -522,6 +522,7 @@ struct cxl_region_params {
>>   * @params: active + config params for the region
>>   * @coord: QoS access coordinates for the region
>>   * @memory_notifier: notifier for setting the access coordinates to node
>> + * @adist_notifier: notifier for calculating the abstract distance of node
>>   */
>>  struct cxl_region {
>>  	struct device dev;
>> @@ -534,6 +535,7 @@ struct cxl_region {
>>  	struct cxl_region_params params;
>>  	struct access_coordinate coord[ACCESS_COORDINATE_MAX];
>>  	struct notifier_block memory_notifier;
>> +	struct notifier_block adist_notifier;
>>  };
>>  
>>  struct cxl_nvdimm_bridge {
>> -- 
>> 2.39.2
>> 
>> 

--
Best Regards,
Huang, Ying
Dan Williams June 12, 2024, 4:22 a.m. UTC | #3
Huang Ying wrote:
> To place memory nodes backed by CXL regions in the appropriate memory
> tiers.  So that, pages can be promoted/demoted with the existing
> memory tiering mechanism.
> 
> The abstract distance is calculated based on the memory access latency
> and bandwidth of CXL regions.  Which in turn comes from the HMAT
> and CDAT, etc.
> 
> Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Cc: Dave Jiang <dave.jiang@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Bharata B Rao <bharata@amd.com>
> Cc: Alistair Popple <apopple@nvidia.com>
> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> Changes:
> 
> v2:
> 
> - Added comments to struct cxl_region and minor fixes (Thanks Jonathan!)
> - Link to v1: https://lore.kernel.org/linux-cxl/20240531024852.282767-1-ying.huang@intel.com/
> 
> ---
>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
>  drivers/cxl/cxl.h         |  2 ++
>  2 files changed, 38 insertions(+), 4 deletions(-)

Looks straigtforward to me,

Acked-by: Dan Williams <dan.j.williams@intel.com>
Alison Schofield June 12, 2024, 5:38 p.m. UTC | #4
On Wed, Jun 12, 2024 at 10:09:14AM +0800, Ying Huang wrote:

snip

> >> ---
> >>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
> >>  drivers/cxl/cxl.h         |  2 ++
> >>  2 files changed, 38 insertions(+), 4 deletions(-)
> >> 
> >> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> >> index 3c2b6144be23..81d0910c0a02 100644
> >> --- a/drivers/cxl/core/region.c
> >> +++ b/drivers/cxl/core/region.c
> >> @@ -9,6 +9,7 @@
> >>  #include <linux/uuid.h>
> >>  #include <linux/sort.h>
> >>  #include <linux/idr.h>
> >> +#include <linux/memory-tiers.h>
> >>  #include <cxlmem.h>
> >>  #include <cxl.h>
> >>  #include "core.h"
> >> @@ -2304,14 +2305,20 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
> >>  	return true;
> >>  }
> >>  
> >> +static int cxl_region_nid(struct cxl_region *cxlr)
> >> +{
> >> +	struct cxl_region_params *p = &cxlr->params;
> >> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
> >> +	struct cxl_decoder *cxld = &cxled->cxld;
> >> +
> >> +	return phys_to_target_node(cxld->hpa_range.start);
> >> +}
> >> +
> >
> > I believe it's OK to send a resource_size_t to phys_to_target_node()
> > like this:
> >
> > --- a/drivers/cxl/core/region.c
> > +++ b/drivers/cxl/core/region.c
> > @@ -2308,10 +2308,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
> >  static int cxl_region_nid(struct cxl_region *cxlr)
> >  {
> >         struct cxl_region_params *p = &cxlr->params;
> > -       struct cxl_endpoint_decoder *cxled = p->targets[0];
> > -       struct cxl_decoder *cxld = &cxled->cxld;
> >
> > -       return phys_to_target_node(cxld->hpa_range.start);
> > +       return phys_to_target_node(p->res->start);
> >  }
> >
> 
> I believe this works.  But the original implementation is just a
> mechanical code movement from cxl_region_perf_attrs_callback().  So, I
> prefer to keep it stupid. Then, further optimization can be done on top
> of it.  Is it good for you?

I prefer to do it now while we are thinking about it.

How about a precursor patch:
Patch 1/2: cxl/region: Add a region to node id helper

--and then in that commit log you can say it's a simplified lookup 
and is being done in preparation for adding another caller.

-- Alison

> 
snip
> >
Huang, Ying June 13, 2024, 12:40 a.m. UTC | #5
Alison Schofield <alison.schofield@intel.com> writes:

> On Wed, Jun 12, 2024 at 10:09:14AM +0800, Ying Huang wrote:
>
> snip
>
>> >> ---
>> >>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
>> >>  drivers/cxl/cxl.h         |  2 ++
>> >>  2 files changed, 38 insertions(+), 4 deletions(-)
>> >> 
>> >> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> >> index 3c2b6144be23..81d0910c0a02 100644
>> >> --- a/drivers/cxl/core/region.c
>> >> +++ b/drivers/cxl/core/region.c
>> >> @@ -9,6 +9,7 @@
>> >>  #include <linux/uuid.h>
>> >>  #include <linux/sort.h>
>> >>  #include <linux/idr.h>
>> >> +#include <linux/memory-tiers.h>
>> >>  #include <cxlmem.h>
>> >>  #include <cxl.h>
>> >>  #include "core.h"
>> >> @@ -2304,14 +2305,20 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>> >>  	return true;
>> >>  }
>> >>  
>> >> +static int cxl_region_nid(struct cxl_region *cxlr)
>> >> +{
>> >> +	struct cxl_region_params *p = &cxlr->params;
>> >> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
>> >> +	struct cxl_decoder *cxld = &cxled->cxld;
>> >> +
>> >> +	return phys_to_target_node(cxld->hpa_range.start);
>> >> +}
>> >> +
>> >
>> > I believe it's OK to send a resource_size_t to phys_to_target_node()
>> > like this:
>> >
>> > --- a/drivers/cxl/core/region.c
>> > +++ b/drivers/cxl/core/region.c
>> > @@ -2308,10 +2308,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>> >  static int cxl_region_nid(struct cxl_region *cxlr)
>> >  {
>> >         struct cxl_region_params *p = &cxlr->params;
>> > -       struct cxl_endpoint_decoder *cxled = p->targets[0];
>> > -       struct cxl_decoder *cxld = &cxled->cxld;
>> >
>> > -       return phys_to_target_node(cxld->hpa_range.start);
>> > +       return phys_to_target_node(p->res->start);
>> >  }
>> >
>> 
>> I believe this works.  But the original implementation is just a
>> mechanical code movement from cxl_region_perf_attrs_callback().  So, I
>> prefer to keep it stupid. Then, further optimization can be done on top
>> of it.  Is it good for you?
>
> I prefer to do it now while we are thinking about it.
>
> How about a precursor patch:
> Patch 1/2: cxl/region: Add a region to node id helper
>
> --and then in that commit log you can say it's a simplified lookup 
> and is being done in preparation for adding another caller.

This works.  Will do it.

--
Best Regards,
Huang, Ying
Huang, Ying June 17, 2024, 2:10 a.m. UTC | #6
Alison Schofield <alison.schofield@intel.com> writes:

> On Tue, Jun 11, 2024 at 01:54:23PM +0800, Ying Huang wrote:

[snip]

>> ---
>>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
>>  drivers/cxl/cxl.h         |  2 ++
>>  2 files changed, 38 insertions(+), 4 deletions(-)
>> 
>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>> index 3c2b6144be23..81d0910c0a02 100644
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -9,6 +9,7 @@
>>  #include <linux/uuid.h>
>>  #include <linux/sort.h>
>>  #include <linux/idr.h>
>> +#include <linux/memory-tiers.h>
>>  #include <cxlmem.h>
>>  #include <cxl.h>
>>  #include "core.h"
>> @@ -2304,14 +2305,20 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>>  	return true;
>>  }
>>  
>> +static int cxl_region_nid(struct cxl_region *cxlr)
>> +{
>> +	struct cxl_region_params *p = &cxlr->params;
>> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
>> +	struct cxl_decoder *cxld = &cxled->cxld;
>> +
>> +	return phys_to_target_node(cxld->hpa_range.start);
>> +}
>> +
>
> I believe it's OK to send a resource_size_t to phys_to_target_node()
> like this:
>
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2308,10 +2308,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>  static int cxl_region_nid(struct cxl_region *cxlr)
>  {
>         struct cxl_region_params *p = &cxlr->params;
> -       struct cxl_endpoint_decoder *cxled = p->targets[0];
> -       struct cxl_decoder *cxld = &cxled->cxld;
>
> -       return phys_to_target_node(cxld->hpa_range.start);
> +       return phys_to_target_node(p->res->start);
>  }
>

Read the related code again, it appears that there's a theoretical race
condition here.  The register_memory_notifier() is called in
devm_cxl_add_region(), where p->targets[] and p->res haven't been
setupped yet.  And, IIUC, p->targets[] or p->res may be gone during the
life cycle of regions too.  If so, we need to use
guard(rwsem_read)(&cxl_region_rwsem) to protect p->targets[] and p->res
references.  Because the memory notifier may be called for other nodes
online/offline.

--
Best Regards,
Huang, Ying
Dave Jiang June 17, 2024, 5:06 p.m. UTC | #7
On 6/16/24 7:10 PM, Huang, Ying wrote:
> Alison Schofield <alison.schofield@intel.com> writes:
> 
>> On Tue, Jun 11, 2024 at 01:54:23PM +0800, Ying Huang wrote:
> 
> [snip]
> 
>>> ---
>>>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
>>>  drivers/cxl/cxl.h         |  2 ++
>>>  2 files changed, 38 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>>> index 3c2b6144be23..81d0910c0a02 100644
>>> --- a/drivers/cxl/core/region.c
>>> +++ b/drivers/cxl/core/region.c
>>> @@ -9,6 +9,7 @@
>>>  #include <linux/uuid.h>
>>>  #include <linux/sort.h>
>>>  #include <linux/idr.h>
>>> +#include <linux/memory-tiers.h>
>>>  #include <cxlmem.h>
>>>  #include <cxl.h>
>>>  #include "core.h"
>>> @@ -2304,14 +2305,20 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>>>  	return true;
>>>  }
>>>  
>>> +static int cxl_region_nid(struct cxl_region *cxlr)
>>> +{
>>> +	struct cxl_region_params *p = &cxlr->params;
>>> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
>>> +	struct cxl_decoder *cxld = &cxled->cxld;
>>> +
>>> +	return phys_to_target_node(cxld->hpa_range.start);
>>> +}
>>> +
>>
>> I believe it's OK to send a resource_size_t to phys_to_target_node()
>> like this:
>>
>> --- a/drivers/cxl/core/region.c
>> +++ b/drivers/cxl/core/region.c
>> @@ -2308,10 +2308,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>>  static int cxl_region_nid(struct cxl_region *cxlr)
>>  {
>>         struct cxl_region_params *p = &cxlr->params;
>> -       struct cxl_endpoint_decoder *cxled = p->targets[0];
>> -       struct cxl_decoder *cxld = &cxled->cxld;
>>
>> -       return phys_to_target_node(cxld->hpa_range.start);
>> +       return phys_to_target_node(p->res->start);
>>  }
>>
> 
> Read the related code again, it appears that there's a theoretical race
> condition here.  The register_memory_notifier() is called in
> devm_cxl_add_region(), where p->targets[] and p->res haven't been
> setupped yet.  And, IIUC, p->targets[] or p->res may be gone during the
> life cycle of regions too.  If so, we need to use
> guard(rwsem_read)(&cxl_region_rwsem) to protect p->targets[] and p->res
> references.  Because the memory notifier may be called for other nodes
> online/offline.

You mind sending a patch? :)

> 
> --
> Best Regards,
> Huang, Ying
Huang, Ying June 18, 2024, 12:28 a.m. UTC | #8
Dave Jiang <dave.jiang@intel.com> writes:

> On 6/16/24 7:10 PM, Huang, Ying wrote:
>> Alison Schofield <alison.schofield@intel.com> writes:
>> 
>>> On Tue, Jun 11, 2024 at 01:54:23PM +0800, Ying Huang wrote:
>> 
>> [snip]
>> 
>>>> ---
>>>>  drivers/cxl/core/region.c | 40 +++++++++++++++++++++++++++++++++++----
>>>>  drivers/cxl/cxl.h         |  2 ++
>>>>  2 files changed, 38 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
>>>> index 3c2b6144be23..81d0910c0a02 100644
>>>> --- a/drivers/cxl/core/region.c
>>>> +++ b/drivers/cxl/core/region.c
>>>> @@ -9,6 +9,7 @@
>>>>  #include <linux/uuid.h>
>>>>  #include <linux/sort.h>
>>>>  #include <linux/idr.h>
>>>> +#include <linux/memory-tiers.h>
>>>>  #include <cxlmem.h>
>>>>  #include <cxl.h>
>>>>  #include "core.h"
>>>> @@ -2304,14 +2305,20 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>>>>  	return true;
>>>>  }
>>>>  
>>>> +static int cxl_region_nid(struct cxl_region *cxlr)
>>>> +{
>>>> +	struct cxl_region_params *p = &cxlr->params;
>>>> +	struct cxl_endpoint_decoder *cxled = p->targets[0];
>>>> +	struct cxl_decoder *cxld = &cxled->cxld;
>>>> +
>>>> +	return phys_to_target_node(cxld->hpa_range.start);
>>>> +}
>>>> +
>>>
>>> I believe it's OK to send a resource_size_t to phys_to_target_node()
>>> like this:
>>>
>>> --- a/drivers/cxl/core/region.c
>>> +++ b/drivers/cxl/core/region.c
>>> @@ -2308,10 +2308,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
>>>  static int cxl_region_nid(struct cxl_region *cxlr)
>>>  {
>>>         struct cxl_region_params *p = &cxlr->params;
>>> -       struct cxl_endpoint_decoder *cxled = p->targets[0];
>>> -       struct cxl_decoder *cxld = &cxled->cxld;
>>>
>>> -       return phys_to_target_node(cxld->hpa_range.start);
>>> +       return phys_to_target_node(p->res->start);
>>>  }
>>>
>> 
>> Read the related code again, it appears that there's a theoretical race
>> condition here.  The register_memory_notifier() is called in
>> devm_cxl_add_region(), where p->targets[] and p->res haven't been
>> setupped yet.  And, IIUC, p->targets[] or p->res may be gone during the
>> life cycle of regions too.  If so, we need to use
>> guard(rwsem_read)(&cxl_region_rwsem) to protect p->targets[] and p->res
>> references.  Because the memory notifier may be called for other nodes
>> online/offline.
>
> You mind sending a patch? :)

Sure.  Will do it.

--
Best Regards,
Huang, Ying
diff mbox series

Patch

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 3c2b6144be23..81d0910c0a02 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -9,6 +9,7 @@ 
 #include <linux/uuid.h>
 #include <linux/sort.h>
 #include <linux/idr.h>
+#include <linux/memory-tiers.h>
 #include <cxlmem.h>
 #include <cxl.h>
 #include "core.h"
@@ -2304,14 +2305,20 @@  static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid)
 	return true;
 }
 
+static int cxl_region_nid(struct cxl_region *cxlr)
+{
+	struct cxl_region_params *p = &cxlr->params;
+	struct cxl_endpoint_decoder *cxled = p->targets[0];
+	struct cxl_decoder *cxld = &cxled->cxld;
+
+	return phys_to_target_node(cxld->hpa_range.start);
+}
+
 static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
 					  unsigned long action, void *arg)
 {
 	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
 					       memory_notifier);
-	struct cxl_region_params *p = &cxlr->params;
-	struct cxl_endpoint_decoder *cxled = p->targets[0];
-	struct cxl_decoder *cxld = &cxled->cxld;
 	struct memory_notify *mnb = arg;
 	int nid = mnb->status_change_nid;
 	int region_nid;
@@ -2319,7 +2326,7 @@  static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
 	if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
 		return NOTIFY_DONE;
 
-	region_nid = phys_to_target_node(cxld->hpa_range.start);
+	region_nid = cxl_region_nid(cxlr);
 	if (nid != region_nid)
 		return NOTIFY_DONE;
 
@@ -2329,6 +2336,27 @@  static int cxl_region_perf_attrs_callback(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
+static int cxl_region_calculate_adistance(struct notifier_block *nb,
+					  unsigned long nid, void *data)
+{
+	struct cxl_region *cxlr = container_of(nb, struct cxl_region,
+					       adist_notifier);
+	struct access_coordinate *perf;
+	int *adist = data;
+	int region_nid;
+
+	region_nid = cxl_region_nid(cxlr);
+	if (nid != region_nid)
+		return NOTIFY_OK;
+
+	perf = &cxlr->coord[ACCESS_COORDINATE_CPU];
+
+	if (mt_perf_to_adistance(perf, adist))
+		return NOTIFY_OK;
+
+	return NOTIFY_STOP;
+}
+
 /**
  * devm_cxl_add_region - Adds a region to a decoder
  * @cxlrd: root decoder
@@ -2371,6 +2399,10 @@  static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 	cxlr->memory_notifier.priority = CXL_CALLBACK_PRI;
 	register_memory_notifier(&cxlr->memory_notifier);
 
+	cxlr->adist_notifier.notifier_call = cxl_region_calculate_adistance;
+	cxlr->adist_notifier.priority = 100;
+	register_mt_adistance_algorithm(&cxlr->adist_notifier);
+
 	rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr);
 	if (rc)
 		return ERR_PTR(rc);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 603c0120cff8..f46252373159 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -522,6 +522,7 @@  struct cxl_region_params {
  * @params: active + config params for the region
  * @coord: QoS access coordinates for the region
  * @memory_notifier: notifier for setting the access coordinates to node
+ * @adist_notifier: notifier for calculating the abstract distance of node
  */
 struct cxl_region {
 	struct device dev;
@@ -534,6 +535,7 @@  struct cxl_region {
 	struct cxl_region_params params;
 	struct access_coordinate coord[ACCESS_COORDINATE_MAX];
 	struct notifier_block memory_notifier;
+	struct notifier_block adist_notifier;
 };
 
 struct cxl_nvdimm_bridge {