diff mbox series

[v2,1/3] cxl: Change 'struct cxl_memdev_state' *_perf_list to single 'struct cxl_dpa_perf'

Message ID 20240130222905.946109-1-dave.jiang@intel.com
State Superseded
Headers show
Series [v2,1/3] cxl: Change 'struct cxl_memdev_state' *_perf_list to single 'struct cxl_dpa_perf' | expand

Commit Message

Dave Jiang Jan. 30, 2024, 10:29 p.m. UTC
In order to address the issue with being able to expose qos_class sysfs
attributes under 'ram' and 'pmem' sub-directories, the attributes must
be defined as static attributes rather than under driver->dev_groups.
To avoid implementing locking for accessing the 'struct cxl_dpa_perf`
lists, convert the list to a single 'struct cxl_dpa_perf' entry in
preparation to move the attributes to statically defined.

Link: https://lore.kernel.org/linux-cxl/65b200ba228f_2d43c29468@dwillia2-mobl3.amr.corp.intel.com.notmuch/
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
 drivers/cxl/core/cdat.c | 90 +++++++++++++----------------------------
 drivers/cxl/core/mbox.c |  4 +-
 drivers/cxl/cxlmem.h    | 10 ++---
 drivers/cxl/mem.c       | 25 ++++--------
 4 files changed, 42 insertions(+), 87 deletions(-)

Comments

Dan Williams Jan. 31, 2024, 10:35 p.m. UTC | #1
Dave Jiang wrote:
> In order to address the issue with being able to expose qos_class sysfs
> attributes under 'ram' and 'pmem' sub-directories, the attributes must
> be defined as static attributes rather than under driver->dev_groups.
> To avoid implementing locking for accessing the 'struct cxl_dpa_perf`
> lists, convert the list to a single 'struct cxl_dpa_perf' entry in
> preparation to move the attributes to statically defined.
> 
> Link: https://lore.kernel.org/linux-cxl/65b200ba228f_2d43c29468@dwillia2-mobl3.amr.corp.intel.com.notmuch/
> Suggested-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
>  drivers/cxl/core/cdat.c | 90 +++++++++++++----------------------------
>  drivers/cxl/core/mbox.c |  4 +-
>  drivers/cxl/cxlmem.h    | 10 ++---
>  drivers/cxl/mem.c       | 25 ++++--------
>  4 files changed, 42 insertions(+), 87 deletions(-)

Oh, wow, this looks wonderful!

I was expecting the lists to still be there, just moved out of 'struct
cxl_dev_state'. Am I reading this right, the work to select and validate
the "best" performance per partition can be done without list walking?
If so, great!

[..]
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index c5c9d8e0d88d..a62099e47d71 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -221,16 +221,13 @@ static ssize_t ram_qos_class_show(struct device *dev,
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>  	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> -	struct cxl_dpa_perf *dpa_perf;
> +	struct cxl_dpa_perf *dpa_perf = &mds->ram_perf;
>  
>  	if (!dev->driver)
>  		return -ENOENT;

This can be deleted since it is racy being referenced without the
device_lock(), and nothing in this routine requires the device to be
locked.

>  
> -	if (list_empty(&mds->ram_perf_list))
> -		return -ENOENT;
> -
> -	dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf,
> -				    list);
> +	if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
> +		return -ENODATA;

As long as is_visible() checks for CXL_QOS_CLASS_INVALID there is no
need to add error handling in this _show() routine.

>  
>  	return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
>  }
> @@ -244,16 +241,10 @@ static ssize_t pmem_qos_class_show(struct device *dev,
>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>  	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> -	struct cxl_dpa_perf *dpa_perf;
> +	struct cxl_dpa_perf *dpa_perf = &mds->pmem_perf;
>  
> -	if (!dev->driver)
> -		return -ENOENT;

Ah, good, you deleted it this time.

> -
> -	if (list_empty(&mds->pmem_perf_list))
> -		return -ENOENT;
> -
> -	dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf,
> -				    list);
> +	if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
> +		return -ENODATA;

This can go.
Dave Jiang Jan. 31, 2024, 11:35 p.m. UTC | #2
On 1/31/24 15:35, Dan Williams wrote:
> Dave Jiang wrote:
>> In order to address the issue with being able to expose qos_class sysfs
>> attributes under 'ram' and 'pmem' sub-directories, the attributes must
>> be defined as static attributes rather than under driver->dev_groups.
>> To avoid implementing locking for accessing the 'struct cxl_dpa_perf`
>> lists, convert the list to a single 'struct cxl_dpa_perf' entry in
>> preparation to move the attributes to statically defined.
>>
>> Link: https://lore.kernel.org/linux-cxl/65b200ba228f_2d43c29468@dwillia2-mobl3.amr.corp.intel.com.notmuch/
>> Suggested-by: Dan Williams <dan.j.williams@intel.com>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>> ---
>>  drivers/cxl/core/cdat.c | 90 +++++++++++++----------------------------
>>  drivers/cxl/core/mbox.c |  4 +-
>>  drivers/cxl/cxlmem.h    | 10 ++---
>>  drivers/cxl/mem.c       | 25 ++++--------
>>  4 files changed, 42 insertions(+), 87 deletions(-)
> 
> Oh, wow, this looks wonderful!
> 
> I was expecting the lists to still be there, just moved out of 'struct
> cxl_dev_state'. Am I reading this right, the work to select and validate
> the "best" performance per partition can be done without list walking?
> If so, great!

I've not encountered more than 1 DSMAS per partition in the CDAT on hardware so far. I don't see why we can't just have the simple case until we need something more complex.

DJ

> 
> [..]
>> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
>> index c5c9d8e0d88d..a62099e47d71 100644
>> --- a/drivers/cxl/mem.c
>> +++ b/drivers/cxl/mem.c
>> @@ -221,16 +221,13 @@ static ssize_t ram_qos_class_show(struct device *dev,
>>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>>  	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>> -	struct cxl_dpa_perf *dpa_perf;
>> +	struct cxl_dpa_perf *dpa_perf = &mds->ram_perf;
>>  
>>  	if (!dev->driver)
>>  		return -ENOENT;
> 
> This can be deleted since it is racy being referenced without the
> device_lock(), and nothing in this routine requires the device to be
> locked.
> 
>>  
>> -	if (list_empty(&mds->ram_perf_list))
>> -		return -ENOENT;
>> -
>> -	dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf,
>> -				    list);
>> +	if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
>> +		return -ENODATA;
> 
> As long as is_visible() checks for CXL_QOS_CLASS_INVALID there is no
> need to add error handling in this _show() routine.
> 
>>  
>>  	return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
>>  }
>> @@ -244,16 +241,10 @@ static ssize_t pmem_qos_class_show(struct device *dev,
>>  	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
>>  	struct cxl_dev_state *cxlds = cxlmd->cxlds;
>>  	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
>> -	struct cxl_dpa_perf *dpa_perf;
>> +	struct cxl_dpa_perf *dpa_perf = &mds->pmem_perf;
>>  
>> -	if (!dev->driver)
>> -		return -ENOENT;
> 
> Ah, good, you deleted it this time.
> 
>> -
>> -	if (list_empty(&mds->pmem_perf_list))
>> -		return -ENOENT;
>> -
>> -	dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf,
>> -				    list);
>> +	if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
>> +		return -ENODATA;
> 
> This can go.
diff mbox series

Patch

diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index 6fe11546889f..99e4c8170c13 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -210,19 +210,12 @@  static int cxl_port_perf_data_calculate(struct cxl_port *port,
 	return 0;
 }
 
-static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
-			   struct list_head *list)
+static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
+			      struct cxl_dpa_perf *dpa_perf)
 {
-	struct cxl_dpa_perf *dpa_perf;
-
-	dpa_perf = kzalloc(sizeof(*dpa_perf), GFP_KERNEL);
-	if (!dpa_perf)
-		return;
-
 	dpa_perf->dpa_range = dent->dpa_range;
 	dpa_perf->coord = dent->coord;
 	dpa_perf->qos_class = dent->qos_class;
-	list_add_tail(&dpa_perf->list, list);
 	dev_dbg(dev,
 		"DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
 		dent->dpa_range.start, dpa_perf->qos_class,
@@ -230,20 +223,6 @@  static void add_perf_entry(struct device *dev, struct dsmas_entry *dent,
 		dent->coord.read_latency, dent->coord.write_latency);
 }
 
-static void free_perf_ents(void *data)
-{
-	struct cxl_memdev_state *mds = data;
-	struct cxl_dpa_perf *dpa_perf, *n;
-	LIST_HEAD(discard);
-
-	list_splice_tail_init(&mds->ram_perf_list, &discard);
-	list_splice_tail_init(&mds->pmem_perf_list, &discard);
-	list_for_each_entry_safe(dpa_perf, n, &discard, list) {
-		list_del(&dpa_perf->list);
-		kfree(dpa_perf);
-	}
-}
-
 static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
 				     struct xarray *dsmas_xa)
 {
@@ -262,17 +241,16 @@  static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
 
 	xa_for_each(dsmas_xa, index, dent) {
 		if (resource_size(&cxlds->ram_res) &&
-		    range_contains(&ram_range, &dent->dpa_range))
-			add_perf_entry(dev, dent, &mds->ram_perf_list);
-		else if (resource_size(&cxlds->pmem_res) &&
-			 range_contains(&pmem_range, &dent->dpa_range))
-			add_perf_entry(dev, dent, &mds->pmem_perf_list);
-		else
+		    range_contains(&ram_range, &dent->dpa_range)) {
+			update_perf_entry(dev, dent, &mds->ram_perf);
+		} else if (resource_size(&cxlds->pmem_res) &&
+			   range_contains(&pmem_range, &dent->dpa_range)) {
+			update_perf_entry(dev, dent, &mds->pmem_perf);
+		} else {
 			dev_dbg(dev, "no partition for dsmas dpa: %#llx\n",
 				dent->dpa_range.start);
+		}
 	}
-
-	devm_add_action_or_reset(&cxlds->cxlmd->dev, free_perf_ents, mds);
 }
 
 static int match_cxlrd_qos_class(struct device *dev, void *data)
@@ -293,24 +271,25 @@  static int match_cxlrd_qos_class(struct device *dev, void *data)
 	return 0;
 }
 
+static void reset_dpa_perf(struct cxl_dpa_perf *dpa_perf)
+{
+	memset(&dpa_perf, 0, sizeof(*dpa_perf));
+	dpa_perf->qos_class = CXL_QOS_CLASS_INVALID;
+}
+
 static void cxl_qos_match(struct cxl_port *root_port,
-			  struct list_head *work_list,
-			  struct list_head *discard_list)
+			  struct cxl_dpa_perf *dpa_perf)
 {
-	struct cxl_dpa_perf *dpa_perf, *n;
+	int rc;
 
-	list_for_each_entry_safe(dpa_perf, n, work_list, list) {
-		int rc;
+	if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
+		return;
 
-		if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
-			return;
-
-		rc = device_for_each_child(&root_port->dev,
-					   (void *)&dpa_perf->qos_class,
-					   match_cxlrd_qos_class);
-		if (!rc)
-			list_move_tail(&dpa_perf->list, discard_list);
-	}
+	rc = device_for_each_child(&root_port->dev,
+				   &dpa_perf->qos_class,
+				   match_cxlrd_qos_class);
+	if (!rc)
+		reset_dpa_perf(dpa_perf);
 }
 
 static int match_cxlrd_hb(struct device *dev, void *data)
@@ -334,23 +313,10 @@  static int match_cxlrd_hb(struct device *dev, void *data)
 	return 0;
 }
 
-static void discard_dpa_perf(struct list_head *list)
-{
-	struct cxl_dpa_perf *dpa_perf, *n;
-
-	list_for_each_entry_safe(dpa_perf, n, list, list) {
-		list_del(&dpa_perf->list);
-		kfree(dpa_perf);
-	}
-}
-DEFINE_FREE(dpa_perf, struct list_head *, if (!list_empty(_T)) discard_dpa_perf(_T))
-
 static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
 {
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-	LIST_HEAD(__discard);
-	struct list_head *discard __free(dpa_perf) = &__discard;
 	struct cxl_port *root_port;
 	int rc;
 
@@ -363,16 +329,16 @@  static int cxl_qos_class_verify(struct cxl_memdev *cxlmd)
 	root_port = &cxl_root->port;
 
 	/* Check that the QTG IDs are all sane between end device and root decoders */
-	cxl_qos_match(root_port, &mds->ram_perf_list, discard);
-	cxl_qos_match(root_port, &mds->pmem_perf_list, discard);
+	cxl_qos_match(root_port, &mds->ram_perf);
+	cxl_qos_match(root_port, &mds->pmem_perf);
 
 	/* Check to make sure that the device's host bridge is under a root decoder */
 	rc = device_for_each_child(&root_port->dev,
 				   (void *)cxlmd->endpoint->host_bridge,
 				   match_cxlrd_hb);
 	if (!rc) {
-		list_splice_tail_init(&mds->ram_perf_list, discard);
-		list_splice_tail_init(&mds->pmem_perf_list, discard);
+		reset_dpa_perf(&mds->ram_perf);
+		reset_dpa_perf(&mds->pmem_perf);
 	}
 
 	return rc;
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 27166a411705..9adda4795eb7 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1391,8 +1391,8 @@  struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
 	mds->cxlds.reg_map.host = dev;
 	mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
 	mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
-	INIT_LIST_HEAD(&mds->ram_perf_list);
-	INIT_LIST_HEAD(&mds->pmem_perf_list);
+	mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
+	mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
 
 	return mds;
 }
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 5303d6942b88..20fb3b35e89e 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -395,13 +395,11 @@  enum cxl_devtype {
 
 /**
  * struct cxl_dpa_perf - DPA performance property entry
- * @list - list entry
  * @dpa_range - range for DPA address
  * @coord - QoS performance data (i.e. latency, bandwidth)
  * @qos_class - QoS Class cookies
  */
 struct cxl_dpa_perf {
-	struct list_head list;
 	struct range dpa_range;
 	struct access_coordinate coord;
 	int qos_class;
@@ -471,8 +469,8 @@  struct cxl_dev_state {
  * @security: security driver state info
  * @fw: firmware upload / activation state
  * @mbox_send: @dev specific transport for transmitting mailbox commands
- * @ram_perf_list: performance data entries matched to RAM
- * @pmem_perf_list: performance data entries matched to PMEM
+ * @ram_perf: performance data entry matched to RAM partition
+ * @pmem_perf: performance data entry matched to PMEM partition
  *
  * See CXL 3.0 8.2.9.8.2 Capacity Configuration and Label Storage for
  * details on capacity parameters.
@@ -494,8 +492,8 @@  struct cxl_memdev_state {
 	u64 next_volatile_bytes;
 	u64 next_persistent_bytes;
 
-	struct list_head ram_perf_list;
-	struct list_head pmem_perf_list;
+	struct cxl_dpa_perf ram_perf;
+	struct cxl_dpa_perf pmem_perf;
 
 	struct cxl_event_state event;
 	struct cxl_poison_state poison;
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index c5c9d8e0d88d..a62099e47d71 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -221,16 +221,13 @@  static ssize_t ram_qos_class_show(struct device *dev,
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-	struct cxl_dpa_perf *dpa_perf;
+	struct cxl_dpa_perf *dpa_perf = &mds->ram_perf;
 
 	if (!dev->driver)
 		return -ENOENT;
 
-	if (list_empty(&mds->ram_perf_list))
-		return -ENOENT;
-
-	dpa_perf = list_first_entry(&mds->ram_perf_list, struct cxl_dpa_perf,
-				    list);
+	if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
+		return -ENODATA;
 
 	return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
 }
@@ -244,16 +241,10 @@  static ssize_t pmem_qos_class_show(struct device *dev,
 	struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 	struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-	struct cxl_dpa_perf *dpa_perf;
+	struct cxl_dpa_perf *dpa_perf = &mds->pmem_perf;
 
-	if (!dev->driver)
-		return -ENOENT;
-
-	if (list_empty(&mds->pmem_perf_list))
-		return -ENOENT;
-
-	dpa_perf = list_first_entry(&mds->pmem_perf_list, struct cxl_dpa_perf,
-				    list);
+	if (dpa_perf->qos_class == CXL_QOS_CLASS_INVALID)
+		return -ENODATA;
 
 	return sysfs_emit(buf, "%d\n", dpa_perf->qos_class);
 }
@@ -273,11 +264,11 @@  static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n)
 			return 0;
 
 	if (a == &dev_attr_pmem_qos_class.attr)
-		if (list_empty(&mds->pmem_perf_list))
+		if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID)
 			return 0;
 
 	if (a == &dev_attr_ram_qos_class.attr)
-		if (list_empty(&mds->ram_perf_list))
+		if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID)
 			return 0;
 
 	return a->mode;