diff mbox series

[3/3] drivers/perf: hisi: Add TLP filter support

Message ID 20221025113242.58271-4-yangyicong@huawei.com (mailing list archive)
State New, archived
Headers show
Series Add TLP filter support and some fixes for HiSilicon PCIe PMU | expand

Commit Message

Yicong Yang Oct. 25, 2022, 11:32 a.m. UTC
From: Yicong Yang <yangyicong@hisilicon.com>

The PMU support to filter the TLP when counting the bandwidth with below
options:

- only count the TLP headers
- only count the TLP payloads
- count both TLP headers and payloads

In the current driver it's default to count the TLP payloads only, which
will have an implicity side effects that on the traffic only have header
only TLPs, we'll get no data.

Make this user configuration through "len_mode" parameter and make it
default to count both TLP headers and payloads when user not specified.
Also update the documentation for it.

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
---
 .../admin-guide/perf/hisi-pcie-pmu.rst        | 20 +++++++++++++++++++
 drivers/perf/hisilicon/hisi_pcie_pmu.c        | 14 ++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

Comments

Jonathan Cameron Nov. 9, 2022, 5:09 p.m. UTC | #1
On Tue, 25 Oct 2022 19:32:42 +0800
Yicong Yang <yangyicong@huawei.com> wrote:

> From: Yicong Yang <yangyicong@hisilicon.com>
> 
> The PMU support to filter the TLP when counting the bandwidth with below
> options:
> 
> - only count the TLP headers
> - only count the TLP payloads
> - count both TLP headers and payloads
> 
> In the current driver it's default to count the TLP payloads only, which
> will have an implicity side effects that on the traffic only have header
> only TLPs, we'll get no data.
> 
> Make this user configuration through "len_mode" parameter and make it
> default to count both TLP headers and payloads when user not specified.
> Also update the documentation for it.

I suppose this filter option is useful.   Default of "both" makes sense.

A few minor suggestions inline.

Jonathan

> 
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> ---
>  .../admin-guide/perf/hisi-pcie-pmu.rst        | 20 +++++++++++++++++++
>  drivers/perf/hisilicon/hisi_pcie_pmu.c        | 14 ++++++++++++-
>  2 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> index bbe66480ff85..83a2ef11b1a0 100644
> --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> @@ -104,3 +104,23 @@ when TLP length < threshold.
>  Example usage of perf::
>  
>    $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
> +
> +4. TLP Length filter
> +When counting bandwidth, the data can be composed of certain parts of TLP
> +packets. You can specify it through "len_mode":
> +
> +- 2'b00: Reserved (Do not use this since the behaviour is undefined)
> +- 2'b01: Bandwidth of TLP payloads
> +- 2'b10: Bandwidth of TLP headers
> +- 2'b11: Bandwidth of both TLP payloads and headers

We could describe this as a bitmap, but i think you are right in thinking
it is clearer to present it as 3 values as not always obvious that the bitmap
is controlling sum of two different things.

> +
> +For example, "len_mode=2" means only counting the bandwidth of TLP headers
> +and "len_mode=3" means the final bandwidth data is composed of both TLP
> +headers and payloads. You need to carefully using this to avoid losing
> +data.

I'm not sure this warning makes sense.  If user has set this filter then
they don't want to measure the types they haven't picked.  I would drop
the warning and example.  It is fairly obvious that no_counts == nothing
to count.
 
> For example you're likely to get no counts by "len_mode=1" if the TLPs
> +on the traffic has no payload.
Drop this example.

> This config is optional, by default it'll
> +be 2'b11.

Anything with a default is inherently optional, so could we just say
Default value if not specified is 2'b11.

> +
> +Example usage of perf::
> +
> +  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
> diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
> index 071e63d9a9ac..6fee0b6e163b 100644
> --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
> +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
> @@ -47,10 +47,14 @@
>  #define HISI_PCIE_EVENT_M		GENMASK_ULL(15, 0)
>  #define HISI_PCIE_THR_MODE_M		GENMASK_ULL(27, 27)
>  #define HISI_PCIE_THR_M			GENMASK_ULL(31, 28)
> +#define HISI_PCIE_LEN_M			GENMASK_ULL(35, 34)
>  #define HISI_PCIE_TARGET_M		GENMASK_ULL(52, 36)
>  #define HISI_PCIE_TRIG_MODE_M		GENMASK_ULL(53, 53)
>  #define HISI_PCIE_TRIG_M		GENMASK_ULL(59, 56)
>  
> +/* Default config of TLP length mode, will count both TLP headers and payloads */
> +#define HISI_PCIE_LEN_M_DEFAULT		3ULL
> +
>  #define HISI_PCIE_MAX_COUNTERS		8
>  #define HISI_PCIE_REG_STEP		8
>  #define HISI_PCIE_THR_MAX_VAL		10
> @@ -91,6 +95,7 @@ HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
>  HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
>  HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
>  HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
> +HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
>  HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
>  HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
>  
> @@ -215,8 +220,8 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
>  {
>  	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
>  	struct hw_perf_event *hwc = &event->hw;
> +	u64 port, trig_len, thr_len, len_mode;
>  	u64 reg = HISI_PCIE_INIT_SET;
> -	u64 port, trig_len, thr_len;
>  
>  	/* Config HISI_PCIE_EVENT_CTRL according to event. */
>  	reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
> @@ -245,6 +250,12 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
>  		reg |= HISI_PCIE_THR_EN;
>  	}
>  
> +	len_mode = hisi_pcie_get_len_mode(event);
> +	if (len_mode)
> +		reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
> +	else
> +		reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
> +
>  	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
>  }
>  
> @@ -711,6 +722,7 @@ static struct attribute *hisi_pcie_pmu_format_attr[] = {
>  	HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
>  	HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
>  	HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
> +	HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
>  	HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
>  	HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
>  	NULL
Yicong Yang Nov. 10, 2022, 2:45 a.m. UTC | #2
On 2022/11/10 1:09, Jonathan Cameron wrote:
> On Tue, 25 Oct 2022 19:32:42 +0800
> Yicong Yang <yangyicong@huawei.com> wrote:
> 
>> From: Yicong Yang <yangyicong@hisilicon.com>
>>
>> The PMU support to filter the TLP when counting the bandwidth with below
>> options:
>>
>> - only count the TLP headers
>> - only count the TLP payloads
>> - count both TLP headers and payloads
>>
>> In the current driver it's default to count the TLP payloads only, which
>> will have an implicity side effects that on the traffic only have header
>> only TLPs, we'll get no data.
>>
>> Make this user configuration through "len_mode" parameter and make it
>> default to count both TLP headers and payloads when user not specified.
>> Also update the documentation for it.
> 
> I suppose this filter option is useful.   Default of "both" makes sense.
> 
> A few minor suggestions inline.
> 
> Jonathan
> 
>>
>> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
>> ---
>>  .../admin-guide/perf/hisi-pcie-pmu.rst        | 20 +++++++++++++++++++
>>  drivers/perf/hisilicon/hisi_pcie_pmu.c        | 14 ++++++++++++-
>>  2 files changed, 33 insertions(+), 1 deletion(-)
>>
>> diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
>> index bbe66480ff85..83a2ef11b1a0 100644
>> --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
>> +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
>> @@ -104,3 +104,23 @@ when TLP length < threshold.
>>  Example usage of perf::
>>  
>>    $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
>> +
>> +4. TLP Length filter
>> +When counting bandwidth, the data can be composed of certain parts of TLP
>> +packets. You can specify it through "len_mode":
>> +
>> +- 2'b00: Reserved (Do not use this since the behaviour is undefined)
>> +- 2'b01: Bandwidth of TLP payloads
>> +- 2'b10: Bandwidth of TLP headers
>> +- 2'b11: Bandwidth of both TLP payloads and headers
> 
> We could describe this as a bitmap, but i think you are right in thinking
> it is clearer to present it as 3 values as not always obvious that the bitmap
> is controlling sum of two different things.
> 

I just to keep the style consistence with the existing doc and yes I also think
it's clearer.

>> +
>> +For example, "len_mode=2" means only counting the bandwidth of TLP headers
>> +and "len_mode=3" means the final bandwidth data is composed of both TLP
>> +headers and payloads. You need to carefully using this to avoid losing
>> +data.
> 
> I'm not sure this warning makes sense.  If user has set this filter then
> they don't want to measure the types they haven't picked.  I would drop
> the warning and example.  It is fairly obvious that no_counts == nothing
> to count.
>  

Just hope users will not make it "len_mode=0" as it's reserved. Will drop it.

>> For example you're likely to get no counts by "len_mode=1" if the TLPs
>> +on the traffic has no payload.
> Drop this example.
> 

ok.

>> This config is optional, by default it'll
>> +be 2'b11.
> 
> Anything with a default is inherently optional, so could we just say
> Default value if not specified is 2'b11.
> 

ok, it'll be better.

Will update the doc. Thanks for the comments!

Regards.

>> +
>> +Example usage of perf::
>> +
>> +  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
>> diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
>> index 071e63d9a9ac..6fee0b6e163b 100644
>> --- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
>> +++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
>> @@ -47,10 +47,14 @@
>>  #define HISI_PCIE_EVENT_M		GENMASK_ULL(15, 0)
>>  #define HISI_PCIE_THR_MODE_M		GENMASK_ULL(27, 27)
>>  #define HISI_PCIE_THR_M			GENMASK_ULL(31, 28)
>> +#define HISI_PCIE_LEN_M			GENMASK_ULL(35, 34)
>>  #define HISI_PCIE_TARGET_M		GENMASK_ULL(52, 36)
>>  #define HISI_PCIE_TRIG_MODE_M		GENMASK_ULL(53, 53)
>>  #define HISI_PCIE_TRIG_M		GENMASK_ULL(59, 56)
>>  
>> +/* Default config of TLP length mode, will count both TLP headers and payloads */
>> +#define HISI_PCIE_LEN_M_DEFAULT		3ULL
>> +
>>  #define HISI_PCIE_MAX_COUNTERS		8
>>  #define HISI_PCIE_REG_STEP		8
>>  #define HISI_PCIE_THR_MAX_VAL		10
>> @@ -91,6 +95,7 @@ HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
>>  HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
>>  HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
>>  HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
>> +HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
>>  HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
>>  HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
>>  
>> @@ -215,8 +220,8 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
>>  {
>>  	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
>>  	struct hw_perf_event *hwc = &event->hw;
>> +	u64 port, trig_len, thr_len, len_mode;
>>  	u64 reg = HISI_PCIE_INIT_SET;
>> -	u64 port, trig_len, thr_len;
>>  
>>  	/* Config HISI_PCIE_EVENT_CTRL according to event. */
>>  	reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
>> @@ -245,6 +250,12 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
>>  		reg |= HISI_PCIE_THR_EN;
>>  	}
>>  
>> +	len_mode = hisi_pcie_get_len_mode(event);
>> +	if (len_mode)
>> +		reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
>> +	else
>> +		reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
>> +
>>  	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
>>  }
>>  
>> @@ -711,6 +722,7 @@ static struct attribute *hisi_pcie_pmu_format_attr[] = {
>>  	HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
>>  	HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
>>  	HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
>> +	HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
>>  	HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
>>  	HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
>>  	NULL
> 
> .
>
Bagas Sanjaya Nov. 10, 2022, 4:16 a.m. UTC | #3
On Tue, Oct 25, 2022 at 07:32:42PM +0800, Yicong Yang wrote:
> +4. TLP Length filter
> +When counting bandwidth, the data can be composed of certain parts of TLP
> +packets. You can specify it through "len_mode":
> +
> +- 2'b00: Reserved (Do not use this since the behaviour is undefined)
> +- 2'b01: Bandwidth of TLP payloads
> +- 2'b10: Bandwidth of TLP headers
> +- 2'b11: Bandwidth of both TLP payloads and headers
> +
> +For example, "len_mode=2" means only counting the bandwidth of TLP headers
> +and "len_mode=3" means the final bandwidth data is composed of both TLP
> +headers and payloads. You need to carefully using this to avoid losing
> +data. For example you're likely to get no counts by "len_mode=1" if the TLPs
> +on the traffic has no payload. This config is optional, by default it'll
> +be 2'b11.
> +
> +Example usage of perf::
> +
> +  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5

The indentation is rather ugly, so I have applied the fixup:

---- >8 ----

diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
index 83a2ef11b1a08d..bae690dddbebfd 100644
--- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@ -106,21 +106,22 @@ Example usage of perf::
   $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
 
 4. TLP Length filter
-When counting bandwidth, the data can be composed of certain parts of TLP
-packets. You can specify it through "len_mode":
 
-- 2'b00: Reserved (Do not use this since the behaviour is undefined)
-- 2'b01: Bandwidth of TLP payloads
-- 2'b10: Bandwidth of TLP headers
-- 2'b11: Bandwidth of both TLP payloads and headers
+   When counting bandwidth, the data can be composed of certain parts of TLP
+   packets. You can specify it through "len_mode":
 
-For example, "len_mode=2" means only counting the bandwidth of TLP headers
-and "len_mode=3" means the final bandwidth data is composed of both TLP
-headers and payloads. You need to carefully using this to avoid losing
-data. For example you're likely to get no counts by "len_mode=1" if the TLPs
-on the traffic has no payload. This config is optional, by default it'll
-be 2'b11.
+   - 2'b00: Reserved (Do not use this since the behaviour is undefined)
+   - 2'b01: Bandwidth of TLP payloads
+   - 2'b10: Bandwidth of TLP headers
+   - 2'b11: Bandwidth of both TLP payloads and headers
 
-Example usage of perf::
+   For example, "len_mode=2" means only counting the bandwidth of TLP headers
+   and "len_mode=3" means the final bandwidth data is composed of both TLP
+   headers and payloads. You need to carefully using this to avoid losing
+   data. For example you're likely to get no counts by "len_mode=1" if the TLPs
+   on the traffic has no payload. This config is optional, by default it'll
+   be 2'b11.
 
-  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
+   Example usage of perf::
+
+     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5

Also, for formatting consistency, you need to pick up the fix for rest of
documentation (as separate patch in your series):

---- >8 ----

From 5f286aceb959a7c70a56cb5c453d3ac0b844fb49 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Thu, 10 Nov 2022 11:03:08 +0700
Subject: [PATCH] Documentation: perf: Indent filter options list

The "Filter options" list have a rather ugly indentation. Also, the first
paragraph after list name is rendered without separator (as continuation
from the name).

Align the list by indenting the list items and add a blank line
separator for each list name.

Fixes: c8602008e247f5 ("docs: perf: Add description for HiSilicon PCIe PMU driver")
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
---
 .../admin-guide/perf/hisi-pcie-pmu.rst        | 80 ++++++++++---------
 1 file changed, 43 insertions(+), 37 deletions(-)

diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
index bae690dddbebfd..54fab870a2aefb 100644
--- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@ -48,62 +48,68 @@ Filter options
 --------------
 
 1. Target filter
-PMU could only monitor the performance of traffic downstream target Root Ports
-or downstream target Endpoint. PCIe PMU driver support "port" and "bdf"
-interfaces for users, and these two interfaces aren't supported at the same
-time.
 
--port
-"port" filter can be used in all PCIe PMU events, target Root Port can be
-selected by configuring the 16-bits-bitmap "port". Multi ports can be selected
-for AP-layer-events, and only one port can be selected for TL/DL-layer-events.
+   PMU could only monitor the performance of traffic downstream target Root
+   Ports or downstream target Endpoint. PCIe PMU driver support "port" and
+   "bdf" interfaces for users, and these two interfaces aren't supported at the
+   same time.
 
-For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of bitmap
-should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4 lanes),
-bit8 is set, port=0x100; if these two Root Ports are both monitored, port=0x101.
+   - port
 
-Example usage of perf::
+     "port" filter can be used in all PCIe PMU events, target Root Port can be
+     selected by configuring the 16-bits-bitmap "port". Multi ports can be
+     selected for AP-layer-events, and only one port can be selected for
+     TL/DL-layer-events.
 
-  $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5
+     For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of
+     bitmap should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4
+     lanes), bit8 is set, port=0x100; if these two Root Ports are both
+     monitored, port=0x101.
 
--bdf
+     Example usage of perf::
 
-"bdf" filter can only be used in bandwidth events, target Endpoint is selected
-by configuring BDF to "bdf". Counter only counts the bandwidth of message
-requested by target Endpoint.
+       $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5
 
-For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
+   - bdf
 
-Example usage of perf::
+     "bdf" filter can only be used in bandwidth events, target Endpoint is
+     selected by configuring BDF to "bdf". Counter only counts the bandwidth of
+     message requested by target Endpoint.
 
-  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5
+     For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
+
+     Example usage of perf::
+
+       $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5
 
 2. Trigger filter
-Event statistics start when the first time TLP length is greater/smaller
-than trigger condition. You can set the trigger condition by writing "trig_len",
-and set the trigger mode by writing "trig_mode". This filter can only be used
-in bandwidth events.
 
-For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
-means statistics start when TLP length > trigger condition, "trig_mode=1"
-means start when TLP length < condition.
+   Event statistics start when the first time TLP length is greater/smaller
+   than trigger condition. You can set the trigger condition by writing
+   "trig_len", and set the trigger mode by writing "trig_mode". This filter can
+   only be used in bandwidth events.
 
-Example usage of perf::
+   For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
+   means statistics start when TLP length > trigger condition, "trig_mode=1"
+   means start when TLP length < condition.
 
-  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
+   Example usage of perf::
+
+     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
 
 3. Threshold filter
-Counter counts when TLP length within the specified range. You can set the
-threshold by writing "thr_len", and set the threshold mode by writing
-"thr_mode". This filter can only be used in bandwidth events.
 
-For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
-counter counts when TLP length >= threshold, and "thr_mode=1" means counts
-when TLP length < threshold.
+   Counter counts when TLP length within the specified range. You can set the
+   threshold by writing "thr_len", and set the threshold mode by writing
+   "thr_mode". This filter can only be used in bandwidth events.
 
-Example usage of perf::
+   For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
+   counter counts when TLP length >= threshold, and "thr_mode=1" means counts
+   when TLP length < threshold.
 
-  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
+   Example usage of perf::
+
+     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
 
 4. TLP Length filter
Yicong Yang Nov. 10, 2022, 8:34 a.m. UTC | #4
On 2022/11/10 12:16, Bagas Sanjaya wrote:
> On Tue, Oct 25, 2022 at 07:32:42PM +0800, Yicong Yang wrote:
>> +4. TLP Length filter
>> +When counting bandwidth, the data can be composed of certain parts of TLP
>> +packets. You can specify it through "len_mode":
>> +
>> +- 2'b00: Reserved (Do not use this since the behaviour is undefined)
>> +- 2'b01: Bandwidth of TLP payloads
>> +- 2'b10: Bandwidth of TLP headers
>> +- 2'b11: Bandwidth of both TLP payloads and headers
>> +
>> +For example, "len_mode=2" means only counting the bandwidth of TLP headers
>> +and "len_mode=3" means the final bandwidth data is composed of both TLP
>> +headers and payloads. You need to carefully using this to avoid losing
>> +data. For example you're likely to get no counts by "len_mode=1" if the TLPs
>> +on the traffic has no payload. This config is optional, by default it'll
>> +be 2'b11.
>> +
>> +Example usage of perf::
>> +
>> +  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
> 
> The indentation is rather ugly, so I have applied the fixup:
> 

will apply the fix and include your patch in v2.

Thanks.

> ---- >8 ----
> 
> diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> index 83a2ef11b1a08d..bae690dddbebfd 100644
> --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> @@ -106,21 +106,22 @@ Example usage of perf::
>    $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
>  
>  4. TLP Length filter
> -When counting bandwidth, the data can be composed of certain parts of TLP
> -packets. You can specify it through "len_mode":
>  
> -- 2'b00: Reserved (Do not use this since the behaviour is undefined)
> -- 2'b01: Bandwidth of TLP payloads
> -- 2'b10: Bandwidth of TLP headers
> -- 2'b11: Bandwidth of both TLP payloads and headers
> +   When counting bandwidth, the data can be composed of certain parts of TLP
> +   packets. You can specify it through "len_mode":
>  
> -For example, "len_mode=2" means only counting the bandwidth of TLP headers
> -and "len_mode=3" means the final bandwidth data is composed of both TLP
> -headers and payloads. You need to carefully using this to avoid losing
> -data. For example you're likely to get no counts by "len_mode=1" if the TLPs
> -on the traffic has no payload. This config is optional, by default it'll
> -be 2'b11.
> +   - 2'b00: Reserved (Do not use this since the behaviour is undefined)
> +   - 2'b01: Bandwidth of TLP payloads
> +   - 2'b10: Bandwidth of TLP headers
> +   - 2'b11: Bandwidth of both TLP payloads and headers
>  
> -Example usage of perf::
> +   For example, "len_mode=2" means only counting the bandwidth of TLP headers
> +   and "len_mode=3" means the final bandwidth data is composed of both TLP
> +   headers and payloads. You need to carefully using this to avoid losing
> +   data. For example you're likely to get no counts by "len_mode=1" if the TLPs
> +   on the traffic has no payload. This config is optional, by default it'll
> +   be 2'b11.
>  
> -  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
> +   Example usage of perf::
> +
> +     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
> 
> Also, for formatting consistency, you need to pick up the fix for rest of
> documentation (as separate patch in your series):
> 
> ---- >8 ----
> 
> From 5f286aceb959a7c70a56cb5c453d3ac0b844fb49 Mon Sep 17 00:00:00 2001
> From: Bagas Sanjaya <bagasdotme@gmail.com>
> Date: Thu, 10 Nov 2022 11:03:08 +0700
> Subject: [PATCH] Documentation: perf: Indent filter options list
> 
> The "Filter options" list have a rather ugly indentation. Also, the first
> paragraph after list name is rendered without separator (as continuation
> from the name).
> 
> Align the list by indenting the list items and add a blank line
> separator for each list name.
> 
> Fixes: c8602008e247f5 ("docs: perf: Add description for HiSilicon PCIe PMU driver")
> Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
> ---
>  .../admin-guide/perf/hisi-pcie-pmu.rst        | 80 ++++++++++---------
>  1 file changed, 43 insertions(+), 37 deletions(-)
> 
> diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> index bae690dddbebfd..54fab870a2aefb 100644
> --- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> +++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
> @@ -48,62 +48,68 @@ Filter options
>  --------------
>  
>  1. Target filter
> -PMU could only monitor the performance of traffic downstream target Root Ports
> -or downstream target Endpoint. PCIe PMU driver support "port" and "bdf"
> -interfaces for users, and these two interfaces aren't supported at the same
> -time.
>  
> --port
> -"port" filter can be used in all PCIe PMU events, target Root Port can be
> -selected by configuring the 16-bits-bitmap "port". Multi ports can be selected
> -for AP-layer-events, and only one port can be selected for TL/DL-layer-events.
> +   PMU could only monitor the performance of traffic downstream target Root
> +   Ports or downstream target Endpoint. PCIe PMU driver support "port" and
> +   "bdf" interfaces for users, and these two interfaces aren't supported at the
> +   same time.
>  
> -For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of bitmap
> -should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4 lanes),
> -bit8 is set, port=0x100; if these two Root Ports are both monitored, port=0x101.
> +   - port
>  
> -Example usage of perf::
> +     "port" filter can be used in all PCIe PMU events, target Root Port can be
> +     selected by configuring the 16-bits-bitmap "port". Multi ports can be
> +     selected for AP-layer-events, and only one port can be selected for
> +     TL/DL-layer-events.
>  
> -  $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5
> +     For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of
> +     bitmap should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4
> +     lanes), bit8 is set, port=0x100; if these two Root Ports are both
> +     monitored, port=0x101.
>  
> --bdf
> +     Example usage of perf::
>  
> -"bdf" filter can only be used in bandwidth events, target Endpoint is selected
> -by configuring BDF to "bdf". Counter only counts the bandwidth of message
> -requested by target Endpoint.
> +       $# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5
>  
> -For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
> +   - bdf
>  
> -Example usage of perf::
> +     "bdf" filter can only be used in bandwidth events, target Endpoint is
> +     selected by configuring BDF to "bdf". Counter only counts the bandwidth of
> +     message requested by target Endpoint.
>  
> -  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5
> +     For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
> +
> +     Example usage of perf::
> +
> +       $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5
>  
>  2. Trigger filter
> -Event statistics start when the first time TLP length is greater/smaller
> -than trigger condition. You can set the trigger condition by writing "trig_len",
> -and set the trigger mode by writing "trig_mode". This filter can only be used
> -in bandwidth events.
>  
> -For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
> -means statistics start when TLP length > trigger condition, "trig_mode=1"
> -means start when TLP length < condition.
> +   Event statistics start when the first time TLP length is greater/smaller
> +   than trigger condition. You can set the trigger condition by writing
> +   "trig_len", and set the trigger mode by writing "trig_mode". This filter can
> +   only be used in bandwidth events.
>  
> -Example usage of perf::
> +   For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
> +   means statistics start when TLP length > trigger condition, "trig_mode=1"
> +   means start when TLP length < condition.
>  
> -  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
> +   Example usage of perf::
> +
> +     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
>  
>  3. Threshold filter
> -Counter counts when TLP length within the specified range. You can set the
> -threshold by writing "thr_len", and set the threshold mode by writing
> -"thr_mode". This filter can only be used in bandwidth events.
>  
> -For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
> -counter counts when TLP length >= threshold, and "thr_mode=1" means counts
> -when TLP length < threshold.
> +   Counter counts when TLP length within the specified range. You can set the
> +   threshold by writing "thr_len", and set the threshold mode by writing
> +   "thr_mode". This filter can only be used in bandwidth events.
>  
> -Example usage of perf::
> +   For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
> +   counter counts when TLP length >= threshold, and "thr_mode=1" means counts
> +   when TLP length < threshold.
>  
> -  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
> +   Example usage of perf::
> +
> +     $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
>  
>  4. TLP Length filter
>  
>
diff mbox series

Patch

diff --git a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
index bbe66480ff85..83a2ef11b1a0 100644
--- a/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
+++ b/Documentation/admin-guide/perf/hisi-pcie-pmu.rst
@@ -104,3 +104,23 @@  when TLP length < threshold.
 Example usage of perf::
 
   $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
+
+4. TLP Length filter
+When counting bandwidth, the data can be composed of certain parts of TLP
+packets. You can specify it through "len_mode":
+
+- 2'b00: Reserved (Do not use this since the behaviour is undefined)
+- 2'b01: Bandwidth of TLP payloads
+- 2'b10: Bandwidth of TLP headers
+- 2'b11: Bandwidth of both TLP payloads and headers
+
+For example, "len_mode=2" means only counting the bandwidth of TLP headers
+and "len_mode=3" means the final bandwidth data is composed of both TLP
+headers and payloads. You need to carefully using this to avoid losing
+data. For example you're likely to get no counts by "len_mode=1" if the TLPs
+on the traffic has no payload. This config is optional, by default it'll
+be 2'b11.
+
+Example usage of perf::
+
+  $# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
diff --git a/drivers/perf/hisilicon/hisi_pcie_pmu.c b/drivers/perf/hisilicon/hisi_pcie_pmu.c
index 071e63d9a9ac..6fee0b6e163b 100644
--- a/drivers/perf/hisilicon/hisi_pcie_pmu.c
+++ b/drivers/perf/hisilicon/hisi_pcie_pmu.c
@@ -47,10 +47,14 @@ 
 #define HISI_PCIE_EVENT_M		GENMASK_ULL(15, 0)
 #define HISI_PCIE_THR_MODE_M		GENMASK_ULL(27, 27)
 #define HISI_PCIE_THR_M			GENMASK_ULL(31, 28)
+#define HISI_PCIE_LEN_M			GENMASK_ULL(35, 34)
 #define HISI_PCIE_TARGET_M		GENMASK_ULL(52, 36)
 #define HISI_PCIE_TRIG_MODE_M		GENMASK_ULL(53, 53)
 #define HISI_PCIE_TRIG_M		GENMASK_ULL(59, 56)
 
+/* Default config of TLP length mode, will count both TLP headers and payloads */
+#define HISI_PCIE_LEN_M_DEFAULT		3ULL
+
 #define HISI_PCIE_MAX_COUNTERS		8
 #define HISI_PCIE_REG_STEP		8
 #define HISI_PCIE_THR_MAX_VAL		10
@@ -91,6 +95,7 @@  HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
 HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
 HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
 HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
+HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
 HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
 HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
 
@@ -215,8 +220,8 @@  static void hisi_pcie_pmu_config_filter(struct perf_event *event)
 {
 	struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
 	struct hw_perf_event *hwc = &event->hw;
+	u64 port, trig_len, thr_len, len_mode;
 	u64 reg = HISI_PCIE_INIT_SET;
-	u64 port, trig_len, thr_len;
 
 	/* Config HISI_PCIE_EVENT_CTRL according to event. */
 	reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
@@ -245,6 +250,12 @@  static void hisi_pcie_pmu_config_filter(struct perf_event *event)
 		reg |= HISI_PCIE_THR_EN;
 	}
 
+	len_mode = hisi_pcie_get_len_mode(event);
+	if (len_mode)
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
+	else
+		reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
+
 	hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
 }
 
@@ -711,6 +722,7 @@  static struct attribute *hisi_pcie_pmu_format_attr[] = {
 	HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
 	HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
 	HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
+	HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
 	HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
 	HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
 	NULL