diff mbox series

[1/3] cxl/acpi: Restore XOR'd position bits during address translation

Message ID d4e689f9a9645cbddb943188d508807da33bf91f.1714159486.git.alison.schofield@intel.com
State Superseded
Headers show
Series XOR Math Fixups: translation & position | expand

Commit Message

Alison Schofield April 26, 2024, 7:51 p.m. UTC
From: Alison Schofield <alison.schofield@intel.com>

When a CXL region is created in a CXL Window (CFMWS) that uses XOR
interleave arithmetic XOR maps are applied during the HPA->DPA
translation. The XOR function changes the interleave selector
bit (aka position bit) in the HPA thereby varying which host bridge
services an HPA. The purpose is to minimize hot spots thereby
improving performance.

When a device reports a DPA in events such as poison, general_media,
and dram, the driver translates that DPA back to an HPA. Presently,
the CXL driver translation only considers the modulo position and
will report the wrong HPA for XOR configured CFMWS's.

Add a helper function that restores the XOR'd bits during DPA->HPA
address translation. Plumb a root decoder callback to the new helper
when XOR interleave arithmetic is in use. For MODULO arithmetic, just
let the callback be NULL - as in no extra work required.

Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events")
Signed-off-by: Alison Schofield <alison.schofield@intel.com>
---
 drivers/cxl/acpi.c       | 49 +++++++++++++++++++++++++++++++++++++---
 drivers/cxl/core/port.c  |  5 +++-
 drivers/cxl/core/trace.c |  5 ++++
 drivers/cxl/cxl.h        |  6 ++++-
 4 files changed, 60 insertions(+), 5 deletions(-)

Comments

Dan Williams April 27, 2024, 1:07 a.m. UTC | #1
alison.schofield@ wrote:
> From: Alison Schofield <alison.schofield@intel.com>
> 
> When a CXL region is created in a CXL Window (CFMWS) that uses XOR
> interleave arithmetic XOR maps are applied during the HPA->DPA
> translation. The XOR function changes the interleave selector
> bit (aka position bit) in the HPA thereby varying which host bridge
> services an HPA. The purpose is to minimize hot spots thereby
> improving performance.
> 
> When a device reports a DPA in events such as poison, general_media,
> and dram, the driver translates that DPA back to an HPA. Presently,
> the CXL driver translation only considers the modulo position and
> will report the wrong HPA for XOR configured CFMWS's.
> 
> Add a helper function that restores the XOR'd bits during DPA->HPA
> address translation. Plumb a root decoder callback to the new helper
> when XOR interleave arithmetic is in use. For MODULO arithmetic, just
> let the callback be NULL - as in no extra work required.
> 
> Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events")
> Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> ---
>  drivers/cxl/acpi.c       | 49 +++++++++++++++++++++++++++++++++++++---
>  drivers/cxl/core/port.c  |  5 +++-
>  drivers/cxl/core/trace.c |  5 ++++
>  drivers/cxl/cxl.h        |  6 ++++-
>  4 files changed, 60 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> index af5cb818f84d..519e933b5a4b 100644
> --- a/drivers/cxl/acpi.c
> +++ b/drivers/cxl/acpi.c
> @@ -74,6 +74,44 @@ static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
>  	return cxlrd->cxlsd.target[n];
>  }
>  
> +static u64 restore_xor_pos(u64 hpa, u64 map)
> +{
> +	int restore_value, restore_pos = 0;
> +
> +	/*
> +	 * Restore the position bit to its value before the
> +	 * xormap was applied at HPA->DPA translation.
> +	 *
> +	 * restore_pos is the lowest set bit in the map
> +	 * restore_value is the XORALLBITS in (hpa AND map)

Might be worth finally clarifying why there is no "XOR" operation in
this xor_pos routine, i.e. that XORALLBITS is identical to asking if the
hweight of the (hpa & map) is odd or even.

> +	 */
> +
> +	while ((map & (1ULL << restore_pos)) == 0)
> +		restore_pos++;

This is just open coded ffs()?

> +
> +	restore_value = (hweight64(hpa & map) & 1);
> +	if (restore_value)
> +		hpa |= (1ULL << restore_pos);
> +	else
> +		hpa &= ~(1ULL << restore_pos);

It feels like this conditional mask / set can just be an xor operation?

    hpa ^= ((hweight64(hpa & map) & 1) << restore_pos);

Otherwise I question the & and | operations relative to the HPA bit
position already being 0 or 1.

> +
> +	return hpa;
> +}
> +
> +static u64 cxl_xor_trans(struct cxl_root_decoder *cxlrd, u64 hpa, int iw)

Ok, so the driver has cxl_trace_hpa() and now cxl_xor_trans() and
"addr_trans". Can these all just be called "translate" because "trace"
feels like tracing, "trans" is only saving 4 characters, and "addr" is
redundant as nothing else needs translating besides addresses in CXL
land.

[..]
> diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
> index d0403dc3c8ab..a7ea4a256036 100644
> --- a/drivers/cxl/core/trace.c
> +++ b/drivers/cxl/core/trace.c
> @@ -36,6 +36,7 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
>  static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
>  			  struct cxl_endpoint_decoder *cxled)
>  {
> +	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
>  	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
>  	struct cxl_region_params *p = &cxlr->params;
>  	int pos = cxled->pos;
> @@ -75,6 +76,10 @@ static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
>  	/* Apply the hpa_offset to the region base address */
>  	hpa = hpa_offset + p->res->start;
>  
> +	/* An addr_trans helper is defined for XOR math */

Rather then calling out XOR math since that is an ACPI'ism I would just
say something like: "root decoder overrides typical modulo decode"
Alison Schofield May 1, 2024, 3:41 a.m. UTC | #2
On Fri, Apr 26, 2024 at 06:07:59PM -0700, Dan Williams wrote:
> alison.schofield@ wrote:
> > From: Alison Schofield <alison.schofield@intel.com>
> > 
> > When a CXL region is created in a CXL Window (CFMWS) that uses XOR
> > interleave arithmetic XOR maps are applied during the HPA->DPA
> > translation. The XOR function changes the interleave selector
> > bit (aka position bit) in the HPA thereby varying which host bridge
> > services an HPA. The purpose is to minimize hot spots thereby
> > improving performance.
> > 
> > When a device reports a DPA in events such as poison, general_media,
> > and dram, the driver translates that DPA back to an HPA. Presently,
> > the CXL driver translation only considers the modulo position and
> > will report the wrong HPA for XOR configured CFMWS's.
> > 
> > Add a helper function that restores the XOR'd bits during DPA->HPA
> > address translation. Plumb a root decoder callback to the new helper
> > when XOR interleave arithmetic is in use. For MODULO arithmetic, just
> > let the callback be NULL - as in no extra work required.
> > 
> > Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events")
> > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> > ---
> >  drivers/cxl/acpi.c       | 49 +++++++++++++++++++++++++++++++++++++---
> >  drivers/cxl/core/port.c  |  5 +++-
> >  drivers/cxl/core/trace.c |  5 ++++
> >  drivers/cxl/cxl.h        |  6 ++++-
> >  4 files changed, 60 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > index af5cb818f84d..519e933b5a4b 100644
> > --- a/drivers/cxl/acpi.c
> > +++ b/drivers/cxl/acpi.c
> > @@ -74,6 +74,44 @@ static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
> >  	return cxlrd->cxlsd.target[n];
> >  }
> >  
> > +static u64 restore_xor_pos(u64 hpa, u64 map)
> > +{
> > +	int restore_value, restore_pos = 0;
> > +
> > +	/*
> > +	 * Restore the position bit to its value before the
> > +	 * xormap was applied at HPA->DPA translation.
> > +	 *
> > +	 * restore_pos is the lowest set bit in the map
> > +	 * restore_value is the XORALLBITS in (hpa AND map)
> 
> Might be worth finally clarifying why there is no "XOR" operation in
> this xor_pos routine, i.e. that XORALLBITS is identical to asking if the
> hweight of the (hpa & map) is odd or even.
> 

Thanks for the review Dan!

Well, I would except that a few lines below, you suggest I use
an XOR operand ;) but I know what you mean. Edited in v2.

Actually, I've abandoned this separate function in v2. Since
the LOC have been whittled down, seems useless.

> > +	 */
> > +
> > +	while ((map & (1ULL << restore_pos)) == 0)
> > +		restore_pos++;
> 
> This is just open coded ffs()?
> 
So it is!  Using ffs() in v2.

> > +
> > +	restore_value = (hweight64(hpa & map) & 1);
> > +	if (restore_value)
> > +		hpa |= (1ULL << restore_pos);
> > +	else
> > +		hpa &= ~(1ULL << restore_pos);
> 
> It feels like this conditional mask / set can just be an xor operation?
> 
>     hpa ^= ((hweight64(hpa & map) & 1) << restore_pos);

I've taken the XOR operand piece, but didn't collapse into the
one-liner you suggest.  See what you think in v1 please.

> 
> Otherwise I question the & and | operations relative to the HPA bit
> position already being 0 or 1.
> 

They are gone in next rev, but FWIW here's the truth about those ops:

if restore_value == 1, using |= always sets the bit at restore_pos
like this:
	restore_value  current_value  |= value
	1		0		1
	1		1		1

if restore_value == 0, using &= always clears the bit at restore_pos
like this:
	restore_value  current_value  |= value
	0		0		0
	0		1		0

> > +
> > +	return hpa;
> > +}
> > +
> > +static u64 cxl_xor_trans(struct cxl_root_decoder *cxlrd, u64 hpa, int iw)
> 
> Ok, so the driver has cxl_trace_hpa() and now cxl_xor_trans() and
> "addr_trans". Can these all just be called "translate" because "trace"
> feels like tracing, "trans" is only saving 4 characters, and "addr" is
> redundant as nothing else needs translating besides addresses in CXL
> land.

I think i've got it:

Existing:
cxl_trace_hpa() -> cxl_translate()

Introduced in this set:
cxl_addr_trans_fn -> cxl_translate_fn
addr_trans -> translate
cxl_xor_trans() -> cxl_xor_translate()

> 
> [..]
> > diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
> > index d0403dc3c8ab..a7ea4a256036 100644
> > --- a/drivers/cxl/core/trace.c
> > +++ b/drivers/cxl/core/trace.c
> > @@ -36,6 +36,7 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
> >  static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
> >  			  struct cxl_endpoint_decoder *cxled)
> >  {
> > +	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> >  	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
> >  	struct cxl_region_params *p = &cxlr->params;
> >  	int pos = cxled->pos;
> > @@ -75,6 +76,10 @@ static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
> >  	/* Apply the hpa_offset to the region base address */
> >  	hpa = hpa_offset + p->res->start;
> >  
> > +	/* An addr_trans helper is defined for XOR math */
> 
> Rather then calling out XOR math since that is an ACPI'ism I would just
> say something like: "root decoder overrides typical modulo decode"

Got it.

-- Alison
Alison Schofield May 1, 2024, 5 a.m. UTC | #3
On Tue, Apr 30, 2024 at 08:41:19PM -0700, Alison Schofield wrote:
> On Fri, Apr 26, 2024 at 06:07:59PM -0700, Dan Williams wrote:
> > alison.schofield@ wrote:
> > > From: Alison Schofield <alison.schofield@intel.com>
> > > 
> > > When a CXL region is created in a CXL Window (CFMWS) that uses XOR
> > > interleave arithmetic XOR maps are applied during the HPA->DPA
> > > translation. The XOR function changes the interleave selector
> > > bit (aka position bit) in the HPA thereby varying which host bridge
> > > services an HPA. The purpose is to minimize hot spots thereby
> > > improving performance.
> > > 
> > > When a device reports a DPA in events such as poison, general_media,
> > > and dram, the driver translates that DPA back to an HPA. Presently,
> > > the CXL driver translation only considers the modulo position and
> > > will report the wrong HPA for XOR configured CFMWS's.
> > > 
> > > Add a helper function that restores the XOR'd bits during DPA->HPA
> > > address translation. Plumb a root decoder callback to the new helper
> > > when XOR interleave arithmetic is in use. For MODULO arithmetic, just
> > > let the callback be NULL - as in no extra work required.
> > > 
> > > Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events")
> > > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> > > ---
> > >  drivers/cxl/acpi.c       | 49 +++++++++++++++++++++++++++++++++++++---
> > >  drivers/cxl/core/port.c  |  5 +++-
> > >  drivers/cxl/core/trace.c |  5 ++++
> > >  drivers/cxl/cxl.h        |  6 ++++-
> > >  4 files changed, 60 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > > index af5cb818f84d..519e933b5a4b 100644
> > > --- a/drivers/cxl/acpi.c
> > > +++ b/drivers/cxl/acpi.c
> > > @@ -74,6 +74,44 @@ static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
> > >  	return cxlrd->cxlsd.target[n];
> > >  }
> > >  
> > > +static u64 restore_xor_pos(u64 hpa, u64 map)
> > > +{
> > > +	int restore_value, restore_pos = 0;
> > > +
> > > +	/*
> > > +	 * Restore the position bit to its value before the
> > > +	 * xormap was applied at HPA->DPA translation.
> > > +	 *
> > > +	 * restore_pos is the lowest set bit in the map
> > > +	 * restore_value is the XORALLBITS in (hpa AND map)
> > 
> > Might be worth finally clarifying why there is no "XOR" operation in
> > this xor_pos routine, i.e. that XORALLBITS is identical to asking if the
> > hweight of the (hpa & map) is odd or even.
> > 
> 
> Thanks for the review Dan!
> 
> Well, I would except that a few lines below, you suggest I use
> an XOR operand ;) but I know what you mean. Edited in v2.
> 
> Actually, I've abandoned this separate function in v2. Since
> the LOC have been whittled down, seems useless.
> 
> > > +	 */
> > > +
> > > +	while ((map & (1ULL << restore_pos)) == 0)
> > > +		restore_pos++;
> > 
> > This is just open coded ffs()?
> > 
> So it is!  Using ffs() in v2.
> 
> > > +
> > > +	restore_value = (hweight64(hpa & map) & 1);
> > > +	if (restore_value)
> > > +		hpa |= (1ULL << restore_pos);
> > > +	else
> > > +		hpa &= ~(1ULL << restore_pos);
> > 
> > It feels like this conditional mask / set can just be an xor operation?
> > 
> >     hpa ^= ((hweight64(hpa & map) & 1) << restore_pos);
> 
> I've taken the XOR operand piece, but didn't collapse into the
> one-liner you suggest.  See what you think in v1 please.
> 
> > 
> > Otherwise I question the & and | operations relative to the HPA bit
> > position already being 0 or 1.
> > 
> 
> They are gone in next rev, but FWIW here's the truth about those ops:
> 
> if restore_value == 1, using |= always sets the bit at restore_pos
> like this:
> 	restore_value  current_value  |= value
> 	1		0		1
> 	1		1		1
> 
> if restore_value == 0, using &= always clears the bit at restore_pos
> like this:
> 	restore_value  current_value  |= value
                                      ^^ should be &= 
> 	0		0		0
> 	0		1		0
>

I guess my own typo proves ^= is cleaner ;)



> > > +
> > > +	return hpa;
> > > +}
> > > +
> > > +static u64 cxl_xor_trans(struct cxl_root_decoder *cxlrd, u64 hpa, int iw)
> > 
> > Ok, so the driver has cxl_trace_hpa() and now cxl_xor_trans() and
> > "addr_trans". Can these all just be called "translate" because "trace"
> > feels like tracing, "trans" is only saving 4 characters, and "addr" is
> > redundant as nothing else needs translating besides addresses in CXL
> > land.
> 
> I think i've got it:
> 
> Existing:
> cxl_trace_hpa() -> cxl_translate()
> 
> Introduced in this set:
> cxl_addr_trans_fn -> cxl_translate_fn
> addr_trans -> translate
> cxl_xor_trans() -> cxl_xor_translate()
> 
> > 
> > [..]
> > > diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
> > > index d0403dc3c8ab..a7ea4a256036 100644
> > > --- a/drivers/cxl/core/trace.c
> > > +++ b/drivers/cxl/core/trace.c
> > > @@ -36,6 +36,7 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
> > >  static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
> > >  			  struct cxl_endpoint_decoder *cxled)
> > >  {
> > > +	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> > >  	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
> > >  	struct cxl_region_params *p = &cxlr->params;
> > >  	int pos = cxled->pos;
> > > @@ -75,6 +76,10 @@ static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
> > >  	/* Apply the hpa_offset to the region base address */
> > >  	hpa = hpa_offset + p->res->start;
> > >  
> > > +	/* An addr_trans helper is defined for XOR math */
> > 
> > Rather then calling out XOR math since that is an ACPI'ism I would just
> > say something like: "root decoder overrides typical modulo decode"
> 
> Got it.
> 
> -- Alison
>
Alison Schofield May 2, 2024, 4:34 a.m. UTC | #4
On Tue, Apr 30, 2024 at 08:41:19PM -0700, Alison Schofield wrote:
> On Fri, Apr 26, 2024 at 06:07:59PM -0700, Dan Williams wrote:
> > alison.schofield@ wrote:
> > > From: Alison Schofield <alison.schofield@intel.com>
> > > 
> > > When a CXL region is created in a CXL Window (CFMWS) that uses XOR
> > > interleave arithmetic XOR maps are applied during the HPA->DPA
> > > translation. The XOR function changes the interleave selector
> > > bit (aka position bit) in the HPA thereby varying which host bridge
> > > services an HPA. The purpose is to minimize hot spots thereby
> > > improving performance.
> > > 
> > > When a device reports a DPA in events such as poison, general_media,
> > > and dram, the driver translates that DPA back to an HPA. Presently,
> > > the CXL driver translation only considers the modulo position and
> > > will report the wrong HPA for XOR configured CFMWS's.
> > > 
> > > Add a helper function that restores the XOR'd bits during DPA->HPA
> > > address translation. Plumb a root decoder callback to the new helper
> > > when XOR interleave arithmetic is in use. For MODULO arithmetic, just
> > > let the callback be NULL - as in no extra work required.
> > > 
> > > Fixes: 28a3ae4ff66c ("cxl/trace: Add an HPA to cxl_poison trace events")
> > > Signed-off-by: Alison Schofield <alison.schofield@intel.com>
> > > ---
> > >  drivers/cxl/acpi.c       | 49 +++++++++++++++++++++++++++++++++++++---
> > >  drivers/cxl/core/port.c  |  5 +++-
> > >  drivers/cxl/core/trace.c |  5 ++++
> > >  drivers/cxl/cxl.h        |  6 ++++-
> > >  4 files changed, 60 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
> > > index af5cb818f84d..519e933b5a4b 100644
> > > --- a/drivers/cxl/acpi.c
> > > +++ b/drivers/cxl/acpi.c
> > > @@ -74,6 +74,44 @@ static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
> > >  	return cxlrd->cxlsd.target[n];
> > >  }
> > >  
> > > +static u64 restore_xor_pos(u64 hpa, u64 map)
> > > +{
> > > +	int restore_value, restore_pos = 0;
> > > +
> > > +	/*
> > > +	 * Restore the position bit to its value before the
> > > +	 * xormap was applied at HPA->DPA translation.
> > > +	 *
> > > +	 * restore_pos is the lowest set bit in the map
> > > +	 * restore_value is the XORALLBITS in (hpa AND map)
> > 
> > Might be worth finally clarifying why there is no "XOR" operation in
> > this xor_pos routine, i.e. that XORALLBITS is identical to asking if the
> > hweight of the (hpa & map) is odd or even.
> > 
> 
> Thanks for the review Dan!
> 
> Well, I would except that a few lines below, you suggest I use
> an XOR operand ;) but I know what you mean. Edited in v2.
> 
> Actually, I've abandoned this separate function in v2. Since
> the LOC have been whittled down, seems useless.
> 
> > > +	 */
> > > +
> > > +	while ((map & (1ULL << restore_pos)) == 0)
> > > +		restore_pos++;
> > 
> > This is just open coded ffs()?
> > 
> So it is!  Using ffs() in v2.
> 
> > > +
> > > +	restore_value = (hweight64(hpa & map) & 1);
> > > +	if (restore_value)
> > > +		hpa |= (1ULL << restore_pos);
> > > +	else
> > > +		hpa &= ~(1ULL << restore_pos);
> > 
> > It feels like this conditional mask / set can just be an xor operation?
> > 
> >     hpa ^= ((hweight64(hpa & map) & 1) << restore_pos);
> 
> I've taken the XOR operand piece, but didn't collapse into the
> one-liner you suggest.  See what you think in v1 please.

I jumped on the Dan-Bandwagon too hastily. The hpa ^= doesn't
work because it toggles the bit. ie It won't clear hpa[restore_pos]
if restore_value is not set, and it needs to.

This works and is more concise than the original if-else:
	hpa = (hpa & ~(1ULL << pos)) | (val << pos);

I've changed a few things around it, but didn't want to leave
this dangling out here for another reviewer to ponder.

> 
> > 
> > Otherwise I question the & and | operations relative to the HPA bit
> > position already being 0 or 1.
> > 
> 
> They are gone in next rev, but FWIW here's the truth about those ops:
> 
> if restore_value == 1, using |= always sets the bit at restore_pos
> like this:
> 	restore_value  current_value  |= value
> 	1		0		1
> 	1		1		1
> 
> if restore_value == 0, using &= always clears the bit at restore_pos
> like this:
> 	restore_value  current_value  |= value
> 	0		0		0
> 	0		1		0
> 
> > > +
> > > +	return hpa;
> > > +}
> > > +
> > > +static u64 cxl_xor_trans(struct cxl_root_decoder *cxlrd, u64 hpa, int iw)
> > 
> > Ok, so the driver has cxl_trace_hpa() and now cxl_xor_trans() and
> > "addr_trans". Can these all just be called "translate" because "trace"
> > feels like tracing, "trans" is only saving 4 characters, and "addr" is
> > redundant as nothing else needs translating besides addresses in CXL
> > land.
> 
> I think i've got it:
> 
> Existing:
> cxl_trace_hpa() -> cxl_translate()
> 
> Introduced in this set:
> cxl_addr_trans_fn -> cxl_translate_fn
> addr_trans -> translate
> cxl_xor_trans() -> cxl_xor_translate()
> 
> > 
> > [..]
> > > diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
> > > index d0403dc3c8ab..a7ea4a256036 100644
> > > --- a/drivers/cxl/core/trace.c
> > > +++ b/drivers/cxl/core/trace.c
> > > @@ -36,6 +36,7 @@ static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
> > >  static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
> > >  			  struct cxl_endpoint_decoder *cxled)
> > >  {
> > > +	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> > >  	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
> > >  	struct cxl_region_params *p = &cxlr->params;
> > >  	int pos = cxled->pos;
> > > @@ -75,6 +76,10 @@ static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
> > >  	/* Apply the hpa_offset to the region base address */
> > >  	hpa = hpa_offset + p->res->start;
> > >  
> > > +	/* An addr_trans helper is defined for XOR math */
> > 
> > Rather then calling out XOR math since that is an ACPI'ism I would just
> > say something like: "root decoder overrides typical modulo decode"
> 
> Got it.
> 
> -- Alison
>
diff mbox series

Patch

diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index af5cb818f84d..519e933b5a4b 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -74,6 +74,44 @@  static struct cxl_dport *cxl_hb_xor(struct cxl_root_decoder *cxlrd, int pos)
 	return cxlrd->cxlsd.target[n];
 }
 
+static u64 restore_xor_pos(u64 hpa, u64 map)
+{
+	int restore_value, restore_pos = 0;
+
+	/*
+	 * Restore the position bit to its value before the
+	 * xormap was applied at HPA->DPA translation.
+	 *
+	 * restore_pos is the lowest set bit in the map
+	 * restore_value is the XORALLBITS in (hpa AND map)
+	 */
+
+	while ((map & (1ULL << restore_pos)) == 0)
+		restore_pos++;
+
+	restore_value = (hweight64(hpa & map) & 1);
+	if (restore_value)
+		hpa |= (1ULL << restore_pos);
+	else
+		hpa &= ~(1ULL << restore_pos);
+
+	return hpa;
+}
+
+static u64 cxl_xor_trans(struct cxl_root_decoder *cxlrd, u64 hpa, int iw)
+{
+	struct cxl_cxims_data *cximsd = cxlrd->platform_data;
+
+	/* No xormaps for ways of 1 or 3 */
+	if (iw == 1 || iw == 3)
+		return hpa;
+
+	for (int i = 0; i < cximsd->nr_maps; i++)
+		hpa = restore_xor_pos(hpa, cximsd->xormaps[i]);
+
+	return hpa;
+}
+
 struct cxl_cxims_context {
 	struct device *dev;
 	struct cxl_root_decoder *cxlrd;
@@ -325,6 +363,7 @@  static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 	struct cxl_cxims_context cxims_ctx;
 	struct cxl_root_decoder *cxlrd;
 	struct device *dev = ctx->dev;
+	cxl_addr_trans_fn addr_trans;
 	cxl_calc_hb_fn cxl_calc_hb;
 	struct cxl_decoder *cxld;
 	unsigned int ways, i, ig;
@@ -365,12 +404,16 @@  static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws,
 	if (rc)
 		goto err_insert;
 
-	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO)
+	if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_MODULO) {
 		cxl_calc_hb = cxl_hb_modulo;
-	else
+		addr_trans = NULL;
+
+	} else {
 		cxl_calc_hb = cxl_hb_xor;
+		addr_trans = cxl_xor_trans;
+	}
 
-	cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb);
+	cxlrd = cxl_root_decoder_alloc(root_port, ways, cxl_calc_hb, addr_trans);
 	if (IS_ERR(cxlrd))
 		return PTR_ERR(cxlrd);
 
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 2b0cab556072..cd4f004f5372 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1808,6 +1808,7 @@  static int cxl_switch_decoder_init(struct cxl_port *port,
  * @port: owning CXL root of this decoder
  * @nr_targets: static number of downstream targets
  * @calc_hb: which host bridge covers the n'th position by granularity
+ * @addr_trans: address translation helper function
  *
  * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
  * 'CXL root' decoder is one that decodes from a top-level / static platform
@@ -1816,7 +1817,8 @@  static int cxl_switch_decoder_init(struct cxl_port *port,
  */
 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
 						unsigned int nr_targets,
-						cxl_calc_hb_fn calc_hb)
+						cxl_calc_hb_fn calc_hb,
+						cxl_addr_trans_fn addr_trans)
 {
 	struct cxl_root_decoder *cxlrd;
 	struct cxl_switch_decoder *cxlsd;
@@ -1839,6 +1841,7 @@  struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
 	}
 
 	cxlrd->calc_hb = calc_hb;
+	cxlrd->addr_trans = addr_trans;
 	mutex_init(&cxlrd->range_lock);
 
 	cxld = &cxlsd->cxld;
diff --git a/drivers/cxl/core/trace.c b/drivers/cxl/core/trace.c
index d0403dc3c8ab..a7ea4a256036 100644
--- a/drivers/cxl/core/trace.c
+++ b/drivers/cxl/core/trace.c
@@ -36,6 +36,7 @@  static bool cxl_is_hpa_in_range(u64 hpa, struct cxl_region *cxlr, int pos)
 static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
 			  struct cxl_endpoint_decoder *cxled)
 {
+	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
 	u64 dpa_offset, hpa_offset, bits_upper, mask_upper, hpa;
 	struct cxl_region_params *p = &cxlr->params;
 	int pos = cxled->pos;
@@ -75,6 +76,10 @@  static u64 cxl_dpa_to_hpa(u64 dpa,  struct cxl_region *cxlr,
 	/* Apply the hpa_offset to the region base address */
 	hpa = hpa_offset + p->res->start;
 
+	/* An addr_trans helper is defined for XOR math */
+	if (cxlrd->addr_trans)
+		hpa = cxlrd->addr_trans(cxlrd, hpa, p->interleave_ways);
+
 	if (!cxl_is_hpa_in_range(hpa, cxlr, cxled->pos))
 		return ULLONG_MAX;
 
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 534e25e2f0a4..f0c3bd377259 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -432,12 +432,14 @@  struct cxl_switch_decoder {
 struct cxl_root_decoder;
 typedef struct cxl_dport *(*cxl_calc_hb_fn)(struct cxl_root_decoder *cxlrd,
 					    int pos);
+typedef u64 (*cxl_addr_trans_fn)(struct cxl_root_decoder *cxlrd, u64 hpa, int ways);
 
 /**
  * struct cxl_root_decoder - Static platform CXL address decoder
  * @res: host / parent resource for region allocations
  * @region_id: region id for next region provisioning event
  * @calc_hb: which host bridge covers the n'th position by granularity
+ * @addr_trans: dpa->hpa address translation helper
  * @platform_data: platform specific configuration data
  * @range_lock: sync region autodiscovery by address range
  * @qos_class: QoS performance class cookie
@@ -447,6 +449,7 @@  struct cxl_root_decoder {
 	struct resource *res;
 	atomic_t region_id;
 	cxl_calc_hb_fn calc_hb;
+	cxl_addr_trans_fn addr_trans;
 	void *platform_data;
 	struct mutex range_lock;
 	int qos_class;
@@ -773,7 +776,8 @@  bool is_switch_decoder(struct device *dev);
 bool is_endpoint_decoder(struct device *dev);
 struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
 						unsigned int nr_targets,
-						cxl_calc_hb_fn calc_hb);
+						cxl_calc_hb_fn calc_hb,
+						cxl_addr_trans_fn addr_trans);
 struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos);
 struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
 						    unsigned int nr_targets);