diff mbox series

[03/14] cxl/mem: Find device capabilities

Message ID 20210130002438.1872527-4-ben.widawsky@intel.com
State Superseded
Headers show
Series CXL 2.0 Support | expand

Commit Message

Ben Widawsky Jan. 30, 2021, 12:24 a.m. UTC
CXL devices contain an array of capabilities that describe the
interactions software can have with the device or firmware running on
the device. A CXL compliant device must implement the device status and
the mailbox capability. A CXL compliant memory device must implement the
memory device capability.

Each of the capabilities can [will] provide an offset within the MMIO
region for interacting with the CXL device.

For more details see 8.2.8 of the CXL 2.0 specification (see Link).

Link: https://www.computeexpresslink.org/download-the-specification
Signed-off-by: Ben Widawsky <ben.widawsky@intel.com>
---
 drivers/cxl/cxl.h |  78 ++++++++++++++++++++++++++++++++++-
 drivers/cxl/mem.c | 102 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 178 insertions(+), 2 deletions(-)

Comments

David Rientjes Jan. 30, 2021, 11:51 p.m. UTC | #1
On Fri, 29 Jan 2021, Ben Widawsky wrote:

> +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> +{
> +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> +
> +	cxlm->mbox.payload_size =
> +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> +
> +	/* 8.2.8.4.3 */
> +	if (cxlm->mbox.payload_size < 256) {
> +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> +			cxlm->mbox.payload_size);
> +		return -ENXIO;
> +	}

Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
return ENXIO if true?
Ben Widawsky Feb. 1, 2021, 4:53 p.m. UTC | #2
On 21-01-30 15:51:49, David Rientjes wrote:
> On Fri, 29 Jan 2021, Ben Widawsky wrote:
> 
> > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > +{
> > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > +
> > +	cxlm->mbox.payload_size =
> > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > +
> > +	/* 8.2.8.4.3 */
> > +	if (cxlm->mbox.payload_size < 256) {
> > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > +			cxlm->mbox.payload_size);
> > +		return -ENXIO;
> > +	}
> 
> Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> return ENXIO if true?

If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
driver not allow it?

I'm open to changing it, it just seems like a larger mailbox wouldn't be fatal.
Konrad Rzeszutek Wilk Feb. 1, 2021, 5:41 p.m. UTC | #3
> +static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
> +{
> +	struct device *dev = &cxlm->pdev->dev;
> +	int cap, cap_count;
> +	u64 cap_array;
> +
> +	cap_array = readq(cxlm->regs + CXLDEV_CAP_ARRAY_OFFSET);
> +	if (CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_ID) != CXLDEV_CAP_ARRAY_CAP_ID)
> +		return -ENODEV;
> +
> +	cap_count = CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_COUNT);
> +
> +	for (cap = 1; cap <= cap_count; cap++) {
> +		void __iomem *register_block;
> +		u32 offset;
> +		u16 cap_id;
> +
> +		cap_id = readl(cxlm->regs + cap * 0x10) & 0xffff;
> +		offset = readl(cxlm->regs + cap * 0x10 + 0x4);
> +		register_block = cxlm->regs + offset;
> +
> +		switch (cap_id) {
> +		case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
> +			dev_dbg(dev, "found Status capability (0x%x)\n",
> +				offset);

That 80 character limit is no longer a requirement. Can you just make
this one line? And perhaps change 'found' to 'Found' ?

> +			cxlm->status.regs = register_block;
> +			break;
> +		case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
> +			dev_dbg(dev, "found Mailbox capability (0x%x)\n",
> +				offset);
> +			cxlm->mbox.regs = register_block;
> +			break;
> +		case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
> +			dev_dbg(dev,
> +				"found Secondary Mailbox capability (0x%x)\n",
> +				offset);
> +			break;
> +		case CXLDEV_CAP_CAP_ID_MEMDEV:
> +			dev_dbg(dev, "found Memory Device capability (0x%x)\n",
> +				offset);
> +			cxlm->mem.regs = register_block;
> +			break;
> +		default:
> +			dev_warn(dev, "Unknown cap ID: %d (0x%x)\n", cap_id,
> +				 offset);
> +			break;
> +		}
> +	}
> +
> +	if (!cxlm->status.regs || !cxlm->mbox.regs || !cxlm->mem.regs) {
> +		dev_err(dev, "registers not found: %s%s%s\n",
> +			!cxlm->status.regs ? "status " : "",
> +			!cxlm->mbox.regs ? "mbox " : "",
> +			!cxlm->mem.regs ? "mem" : "");
> +		return -ENXIO;
> +	}
> +
> +	return 0;
> +}
> +
> +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> +{
> +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> +
> +	cxlm->mbox.payload_size =
> +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> +

I think the static analyzers are not going to be happy that you are not
checking the value of `cap` before using it.

Perhaps you should check that first before doing the manipulations?

> +	/* 8.2.8.4.3 */
> +	if (cxlm->mbox.payload_size < 256) {

#define for 256?
Ben Widawsky Feb. 1, 2021, 5:50 p.m. UTC | #4
On 21-02-01 12:41:36, Konrad Rzeszutek Wilk wrote:
> > +static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
> > +{
> > +	struct device *dev = &cxlm->pdev->dev;
> > +	int cap, cap_count;
> > +	u64 cap_array;
> > +
> > +	cap_array = readq(cxlm->regs + CXLDEV_CAP_ARRAY_OFFSET);
> > +	if (CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_ID) != CXLDEV_CAP_ARRAY_CAP_ID)
> > +		return -ENODEV;
> > +
> > +	cap_count = CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_COUNT);
> > +
> > +	for (cap = 1; cap <= cap_count; cap++) {
> > +		void __iomem *register_block;
> > +		u32 offset;
> > +		u16 cap_id;
> > +
> > +		cap_id = readl(cxlm->regs + cap * 0x10) & 0xffff;
> > +		offset = readl(cxlm->regs + cap * 0x10 + 0x4);
> > +		register_block = cxlm->regs + offset;
> > +
> > +		switch (cap_id) {
> > +		case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
> > +			dev_dbg(dev, "found Status capability (0x%x)\n",
> > +				offset);
> 
> That 80 character limit is no longer a requirement. Can you just make
> this one line? And perhaps change 'found' to 'Found' ?
> 

Funny that.
https://lore.kernel.org/linux-cxl/20201111073449.GA16235@infradead.org/

> > +			cxlm->status.regs = register_block;
> > +			break;
> > +		case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
> > +			dev_dbg(dev, "found Mailbox capability (0x%x)\n",
> > +				offset);
> > +			cxlm->mbox.regs = register_block;
> > +			break;
> > +		case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
> > +			dev_dbg(dev,
> > +				"found Secondary Mailbox capability (0x%x)\n",
> > +				offset);
> > +			break;
> > +		case CXLDEV_CAP_CAP_ID_MEMDEV:
> > +			dev_dbg(dev, "found Memory Device capability (0x%x)\n",
> > +				offset);
> > +			cxlm->mem.regs = register_block;
> > +			break;
> > +		default:
> > +			dev_warn(dev, "Unknown cap ID: %d (0x%x)\n", cap_id,
> > +				 offset);
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (!cxlm->status.regs || !cxlm->mbox.regs || !cxlm->mem.regs) {
> > +		dev_err(dev, "registers not found: %s%s%s\n",
> > +			!cxlm->status.regs ? "status " : "",
> > +			!cxlm->mbox.regs ? "mbox " : "",
> > +			!cxlm->mem.regs ? "mem" : "");
> > +		return -ENXIO;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > +{
> > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > +
> > +	cxlm->mbox.payload_size =
> > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > +
> 
> I think the static analyzers are not going to be happy that you are not
> checking the value of `cap` before using it.
> 
> Perhaps you should check that first before doing the manipulations?
> 

I'm not following the request. CXL_GET_FIELD is just doing the shift and mask on
cap.

Can you explain what you're hoping to see?

> > +	/* 8.2.8.4.3 */
> > +	if (cxlm->mbox.payload_size < 256) {
> 
> #define for 256?
Konrad Rzeszutek Wilk Feb. 1, 2021, 6:08 p.m. UTC | #5
On Mon, Feb 01, 2021 at 09:50:41AM -0800, Ben Widawsky wrote:
> On 21-02-01 12:41:36, Konrad Rzeszutek Wilk wrote:
> > > +static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
> > > +{
> > > +	struct device *dev = &cxlm->pdev->dev;
> > > +	int cap, cap_count;
> > > +	u64 cap_array;
> > > +
> > > +	cap_array = readq(cxlm->regs + CXLDEV_CAP_ARRAY_OFFSET);
> > > +	if (CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_ID) != CXLDEV_CAP_ARRAY_CAP_ID)
> > > +		return -ENODEV;
> > > +
> > > +	cap_count = CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_COUNT);
> > > +
> > > +	for (cap = 1; cap <= cap_count; cap++) {
> > > +		void __iomem *register_block;
> > > +		u32 offset;
> > > +		u16 cap_id;
> > > +
> > > +		cap_id = readl(cxlm->regs + cap * 0x10) & 0xffff;
> > > +		offset = readl(cxlm->regs + cap * 0x10 + 0x4);
> > > +		register_block = cxlm->regs + offset;
> > > +
> > > +		switch (cap_id) {
> > > +		case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
> > > +			dev_dbg(dev, "found Status capability (0x%x)\n",
> > > +				offset);
> > 
> > That 80 character limit is no longer a requirement. Can you just make
> > this one line? And perhaps change 'found' to 'Found' ?
> > 
> 
> Funny that.
> https://lore.kernel.org/linux-cxl/20201111073449.GA16235@infradead.org/

 "If there is a good reason to go against the
style (a line which becomes far less readable if split to fit within the
80-column limit, for example), just do it.
"

I would say that having an offset on its own line is kind of silly.

> 
> > > +			cxlm->status.regs = register_block;
> > > +			break;
> > > +		case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
> > > +			dev_dbg(dev, "found Mailbox capability (0x%x)\n",
> > > +				offset);
> > > +			cxlm->mbox.regs = register_block;
> > > +			break;
> > > +		case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
> > > +			dev_dbg(dev,
> > > +				"found Secondary Mailbox capability (0x%x)\n",
> > > +				offset);
> > > +			break;
> > > +		case CXLDEV_CAP_CAP_ID_MEMDEV:
> > > +			dev_dbg(dev, "found Memory Device capability (0x%x)\n",
> > > +				offset);
> > > +			cxlm->mem.regs = register_block;
> > > +			break;
> > > +		default:
> > > +			dev_warn(dev, "Unknown cap ID: %d (0x%x)\n", cap_id,
> > > +				 offset);
> > > +			break;
> > > +		}
> > > +	}
> > > +
> > > +	if (!cxlm->status.regs || !cxlm->mbox.regs || !cxlm->mem.regs) {
> > > +		dev_err(dev, "registers not found: %s%s%s\n",
> > > +			!cxlm->status.regs ? "status " : "",
> > > +			!cxlm->mbox.regs ? "mbox " : "",
> > > +			!cxlm->mem.regs ? "mem" : "");
> > > +		return -ENXIO;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > +{
> > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > +
> > > +	cxlm->mbox.payload_size =
> > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > +
> > 
> > I think the static analyzers are not going to be happy that you are not
> > checking the value of `cap` before using it.
> > 
> > Perhaps you should check that first before doing the manipulations?
> > 
> 
> I'm not following the request. CXL_GET_FIELD is just doing the shift and mask on
> cap.
> 
> Can you explain what you're hoping to see?

My thoughts were that if cxl_read_mbox_reg32 gave you -1 (would be wacky
but we live in the world of having a healthy vision of devices not
always giving right values).

Then your payload_size bit shifting will get bent out of shape as you
are effectively casting cap to unsigned, which means it will end up
being 0xfffffffffffffffff.. and your bit shifting end up with a bunch
of zeros at the end.
> 
> > > +	/* 8.2.8.4.3 */
> > > +	if (cxlm->mbox.payload_size < 256) {

If this ends up being casted back to signed, this conditional will
catch it (-4096 < 256, true).

But if it does not (so 0xffffffff<256, false), then this wacky
number will pass this check and you may reference a payload_size that is
larger than the reality and copy the wrong set of values (buffer
overflow).

So what I was thinking is that you want to check `cap` to make sure it
is not negative nor to large?

> > 
> > #define for 256?
David Rientjes Feb. 1, 2021, 9:51 p.m. UTC | #6
On Mon, 1 Feb 2021, Ben Widawsky wrote:

> On 21-01-30 15:51:49, David Rientjes wrote:
> > On Fri, 29 Jan 2021, Ben Widawsky wrote:
> > 
> > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > +{
> > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > +
> > > +	cxlm->mbox.payload_size =
> > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > +
> > > +	/* 8.2.8.4.3 */
> > > +	if (cxlm->mbox.payload_size < 256) {
> > > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > +			cxlm->mbox.payload_size);
> > > +		return -ENXIO;
> > > +	}
> > 
> > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> > return ENXIO if true?
> 
> If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> driver not allow it?
> 

Because the spec disallows it :)
Ben Widawsky Feb. 1, 2021, 9:58 p.m. UTC | #7
On 21-02-01 13:51:16, David Rientjes wrote:
> On Mon, 1 Feb 2021, Ben Widawsky wrote:
> 
> > On 21-01-30 15:51:49, David Rientjes wrote:
> > > On Fri, 29 Jan 2021, Ben Widawsky wrote:
> > > 
> > > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > > +{
> > > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > > +
> > > > +	cxlm->mbox.payload_size =
> > > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > > +
> > > > +	/* 8.2.8.4.3 */
> > > > +	if (cxlm->mbox.payload_size < 256) {
> > > > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > > +			cxlm->mbox.payload_size);
> > > > +		return -ENXIO;
> > > > +	}
> > > 
> > > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> > > return ENXIO if true?
> > 
> > If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> > driver not allow it?
> > 
> 
> Because the spec disallows it :)

I don't see it being the driver's responsibility to enforce spec correctness
though. In certain cases, I need to use the spec, like I have to pick /some/
mailbox timeout. For other cases... 

I'm not too familiar with what other similar drivers may or may not do in
situations like this. The current 256 limit is mostly a reflection of that being
too small to even support advertised mandatory commands. So things can't work in
that scenario, but things can work if they have a larger register size (so long
as the BAR advertises enough space).
Dan Williams Feb. 1, 2021, 10:02 p.m. UTC | #8
On Mon, Feb 1, 2021 at 1:51 PM David Rientjes <rientjes@google.com> wrote:
>
> On Mon, 1 Feb 2021, Ben Widawsky wrote:
>
> > On 21-01-30 15:51:49, David Rientjes wrote:
> > > On Fri, 29 Jan 2021, Ben Widawsky wrote:
> > >
> > > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > > +{
> > > > + const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > > +
> > > > + cxlm->mbox.payload_size =
> > > > +         1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > > +
> > > > + /* 8.2.8.4.3 */
> > > > + if (cxlm->mbox.payload_size < 256) {
> > > > +         dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > > +                 cxlm->mbox.payload_size);
> > > > +         return -ENXIO;
> > > > + }
> > >
> > > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and
> > > return ENXIO if true?
> >
> > If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> > driver not allow it?
> >
>
> Because the spec disallows it :)

Unless it causes an operational failure in practice I'd go with the
Robustness Principle and be liberal in accepting hardware geometries.
David Rientjes Feb. 1, 2021, 10:23 p.m. UTC | #9
On Mon, 1 Feb 2021, Ben Widawsky wrote:

> > > > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > > > +{
> > > > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > > > +
> > > > > +	cxlm->mbox.payload_size =
> > > > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > > > +
> > > > > +	/* 8.2.8.4.3 */
> > > > > +	if (cxlm->mbox.payload_size < 256) {
> > > > > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > > > +			cxlm->mbox.payload_size);
> > > > > +		return -ENXIO;
> > > > > +	}
> > > > 
> > > > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> > > > return ENXIO if true?
> > > 
> > > If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> > > driver not allow it?
> > > 
> > 
> > Because the spec disallows it :)
> 
> I don't see it being the driver's responsibility to enforce spec correctness
> though. In certain cases, I need to use the spec, like I have to pick /some/
> mailbox timeout. For other cases... 
> 
> I'm not too familiar with what other similar drivers may or may not do in
> situations like this. The current 256 limit is mostly a reflection of that being
> too small to even support advertised mandatory commands. So things can't work in
> that scenario, but things can work if they have a larger register size (so long
> as the BAR advertises enough space).
> 

I don't think things can work above 1MB, either, though.  Section 
8.2.8.4.5 specifies 20 bits to define the payload length, if this is 
larger than cxlm->mbox.payload_size it would venture into the reserved 
bits of the command register.

So is the idea to allow cxl_mem_setup_mailbox() to succeed with a payload 
size > 1MB and then only check 20 bits for the command register?
Ben Widawsky Feb. 1, 2021, 10:28 p.m. UTC | #10
On 21-02-01 14:23:47, David Rientjes wrote:
> On Mon, 1 Feb 2021, Ben Widawsky wrote:
> 
> > > > > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > > > > +{
> > > > > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > > > > +
> > > > > > +	cxlm->mbox.payload_size =
> > > > > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > > > > +
> > > > > > +	/* 8.2.8.4.3 */
> > > > > > +	if (cxlm->mbox.payload_size < 256) {
> > > > > > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > > > > +			cxlm->mbox.payload_size);
> > > > > > +		return -ENXIO;
> > > > > > +	}
> > > > > 
> > > > > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> > > > > return ENXIO if true?
> > > > 
> > > > If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> > > > driver not allow it?
> > > > 
> > > 
> > > Because the spec disallows it :)
> > 
> > I don't see it being the driver's responsibility to enforce spec correctness
> > though. In certain cases, I need to use the spec, like I have to pick /some/
> > mailbox timeout. For other cases... 
> > 
> > I'm not too familiar with what other similar drivers may or may not do in
> > situations like this. The current 256 limit is mostly a reflection of that being
> > too small to even support advertised mandatory commands. So things can't work in
> > that scenario, but things can work if they have a larger register size (so long
> > as the BAR advertises enough space).
> > 
> 
> I don't think things can work above 1MB, either, though.  Section 
> 8.2.8.4.5 specifies 20 bits to define the payload length, if this is 
> larger than cxlm->mbox.payload_size it would venture into the reserved 
> bits of the command register.
> 
> So is the idea to allow cxl_mem_setup_mailbox() to succeed with a payload 
> size > 1MB and then only check 20 bits for the command register?

So it's probably a spec bug, but actually the payload size is 21 bits... I'll
check if that was a mistake.
Ben Widawsky Feb. 1, 2021, 10:33 p.m. UTC | #11
On 21-02-01 14:28:59, Ben Widawsky wrote:
> On 21-02-01 14:23:47, David Rientjes wrote:
> > On Mon, 1 Feb 2021, Ben Widawsky wrote:
> > 
> > > > > > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > > > > > +{
> > > > > > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > > > > > +
> > > > > > > +	cxlm->mbox.payload_size =
> > > > > > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > > > > > +
> > > > > > > +	/* 8.2.8.4.3 */
> > > > > > > +	if (cxlm->mbox.payload_size < 256) {
> > > > > > > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > > > > > +			cxlm->mbox.payload_size);
> > > > > > > +		return -ENXIO;
> > > > > > > +	}
> > > > > > 
> > > > > > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> > > > > > return ENXIO if true?
> > > > > 
> > > > > If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> > > > > driver not allow it?
> > > > > 
> > > > 
> > > > Because the spec disallows it :)
> > > 
> > > I don't see it being the driver's responsibility to enforce spec correctness
> > > though. In certain cases, I need to use the spec, like I have to pick /some/
> > > mailbox timeout. For other cases... 
> > > 
> > > I'm not too familiar with what other similar drivers may or may not do in
> > > situations like this. The current 256 limit is mostly a reflection of that being
> > > too small to even support advertised mandatory commands. So things can't work in
> > > that scenario, but things can work if they have a larger register size (so long
> > > as the BAR advertises enough space).
> > > 
> > 
> > I don't think things can work above 1MB, either, though.  Section 
> > 8.2.8.4.5 specifies 20 bits to define the payload length, if this is 
> > larger than cxlm->mbox.payload_size it would venture into the reserved 
> > bits of the command register.
> > 
> > So is the idea to allow cxl_mem_setup_mailbox() to succeed with a payload 
> > size > 1MB and then only check 20 bits for the command register?
> 
> So it's probably a spec bug, but actually the payload size is 21 bits... I'll
> check if that was a mistake.

Well I guess they wanted to be able to specify 1M exactly... Spec should
probably say you can't go over 1M
David Rientjes Feb. 1, 2021, 10:45 p.m. UTC | #12
On Mon, 1 Feb 2021, Ben Widawsky wrote:

> > > > > > > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > > > > > > +{
> > > > > > > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > > > > > > +
> > > > > > > > +	cxlm->mbox.payload_size =
> > > > > > > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > > > > > > +
> > > > > > > > +	/* 8.2.8.4.3 */
> > > > > > > > +	if (cxlm->mbox.payload_size < 256) {
> > > > > > > > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > > > > > > +			cxlm->mbox.payload_size);
> > > > > > > > +		return -ENXIO;
> > > > > > > > +	}
> > > > > > > 
> > > > > > > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> > > > > > > return ENXIO if true?
> > > > > > 
> > > > > > If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> > > > > > driver not allow it?
> > > > > > 
> > > > > 
> > > > > Because the spec disallows it :)
> > > > 
> > > > I don't see it being the driver's responsibility to enforce spec correctness
> > > > though. In certain cases, I need to use the spec, like I have to pick /some/
> > > > mailbox timeout. For other cases... 
> > > > 
> > > > I'm not too familiar with what other similar drivers may or may not do in
> > > > situations like this. The current 256 limit is mostly a reflection of that being
> > > > too small to even support advertised mandatory commands. So things can't work in
> > > > that scenario, but things can work if they have a larger register size (so long
> > > > as the BAR advertises enough space).
> > > > 
> > > 
> > > I don't think things can work above 1MB, either, though.  Section 
> > > 8.2.8.4.5 specifies 20 bits to define the payload length, if this is 
> > > larger than cxlm->mbox.payload_size it would venture into the reserved 
> > > bits of the command register.
> > > 
> > > So is the idea to allow cxl_mem_setup_mailbox() to succeed with a payload 
> > > size > 1MB and then only check 20 bits for the command register?
> > 
> > So it's probably a spec bug, but actually the payload size is 21 bits... I'll
> > check if that was a mistake.
> 
> Well I guess they wanted to be able to specify 1M exactly... Spec should
> probably say you can't go over 1M
> 

I think that's what 8.2.8.4.3 says, no?  And then 8.2.8.4.5 says you 
can use up to Payload Size.  That's why my recommendation was to enforce 
this in cxl_mem_setup_mailbox() up front.
Ben Widawsky Feb. 1, 2021, 10:50 p.m. UTC | #13
On 21-02-01 14:45:00, David Rientjes wrote:
> On Mon, 1 Feb 2021, Ben Widawsky wrote:
> 
> > > > > > > > > +static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
> > > > > > > > > +{
> > > > > > > > > +	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > > > > > > > > +
> > > > > > > > > +	cxlm->mbox.payload_size =
> > > > > > > > > +		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
> > > > > > > > > +
> > > > > > > > > +	/* 8.2.8.4.3 */
> > > > > > > > > +	if (cxlm->mbox.payload_size < 256) {
> > > > > > > > > +		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
> > > > > > > > > +			cxlm->mbox.payload_size);
> > > > > > > > > +		return -ENXIO;
> > > > > > > > > +	}
> > > > > > > > 
> > > > > > > > Any reason not to check cxlm->mbox.payload_size > (1 << 20) as well and 
> > > > > > > > return ENXIO if true?
> > > > > > > 
> > > > > > > If some crazy vendor wanted to ship a mailbox larger than 1M, why should the
> > > > > > > driver not allow it?
> > > > > > > 
> > > > > > 
> > > > > > Because the spec disallows it :)
> > > > > 
> > > > > I don't see it being the driver's responsibility to enforce spec correctness
> > > > > though. In certain cases, I need to use the spec, like I have to pick /some/
> > > > > mailbox timeout. For other cases... 
> > > > > 
> > > > > I'm not too familiar with what other similar drivers may or may not do in
> > > > > situations like this. The current 256 limit is mostly a reflection of that being
> > > > > too small to even support advertised mandatory commands. So things can't work in
> > > > > that scenario, but things can work if they have a larger register size (so long
> > > > > as the BAR advertises enough space).
> > > > > 
> > > > 
> > > > I don't think things can work above 1MB, either, though.  Section 
> > > > 8.2.8.4.5 specifies 20 bits to define the payload length, if this is 
> > > > larger than cxlm->mbox.payload_size it would venture into the reserved 
> > > > bits of the command register.
> > > > 
> > > > So is the idea to allow cxl_mem_setup_mailbox() to succeed with a payload 
> > > > size > 1MB and then only check 20 bits for the command register?
> > > 
> > > So it's probably a spec bug, but actually the payload size is 21 bits... I'll
> > > check if that was a mistake.
> > 
> > Well I guess they wanted to be able to specify 1M exactly... Spec should
> > probably say you can't go over 1M
> > 
> 
> I think that's what 8.2.8.4.3 says, no?  And then 8.2.8.4.5 says you 
> can use up to Payload Size.  That's why my recommendation was to enforce 
> this in cxl_mem_setup_mailbox() up front.

Yeah. I asked our spec people to update 8.2.8.4.5 to make it clearer. I'd argue
the intent is how you describe it, but the implementation isn't.

My argument was silly anyway because if you specify greater than 1M as your
payload, you will get EINVAL at the ioctl.

The value of how it works today is the driver will at least bind and allow you
to interact with it.

How strongly do you feel about this?
David Rientjes Feb. 1, 2021, 11:09 p.m. UTC | #14
On Mon, 1 Feb 2021, Ben Widawsky wrote:

> > I think that's what 8.2.8.4.3 says, no?  And then 8.2.8.4.5 says you 
> > can use up to Payload Size.  That's why my recommendation was to enforce 
> > this in cxl_mem_setup_mailbox() up front.
> 
> Yeah. I asked our spec people to update 8.2.8.4.5 to make it clearer. I'd argue
> the intent is how you describe it, but the implementation isn't.
> 
> My argument was silly anyway because if you specify greater than 1M as your
> payload, you will get EINVAL at the ioctl.
> 
> The value of how it works today is the driver will at least bind and allow you
> to interact with it.
> 
> How strongly do you feel about this?
> 

I haven't seen the update to 8.2.8.4.5 to know yet :)

You make a good point of at least being able to interact with the driver.  
I think you could argue that if the driver binds, then the payload size is 
accepted, in which case it would be strange to get an EINVAL when using 
the ioctl with anything >1MB.

Concern was that if we mask off the reserved bits from the command 
register that we could be masking part of the payload size that is being 
passed if the accepted max is >1MB.  Idea was to avoid any possibility of 
this inconsistency.  If this is being checked for ioctl, seems like it's 
checking reserved bits.

But maybe I should just wait for the spec update.
Ben Widawsky Feb. 1, 2021, 11:17 p.m. UTC | #15
On 21-02-01 15:09:45, David Rientjes wrote:
> On Mon, 1 Feb 2021, Ben Widawsky wrote:
> 
> > > I think that's what 8.2.8.4.3 says, no?  And then 8.2.8.4.5 says you 
> > > can use up to Payload Size.  That's why my recommendation was to enforce 
> > > this in cxl_mem_setup_mailbox() up front.
> > 
> > Yeah. I asked our spec people to update 8.2.8.4.5 to make it clearer. I'd argue
> > the intent is how you describe it, but the implementation isn't.
> > 
> > My argument was silly anyway because if you specify greater than 1M as your
> > payload, you will get EINVAL at the ioctl.
> > 
> > The value of how it works today is the driver will at least bind and allow you
> > to interact with it.
> > 
> > How strongly do you feel about this?
> > 
> 
> I haven't seen the update to 8.2.8.4.5 to know yet :)
> 
> You make a good point of at least being able to interact with the driver.  
> I think you could argue that if the driver binds, then the payload size is 
> accepted, in which case it would be strange to get an EINVAL when using 
> the ioctl with anything >1MB.
> 
> Concern was that if we mask off the reserved bits from the command 
> register that we could be masking part of the payload size that is being 
> passed if the accepted max is >1MB.  Idea was to avoid any possibility of 
> this inconsistency.  If this is being checked for ioctl, seems like it's 
> checking reserved bits.
> 
> But maybe I should just wait for the spec update.

Well, I wouldn't hold your breath (it would be an errata in this case anyway).
My preference would be to just allow allow mailbox payload size to be 2^31 and
not deal with this.

My question was how strongly do you feel it's an error that should prevent
binding.
David Rientjes Feb. 1, 2021, 11:58 p.m. UTC | #16
On Mon, 1 Feb 2021, Ben Widawsky wrote:

> > I haven't seen the update to 8.2.8.4.5 to know yet :)
> > 
> > You make a good point of at least being able to interact with the driver.  
> > I think you could argue that if the driver binds, then the payload size is 
> > accepted, in which case it would be strange to get an EINVAL when using 
> > the ioctl with anything >1MB.
> > 
> > Concern was that if we mask off the reserved bits from the command 
> > register that we could be masking part of the payload size that is being 
> > passed if the accepted max is >1MB.  Idea was to avoid any possibility of 
> > this inconsistency.  If this is being checked for ioctl, seems like it's 
> > checking reserved bits.
> > 
> > But maybe I should just wait for the spec update.
> 
> Well, I wouldn't hold your breath (it would be an errata in this case anyway).
> My preference would be to just allow allow mailbox payload size to be 2^31 and
> not deal with this.
> 
> My question was how strongly do you feel it's an error that should prevent
> binding.
> 

I don't have an objection to binding, but doesn't this require that the 
check in cxl_validate_cmd_from_user() guarantees send_cmd->size_in cannot 
be greater than 1MB?
Ben Widawsky Feb. 2, 2021, 12:11 a.m. UTC | #17
On 21-02-01 15:58:09, David Rientjes wrote:
> On Mon, 1 Feb 2021, Ben Widawsky wrote:
> 
> > > I haven't seen the update to 8.2.8.4.5 to know yet :)
> > > 
> > > You make a good point of at least being able to interact with the driver.  
> > > I think you could argue that if the driver binds, then the payload size is 
> > > accepted, in which case it would be strange to get an EINVAL when using 
> > > the ioctl with anything >1MB.
> > > 
> > > Concern was that if we mask off the reserved bits from the command 
> > > register that we could be masking part of the payload size that is being 
> > > passed if the accepted max is >1MB.  Idea was to avoid any possibility of 
> > > this inconsistency.  If this is being checked for ioctl, seems like it's 
> > > checking reserved bits.
> > > 
> > > But maybe I should just wait for the spec update.
> > 
> > Well, I wouldn't hold your breath (it would be an errata in this case anyway).
> > My preference would be to just allow allow mailbox payload size to be 2^31 and
> > not deal with this.
> > 
> > My question was how strongly do you feel it's an error that should prevent
> > binding.
> > 
> 
> I don't have an objection to binding, but doesn't this require that the 
> check in cxl_validate_cmd_from_user() guarantees send_cmd->size_in cannot 
> be greater than 1MB?

You're correct. I'd need to add:
cxlm->mbox.payload_size =
    min_t(size_t, 1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE), 1<<20)
Dan Williams Feb. 2, 2021, 12:14 a.m. UTC | #18
On Mon, Feb 1, 2021 at 4:11 PM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 21-02-01 15:58:09, David Rientjes wrote:
> > On Mon, 1 Feb 2021, Ben Widawsky wrote:
> >
> > > > I haven't seen the update to 8.2.8.4.5 to know yet :)
> > > >
> > > > You make a good point of at least being able to interact with the driver.
> > > > I think you could argue that if the driver binds, then the payload size is
> > > > accepted, in which case it would be strange to get an EINVAL when using
> > > > the ioctl with anything >1MB.
> > > >
> > > > Concern was that if we mask off the reserved bits from the command
> > > > register that we could be masking part of the payload size that is being
> > > > passed if the accepted max is >1MB.  Idea was to avoid any possibility of
> > > > this inconsistency.  If this is being checked for ioctl, seems like it's
> > > > checking reserved bits.
> > > >
> > > > But maybe I should just wait for the spec update.
> > >
> > > Well, I wouldn't hold your breath (it would be an errata in this case anyway).
> > > My preference would be to just allow allow mailbox payload size to be 2^31 and
> > > not deal with this.
> > >
> > > My question was how strongly do you feel it's an error that should prevent
> > > binding.
> > >
> >
> > I don't have an objection to binding, but doesn't this require that the
> > check in cxl_validate_cmd_from_user() guarantees send_cmd->size_in cannot
> > be greater than 1MB?
>
> You're correct. I'd need to add:
> cxlm->mbox.payload_size =
>     min_t(size_t, 1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE), 1<<20)

nit, use the existing SZ_1M define.
David Rientjes Feb. 2, 2021, 1:09 a.m. UTC | #19
On Mon, 1 Feb 2021, Dan Williams wrote:

> > > I don't have an objection to binding, but doesn't this require that the
> > > check in cxl_validate_cmd_from_user() guarantees send_cmd->size_in cannot
> > > be greater than 1MB?
> >
> > You're correct. I'd need to add:
> > cxlm->mbox.payload_size =
> >     min_t(size_t, 1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE), 1<<20)
> 
> nit, use the existing SZ_1M define.
> 

Sounds good, thanks both!  I'll assume you'll follow-up on this in the 
next revision for patch 7 ("cxl/mem: Add send command") and we can 
consider this resolved :)
Christoph Hellwig Feb. 2, 2021, 6:10 p.m. UTC | #20
On Fri, Jan 29, 2021 at 04:24:27PM -0800, Ben Widawsky wrote:
>  #ifndef __CXL_H__
>  #define __CXL_H__
>  
> +#include <linux/bitfield.h>
> +#include <linux/bitops.h>
> +#include <linux/io.h>
> +
> +#define CXL_SET_FIELD(value, field)                                            \
> +	({                                                                     \
> +		WARN_ON(!FIELD_FIT(field##_MASK, value));                      \
> +		FIELD_PREP(field##_MASK, value);                               \
> +	})
> +
> +#define CXL_GET_FIELD(word, field) FIELD_GET(field##_MASK, word)

This looks like some massive obsfucation.  What is the intent
here?

> +	/* Cap 0001h - CXL_CAP_CAP_ID_DEVICE_STATUS */
> +	struct {
> +		void __iomem *regs;
> +	} status;
> +
> +	/* Cap 0002h - CXL_CAP_CAP_ID_PRIMARY_MAILBOX */
> +	struct {
> +		void __iomem *regs;
> +		size_t payload_size;
> +	} mbox;
> +
> +	/* Cap 4000h - CXL_CAP_CAP_ID_MEMDEV */
> +	struct {
> +		void __iomem *regs;
> +	} mem;

This style looks massively obsfucated.  For one the comments look like
absolute gibberish, but also what is the point of all these anonymous
structures?

> +#define cxl_reg(type)                                                          \
> +	static inline void cxl_write_##type##_reg32(struct cxl_mem *cxlm,      \
> +						    u32 reg, u32 value)        \
> +	{                                                                      \
> +		void __iomem *reg_addr = cxlm->type.regs;                      \
> +		writel(value, reg_addr + reg);                                 \
> +	}                                                                      \
> +	static inline void cxl_write_##type##_reg64(struct cxl_mem *cxlm,      \
> +						    u32 reg, u64 value)        \
> +	{                                                                      \
> +		void __iomem *reg_addr = cxlm->type.regs;                      \
> +		writeq(value, reg_addr + reg);                                 \
> +	}                                                                      \

What is the value add of all this obsfucation over the trivial
calls to the write*/read* functions, possible with a locally
declarate "void __iomem *" variable in the callers like in all
normall drivers?  Except for making the life of the poor soul trying
to debug this code some time in the future really hard, of course.

> +	/* 8.2.8.4.3 */

????
Ben Widawsky Feb. 2, 2021, 6:24 p.m. UTC | #21
On 21-02-02 18:10:16, Christoph Hellwig wrote:
> On Fri, Jan 29, 2021 at 04:24:27PM -0800, Ben Widawsky wrote:
> >  #ifndef __CXL_H__
> >  #define __CXL_H__
> >  
> > +#include <linux/bitfield.h>
> > +#include <linux/bitops.h>
> > +#include <linux/io.h>
> > +
> > +#define CXL_SET_FIELD(value, field)                                            \
> > +	({                                                                     \
> > +		WARN_ON(!FIELD_FIT(field##_MASK, value));                      \
> > +		FIELD_PREP(field##_MASK, value);                               \
> > +	})
> > +
> > +#define CXL_GET_FIELD(word, field) FIELD_GET(field##_MASK, word)
> 
> This looks like some massive obsfucation.  What is the intent
> here?
> 

I will drop these. I don't recall why I did this to be honest.

> > +	/* Cap 0001h - CXL_CAP_CAP_ID_DEVICE_STATUS */
> > +	struct {
> > +		void __iomem *regs;
> > +	} status;
> > +
> > +	/* Cap 0002h - CXL_CAP_CAP_ID_PRIMARY_MAILBOX */
> > +	struct {
> > +		void __iomem *regs;
> > +		size_t payload_size;
> > +	} mbox;
> > +
> > +	/* Cap 4000h - CXL_CAP_CAP_ID_MEMDEV */
> > +	struct {
> > +		void __iomem *regs;
> > +	} mem;
> 
> This style looks massively obsfucated.  For one the comments look like
> absolute gibberish, but also what is the point of all these anonymous
> structures?

They're not anonymous, and their names are for the below register functions. The
comments are connected spec reference 'Cap XXXXh' to definitions in cxl.h. I can
articulate that if it helps.

> 
> > +#define cxl_reg(type)                                                          \
> > +	static inline void cxl_write_##type##_reg32(struct cxl_mem *cxlm,      \
> > +						    u32 reg, u32 value)        \
> > +	{                                                                      \
> > +		void __iomem *reg_addr = cxlm->type.regs;                      \
> > +		writel(value, reg_addr + reg);                                 \
> > +	}                                                                      \
> > +	static inline void cxl_write_##type##_reg64(struct cxl_mem *cxlm,      \
> > +						    u32 reg, u64 value)        \
> > +	{                                                                      \
> > +		void __iomem *reg_addr = cxlm->type.regs;                      \
> > +		writeq(value, reg_addr + reg);                                 \
> > +	}                                                                      \
> 
> What is the value add of all this obsfucation over the trivial
> calls to the write*/read* functions, possible with a locally
> declarate "void __iomem *" variable in the callers like in all
> normall drivers?  Except for making the life of the poor soul trying
> to debug this code some time in the future really hard, of course.
> 

The register space for CXL devices is a bit weird since it's all subdivided
under 1 BAR for now. To clearly distinguish over the different subregions, these
helpers exist. It's really easy to mess this up as the developer and I actually
would disagree that it makes debugging quite a bit easier. It also gets more
convoluted when you add the other 2 BARs which also each have their own
subregions.

For example. if my mailbox function does:
cxl_read_status_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);

instead of:
cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);

It's easier to spot than:
readl(cxlm->regs + cxlm->status_offset + CXLDEV_MB_CAPS_OFFSET)


> > +	/* 8.2.8.4.3 */
> 
> ????
> 

I had been trying to be consistent with 'CXL2.0 - ' in front of all spec
reference. I obviously missed this one.
Christoph Hellwig Feb. 3, 2021, 5:15 p.m. UTC | #22
On Tue, Feb 02, 2021 at 10:24:18AM -0800, Ben Widawsky wrote:
> > > +	/* Cap 4000h - CXL_CAP_CAP_ID_MEMDEV */
> > > +	struct {
> > > +		void __iomem *regs;
> > > +	} mem;
> > 
> > This style looks massively obsfucated.  For one the comments look like
> > absolute gibberish, but also what is the point of all these anonymous
> > structures?
> 
> They're not anonymous, and their names are for the below register functions. The
> comments are connected spec reference 'Cap XXXXh' to definitions in cxl.h. I can
> articulate that if it helps.

But why no simply a

	void __iomem *mem_regs;

field vs the extra struct?

> The register space for CXL devices is a bit weird since it's all subdivided
> under 1 BAR for now. To clearly distinguish over the different subregions, these
> helpers exist. It's really easy to mess this up as the developer and I actually
> would disagree that it makes debugging quite a bit easier. It also gets more
> convoluted when you add the other 2 BARs which also each have their own
> subregions.
> 
> For example. if my mailbox function does:
> cxl_read_status_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> 
> instead of:
> cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> 
> It's easier to spot than:
> readl(cxlm->regs + cxlm->status_offset + CXLDEV_MB_CAPS_OFFSET)

Well, what I think would be the most obvious is:

readl(cxlm->status_regs + CXLDEV_MB_CAPS_OFFSET);

> > > +	/* 8.2.8.4.3 */
> > 
> > ????
> > 
> 
> I had been trying to be consistent with 'CXL2.0 - ' in front of all spec
> reference. I obviously missed this one.

FYI, I generally find section names much easier to find than section
numbers.  Especially as the numbers change very frequently, some times
even for very minor updates to the spec.  E.g. in NVMe the numbers might
even change from NVMe 1.X to NVMe 1.Xa because an errata had to add
a clarification as its own section.
Ben Widawsky Feb. 3, 2021, 5:23 p.m. UTC | #23
On 21-02-03 17:15:34, Christoph Hellwig wrote:
> On Tue, Feb 02, 2021 at 10:24:18AM -0800, Ben Widawsky wrote:
> > > > +	/* Cap 4000h - CXL_CAP_CAP_ID_MEMDEV */
> > > > +	struct {
> > > > +		void __iomem *regs;
> > > > +	} mem;
> > > 
> > > This style looks massively obsfucated.  For one the comments look like
> > > absolute gibberish, but also what is the point of all these anonymous
> > > structures?
> > 
> > They're not anonymous, and their names are for the below register functions. The
> > comments are connected spec reference 'Cap XXXXh' to definitions in cxl.h. I can
> > articulate that if it helps.
> 
> But why no simply a
> 
> 	void __iomem *mem_regs;
> 
> field vs the extra struct?
> 
> > The register space for CXL devices is a bit weird since it's all subdivided
> > under 1 BAR for now. To clearly distinguish over the different subregions, these
> > helpers exist. It's really easy to mess this up as the developer and I actually
> > would disagree that it makes debugging quite a bit easier. It also gets more
> > convoluted when you add the other 2 BARs which also each have their own
> > subregions.
> > 
> > For example. if my mailbox function does:
> > cxl_read_status_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > 
> > instead of:
> > cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > 
> > It's easier to spot than:
> > readl(cxlm->regs + cxlm->status_offset + CXLDEV_MB_CAPS_OFFSET)
> 
> Well, what I think would be the most obvious is:
> 
> readl(cxlm->status_regs + CXLDEV_MB_CAPS_OFFSET);
> 

Right, so you wrote the buggy version. Should be.
readl(cxlm->mbox_regs + CXLDEV_MB_CAPS_OFFSET);

Admittedly, "MB" for mailbox isn't super obvious. I think you've convinced me to
rename these register definitions
s/MB/MBOX.

I'd prefer to keep the helpers for now as I do find them helpful, and so far
nobody else who has touched the code has complained. If you feel strongly, I
will change it.

> > > > +	/* 8.2.8.4.3 */
> > > 
> > > ????
> > > 
> > 
> > I had been trying to be consistent with 'CXL2.0 - ' in front of all spec
> > reference. I obviously missed this one.
> 
> FYI, I generally find section names much easier to find than section
> numbers.  Especially as the numbers change very frequently, some times
> even for very minor updates to the spec.  E.g. in NVMe the numbers might
> even change from NVMe 1.X to NVMe 1.Xa because an errata had to add
> a clarification as its own section.

Why not both?

I ran into this in fact going from version 0.7 to 1.0 of the spec. I did call
out the spec version to address this, but you're right. Section names can change
too in theory.

/*
 * CXL 2.0 8.2.8.4.3
 * Mailbox Capabilities Register
 */

Too much?
Dan Williams Feb. 3, 2021, 9:23 p.m. UTC | #24
On Wed, Feb 3, 2021 at 9:23 AM Ben Widawsky <ben.widawsky@intel.com> wrote:
>
> On 21-02-03 17:15:34, Christoph Hellwig wrote:
> > On Tue, Feb 02, 2021 at 10:24:18AM -0800, Ben Widawsky wrote:
> > > > > +       /* Cap 4000h - CXL_CAP_CAP_ID_MEMDEV */
> > > > > +       struct {
> > > > > +               void __iomem *regs;
> > > > > +       } mem;
> > > >
> > > > This style looks massively obsfucated.  For one the comments look like
> > > > absolute gibberish, but also what is the point of all these anonymous
> > > > structures?
> > >
> > > They're not anonymous, and their names are for the below register functions. The
> > > comments are connected spec reference 'Cap XXXXh' to definitions in cxl.h. I can
> > > articulate that if it helps.
> >
> > But why no simply a
> >
> >       void __iomem *mem_regs;
> >
> > field vs the extra struct?
> >
> > > The register space for CXL devices is a bit weird since it's all subdivided
> > > under 1 BAR for now. To clearly distinguish over the different subregions, these
> > > helpers exist. It's really easy to mess this up as the developer and I actually
> > > would disagree that it makes debugging quite a bit easier. It also gets more
> > > convoluted when you add the other 2 BARs which also each have their own
> > > subregions.
> > >
> > > For example. if my mailbox function does:
> > > cxl_read_status_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > >
> > > instead of:
> > > cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > >
> > > It's easier to spot than:
> > > readl(cxlm->regs + cxlm->status_offset + CXLDEV_MB_CAPS_OFFSET)
> >
> > Well, what I think would be the most obvious is:
> >
> > readl(cxlm->status_regs + CXLDEV_MB_CAPS_OFFSET);
> >
>
> Right, so you wrote the buggy version. Should be.
> readl(cxlm->mbox_regs + CXLDEV_MB_CAPS_OFFSET);
>
> Admittedly, "MB" for mailbox isn't super obvious. I think you've convinced me to
> rename these register definitions
> s/MB/MBOX.
>
> I'd prefer to keep the helpers for now as I do find them helpful, and so far
> nobody else who has touched the code has complained. If you feel strongly, I
> will change it.

After seeing the options, I think I'd prefer to not have to worry what
extra magic is happening with cxl_read_mbox_reg32()

cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);

readl(cxlm->mbox_regs + CXLDEV_MB_CAPS_OFFSET);

The latter is both shorter and more idiomatic.


>
> > > > > +       /* 8.2.8.4.3 */
> > > >
> > > > ????
> > > >
> > >
> > > I had been trying to be consistent with 'CXL2.0 - ' in front of all spec
> > > reference. I obviously missed this one.
> >
> > FYI, I generally find section names much easier to find than section
> > numbers.  Especially as the numbers change very frequently, some times
> > even for very minor updates to the spec.  E.g. in NVMe the numbers might
> > even change from NVMe 1.X to NVMe 1.Xa because an errata had to add
> > a clarification as its own section.
>
> Why not both?
>
> I ran into this in fact going from version 0.7 to 1.0 of the spec. I did call
> out the spec version to address this, but you're right. Section names can change
> too in theory.
>
> /*
>  * CXL 2.0 8.2.8.4.3
>  * Mailbox Capabilities Register
>  */
>
> Too much?

That seems like too many lines.

/* CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register */

...this looks ok.
Christoph Hellwig Feb. 4, 2021, 7:16 a.m. UTC | #25
On Wed, Feb 03, 2021 at 01:23:31PM -0800, Dan Williams wrote:
> > I'd prefer to keep the helpers for now as I do find them helpful, and so far
> > nobody else who has touched the code has complained. If you feel strongly, I
> > will change it.
> 
> After seeing the options, I think I'd prefer to not have to worry what
> extra magic is happening with cxl_read_mbox_reg32()
> 
> cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> 
> readl(cxlm->mbox_regs + CXLDEV_MB_CAPS_OFFSET);
> 
> The latter is both shorter and more idiomatic.

Same here.  That being said I know some driver maintainers like
wrappers, my real main irk was the macro magic to generate them.
Ben Widawsky Feb. 4, 2021, 3:29 p.m. UTC | #26
On 21-02-04 07:16:46, Christoph Hellwig wrote:
> On Wed, Feb 03, 2021 at 01:23:31PM -0800, Dan Williams wrote:
> > > I'd prefer to keep the helpers for now as I do find them helpful, and so far
> > > nobody else who has touched the code has complained. If you feel strongly, I
> > > will change it.
> > 
> > After seeing the options, I think I'd prefer to not have to worry what
> > extra magic is happening with cxl_read_mbox_reg32()
> > 
> > cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
> > 
> > readl(cxlm->mbox_regs + CXLDEV_MB_CAPS_OFFSET);
> > 
> > The latter is both shorter and more idiomatic.
> 
> Same here.  That being said I know some driver maintainers like
> wrappers, my real main irk was the macro magic to generate them.

I think the wrapper is often used as a means of trying to have cross OS
compatibility to some degree. Just to be clear, that was *not* the purpose here.

Stating I disagree for posterity, I'll begin reworking this code and it will be
changed for v2.

Thanks.
Ben
diff mbox series

Patch

diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index d81d0ba4617c..a3da7f8050c4 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -4,6 +4,37 @@ 
 #ifndef __CXL_H__
 #define __CXL_H__
 
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+
+#define CXL_SET_FIELD(value, field)                                            \
+	({                                                                     \
+		WARN_ON(!FIELD_FIT(field##_MASK, value));                      \
+		FIELD_PREP(field##_MASK, value);                               \
+	})
+
+#define CXL_GET_FIELD(word, field) FIELD_GET(field##_MASK, word)
+
+/* Device Capabilities (CXL 2.0 - 8.2.8.1) */
+#define CXLDEV_CAP_ARRAY_OFFSET 0x0
+#define   CXLDEV_CAP_ARRAY_CAP_ID 0
+#define   CXLDEV_CAP_ARRAY_ID_MASK GENMASK(15, 0)
+#define   CXLDEV_CAP_ARRAY_COUNT_MASK GENMASK(47, 32)
+/* (CXL 2.0 - 8.2.8.2.1) */
+#define CXLDEV_CAP_CAP_ID_DEVICE_STATUS 0x1
+#define CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX 0x2
+#define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3
+#define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000
+
+/* CXL Device Mailbox (CXL 2.0 - 8.2.8.4) */
+#define CXLDEV_MB_CAPS_OFFSET 0x00
+#define   CXLDEV_MB_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
+#define CXLDEV_MB_CTRL_OFFSET 0x04
+#define CXLDEV_MB_CMD_OFFSET 0x08
+#define CXLDEV_MB_STATUS_OFFSET 0x10
+#define CXLDEV_MB_BG_CMD_STATUS_OFFSET 0x18
+
 /**
  * struct cxl_mem - A CXL memory device
  * @pdev: The PCI device associated with this CXL device.
@@ -12,6 +43,51 @@ 
 struct cxl_mem {
 	struct pci_dev *pdev;
 	void __iomem *regs;
+
+	/* Cap 0001h - CXL_CAP_CAP_ID_DEVICE_STATUS */
+	struct {
+		void __iomem *regs;
+	} status;
+
+	/* Cap 0002h - CXL_CAP_CAP_ID_PRIMARY_MAILBOX */
+	struct {
+		void __iomem *regs;
+		size_t payload_size;
+	} mbox;
+
+	/* Cap 4000h - CXL_CAP_CAP_ID_MEMDEV */
+	struct {
+		void __iomem *regs;
+	} mem;
 };
 
-#endif
+#define cxl_reg(type)                                                          \
+	static inline void cxl_write_##type##_reg32(struct cxl_mem *cxlm,      \
+						    u32 reg, u32 value)        \
+	{                                                                      \
+		void __iomem *reg_addr = cxlm->type.regs;                      \
+		writel(value, reg_addr + reg);                                 \
+	}                                                                      \
+	static inline void cxl_write_##type##_reg64(struct cxl_mem *cxlm,      \
+						    u32 reg, u64 value)        \
+	{                                                                      \
+		void __iomem *reg_addr = cxlm->type.regs;                      \
+		writeq(value, reg_addr + reg);                                 \
+	}                                                                      \
+	static inline u32 cxl_read_##type##_reg32(struct cxl_mem *cxlm,        \
+						  u32 reg)                     \
+	{                                                                      \
+		void __iomem *reg_addr = cxlm->type.regs;                      \
+		return readl(reg_addr + reg);                                  \
+	}                                                                      \
+	static inline u64 cxl_read_##type##_reg64(struct cxl_mem *cxlm,        \
+						  u32 reg)                     \
+	{                                                                      \
+		void __iomem *reg_addr = cxlm->type.regs;                      \
+		return readq(reg_addr + reg);                                  \
+	}
+
+cxl_reg(status);
+cxl_reg(mbox);
+
+#endif /* __CXL_H__ */
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index a869c8dc24cc..fa14d51243ee 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -6,6 +6,99 @@ 
 #include "pci.h"
 #include "cxl.h"
 
+/**
+ * cxl_mem_setup_regs() - Setup necessary MMIO.
+ * @cxlm: The CXL memory device to communicate with.
+ *
+ * Return: 0 if all necessary registers mapped.
+ *
+ * A memory device is required by spec to implement a certain set of MMIO
+ * regions. The purpose of this function is to enumerate and map those
+ * registers.
+ *
+ * XXX: Register accessors need the mappings set up by this function, so
+ * any reads or writes must be read(b|w|l|q) or write(b|w|l|q)
+ */
+static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
+{
+	struct device *dev = &cxlm->pdev->dev;
+	int cap, cap_count;
+	u64 cap_array;
+
+	cap_array = readq(cxlm->regs + CXLDEV_CAP_ARRAY_OFFSET);
+	if (CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_ID) != CXLDEV_CAP_ARRAY_CAP_ID)
+		return -ENODEV;
+
+	cap_count = CXL_GET_FIELD(cap_array, CXLDEV_CAP_ARRAY_COUNT);
+
+	for (cap = 1; cap <= cap_count; cap++) {
+		void __iomem *register_block;
+		u32 offset;
+		u16 cap_id;
+
+		cap_id = readl(cxlm->regs + cap * 0x10) & 0xffff;
+		offset = readl(cxlm->regs + cap * 0x10 + 0x4);
+		register_block = cxlm->regs + offset;
+
+		switch (cap_id) {
+		case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
+			dev_dbg(dev, "found Status capability (0x%x)\n",
+				offset);
+			cxlm->status.regs = register_block;
+			break;
+		case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
+			dev_dbg(dev, "found Mailbox capability (0x%x)\n",
+				offset);
+			cxlm->mbox.regs = register_block;
+			break;
+		case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
+			dev_dbg(dev,
+				"found Secondary Mailbox capability (0x%x)\n",
+				offset);
+			break;
+		case CXLDEV_CAP_CAP_ID_MEMDEV:
+			dev_dbg(dev, "found Memory Device capability (0x%x)\n",
+				offset);
+			cxlm->mem.regs = register_block;
+			break;
+		default:
+			dev_warn(dev, "Unknown cap ID: %d (0x%x)\n", cap_id,
+				 offset);
+			break;
+		}
+	}
+
+	if (!cxlm->status.regs || !cxlm->mbox.regs || !cxlm->mem.regs) {
+		dev_err(dev, "registers not found: %s%s%s\n",
+			!cxlm->status.regs ? "status " : "",
+			!cxlm->mbox.regs ? "mbox " : "",
+			!cxlm->mem.regs ? "mem" : "");
+		return -ENXIO;
+	}
+
+	return 0;
+}
+
+static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
+{
+	const int cap = cxl_read_mbox_reg32(cxlm, CXLDEV_MB_CAPS_OFFSET);
+
+	cxlm->mbox.payload_size =
+		1 << CXL_GET_FIELD(cap, CXLDEV_MB_CAP_PAYLOAD_SIZE);
+
+	/* 8.2.8.4.3 */
+	if (cxlm->mbox.payload_size < 256) {
+		dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
+			cxlm->mbox.payload_size);
+		return -ENXIO;
+	}
+
+	dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu",
+		cxlm->mbox.payload_size);
+
+	return 0;
+}
+
 /**
  * cxl_mem_create() - Create a new &struct cxl_mem.
  * @pdev: The pci device associated with the new &struct cxl_mem.
@@ -119,7 +212,14 @@  static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		}
 	}
 
-	return rc;
+	if (rc)
+		return rc;
+
+	rc = cxl_mem_setup_regs(cxlm);
+	if (rc)
+		return rc;
+
+	return cxl_mem_setup_mailbox(cxlm);
 }
 
 static const struct pci_device_id cxl_mem_pci_tbl[] = {