Message ID | 20190705095800.43534-4-mika.westerberg@linux.intel.com (mailing list archive) |
---|---|
State | RFC, archived |
Headers | show |
Series | thunderbolt: Intel Ice Lake support | expand |
On Fri, Jul 5, 2019 at 12:58 PM Mika Westerberg <mika.westerberg@linux.intel.com> wrote: > > The register access should be using 32-bit reads/writes according to the > datasheet. With the previous generation hardware 16-bit writes have been > working but starting with ICL this is not the case anymore so fix > producer/consumer register update to use correct width register address. > > Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> > --- > drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++---- > 1 file changed, 22 insertions(+), 4 deletions(-) > > diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c > index 27fbe62c7ddd..09242653da67 100644 > --- a/drivers/thunderbolt/nhi.c > +++ b/drivers/thunderbolt/nhi.c > @@ -143,9 +143,24 @@ static void __iomem *ring_options_base(struct tb_ring *ring) > return io; > } > > -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset) > +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod) > { > - iowrite16(value, ring_desc_base(ring) + offset); > + u32 val; > + > + val = ioread32(ring_desc_base(ring) + 8); > + val &= 0x0000ffff; > + val |= prod << 16; > + iowrite32(val, ring_desc_base(ring) + 8); > +} > + > +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons) > +{ > + u32 val; > + > + val = ioread32(ring_desc_base(ring) + 8); > + val &= 0xffff0000; > + val |= cons; > + iowrite32(val, ring_desc_base(ring) + 8); > } > > static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset) > @@ -197,7 +212,10 @@ static void ring_write_descriptors(struct tb_ring *ring) > descriptor->sof = frame->sof; > } > ring->head = (ring->head + 1) % ring->size; > - ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8); > + if (ring->is_tx) > + ring_iowrite_prod(ring, ring->head); > + else > + ring_iowrite_cons(ring, ring->head); Really a matter of taste, but maybe you want to consider having a single function, with a 3rd parameter, bool is_tx. The calls here will be unified to: ring_iowrite(ring, ring->head, ring->is_tx); (No condition is needed here). The implementation uses the new parameter to decide which part of the register to mask, reducing the code duplication (in my eyes): val = ioread32(ring_desc_base(ring) + 8); if (is_tx) { val &= 0x0000ffff; val |= value << 16; } else { val &= 0xffff0000; val |= value; } iowrite32(val, ring_desc_base(ring) + 8); I'm not sure if it improves the readability or makes it worse. Your call.
On Fri, Jul 05, 2019 at 02:09:44PM +0300, Yehezkel Bernat wrote: > On Fri, Jul 5, 2019 at 12:58 PM Mika Westerberg > <mika.westerberg@linux.intel.com> wrote: > > > > The register access should be using 32-bit reads/writes according to the > > datasheet. With the previous generation hardware 16-bit writes have been > > working but starting with ICL this is not the case anymore so fix > > producer/consumer register update to use correct width register address. > > > > Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> > > --- > > drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++---- > > 1 file changed, 22 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c > > index 27fbe62c7ddd..09242653da67 100644 > > --- a/drivers/thunderbolt/nhi.c > > +++ b/drivers/thunderbolt/nhi.c > > @@ -143,9 +143,24 @@ static void __iomem *ring_options_base(struct tb_ring *ring) > > return io; > > } > > > > -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset) > > +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod) > > { > > - iowrite16(value, ring_desc_base(ring) + offset); > > + u32 val; > > + > > + val = ioread32(ring_desc_base(ring) + 8); > > + val &= 0x0000ffff; > > + val |= prod << 16; > > + iowrite32(val, ring_desc_base(ring) + 8); > > +} > > + > > +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons) > > +{ > > + u32 val; > > + > > + val = ioread32(ring_desc_base(ring) + 8); > > + val &= 0xffff0000; > > + val |= cons; > > + iowrite32(val, ring_desc_base(ring) + 8); > > } > > > > static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset) > > @@ -197,7 +212,10 @@ static void ring_write_descriptors(struct tb_ring *ring) > > descriptor->sof = frame->sof; > > } > > ring->head = (ring->head + 1) % ring->size; > > - ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8); > > + if (ring->is_tx) > > + ring_iowrite_prod(ring, ring->head); > > + else > > + ring_iowrite_cons(ring, ring->head); > > Really a matter of taste, but maybe you want to consider having a single > function, with a 3rd parameter, bool is_tx. > The calls here will be unified to: > ring_iowrite(ring, ring->head, ring->is_tx); > (No condition is needed here). I like the idea. We could even make it ring_iowrite_prod_cons(ring); and then use ring->head and ring->is_tx inside.
From: Yehezkel Bernat > Sent: 05 July 2019 12:10 > On Fri, Jul 5, 2019 at 12:58 PM Mika Westerberg > <mika.westerberg@linux.intel.com> wrote: > > > > The register access should be using 32-bit reads/writes according to the > > datasheet. With the previous generation hardware 16-bit writes have been > > working but starting with ICL this is not the case anymore so fix > > producer/consumer register update to use correct width register address. > > > > Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> > > --- > > drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++---- > > 1 file changed, 22 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c > > index 27fbe62c7ddd..09242653da67 100644 > > --- a/drivers/thunderbolt/nhi.c > > +++ b/drivers/thunderbolt/nhi.c > > @@ -143,9 +143,24 @@ static void __iomem *ring_options_base(struct tb_ring *ring) > > return io; > > } > > > > -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset) > > +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod) > > { > > - iowrite16(value, ring_desc_base(ring) + offset); > > + u32 val; > > + > > + val = ioread32(ring_desc_base(ring) + 8); > > + val &= 0x0000ffff; > > + val |= prod << 16; > > + iowrite32(val, ring_desc_base(ring) + 8); > > +} > > + > > +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons) > > +{ > > + u32 val; > > + > > + val = ioread32(ring_desc_base(ring) + 8); > > + val &= 0xffff0000; > > + val |= cons; > > + iowrite32(val, ring_desc_base(ring) + 8); > > } > > > > static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset) > > @@ -197,7 +212,10 @@ static void ring_write_descriptors(struct tb_ring *ring) > > descriptor->sof = frame->sof; > > } > > ring->head = (ring->head + 1) % ring->size; > > - ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8); > > + if (ring->is_tx) > > + ring_iowrite_prod(ring, ring->head); > > + else > > + ring_iowrite_cons(ring, ring->head); > > Really a matter of taste, but maybe you want to consider having a single > function, with a 3rd parameter, bool is_tx. > The calls here will be unified to: > ring_iowrite(ring, ring->head, ring->is_tx); > (No condition is needed here). > > The implementation uses the new parameter to decide which part of the register > to mask, reducing the code duplication (in my eyes): > > val = ioread32(ring_desc_base(ring) + 8); > if (is_tx) { > val &= 0x0000ffff; > val |= value << 16; > } else { > val &= 0xffff0000; > val |= value; > } > iowrite32(val, ring_desc_base(ring) + 8); > > I'm not sure if it improves the readability or makes it worse. Your call. Gah, that is all horrid beyond belief. If a 32bit write is valid then the hardware must not be updating the other 16 bits. In which case the driver knows what they should be. So it can do a single 32bit write of the required value. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote: > > Really a matter of taste, but maybe you want to consider having a single > > function, with a 3rd parameter, bool is_tx. > > The calls here will be unified to: > > ring_iowrite(ring, ring->head, ring->is_tx); > > (No condition is needed here). > > > > The implementation uses the new parameter to decide which part of the register > > to mask, reducing the code duplication (in my eyes): > > > > val = ioread32(ring_desc_base(ring) + 8); > > if (is_tx) { > > val &= 0x0000ffff; > > val |= value << 16; > > } else { > > val &= 0xffff0000; > > val |= value; > > } > > iowrite32(val, ring_desc_base(ring) + 8); > > > > I'm not sure if it improves the readability or makes it worse. Your call. > > Gah, that is all horrid beyond belief. > If a 32bit write is valid then the hardware must not be updating > the other 16 bits. > In which case the driver knows what they should be. > So it can do a single 32bit write of the required value. I'm not entirely sure I understand what you say above. Can you shed some light on this by a concrete example how it should look like? :-)
From: Mika Westerberg > Sent: 07 August 2019 17:14 > To: David Laight > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote: > > > Really a matter of taste, but maybe you want to consider having a single > > > function, with a 3rd parameter, bool is_tx. > > > The calls here will be unified to: > > > ring_iowrite(ring, ring->head, ring->is_tx); > > > (No condition is needed here). > > > > > > The implementation uses the new parameter to decide which part of the register > > > to mask, reducing the code duplication (in my eyes): > > > > > > val = ioread32(ring_desc_base(ring) + 8); > > > if (is_tx) { > > > val &= 0x0000ffff; > > > val |= value << 16; > > > } else { > > > val &= 0xffff0000; > > > val |= value; > > > } > > > iowrite32(val, ring_desc_base(ring) + 8); > > > > > > I'm not sure if it improves the readability or makes it worse. Your call. > > > > Gah, that is all horrid beyond belief. > > If a 32bit write is valid then the hardware must not be updating > > the other 16 bits. > > In which case the driver knows what they should be. > > So it can do a single 32bit write of the required value. > > I'm not entirely sure I understand what you say above. Can you shed some > light on this by a concrete example how it should look like? :-) The driver must know both the tx and rx ring values, so: iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8); The ioread32() is likely to be very slow - you only want to do them if absolutely necessary. The speed of the iowrite32() doesn't matter (much) since it is almost certainly 'posted' and execution continues while the bus cycle is in progress. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote: > From: Mika Westerberg > > Sent: 07 August 2019 17:14 > > To: David Laight > > > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote: > > > > Really a matter of taste, but maybe you want to consider having a single > > > > function, with a 3rd parameter, bool is_tx. > > > > The calls here will be unified to: > > > > ring_iowrite(ring, ring->head, ring->is_tx); > > > > (No condition is needed here). > > > > > > > > The implementation uses the new parameter to decide which part of the register > > > > to mask, reducing the code duplication (in my eyes): > > > > > > > > val = ioread32(ring_desc_base(ring) + 8); > > > > if (is_tx) { > > > > val &= 0x0000ffff; > > > > val |= value << 16; > > > > } else { > > > > val &= 0xffff0000; > > > > val |= value; > > > > } > > > > iowrite32(val, ring_desc_base(ring) + 8); > > > > > > > > I'm not sure if it improves the readability or makes it worse. Your call. > > > > > > Gah, that is all horrid beyond belief. > > > If a 32bit write is valid then the hardware must not be updating > > > the other 16 bits. > > > In which case the driver knows what they should be. > > > So it can do a single 32bit write of the required value. > > > > I'm not entirely sure I understand what you say above. Can you shed some > > light on this by a concrete example how it should look like? :-) > > The driver must know both the tx and rx ring values, so: > iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8); > I see. However, prod or cons side gets updated by the hardware as it processes buffers and other side is only updated by the driver. I'm not sure the above works here. > The ioread32() is likely to be very slow - you only want to do them > if absolutely necessary. > The speed of the iowrite32() doesn't matter (much) since it is almost > certainly 'posted' and execution continues while the bus cycle is > in progress. OK thanks for the explanation.
From: 'Mika Westerberg' [mailto:mika.westerberg@linux.intel.com] > Sent: 07 August 2019 17:36 > > On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote: > > From: Mika Westerberg > > > Sent: 07 August 2019 17:14 > > > To: David Laight > > > > > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote: > > > > > Really a matter of taste, but maybe you want to consider having a single > > > > > function, with a 3rd parameter, bool is_tx. > > > > > The calls here will be unified to: > > > > > ring_iowrite(ring, ring->head, ring->is_tx); > > > > > (No condition is needed here). > > > > > > > > > > The implementation uses the new parameter to decide which part of the register > > > > > to mask, reducing the code duplication (in my eyes): > > > > > > > > > > val = ioread32(ring_desc_base(ring) + 8); > > > > > if (is_tx) { > > > > > val &= 0x0000ffff; > > > > > val |= value << 16; > > > > > } else { > > > > > val &= 0xffff0000; > > > > > val |= value; > > > > > } > > > > > iowrite32(val, ring_desc_base(ring) + 8); > > > > > > > > > > I'm not sure if it improves the readability or makes it worse. Your call. > > > > > > > > Gah, that is all horrid beyond belief. > > > > If a 32bit write is valid then the hardware must not be updating > > > > the other 16 bits. > > > > In which case the driver knows what they should be. > > > > So it can do a single 32bit write of the required value. > > > > > > I'm not entirely sure I understand what you say above. Can you shed some > > > light on this by a concrete example how it should look like? :-) > > > > The driver must know both the tx and rx ring values, so: > > iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8); > > > > I see. However, prod or cons side gets updated by the hardware as it > processes buffers and other side is only updated by the driver. I'm not > sure the above works here. If the hardware updates the other half of the 32bit word it doesn't ever work. In that case you must do 16bit writes. If the hardware is ignoring the byte-enables it is broken and unusable. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Wed, Aug 07, 2019 at 04:41:30PM +0000, David Laight wrote: > From: 'Mika Westerberg' [mailto:mika.westerberg@linux.intel.com] > > Sent: 07 August 2019 17:36 > > > > On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote: > > > From: Mika Westerberg > > > > Sent: 07 August 2019 17:14 > > > > To: David Laight > > > > > > > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote: > > > > > > Really a matter of taste, but maybe you want to consider having a single > > > > > > function, with a 3rd parameter, bool is_tx. > > > > > > The calls here will be unified to: > > > > > > ring_iowrite(ring, ring->head, ring->is_tx); > > > > > > (No condition is needed here). > > > > > > > > > > > > The implementation uses the new parameter to decide which part of the register > > > > > > to mask, reducing the code duplication (in my eyes): > > > > > > > > > > > > val = ioread32(ring_desc_base(ring) + 8); > > > > > > if (is_tx) { > > > > > > val &= 0x0000ffff; > > > > > > val |= value << 16; > > > > > > } else { > > > > > > val &= 0xffff0000; > > > > > > val |= value; > > > > > > } > > > > > > iowrite32(val, ring_desc_base(ring) + 8); > > > > > > > > > > > > I'm not sure if it improves the readability or makes it worse. Your call. > > > > > > > > > > Gah, that is all horrid beyond belief. > > > > > If a 32bit write is valid then the hardware must not be updating > > > > > the other 16 bits. > > > > > In which case the driver knows what they should be. > > > > > So it can do a single 32bit write of the required value. > > > > > > > > I'm not entirely sure I understand what you say above. Can you shed some > > > > light on this by a concrete example how it should look like? :-) > > > > > > The driver must know both the tx and rx ring values, so: > > > iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8); > > > > > > > I see. However, prod or cons side gets updated by the hardware as it > > processes buffers and other side is only updated by the driver. I'm not > > sure the above works here. > > If the hardware updates the other half of the 32bit word it doesn't ever work. > > In that case you must do 16bit writes. > If the hardware is ignoring the byte-enables it is broken and unusable. It is quite usable as I'm running this code on real hardware ;-) but 32-bit access is needed. The low or high 16-bits are read-only depending on the ring (Tx or Rx) and updated by the hardware. It ignores writes to that part so we could do this for Tx ring: iowrite32(prod << 16, ring_desc_base(ring) + 8); and this for Rx ring: iowrite32(cons, ring_desc_base(ring) + 8); Do you see any issues with this?
From: 'Mika Westerberg' > Sent: 08 August 2019 10:58 > On Wed, Aug 07, 2019 at 04:41:30PM +0000, David Laight wrote: > > From: 'Mika Westerberg' [mailto:mika.westerberg@linux.intel.com] > > > Sent: 07 August 2019 17:36 > > > > > > On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote: > > > > From: Mika Westerberg > > > > > Sent: 07 August 2019 17:14 > > > > > To: David Laight > > > > > > > > > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote: > > > > > > > Really a matter of taste, but maybe you want to consider having a single > > > > > > > function, with a 3rd parameter, bool is_tx. > > > > > > > The calls here will be unified to: > > > > > > > ring_iowrite(ring, ring->head, ring->is_tx); > > > > > > > (No condition is needed here). > > > > > > > > > > > > > > The implementation uses the new parameter to decide which part of the register > > > > > > > to mask, reducing the code duplication (in my eyes): > > > > > > > > > > > > > > val = ioread32(ring_desc_base(ring) + 8); > > > > > > > if (is_tx) { > > > > > > > val &= 0x0000ffff; > > > > > > > val |= value << 16; > > > > > > > } else { > > > > > > > val &= 0xffff0000; > > > > > > > val |= value; > > > > > > > } > > > > > > > iowrite32(val, ring_desc_base(ring) + 8); > > > > > > > > > > > > > > I'm not sure if it improves the readability or makes it worse. Your call. > > > > > > > > > > > > Gah, that is all horrid beyond belief. > > > > > > If a 32bit write is valid then the hardware must not be updating > > > > > > the other 16 bits. > > > > > > In which case the driver knows what they should be. > > > > > > So it can do a single 32bit write of the required value. > > > > > > > > > > I'm not entirely sure I understand what you say above. Can you shed some > > > > > light on this by a concrete example how it should look like? :-) > > > > > > > > The driver must know both the tx and rx ring values, so: > > > > iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8); > > > > > > > > > > I see. However, prod or cons side gets updated by the hardware as it > > > processes buffers and other side is only updated by the driver. I'm not > > > sure the above works here. > > > > If the hardware updates the other half of the 32bit word it doesn't ever work. > > > > In that case you must do 16bit writes. > > If the hardware is ignoring the byte-enables it is broken and unusable. > > It is quite usable as I'm running this code on real hardware ;-) but > 32-bit access is needed. > > The low or high 16-bits are read-only depending on the ring (Tx or Rx) > and updated by the hardware. It ignores writes to that part so we could > do this for Tx ring: > > iowrite32(prod << 16, ring_desc_base(ring) + 8); > > and this for Rx ring: > > iowrite32(cons, ring_desc_base(ring) + 8); > > Do you see any issues with this? No, just comment that the hardware ignores the write to the other bytes. You probably want separate functions - to remove the mispredicted branch. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 27fbe62c7ddd..09242653da67 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -143,9 +143,24 @@ static void __iomem *ring_options_base(struct tb_ring *ring) return io; } -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset) +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod) { - iowrite16(value, ring_desc_base(ring) + offset); + u32 val; + + val = ioread32(ring_desc_base(ring) + 8); + val &= 0x0000ffff; + val |= prod << 16; + iowrite32(val, ring_desc_base(ring) + 8); +} + +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons) +{ + u32 val; + + val = ioread32(ring_desc_base(ring) + 8); + val &= 0xffff0000; + val |= cons; + iowrite32(val, ring_desc_base(ring) + 8); } static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset) @@ -197,7 +212,10 @@ static void ring_write_descriptors(struct tb_ring *ring) descriptor->sof = frame->sof; } ring->head = (ring->head + 1) % ring->size; - ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8); + if (ring->is_tx) + ring_iowrite_prod(ring, ring->head); + else + ring_iowrite_cons(ring, ring->head); } } @@ -662,7 +680,7 @@ void tb_ring_stop(struct tb_ring *ring) ring_iowrite32options(ring, 0, 0); ring_iowrite64desc(ring, 0, 0); - ring_iowrite16desc(ring, 0, ring->is_tx ? 10 : 8); + ring_iowrite32desc(ring, 0, 8); ring_iowrite32desc(ring, 0, 12); ring->head = 0; ring->tail = 0;
The register access should be using 32-bit reads/writes according to the datasheet. With the previous generation hardware 16-bit writes have been working but starting with ICL this is not the case anymore so fix producer/consumer register update to use correct width register address. Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com> --- drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-)