diff mbox series

[3/8] thunderbolt: Use 32-bit writes when writing ring producer/consumer

Message ID 20190705095800.43534-4-mika.westerberg@linux.intel.com (mailing list archive)
State RFC, archived
Headers show
Series thunderbolt: Intel Ice Lake support | expand

Commit Message

Mika Westerberg July 5, 2019, 9:57 a.m. UTC
The register access should be using 32-bit reads/writes according to the
datasheet. With the previous generation hardware 16-bit writes have been
working but starting with ICL this is not the case anymore so fix
producer/consumer register update to use correct width register address.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

Comments

Yehezkel Bernat July 5, 2019, 11:09 a.m. UTC | #1
On Fri, Jul 5, 2019 at 12:58 PM Mika Westerberg
<mika.westerberg@linux.intel.com> wrote:
>
> The register access should be using 32-bit reads/writes according to the
> datasheet. With the previous generation hardware 16-bit writes have been
> working but starting with ICL this is not the case anymore so fix
> producer/consumer register update to use correct width register address.
>
> Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> ---
>  drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++----
>  1 file changed, 22 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
> index 27fbe62c7ddd..09242653da67 100644
> --- a/drivers/thunderbolt/nhi.c
> +++ b/drivers/thunderbolt/nhi.c
> @@ -143,9 +143,24 @@ static void __iomem *ring_options_base(struct tb_ring *ring)
>         return io;
>  }
>
> -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset)
> +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod)
>  {
> -       iowrite16(value, ring_desc_base(ring) + offset);
> +       u32 val;
> +
> +       val = ioread32(ring_desc_base(ring) + 8);
> +       val &= 0x0000ffff;
> +       val |= prod << 16;
> +       iowrite32(val, ring_desc_base(ring) + 8);
> +}
> +
> +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons)
> +{
> +       u32 val;
> +
> +       val = ioread32(ring_desc_base(ring) + 8);
> +       val &= 0xffff0000;
> +       val |= cons;
> +       iowrite32(val, ring_desc_base(ring) + 8);
>  }
>
>  static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset)
> @@ -197,7 +212,10 @@ static void ring_write_descriptors(struct tb_ring *ring)
>                         descriptor->sof = frame->sof;
>                 }
>                 ring->head = (ring->head + 1) % ring->size;
> -               ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8);
> +               if (ring->is_tx)
> +                       ring_iowrite_prod(ring, ring->head);
> +               else
> +                       ring_iowrite_cons(ring, ring->head);

Really a matter of taste, but maybe you want to consider having a single
function, with a 3rd parameter, bool is_tx.
The calls here will be unified to:
        ring_iowrite(ring, ring->head, ring->is_tx);
(No condition is needed here).

The implementation uses the new parameter to decide which part of the register
to mask, reducing the code duplication (in my eyes):

        val = ioread32(ring_desc_base(ring) + 8);
        if (is_tx) {
                val &= 0x0000ffff;
                val |= value << 16;
        } else {
                val &= 0xffff0000;
                val |= value;
        }
        iowrite32(val, ring_desc_base(ring) + 8);

I'm not sure if it improves the readability or makes it worse. Your call.
Mika Westerberg July 5, 2019, 11:24 a.m. UTC | #2
On Fri, Jul 05, 2019 at 02:09:44PM +0300, Yehezkel Bernat wrote:
> On Fri, Jul 5, 2019 at 12:58 PM Mika Westerberg
> <mika.westerberg@linux.intel.com> wrote:
> >
> > The register access should be using 32-bit reads/writes according to the
> > datasheet. With the previous generation hardware 16-bit writes have been
> > working but starting with ICL this is not the case anymore so fix
> > producer/consumer register update to use correct width register address.
> >
> > Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> > ---
> >  drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++----
> >  1 file changed, 22 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
> > index 27fbe62c7ddd..09242653da67 100644
> > --- a/drivers/thunderbolt/nhi.c
> > +++ b/drivers/thunderbolt/nhi.c
> > @@ -143,9 +143,24 @@ static void __iomem *ring_options_base(struct tb_ring *ring)
> >         return io;
> >  }
> >
> > -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset)
> > +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod)
> >  {
> > -       iowrite16(value, ring_desc_base(ring) + offset);
> > +       u32 val;
> > +
> > +       val = ioread32(ring_desc_base(ring) + 8);
> > +       val &= 0x0000ffff;
> > +       val |= prod << 16;
> > +       iowrite32(val, ring_desc_base(ring) + 8);
> > +}
> > +
> > +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons)
> > +{
> > +       u32 val;
> > +
> > +       val = ioread32(ring_desc_base(ring) + 8);
> > +       val &= 0xffff0000;
> > +       val |= cons;
> > +       iowrite32(val, ring_desc_base(ring) + 8);
> >  }
> >
> >  static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset)
> > @@ -197,7 +212,10 @@ static void ring_write_descriptors(struct tb_ring *ring)
> >                         descriptor->sof = frame->sof;
> >                 }
> >                 ring->head = (ring->head + 1) % ring->size;
> > -               ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8);
> > +               if (ring->is_tx)
> > +                       ring_iowrite_prod(ring, ring->head);
> > +               else
> > +                       ring_iowrite_cons(ring, ring->head);
> 
> Really a matter of taste, but maybe you want to consider having a single
> function, with a 3rd parameter, bool is_tx.
> The calls here will be unified to:
>         ring_iowrite(ring, ring->head, ring->is_tx);
> (No condition is needed here).

I like the idea. We could even make it

	ring_iowrite_prod_cons(ring);

and then use ring->head and ring->is_tx inside.
David Laight July 5, 2019, 4:04 p.m. UTC | #3
From: Yehezkel Bernat
> Sent: 05 July 2019 12:10
> On Fri, Jul 5, 2019 at 12:58 PM Mika Westerberg
> <mika.westerberg@linux.intel.com> wrote:
> >
> > The register access should be using 32-bit reads/writes according to the
> > datasheet. With the previous generation hardware 16-bit writes have been
> > working but starting with ICL this is not the case anymore so fix
> > producer/consumer register update to use correct width register address.
> >
> > Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
> > ---
> >  drivers/thunderbolt/nhi.c | 26 ++++++++++++++++++++++----
> >  1 file changed, 22 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
> > index 27fbe62c7ddd..09242653da67 100644
> > --- a/drivers/thunderbolt/nhi.c
> > +++ b/drivers/thunderbolt/nhi.c
> > @@ -143,9 +143,24 @@ static void __iomem *ring_options_base(struct tb_ring *ring)
> >         return io;
> >  }
> >
> > -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset)
> > +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod)
> >  {
> > -       iowrite16(value, ring_desc_base(ring) + offset);
> > +       u32 val;
> > +
> > +       val = ioread32(ring_desc_base(ring) + 8);
> > +       val &= 0x0000ffff;
> > +       val |= prod << 16;
> > +       iowrite32(val, ring_desc_base(ring) + 8);
> > +}
> > +
> > +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons)
> > +{
> > +       u32 val;
> > +
> > +       val = ioread32(ring_desc_base(ring) + 8);
> > +       val &= 0xffff0000;
> > +       val |= cons;
> > +       iowrite32(val, ring_desc_base(ring) + 8);
> >  }
> >
> >  static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset)
> > @@ -197,7 +212,10 @@ static void ring_write_descriptors(struct tb_ring *ring)
> >                         descriptor->sof = frame->sof;
> >                 }
> >                 ring->head = (ring->head + 1) % ring->size;
> > -               ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8);
> > +               if (ring->is_tx)
> > +                       ring_iowrite_prod(ring, ring->head);
> > +               else
> > +                       ring_iowrite_cons(ring, ring->head);
> 
> Really a matter of taste, but maybe you want to consider having a single
> function, with a 3rd parameter, bool is_tx.
> The calls here will be unified to:
>         ring_iowrite(ring, ring->head, ring->is_tx);
> (No condition is needed here).
> 
> The implementation uses the new parameter to decide which part of the register
> to mask, reducing the code duplication (in my eyes):
> 
>         val = ioread32(ring_desc_base(ring) + 8);
>         if (is_tx) {
>                 val &= 0x0000ffff;
>                 val |= value << 16;
>         } else {
>                 val &= 0xffff0000;
>                 val |= value;
>         }
>         iowrite32(val, ring_desc_base(ring) + 8);
> 
> I'm not sure if it improves the readability or makes it worse. Your call.

Gah, that is all horrid beyond belief.
If a 32bit write is valid then the hardware must not be updating
the other 16 bits.
In which case the driver knows what they should be.
So it can do a single 32bit write of the required value.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Mika Westerberg Aug. 7, 2019, 4:13 p.m. UTC | #4
On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote:
> > Really a matter of taste, but maybe you want to consider having a single
> > function, with a 3rd parameter, bool is_tx.
> > The calls here will be unified to:
> >         ring_iowrite(ring, ring->head, ring->is_tx);
> > (No condition is needed here).
> > 
> > The implementation uses the new parameter to decide which part of the register
> > to mask, reducing the code duplication (in my eyes):
> > 
> >         val = ioread32(ring_desc_base(ring) + 8);
> >         if (is_tx) {
> >                 val &= 0x0000ffff;
> >                 val |= value << 16;
> >         } else {
> >                 val &= 0xffff0000;
> >                 val |= value;
> >         }
> >         iowrite32(val, ring_desc_base(ring) + 8);
> > 
> > I'm not sure if it improves the readability or makes it worse. Your call.
> 
> Gah, that is all horrid beyond belief.
> If a 32bit write is valid then the hardware must not be updating
> the other 16 bits.
> In which case the driver knows what they should be.
> So it can do a single 32bit write of the required value.

I'm not entirely sure I understand what you say above. Can you shed some
light on this by a concrete example how it should look like? :-)
David Laight Aug. 7, 2019, 4:22 p.m. UTC | #5
From: Mika Westerberg
> Sent: 07 August 2019 17:14
> To: David Laight
> 
> On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote:
> > > Really a matter of taste, but maybe you want to consider having a single
> > > function, with a 3rd parameter, bool is_tx.
> > > The calls here will be unified to:
> > >         ring_iowrite(ring, ring->head, ring->is_tx);
> > > (No condition is needed here).
> > >
> > > The implementation uses the new parameter to decide which part of the register
> > > to mask, reducing the code duplication (in my eyes):
> > >
> > >         val = ioread32(ring_desc_base(ring) + 8);
> > >         if (is_tx) {
> > >                 val &= 0x0000ffff;
> > >                 val |= value << 16;
> > >         } else {
> > >                 val &= 0xffff0000;
> > >                 val |= value;
> > >         }
> > >         iowrite32(val, ring_desc_base(ring) + 8);
> > >
> > > I'm not sure if it improves the readability or makes it worse. Your call.
> >
> > Gah, that is all horrid beyond belief.
> > If a 32bit write is valid then the hardware must not be updating
> > the other 16 bits.
> > In which case the driver knows what they should be.
> > So it can do a single 32bit write of the required value.
> 
> I'm not entirely sure I understand what you say above. Can you shed some
> light on this by a concrete example how it should look like? :-)

The driver must know both the tx and rx ring values, so:
	iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8);

The ioread32() is likely to be very slow - you only want to do them
if absolutely necessary.
The speed of the iowrite32() doesn't matter (much) since it is almost
certainly 'posted' and execution continues while the bus cycle is
in progress.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Mika Westerberg Aug. 7, 2019, 4:36 p.m. UTC | #6
On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote:
> From: Mika Westerberg
> > Sent: 07 August 2019 17:14
> > To: David Laight
> > 
> > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote:
> > > > Really a matter of taste, but maybe you want to consider having a single
> > > > function, with a 3rd parameter, bool is_tx.
> > > > The calls here will be unified to:
> > > >         ring_iowrite(ring, ring->head, ring->is_tx);
> > > > (No condition is needed here).
> > > >
> > > > The implementation uses the new parameter to decide which part of the register
> > > > to mask, reducing the code duplication (in my eyes):
> > > >
> > > >         val = ioread32(ring_desc_base(ring) + 8);
> > > >         if (is_tx) {
> > > >                 val &= 0x0000ffff;
> > > >                 val |= value << 16;
> > > >         } else {
> > > >                 val &= 0xffff0000;
> > > >                 val |= value;
> > > >         }
> > > >         iowrite32(val, ring_desc_base(ring) + 8);
> > > >
> > > > I'm not sure if it improves the readability or makes it worse. Your call.
> > >
> > > Gah, that is all horrid beyond belief.
> > > If a 32bit write is valid then the hardware must not be updating
> > > the other 16 bits.
> > > In which case the driver knows what they should be.
> > > So it can do a single 32bit write of the required value.
> > 
> > I'm not entirely sure I understand what you say above. Can you shed some
> > light on this by a concrete example how it should look like? :-)
> 
> The driver must know both the tx and rx ring values, so:
> 	iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8);
>

I see. However, prod or cons side gets updated by the hardware as it
processes buffers and other side is only updated by the driver. I'm not
sure the above works here.

> The ioread32() is likely to be very slow - you only want to do them
> if absolutely necessary.
> The speed of the iowrite32() doesn't matter (much) since it is almost
> certainly 'posted' and execution continues while the bus cycle is
> in progress.

OK thanks for the explanation.
David Laight Aug. 7, 2019, 4:41 p.m. UTC | #7
From: 'Mika Westerberg' [mailto:mika.westerberg@linux.intel.com]
> Sent: 07 August 2019 17:36
> 
> On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote:
> > From: Mika Westerberg
> > > Sent: 07 August 2019 17:14
> > > To: David Laight
> > >
> > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote:
> > > > > Really a matter of taste, but maybe you want to consider having a single
> > > > > function, with a 3rd parameter, bool is_tx.
> > > > > The calls here will be unified to:
> > > > >         ring_iowrite(ring, ring->head, ring->is_tx);
> > > > > (No condition is needed here).
> > > > >
> > > > > The implementation uses the new parameter to decide which part of the register
> > > > > to mask, reducing the code duplication (in my eyes):
> > > > >
> > > > >         val = ioread32(ring_desc_base(ring) + 8);
> > > > >         if (is_tx) {
> > > > >                 val &= 0x0000ffff;
> > > > >                 val |= value << 16;
> > > > >         } else {
> > > > >                 val &= 0xffff0000;
> > > > >                 val |= value;
> > > > >         }
> > > > >         iowrite32(val, ring_desc_base(ring) + 8);
> > > > >
> > > > > I'm not sure if it improves the readability or makes it worse. Your call.
> > > >
> > > > Gah, that is all horrid beyond belief.
> > > > If a 32bit write is valid then the hardware must not be updating
> > > > the other 16 bits.
> > > > In which case the driver knows what they should be.
> > > > So it can do a single 32bit write of the required value.
> > >
> > > I'm not entirely sure I understand what you say above. Can you shed some
> > > light on this by a concrete example how it should look like? :-)
> >
> > The driver must know both the tx and rx ring values, so:
> > 	iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8);
> >
> 
> I see. However, prod or cons side gets updated by the hardware as it
> processes buffers and other side is only updated by the driver. I'm not
> sure the above works here.

If the hardware updates the other half of the 32bit word it doesn't ever work.
In that case you must do 16bit writes.
If the hardware is ignoring the byte-enables it is broken and unusable.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Mika Westerberg Aug. 8, 2019, 9:57 a.m. UTC | #8
On Wed, Aug 07, 2019 at 04:41:30PM +0000, David Laight wrote:
> From: 'Mika Westerberg' [mailto:mika.westerberg@linux.intel.com]
> > Sent: 07 August 2019 17:36
> > 
> > On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote:
> > > From: Mika Westerberg
> > > > Sent: 07 August 2019 17:14
> > > > To: David Laight
> > > >
> > > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote:
> > > > > > Really a matter of taste, but maybe you want to consider having a single
> > > > > > function, with a 3rd parameter, bool is_tx.
> > > > > > The calls here will be unified to:
> > > > > >         ring_iowrite(ring, ring->head, ring->is_tx);
> > > > > > (No condition is needed here).
> > > > > >
> > > > > > The implementation uses the new parameter to decide which part of the register
> > > > > > to mask, reducing the code duplication (in my eyes):
> > > > > >
> > > > > >         val = ioread32(ring_desc_base(ring) + 8);
> > > > > >         if (is_tx) {
> > > > > >                 val &= 0x0000ffff;
> > > > > >                 val |= value << 16;
> > > > > >         } else {
> > > > > >                 val &= 0xffff0000;
> > > > > >                 val |= value;
> > > > > >         }
> > > > > >         iowrite32(val, ring_desc_base(ring) + 8);
> > > > > >
> > > > > > I'm not sure if it improves the readability or makes it worse. Your call.
> > > > >
> > > > > Gah, that is all horrid beyond belief.
> > > > > If a 32bit write is valid then the hardware must not be updating
> > > > > the other 16 bits.
> > > > > In which case the driver knows what they should be.
> > > > > So it can do a single 32bit write of the required value.
> > > >
> > > > I'm not entirely sure I understand what you say above. Can you shed some
> > > > light on this by a concrete example how it should look like? :-)
> > >
> > > The driver must know both the tx and rx ring values, so:
> > > 	iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8);
> > >
> > 
> > I see. However, prod or cons side gets updated by the hardware as it
> > processes buffers and other side is only updated by the driver. I'm not
> > sure the above works here.
> 
> If the hardware updates the other half of the 32bit word it doesn't ever work.
>
> In that case you must do 16bit writes.
> If the hardware is ignoring the byte-enables it is broken and unusable.

It is quite usable as I'm running this code on real hardware ;-) but
32-bit access is needed.

The low or high 16-bits are read-only depending on the ring (Tx or Rx)
and updated by the hardware. It ignores writes to that part so we could
do this for Tx ring:

	iowrite32(prod << 16, ring_desc_base(ring) + 8);

and this for Rx ring:

	iowrite32(cons, ring_desc_base(ring) + 8);

Do you see any issues with this?
David Laight Aug. 12, 2019, 9:01 a.m. UTC | #9
From: 'Mika Westerberg'
> Sent: 08 August 2019 10:58
> On Wed, Aug 07, 2019 at 04:41:30PM +0000, David Laight wrote:
> > From: 'Mika Westerberg' [mailto:mika.westerberg@linux.intel.com]
> > > Sent: 07 August 2019 17:36
> > >
> > > On Wed, Aug 07, 2019 at 04:22:26PM +0000, David Laight wrote:
> > > > From: Mika Westerberg
> > > > > Sent: 07 August 2019 17:14
> > > > > To: David Laight
> > > > >
> > > > > On Fri, Jul 05, 2019 at 04:04:19PM +0000, David Laight wrote:
> > > > > > > Really a matter of taste, but maybe you want to consider having a single
> > > > > > > function, with a 3rd parameter, bool is_tx.
> > > > > > > The calls here will be unified to:
> > > > > > >         ring_iowrite(ring, ring->head, ring->is_tx);
> > > > > > > (No condition is needed here).
> > > > > > >
> > > > > > > The implementation uses the new parameter to decide which part of the register
> > > > > > > to mask, reducing the code duplication (in my eyes):
> > > > > > >
> > > > > > >         val = ioread32(ring_desc_base(ring) + 8);
> > > > > > >         if (is_tx) {
> > > > > > >                 val &= 0x0000ffff;
> > > > > > >                 val |= value << 16;
> > > > > > >         } else {
> > > > > > >                 val &= 0xffff0000;
> > > > > > >                 val |= value;
> > > > > > >         }
> > > > > > >         iowrite32(val, ring_desc_base(ring) + 8);
> > > > > > >
> > > > > > > I'm not sure if it improves the readability or makes it worse. Your call.
> > > > > >
> > > > > > Gah, that is all horrid beyond belief.
> > > > > > If a 32bit write is valid then the hardware must not be updating
> > > > > > the other 16 bits.
> > > > > > In which case the driver knows what they should be.
> > > > > > So it can do a single 32bit write of the required value.
> > > > >
> > > > > I'm not entirely sure I understand what you say above. Can you shed some
> > > > > light on this by a concrete example how it should look like? :-)
> > > >
> > > > The driver must know both the tx and rx ring values, so:
> > > > 	iowrite32(tx_val << 16 | rx_val, ring_desc_base(ring) + 8);
> > > >
> > >
> > > I see. However, prod or cons side gets updated by the hardware as it
> > > processes buffers and other side is only updated by the driver. I'm not
> > > sure the above works here.
> >
> > If the hardware updates the other half of the 32bit word it doesn't ever work.
> >
> > In that case you must do 16bit writes.
> > If the hardware is ignoring the byte-enables it is broken and unusable.
> 
> It is quite usable as I'm running this code on real hardware ;-) but
> 32-bit access is needed.
> 
> The low or high 16-bits are read-only depending on the ring (Tx or Rx)
> and updated by the hardware. It ignores writes to that part so we could
> do this for Tx ring:
> 
> 	iowrite32(prod << 16, ring_desc_base(ring) + 8);
> 
> and this for Rx ring:
> 
> 	iowrite32(cons, ring_desc_base(ring) + 8);
> 
> Do you see any issues with this?

No, just comment that the hardware ignores the write to the other bytes.

You probably want separate functions - to remove the mispredicted branch.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 27fbe62c7ddd..09242653da67 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -143,9 +143,24 @@  static void __iomem *ring_options_base(struct tb_ring *ring)
 	return io;
 }
 
-static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset)
+static void ring_iowrite_prod(struct tb_ring *ring, u16 prod)
 {
-	iowrite16(value, ring_desc_base(ring) + offset);
+	u32 val;
+
+	val = ioread32(ring_desc_base(ring) + 8);
+	val &= 0x0000ffff;
+	val |= prod << 16;
+	iowrite32(val, ring_desc_base(ring) + 8);
+}
+
+static void ring_iowrite_cons(struct tb_ring *ring, u16 cons)
+{
+	u32 val;
+
+	val = ioread32(ring_desc_base(ring) + 8);
+	val &= 0xffff0000;
+	val |= cons;
+	iowrite32(val, ring_desc_base(ring) + 8);
 }
 
 static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset)
@@ -197,7 +212,10 @@  static void ring_write_descriptors(struct tb_ring *ring)
 			descriptor->sof = frame->sof;
 		}
 		ring->head = (ring->head + 1) % ring->size;
-		ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8);
+		if (ring->is_tx)
+			ring_iowrite_prod(ring, ring->head);
+		else
+			ring_iowrite_cons(ring, ring->head);
 	}
 }
 
@@ -662,7 +680,7 @@  void tb_ring_stop(struct tb_ring *ring)
 
 	ring_iowrite32options(ring, 0, 0);
 	ring_iowrite64desc(ring, 0, 0);
-	ring_iowrite16desc(ring, 0, ring->is_tx ? 10 : 8);
+	ring_iowrite32desc(ring, 0, 8);
 	ring_iowrite32desc(ring, 0, 12);
 	ring->head = 0;
 	ring->tail = 0;