Message ID | 0d76de0ecc0aa7cb01fd8b8863a8e567abd4410b.1651663123.git.quic_saipraka@quicinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | lib/rwmmio/arm64: Add support to trace register reads/writes | expand |
Hi, On 04/05/2022 12:28, Sai Prakash Ranjan wrote: > Per discussion in [1], it was decided to move to using architecture > independent/asm-generic IO memory barriers to have just one set of > them and deprecate use of arm64 specific IO memory barriers in driver > code. So replace current usage of __io_rmb()/__iowmb() in drivers to > __io_ar()/__io_bw(). > > [1] https://lore.kernel.org/lkml/CAK8P3a0L2tLeF1Q0+0ijUxhGNaw+Z0fyPC1oW6_ELQfn0=i4iw@mail.gmail.com/ > Looking at the dis-assembly it looks like in effect they are slightly different for arm64. i.e., before this patch we had "dmb osh{ld/st}" and after the patch we have : "dsb {ld/st}" Is this really what we want ? I don't think this is desirable. Suzuki > Cc: Mathieu Poirier <mathieu.poirier@linaro.org> > Cc: Suzuki K Poulose <suzuki.poulose@arm.com> > Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com> > Reviewed-by: Arnd Bergmann <arnd@arndb.de> > --- > drivers/hwtracing/coresight/coresight-etm4x-core.c | 8 ++++---- > drivers/hwtracing/coresight/coresight-etm4x.h | 8 ++++---- > 2 files changed, 8 insertions(+), 8 deletions(-) > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c > index 7f416a12000e..81c0faf45b28 100644 > --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c > +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c > @@ -98,7 +98,7 @@ u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit) > } > > if (!_relaxed) > - __iormb(res); /* Imitate the !relaxed I/O helpers */ > + __io_ar(res); /* Imitate the !relaxed I/O helpers */ > > return res; > } > @@ -106,7 +106,7 @@ u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit) > void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit) > { > if (!_relaxed) > - __iowmb(); /* Imitate the !relaxed I/O helpers */ > + __io_bw(); /* Imitate the !relaxed I/O helpers */ > if (!_64bit) > val &= GENMASK(31, 0); > > @@ -130,7 +130,7 @@ static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit) > } > > if (!_relaxed) > - __iormb(res); /* Imitate the !relaxed I/O helpers */ > + __io_ar(res); /* Imitate the !relaxed I/O helpers */ > > return res; > } > @@ -138,7 +138,7 @@ static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit) > static void ete_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit) > { > if (!_relaxed) > - __iowmb(); /* Imitate the !relaxed I/O helpers */ > + __io_bw(); /* Imitate the !relaxed I/O helpers */ > if (!_64bit) > val &= GENMASK(31, 0); > > diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h > index 3c4d69b096ca..f54698731582 100644 > --- a/drivers/hwtracing/coresight/coresight-etm4x.h > +++ b/drivers/hwtracing/coresight/coresight-etm4x.h > @@ -448,14 +448,14 @@ > #define etm4x_read32(csa, offset) \ > ({ \ > u32 __val = etm4x_relaxed_read32((csa), (offset)); \ > - __iormb(__val); \ > + __io_ar(__val); \ > __val; \ > }) > > #define etm4x_read64(csa, offset) \ > ({ \ > u64 __val = etm4x_relaxed_read64((csa), (offset)); \ > - __iormb(__val); \ > + __io_ar(__val); \ > __val; \ > }) > > @@ -479,13 +479,13 @@ > > #define etm4x_write32(csa, val, offset) \ > do { \ > - __iowmb(); \ > + __io_bw(); \ > etm4x_relaxed_write32((csa), (val), (offset)); \ > } while (0) > > #define etm4x_write64(csa, val, offset) \ > do { \ > - __iowmb(); \ > + __io_bw(); \ > etm4x_relaxed_write64((csa), (val), (offset)); \ > } while (0) >
Hi Suzuki, On 5/6/2022 5:14 AM, Suzuki K Poulose wrote: > Hi, > > On 04/05/2022 12:28, Sai Prakash Ranjan wrote: >> Per discussion in [1], it was decided to move to using architecture >> independent/asm-generic IO memory barriers to have just one set of >> them and deprecate use of arm64 specific IO memory barriers in driver >> code. So replace current usage of __io_rmb()/__iowmb() in drivers to >> __io_ar()/__io_bw(). >> >> [1] https://lore.kernel.org/lkml/CAK8P3a0L2tLeF1Q0+0ijUxhGNaw+Z0fyPC1oW6_ELQfn0=i4iw@mail.gmail.com/ >> > > Looking at the dis-assembly it looks like in effect they are slightly > different for arm64. > > i.e., before this patch we had > > "dmb osh{ld/st}" > > and after the patch we have : > > "dsb {ld/st}" > > Is this really what we want ? I don't think this is desirable. > > Suzuki > No, this is not supposed to happen and I do not see how it could happen. __io_ar() is defined as __iormb() and __io_bw() is __iowmb(). I checked the disassembly in both case with MMIO trace off/on with __etm4_cpu_save() as below and saw the same number of "dmb"s. aarch64-linux-gnu-gdb -batch -ex "disassemble/rs __etm4_cpu_save" vmlinux-without-mmio aarch64-linux-gnu-gdb -batch -ex "disassemble/rs __etm4_cpu_save" vmlinux-with-mmio Can you tell me how are you validating if I am missing something? Thanks, Sai > >> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> >> Cc: Suzuki K Poulose <suzuki.poulose@arm.com> >> Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com> >> Reviewed-by: Arnd Bergmann <arnd@arndb.de> >> --- >> drivers/hwtracing/coresight/coresight-etm4x-core.c | 8 ++++---- >> drivers/hwtracing/coresight/coresight-etm4x.h | 8 ++++---- >> 2 files changed, 8 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c >> index 7f416a12000e..81c0faf45b28 100644 >> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c >> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c >> @@ -98,7 +98,7 @@ u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit) >> } >> if (!_relaxed) >> - __iormb(res); /* Imitate the !relaxed I/O helpers */ >> + __io_ar(res); /* Imitate the !relaxed I/O helpers */ >> return res; >> } >> @@ -106,7 +106,7 @@ u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit) >> void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit) >> { >> if (!_relaxed) >> - __iowmb(); /* Imitate the !relaxed I/O helpers */ >> + __io_bw(); /* Imitate the !relaxed I/O helpers */ >> if (!_64bit) >> val &= GENMASK(31, 0); >> @@ -130,7 +130,7 @@ static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit) >> } >> if (!_relaxed) >> - __iormb(res); /* Imitate the !relaxed I/O helpers */ >> + __io_ar(res); /* Imitate the !relaxed I/O helpers */ >> return res; >> } >> @@ -138,7 +138,7 @@ static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit) >> static void ete_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit) >> { >> if (!_relaxed) >> - __iowmb(); /* Imitate the !relaxed I/O helpers */ >> + __io_bw(); /* Imitate the !relaxed I/O helpers */ >> if (!_64bit) >> val &= GENMASK(31, 0); >> diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h >> index 3c4d69b096ca..f54698731582 100644 >> --- a/drivers/hwtracing/coresight/coresight-etm4x.h >> +++ b/drivers/hwtracing/coresight/coresight-etm4x.h >> @@ -448,14 +448,14 @@ >> #define etm4x_read32(csa, offset) \ >> ({ \ >> u32 __val = etm4x_relaxed_read32((csa), (offset)); \ >> - __iormb(__val); \ >> + __io_ar(__val); \ >> __val; \ >> }) >> #define etm4x_read64(csa, offset) \ >> ({ \ >> u64 __val = etm4x_relaxed_read64((csa), (offset)); \ >> - __iormb(__val); \ >> + __io_ar(__val); \ >> __val; \ >> }) >> @@ -479,13 +479,13 @@ >> #define etm4x_write32(csa, val, offset) \ >> do { \ >> - __iowmb(); \ >> + __io_bw(); \ >> etm4x_relaxed_write32((csa), (val), (offset)); \ >> } while (0) >> #define etm4x_write64(csa, val, offset) \ >> do { \ >> - __iowmb(); \ >> + __io_bw(); \ >> etm4x_relaxed_write64((csa), (val), (offset)); \ >> } while (0) >
On 06/05/2022 04:02, Sai Prakash Ranjan wrote: > Hi Suzuki, > > On 5/6/2022 5:14 AM, Suzuki K Poulose wrote: >> Hi, >> >> On 04/05/2022 12:28, Sai Prakash Ranjan wrote: >>> Per discussion in [1], it was decided to move to using architecture >>> independent/asm-generic IO memory barriers to have just one set of >>> them and deprecate use of arm64 specific IO memory barriers in driver >>> code. So replace current usage of __io_rmb()/__iowmb() in drivers to >>> __io_ar()/__io_bw(). >>> >>> [1] >>> https://lore.kernel.org/lkml/CAK8P3a0L2tLeF1Q0+0ijUxhGNaw+Z0fyPC1oW6_ELQfn0=i4iw@mail.gmail.com/ >>> >>> >> >> Looking at the dis-assembly it looks like in effect they are slightly >> different for arm64. >> >> i.e., before this patch we had >> >> "dmb osh{ld/st}" >> >> and after the patch we have : >> >> "dsb {ld/st}" >> >> Is this really what we want ? I don't think this is desirable. >> >> Suzuki >> > > No, this is not supposed to happen and I do not see how it could happen. > __io_ar() is defined as __iormb() and __io_bw() is __iowmb(). > > I checked the disassembly in both case with MMIO trace off/on with > __etm4_cpu_save() > as below and saw the same number of "dmb"s. > > aarch64-linux-gnu-gdb -batch -ex "disassemble/rs __etm4_cpu_save" > vmlinux-without-mmio > aarch64-linux-gnu-gdb -batch -ex "disassemble/rs __etm4_cpu_save" > vmlinux-with-mmio > > Can you tell me how are you validating if I am missing something? Apologies, I was missing the patch in this series, which adds the arm64 definition for __io_ar/__io_bw. (hint: Please Cc the affected subsystem in the Cover letter for the series, so we know what the intention of the changes are). With the patch1, yes this makes sense to me. Otherwise, __io_ar() is default to rmb() which implies dsb ld. With that said, Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
On 5/6/2022 2:51 PM, Suzuki K Poulose wrote: > On 06/05/2022 04:02, Sai Prakash Ranjan wrote: >> Hi Suzuki, >> >> On 5/6/2022 5:14 AM, Suzuki K Poulose wrote: >>> Hi, >>> >>> On 04/05/2022 12:28, Sai Prakash Ranjan wrote: >>>> Per discussion in [1], it was decided to move to using architecture >>>> independent/asm-generic IO memory barriers to have just one set of >>>> them and deprecate use of arm64 specific IO memory barriers in driver >>>> code. So replace current usage of __io_rmb()/__iowmb() in drivers to >>>> __io_ar()/__io_bw(). >>>> >>>> [1] https://lore.kernel.org/lkml/CAK8P3a0L2tLeF1Q0+0ijUxhGNaw+Z0fyPC1oW6_ELQfn0=i4iw@mail.gmail.com/ >>>> >>> >>> Looking at the dis-assembly it looks like in effect they are slightly >>> different for arm64. >>> >>> i.e., before this patch we had >>> >>> "dmb osh{ld/st}" >>> >>> and after the patch we have : >>> >>> "dsb {ld/st}" >>> >>> Is this really what we want ? I don't think this is desirable. >>> >>> Suzuki >>> >> >> No, this is not supposed to happen and I do not see how it could happen. >> __io_ar() is defined as __iormb() and __io_bw() is __iowmb(). >> >> I checked the disassembly in both case with MMIO trace off/on with __etm4_cpu_save() >> as below and saw the same number of "dmb"s. >> >> aarch64-linux-gnu-gdb -batch -ex "disassemble/rs __etm4_cpu_save" vmlinux-without-mmio >> aarch64-linux-gnu-gdb -batch -ex "disassemble/rs __etm4_cpu_save" vmlinux-with-mmio >> >> Can you tell me how are you validating if I am missing something? > > Apologies, I was missing the patch in this series, which adds > the arm64 definition for __io_ar/__io_bw. (hint: Please Cc > the affected subsystem in the Cover letter for the series, so > we know what the intention of the changes are). > Sure, I will keep this in mind, apologies for the confusion. > With the patch1, yes this makes sense to me. Otherwise, __io_ar() > is default to rmb() which implies dsb ld. > > With that said, > > Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com> Thanks, Sai
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 7f416a12000e..81c0faf45b28 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -98,7 +98,7 @@ u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit) } if (!_relaxed) - __iormb(res); /* Imitate the !relaxed I/O helpers */ + __io_ar(res); /* Imitate the !relaxed I/O helpers */ return res; } @@ -106,7 +106,7 @@ u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit) void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit) { if (!_relaxed) - __iowmb(); /* Imitate the !relaxed I/O helpers */ + __io_bw(); /* Imitate the !relaxed I/O helpers */ if (!_64bit) val &= GENMASK(31, 0); @@ -130,7 +130,7 @@ static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit) } if (!_relaxed) - __iormb(res); /* Imitate the !relaxed I/O helpers */ + __io_ar(res); /* Imitate the !relaxed I/O helpers */ return res; } @@ -138,7 +138,7 @@ static u64 ete_sysreg_read(u32 offset, bool _relaxed, bool _64bit) static void ete_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit) { if (!_relaxed) - __iowmb(); /* Imitate the !relaxed I/O helpers */ + __io_bw(); /* Imitate the !relaxed I/O helpers */ if (!_64bit) val &= GENMASK(31, 0); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index 3c4d69b096ca..f54698731582 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -448,14 +448,14 @@ #define etm4x_read32(csa, offset) \ ({ \ u32 __val = etm4x_relaxed_read32((csa), (offset)); \ - __iormb(__val); \ + __io_ar(__val); \ __val; \ }) #define etm4x_read64(csa, offset) \ ({ \ u64 __val = etm4x_relaxed_read64((csa), (offset)); \ - __iormb(__val); \ + __io_ar(__val); \ __val; \ }) @@ -479,13 +479,13 @@ #define etm4x_write32(csa, val, offset) \ do { \ - __iowmb(); \ + __io_bw(); \ etm4x_relaxed_write32((csa), (val), (offset)); \ } while (0) #define etm4x_write64(csa, val, offset) \ do { \ - __iowmb(); \ + __io_bw(); \ etm4x_relaxed_write64((csa), (val), (offset)); \ } while (0)