Message ID | 20230727141428.962286-10-alexghiti@rivosinc.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | riscv: Allow userspace to directly access perf counters | expand |
On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > riscv now supports mmaping hardware counters so add what's needed to > take advantage of that in libperf. > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > Reviewed-by: Atish Patra <atishp@rivosinc.com> > --- > tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 65 insertions(+) > > diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c > index 0d1634cedf44..378a163f0554 100644 > --- a/tools/lib/perf/mmap.c > +++ b/tools/lib/perf/mmap.c > @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) > > static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } > > +#elif __riscv_xlen == 64 This is something of an odd guard, perhaps: #elif defined(__riscv) && __riscv_xlen == 64 That way it is more intention revealing that this is riscv code. Could you add a comment relating to the __riscv_xlen ? > + > +/* TODO: implement rv32 support */ > + > +#define CSR_CYCLE 0xc00 > +#define CSR_TIME 0xc01 > + > +#define csr_read(csr) \ > +({ \ > + register unsigned long __v; \ > + __asm__ __volatile__ ("csrr %0, " #csr \ > + : "=r" (__v) : \ > + : "memory"); \ To avoid the macro pasting that could potentially go weird, could this be: __asm__ __volatile__ ("csrr %0, %1", : "=r"(__v) /* outputs */ : "i"(csr) /* inputs */ : "memory" /* clobbers */) Also, why is this clobbering memory? Worth adding a comment. Thanks, Ian > + __v; \ > +}) > + > +static unsigned long csr_read_num(int csr_num) > +{ > +#define switchcase_csr_read(__csr_num, __val) {\ > + case __csr_num: \ > + __val = csr_read(__csr_num); \ > + break; } > +#define switchcase_csr_read_2(__csr_num, __val) {\ > + switchcase_csr_read(__csr_num + 0, __val) \ > + switchcase_csr_read(__csr_num + 1, __val)} > +#define switchcase_csr_read_4(__csr_num, __val) {\ > + switchcase_csr_read_2(__csr_num + 0, __val) \ > + switchcase_csr_read_2(__csr_num + 2, __val)} > +#define switchcase_csr_read_8(__csr_num, __val) {\ > + switchcase_csr_read_4(__csr_num + 0, __val) \ > + switchcase_csr_read_4(__csr_num + 4, __val)} > +#define switchcase_csr_read_16(__csr_num, __val) {\ > + switchcase_csr_read_8(__csr_num + 0, __val) \ > + switchcase_csr_read_8(__csr_num + 8, __val)} > +#define switchcase_csr_read_32(__csr_num, __val) {\ > + switchcase_csr_read_16(__csr_num + 0, __val) \ > + switchcase_csr_read_16(__csr_num + 16, __val)} > + > + unsigned long ret = 0; > + > + switch (csr_num) { > + switchcase_csr_read_32(CSR_CYCLE, ret) > + default: > + break; > + } > + > + return ret; > +#undef switchcase_csr_read_32 > +#undef switchcase_csr_read_16 > +#undef switchcase_csr_read_8 > +#undef switchcase_csr_read_4 > +#undef switchcase_csr_read_2 > +#undef switchcase_csr_read > +} > + > +static u64 read_perf_counter(unsigned int counter) > +{ > + return csr_read_num(CSR_CYCLE + counter); > +} > + > +static u64 read_timestamp(void) > +{ > + return csr_read_num(CSR_TIME); > +} > + > #else > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > static u64 read_timestamp(void) { return 0; } > -- > 2.39.2 >
Hi Ian, On Fri, Jul 28, 2023 at 7:53 PM Ian Rogers <irogers@google.com> wrote: > > On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > riscv now supports mmaping hardware counters so add what's needed to > > take advantage of that in libperf. > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > Reviewed-by: Atish Patra <atishp@rivosinc.com> > > --- > > tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 65 insertions(+) > > > > diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c > > index 0d1634cedf44..378a163f0554 100644 > > --- a/tools/lib/perf/mmap.c > > +++ b/tools/lib/perf/mmap.c > > @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) > > > > static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } > > > > +#elif __riscv_xlen == 64 > > This is something of an odd guard, perhaps: > #elif defined(__riscv) && __riscv_xlen == 64 > > That way it is more intention revealing that this is riscv code. Could > you add a comment relating to the __riscv_xlen ? I guess Andrew answered that already. > > > + > > +/* TODO: implement rv32 support */ > > + > > +#define CSR_CYCLE 0xc00 > > +#define CSR_TIME 0xc01 > > + > > +#define csr_read(csr) \ > > +({ \ > > + register unsigned long __v; \ > > + __asm__ __volatile__ ("csrr %0, " #csr \ > > + : "=r" (__v) : \ > > + : "memory"); \ > > To avoid the macro pasting that could potentially go weird, could this be: > > __asm__ __volatile__ ("csrr %0, %1", > : "=r"(__v) /* outputs */ > : "i"(csr) /* inputs */ > : "memory" /* clobbers */) > > Also, why is this clobbering memory? Worth adding a comment. No idea, I see that it is also done this way in arch/riscv/include/asm/csr.h. @Atish Kumar Patra , @Palmer Dabbelt ? Thanks for your comments! Alex > > Thanks, > Ian > > > + __v; \ > > +}) > > + > > +static unsigned long csr_read_num(int csr_num) > > +{ > > +#define switchcase_csr_read(__csr_num, __val) {\ > > + case __csr_num: \ > > + __val = csr_read(__csr_num); \ > > + break; } > > +#define switchcase_csr_read_2(__csr_num, __val) {\ > > + switchcase_csr_read(__csr_num + 0, __val) \ > > + switchcase_csr_read(__csr_num + 1, __val)} > > +#define switchcase_csr_read_4(__csr_num, __val) {\ > > + switchcase_csr_read_2(__csr_num + 0, __val) \ > > + switchcase_csr_read_2(__csr_num + 2, __val)} > > +#define switchcase_csr_read_8(__csr_num, __val) {\ > > + switchcase_csr_read_4(__csr_num + 0, __val) \ > > + switchcase_csr_read_4(__csr_num + 4, __val)} > > +#define switchcase_csr_read_16(__csr_num, __val) {\ > > + switchcase_csr_read_8(__csr_num + 0, __val) \ > > + switchcase_csr_read_8(__csr_num + 8, __val)} > > +#define switchcase_csr_read_32(__csr_num, __val) {\ > > + switchcase_csr_read_16(__csr_num + 0, __val) \ > > + switchcase_csr_read_16(__csr_num + 16, __val)} > > + > > + unsigned long ret = 0; > > + > > + switch (csr_num) { > > + switchcase_csr_read_32(CSR_CYCLE, ret) > > + default: > > + break; > > + } > > + > > + return ret; > > +#undef switchcase_csr_read_32 > > +#undef switchcase_csr_read_16 > > +#undef switchcase_csr_read_8 > > +#undef switchcase_csr_read_4 > > +#undef switchcase_csr_read_2 > > +#undef switchcase_csr_read > > +} > > + > > +static u64 read_perf_counter(unsigned int counter) > > +{ > > + return csr_read_num(CSR_CYCLE + counter); > > +} > > + > > +static u64 read_timestamp(void) > > +{ > > + return csr_read_num(CSR_TIME); > > +} > > + > > #else > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > static u64 read_timestamp(void) { return 0; } > > -- > > 2.39.2 > >
On Mon, Jul 31, 2023 at 12:15 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > Hi Ian, > > On Fri, Jul 28, 2023 at 7:53 PM Ian Rogers <irogers@google.com> wrote: > > > > On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > riscv now supports mmaping hardware counters so add what's needed to > > > take advantage of that in libperf. > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > > Reviewed-by: Atish Patra <atishp@rivosinc.com> > > > --- > > > tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ > > > 1 file changed, 65 insertions(+) > > > > > > diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c > > > index 0d1634cedf44..378a163f0554 100644 > > > --- a/tools/lib/perf/mmap.c > > > +++ b/tools/lib/perf/mmap.c > > > @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) > > > > > > static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } > > > > > > +#elif __riscv_xlen == 64 > > > > This is something of an odd guard, perhaps: > > #elif defined(__riscv) && __riscv_xlen == 64 > > > > That way it is more intention revealing that this is riscv code. Could > > you add a comment relating to the __riscv_xlen ? > > I guess Andrew answered that already. > > > > > > + > > > +/* TODO: implement rv32 support */ > > > + > > > +#define CSR_CYCLE 0xc00 > > > +#define CSR_TIME 0xc01 > > > + > > > +#define csr_read(csr) \ > > > +({ \ > > > + register unsigned long __v; \ > > > + __asm__ __volatile__ ("csrr %0, " #csr \ > > > + : "=r" (__v) : \ > > > + : "memory"); \ > > > > To avoid the macro pasting that could potentially go weird, could this be: > > > > __asm__ __volatile__ ("csrr %0, %1", > > : "=r"(__v) /* outputs */ > > : "i"(csr) /* inputs */ > > : "memory" /* clobbers */) Forgot to answer this one: it compiles, but I have to admit that I don't understand the difference and if that's correct (all macros in arch/riscv/include/asm/csr.h use # to do this) and what benefits it brings. Can you elaborate more on things that could "go weird"? Thanks again, Alex > > > > Also, why is this clobbering memory? Worth adding a comment. > > No idea, I see that it is also done this way in > arch/riscv/include/asm/csr.h. @Atish Kumar Patra , @Palmer Dabbelt ? > > Thanks for your comments! > > Alex > > > > > Thanks, > > Ian > > > > > + __v; \ > > > +}) > > > + > > > +static unsigned long csr_read_num(int csr_num) > > > +{ > > > +#define switchcase_csr_read(__csr_num, __val) {\ > > > + case __csr_num: \ > > > + __val = csr_read(__csr_num); \ > > > + break; } > > > +#define switchcase_csr_read_2(__csr_num, __val) {\ > > > + switchcase_csr_read(__csr_num + 0, __val) \ > > > + switchcase_csr_read(__csr_num + 1, __val)} > > > +#define switchcase_csr_read_4(__csr_num, __val) {\ > > > + switchcase_csr_read_2(__csr_num + 0, __val) \ > > > + switchcase_csr_read_2(__csr_num + 2, __val)} > > > +#define switchcase_csr_read_8(__csr_num, __val) {\ > > > + switchcase_csr_read_4(__csr_num + 0, __val) \ > > > + switchcase_csr_read_4(__csr_num + 4, __val)} > > > +#define switchcase_csr_read_16(__csr_num, __val) {\ > > > + switchcase_csr_read_8(__csr_num + 0, __val) \ > > > + switchcase_csr_read_8(__csr_num + 8, __val)} > > > +#define switchcase_csr_read_32(__csr_num, __val) {\ > > > + switchcase_csr_read_16(__csr_num + 0, __val) \ > > > + switchcase_csr_read_16(__csr_num + 16, __val)} > > > + > > > + unsigned long ret = 0; > > > + > > > + switch (csr_num) { > > > + switchcase_csr_read_32(CSR_CYCLE, ret) > > > + default: > > > + break; > > > + } > > > + > > > + return ret; > > > +#undef switchcase_csr_read_32 > > > +#undef switchcase_csr_read_16 > > > +#undef switchcase_csr_read_8 > > > +#undef switchcase_csr_read_4 > > > +#undef switchcase_csr_read_2 > > > +#undef switchcase_csr_read > > > +} > > > + > > > +static u64 read_perf_counter(unsigned int counter) > > > +{ > > > + return csr_read_num(CSR_CYCLE + counter); > > > +} > > > + > > > +static u64 read_timestamp(void) > > > +{ > > > + return csr_read_num(CSR_TIME); > > > +} > > > + > > > #else > > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > > static u64 read_timestamp(void) { return 0; } > > > -- > > > 2.39.2 > > >
On Mon, Jul 31, 2023 at 3:27 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > On Mon, Jul 31, 2023 at 12:15 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > Hi Ian, > > > > On Fri, Jul 28, 2023 at 7:53 PM Ian Rogers <irogers@google.com> wrote: > > > > > > On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > > > riscv now supports mmaping hardware counters so add what's needed to > > > > take advantage of that in libperf. > > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > > > Reviewed-by: Atish Patra <atishp@rivosinc.com> > > > > --- > > > > tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ > > > > 1 file changed, 65 insertions(+) > > > > > > > > diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c > > > > index 0d1634cedf44..378a163f0554 100644 > > > > --- a/tools/lib/perf/mmap.c > > > > +++ b/tools/lib/perf/mmap.c > > > > @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) > > > > > > > > static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } > > > > > > > > +#elif __riscv_xlen == 64 > > > > > > This is something of an odd guard, perhaps: > > > #elif defined(__riscv) && __riscv_xlen == 64 > > > > > > That way it is more intention revealing that this is riscv code. Could > > > you add a comment relating to the __riscv_xlen ? > > > > I guess Andrew answered that already. > > Not sure. I still think it looks weird: ... #if defined(__i386__) || defined(__x86_64__) ... #elif defined(__aarch64__) ... #elif __riscv_xlen == 64 ... #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } #endif The first two are clearly #ifdef-ing architecture specific assembly code, under what conditions I get RISC-V code ¯\(ツ)/¯ At least worth a comment like "csrr is only available when you have xlens of 64 because ..." > > > > > > > + > > > > +/* TODO: implement rv32 support */ > > > > + > > > > +#define CSR_CYCLE 0xc00 > > > > +#define CSR_TIME 0xc01 > > > > + > > > > +#define csr_read(csr) \ > > > > +({ \ > > > > + register unsigned long __v; \ > > > > + __asm__ __volatile__ ("csrr %0, " #csr \ > > > > + : "=r" (__v) : \ > > > > + : "memory"); \ > > > > > > To avoid the macro pasting that could potentially go weird, could this be: > > > > > > __asm__ __volatile__ ("csrr %0, %1", > > > : "=r"(__v) /* outputs */ > > > : "i"(csr) /* inputs */ > > > : "memory" /* clobbers */) > > Forgot to answer this one: it compiles, but I have to admit that I > don't understand the difference and if that's correct (all macros in > arch/riscv/include/asm/csr.h use # to do this) and what benefits it > brings. Can you elaborate more on things that could "go weird"? So rather than use an input constraint for the asm block you are using the C preprocessor to paste in the csr argument. If csr is something like "1" then it looks good and you'll get "csrr %0,1". If you pass something like "1 << 31" then that will be pasted as "csrr %0, 1 << 31" and that starts to get weird in the context of being in the assembler where it is unlikely the C operators work. Using the input constraint avoids this, causes the C compiler to check the type of the argument and you'll probably get more intelligible error messages as a consequence. > > Thanks again, > > Alex > > > > > > > Also, why is this clobbering memory? Worth adding a comment. > > > > No idea, I see that it is also done this way in > > arch/riscv/include/asm/csr.h. @Atish Kumar Patra , @Palmer Dabbelt ? It would seem to make sense then not to have a memory constraint until we know why we're doing it? Thanks, Ian > > > > Thanks for your comments! > > > > Alex > > > > > > > > Thanks, > > > Ian > > > > > > > + __v; \ > > > > +}) > > > > + > > > > +static unsigned long csr_read_num(int csr_num) > > > > +{ > > > > +#define switchcase_csr_read(__csr_num, __val) {\ > > > > + case __csr_num: \ > > > > + __val = csr_read(__csr_num); \ > > > > + break; } > > > > +#define switchcase_csr_read_2(__csr_num, __val) {\ > > > > + switchcase_csr_read(__csr_num + 0, __val) \ > > > > + switchcase_csr_read(__csr_num + 1, __val)} > > > > +#define switchcase_csr_read_4(__csr_num, __val) {\ > > > > + switchcase_csr_read_2(__csr_num + 0, __val) \ > > > > + switchcase_csr_read_2(__csr_num + 2, __val)} > > > > +#define switchcase_csr_read_8(__csr_num, __val) {\ > > > > + switchcase_csr_read_4(__csr_num + 0, __val) \ > > > > + switchcase_csr_read_4(__csr_num + 4, __val)} > > > > +#define switchcase_csr_read_16(__csr_num, __val) {\ > > > > + switchcase_csr_read_8(__csr_num + 0, __val) \ > > > > + switchcase_csr_read_8(__csr_num + 8, __val)} > > > > +#define switchcase_csr_read_32(__csr_num, __val) {\ > > > > + switchcase_csr_read_16(__csr_num + 0, __val) \ > > > > + switchcase_csr_read_16(__csr_num + 16, __val)} > > > > + > > > > + unsigned long ret = 0; > > > > + > > > > + switch (csr_num) { > > > > + switchcase_csr_read_32(CSR_CYCLE, ret) > > > > + default: > > > > + break; > > > > + } > > > > + > > > > + return ret; > > > > +#undef switchcase_csr_read_32 > > > > +#undef switchcase_csr_read_16 > > > > +#undef switchcase_csr_read_8 > > > > +#undef switchcase_csr_read_4 > > > > +#undef switchcase_csr_read_2 > > > > +#undef switchcase_csr_read > > > > +} > > > > + > > > > +static u64 read_perf_counter(unsigned int counter) > > > > +{ > > > > + return csr_read_num(CSR_CYCLE + counter); > > > > +} > > > > + > > > > +static u64 read_timestamp(void) > > > > +{ > > > > + return csr_read_num(CSR_TIME); > > > > +} > > > > + > > > > #else > > > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > > > static u64 read_timestamp(void) { return 0; } > > > > -- > > > > 2.39.2 > > > >
On Mon, Jul 31, 2023 at 5:10 PM Ian Rogers <irogers@google.com> wrote: > > On Mon, Jul 31, 2023 at 3:27 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > On Mon, Jul 31, 2023 at 12:15 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > Hi Ian, > > > > > > On Fri, Jul 28, 2023 at 7:53 PM Ian Rogers <irogers@google.com> wrote: > > > > > > > > On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > > > > > riscv now supports mmaping hardware counters so add what's needed to > > > > > take advantage of that in libperf. > > > > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > > > > Reviewed-by: Atish Patra <atishp@rivosinc.com> > > > > > --- > > > > > tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ > > > > > 1 file changed, 65 insertions(+) > > > > > > > > > > diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c > > > > > index 0d1634cedf44..378a163f0554 100644 > > > > > --- a/tools/lib/perf/mmap.c > > > > > +++ b/tools/lib/perf/mmap.c > > > > > @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) > > > > > > > > > > static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } > > > > > > > > > > +#elif __riscv_xlen == 64 > > > > > > > > This is something of an odd guard, perhaps: > > > > #elif defined(__riscv) && __riscv_xlen == 64 > > > > > > > > That way it is more intention revealing that this is riscv code. Could > > > > you add a comment relating to the __riscv_xlen ? > > > > > > I guess Andrew answered that already. > > > > > Not sure. I still think it looks weird: > ... > #if defined(__i386__) || defined(__x86_64__) > ... > #elif defined(__aarch64__) > ... > #elif __riscv_xlen == 64 > ... > #else > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > static u64 read_timestamp(void) { return 0; } > #endif > > The first two are clearly #ifdef-ing architecture specific assembly > code, under what conditions I get RISC-V code ¯\(ツ)/¯ At least worth > a comment like "csrr is only available when you have xlens of 64 > because ..." __riscv_xlen indicates riscv64, which is the only target of this patchset. But if you prefer, I don't mind adding back the defined(__riscv) if I re-spin a new version. > > > > > > > > > > + > > > > > +/* TODO: implement rv32 support */ > > > > > + > > > > > +#define CSR_CYCLE 0xc00 > > > > > +#define CSR_TIME 0xc01 > > > > > + > > > > > +#define csr_read(csr) \ > > > > > +({ \ > > > > > + register unsigned long __v; \ > > > > > + __asm__ __volatile__ ("csrr %0, " #csr \ > > > > > + : "=r" (__v) : \ > > > > > + : "memory"); \ > > > > > > > > To avoid the macro pasting that could potentially go weird, could this be: > > > > > > > > __asm__ __volatile__ ("csrr %0, %1", > > > > : "=r"(__v) /* outputs */ > > > > : "i"(csr) /* inputs */ > > > > : "memory" /* clobbers */) > > > > Forgot to answer this one: it compiles, but I have to admit that I > > don't understand the difference and if that's correct (all macros in > > arch/riscv/include/asm/csr.h use # to do this) and what benefits it > > brings. Can you elaborate more on things that could "go weird"? > > So rather than use an input constraint for the asm block you are using > the C preprocessor to paste in the csr argument. If csr is something > like "1" then it looks good and you'll get "csrr %0,1". If you pass > something like "1 << 31" then that will be pasted as "csrr %0, 1 << > 31" and that starts to get weird in the context of being in the > assembler where it is unlikely the C operators work. Using the input > constraint avoids this, causes the C compiler to check the type of the > argument and you'll probably get more intelligible error messages as a > consequence. > Thanks. So if I'm not mistaken, in this exact context, given we only use csr_read() through the csr_read_num() function, it seems ok right? > > > > Thanks again, > > > > Alex > > > > > > > > > > Also, why is this clobbering memory? Worth adding a comment. > > > > > > No idea, I see that it is also done this way in > > > arch/riscv/include/asm/csr.h. @Atish Kumar Patra , @Palmer Dabbelt ? > > It would seem to make sense then not to have a memory constraint until > we know why we're doing it? > I have just had the answer internally (thanks to @Brendan Sweeney): csr modifications can alter how the memory is accessed (satp which changes the address space, sum which allows/disallows userspace access...), so we need the memory barrier to make sure the compiler does not reorder the memory accesses. Thanks, Alex > Thanks, > Ian > > > > > > > Thanks for your comments! > > > > > > Alex > > > > > > > > > > > Thanks, > > > > Ian > > > > > > > > > + __v; \ > > > > > +}) > > > > > + > > > > > +static unsigned long csr_read_num(int csr_num) > > > > > +{ > > > > > +#define switchcase_csr_read(__csr_num, __val) {\ > > > > > + case __csr_num: \ > > > > > + __val = csr_read(__csr_num); \ > > > > > + break; } > > > > > +#define switchcase_csr_read_2(__csr_num, __val) {\ > > > > > + switchcase_csr_read(__csr_num + 0, __val) \ > > > > > + switchcase_csr_read(__csr_num + 1, __val)} > > > > > +#define switchcase_csr_read_4(__csr_num, __val) {\ > > > > > + switchcase_csr_read_2(__csr_num + 0, __val) \ > > > > > + switchcase_csr_read_2(__csr_num + 2, __val)} > > > > > +#define switchcase_csr_read_8(__csr_num, __val) {\ > > > > > + switchcase_csr_read_4(__csr_num + 0, __val) \ > > > > > + switchcase_csr_read_4(__csr_num + 4, __val)} > > > > > +#define switchcase_csr_read_16(__csr_num, __val) {\ > > > > > + switchcase_csr_read_8(__csr_num + 0, __val) \ > > > > > + switchcase_csr_read_8(__csr_num + 8, __val)} > > > > > +#define switchcase_csr_read_32(__csr_num, __val) {\ > > > > > + switchcase_csr_read_16(__csr_num + 0, __val) \ > > > > > + switchcase_csr_read_16(__csr_num + 16, __val)} > > > > > + > > > > > + unsigned long ret = 0; > > > > > + > > > > > + switch (csr_num) { > > > > > + switchcase_csr_read_32(CSR_CYCLE, ret) > > > > > + default: > > > > > + break; > > > > > + } > > > > > + > > > > > + return ret; > > > > > +#undef switchcase_csr_read_32 > > > > > +#undef switchcase_csr_read_16 > > > > > +#undef switchcase_csr_read_8 > > > > > +#undef switchcase_csr_read_4 > > > > > +#undef switchcase_csr_read_2 > > > > > +#undef switchcase_csr_read > > > > > +} > > > > > + > > > > > +static u64 read_perf_counter(unsigned int counter) > > > > > +{ > > > > > + return csr_read_num(CSR_CYCLE + counter); > > > > > +} > > > > > + > > > > > +static u64 read_timestamp(void) > > > > > +{ > > > > > + return csr_read_num(CSR_TIME); > > > > > +} > > > > > + > > > > > #else > > > > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > > > > static u64 read_timestamp(void) { return 0; } > > > > > -- > > > > > 2.39.2 > > > > >
On Mon, Jul 31, 2023 at 9:06 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > On Mon, Jul 31, 2023 at 5:10 PM Ian Rogers <irogers@google.com> wrote: > > > > On Mon, Jul 31, 2023 at 3:27 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > On Mon, Jul 31, 2023 at 12:15 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > > > Hi Ian, > > > > > > > > On Fri, Jul 28, 2023 at 7:53 PM Ian Rogers <irogers@google.com> wrote: > > > > > > > > > > On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > > > > > > > riscv now supports mmaping hardware counters so add what's needed to > > > > > > take advantage of that in libperf. > > > > > > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > > > > > Reviewed-by: Atish Patra <atishp@rivosinc.com> > > > > > > --- > > > > > > tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ > > > > > > 1 file changed, 65 insertions(+) > > > > > > > > > > > > diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c > > > > > > index 0d1634cedf44..378a163f0554 100644 > > > > > > --- a/tools/lib/perf/mmap.c > > > > > > +++ b/tools/lib/perf/mmap.c > > > > > > @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) > > > > > > > > > > > > static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } > > > > > > > > > > > > +#elif __riscv_xlen == 64 > > > > > > > > > > This is something of an odd guard, perhaps: > > > > > #elif defined(__riscv) && __riscv_xlen == 64 > > > > > > > > > > That way it is more intention revealing that this is riscv code. Could > > > > > you add a comment relating to the __riscv_xlen ? > > > > > > > > I guess Andrew answered that already. > > > > > > > > Not sure. I still think it looks weird: > > ... > > #if defined(__i386__) || defined(__x86_64__) > > ... > > #elif defined(__aarch64__) > > ... > > #elif __riscv_xlen == 64 > > ... > > #else > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > static u64 read_timestamp(void) { return 0; } > > #endif > > > > The first two are clearly #ifdef-ing architecture specific assembly > > code, under what conditions I get RISC-V code ¯\(ツ)/¯ At least worth > > a comment like "csrr is only available when you have xlens of 64 > > because ..." > > __riscv_xlen indicates riscv64, which is the only target of this > patchset. But if you prefer, I don't mind adding back the > defined(__riscv) if I re-spin a new version. This kind of begs the question as to why there is no __riscv64 ifdef. The issue with xlen is it isn't intention revealing so for regular people trying to understand the code it would be nice to document it. > > > > > > > > > > > > > + > > > > > > +/* TODO: implement rv32 support */ > > > > > > + > > > > > > +#define CSR_CYCLE 0xc00 > > > > > > +#define CSR_TIME 0xc01 > > > > > > + > > > > > > +#define csr_read(csr) \ > > > > > > +({ \ > > > > > > + register unsigned long __v; \ > > > > > > + __asm__ __volatile__ ("csrr %0, " #csr \ > > > > > > + : "=r" (__v) : \ > > > > > > + : "memory"); \ > > > > > > > > > > To avoid the macro pasting that could potentially go weird, could this be: > > > > > > > > > > __asm__ __volatile__ ("csrr %0, %1", > > > > > : "=r"(__v) /* outputs */ > > > > > : "i"(csr) /* inputs */ > > > > > : "memory" /* clobbers */) > > > > > > Forgot to answer this one: it compiles, but I have to admit that I > > > don't understand the difference and if that's correct (all macros in > > > arch/riscv/include/asm/csr.h use # to do this) and what benefits it > > > brings. Can you elaborate more on things that could "go weird"? > > > > So rather than use an input constraint for the asm block you are using > > the C preprocessor to paste in the csr argument. If csr is something > > like "1" then it looks good and you'll get "csrr %0,1". If you pass > > something like "1 << 31" then that will be pasted as "csrr %0, 1 << > > 31" and that starts to get weird in the context of being in the > > assembler where it is unlikely the C operators work. Using the input > > constraint avoids this, causes the C compiler to check the type of the > > argument and you'll probably get more intelligible error messages as a > > consequence. > > > > Thanks. So if I'm not mistaken, in this exact context, given we only > use csr_read() through the csr_read_num() function, it seems ok right? So you've formed a cargo cult and the justification is not wanting to stop a copy-paste chain from somewhere else. This code itself will be copy-pasted and we go to some ends to encourage that by placing parts of it in include/uapi/linux/perf_event.h. It seems better to catch this issue early rather than propagate it. > > > > > > Thanks again, > > > > > > Alex > > > > > > > > > > > > > Also, why is this clobbering memory? Worth adding a comment. > > > > > > > > No idea, I see that it is also done this way in > > > > arch/riscv/include/asm/csr.h. @Atish Kumar Patra , @Palmer Dabbelt ? > > > > It would seem to make sense then not to have a memory constraint until > > we know why we're doing it? > > > > I have just had the answer internally (thanks to @Brendan Sweeney): > csr modifications can alter how the memory is accessed (satp which > changes the address space, sum which allows/disallows userspace > access...), so we need the memory barrier to make sure the compiler > does not reorder the memory accesses. The conditions you mention shouldn't apply here though? Even if you add a memory barrier for the compiler what is stopping the hardware reordering loads and stores? If it absolutely has to be there then a comment something like "There is a bug is riscv where the csrr instruction can clobber memory breaking future reads and some how this constraint fixes it by ... ". Thanks, Ian > Thanks, > > Alex > > > Thanks, > > Ian > > > > > > > > > > Thanks for your comments! > > > > > > > > Alex > > > > > > > > > > > > > > Thanks, > > > > > Ian > > > > > > > > > > > + __v; \ > > > > > > +}) > > > > > > + > > > > > > +static unsigned long csr_read_num(int csr_num) > > > > > > +{ > > > > > > +#define switchcase_csr_read(__csr_num, __val) {\ > > > > > > + case __csr_num: \ > > > > > > + __val = csr_read(__csr_num); \ > > > > > > + break; } > > > > > > +#define switchcase_csr_read_2(__csr_num, __val) {\ > > > > > > + switchcase_csr_read(__csr_num + 0, __val) \ > > > > > > + switchcase_csr_read(__csr_num + 1, __val)} > > > > > > +#define switchcase_csr_read_4(__csr_num, __val) {\ > > > > > > + switchcase_csr_read_2(__csr_num + 0, __val) \ > > > > > > + switchcase_csr_read_2(__csr_num + 2, __val)} > > > > > > +#define switchcase_csr_read_8(__csr_num, __val) {\ > > > > > > + switchcase_csr_read_4(__csr_num + 0, __val) \ > > > > > > + switchcase_csr_read_4(__csr_num + 4, __val)} > > > > > > +#define switchcase_csr_read_16(__csr_num, __val) {\ > > > > > > + switchcase_csr_read_8(__csr_num + 0, __val) \ > > > > > > + switchcase_csr_read_8(__csr_num + 8, __val)} > > > > > > +#define switchcase_csr_read_32(__csr_num, __val) {\ > > > > > > + switchcase_csr_read_16(__csr_num + 0, __val) \ > > > > > > + switchcase_csr_read_16(__csr_num + 16, __val)} > > > > > > + > > > > > > + unsigned long ret = 0; > > > > > > + > > > > > > + switch (csr_num) { > > > > > > + switchcase_csr_read_32(CSR_CYCLE, ret) > > > > > > + default: > > > > > > + break; > > > > > > + } > > > > > > + > > > > > > + return ret; > > > > > > +#undef switchcase_csr_read_32 > > > > > > +#undef switchcase_csr_read_16 > > > > > > +#undef switchcase_csr_read_8 > > > > > > +#undef switchcase_csr_read_4 > > > > > > +#undef switchcase_csr_read_2 > > > > > > +#undef switchcase_csr_read > > > > > > +} > > > > > > + > > > > > > +static u64 read_perf_counter(unsigned int counter) > > > > > > +{ > > > > > > + return csr_read_num(CSR_CYCLE + counter); > > > > > > +} > > > > > > + > > > > > > +static u64 read_timestamp(void) > > > > > > +{ > > > > > > + return csr_read_num(CSR_TIME); > > > > > > +} > > > > > > + > > > > > > #else > > > > > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > > > > > static u64 read_timestamp(void) { return 0; } > > > > > > -- > > > > > > 2.39.2 > > > > > >
On 31 Jul 2023, at 17:06, Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > On Mon, Jul 31, 2023 at 5:10 PM Ian Rogers <irogers@google.com> wrote: >> >> On Mon, Jul 31, 2023 at 3:27 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: >>> >>> On Mon, Jul 31, 2023 at 12:15 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: >>>> >>>> Hi Ian, >>>> >>>> On Fri, Jul 28, 2023 at 7:53 PM Ian Rogers <irogers@google.com> wrote: >>>>> >>>>> On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: >>>>>> >>>>>> riscv now supports mmaping hardware counters so add what's needed to >>>>>> take advantage of that in libperf. >>>>>> >>>>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> >>>>>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com> >>>>>> Reviewed-by: Atish Patra <atishp@rivosinc.com> >>>>>> --- >>>>>> tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ >>>>>> 1 file changed, 65 insertions(+) >>>>>> >>>>>> diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c >>>>>> index 0d1634cedf44..378a163f0554 100644 >>>>>> --- a/tools/lib/perf/mmap.c >>>>>> +++ b/tools/lib/perf/mmap.c >>>>>> @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) >>>>>> >>>>>> static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } >>>>>> >>>>>> +#elif __riscv_xlen == 64 >>>>> >>>>> This is something of an odd guard, perhaps: >>>>> #elif defined(__riscv) && __riscv_xlen == 64 >>>>> >>>>> That way it is more intention revealing that this is riscv code. Could >>>>> you add a comment relating to the __riscv_xlen ? >>>> >>>> I guess Andrew answered that already. >>>> >> >> Not sure. I still think it looks weird: >> ... >> #if defined(__i386__) || defined(__x86_64__) >> ... >> #elif defined(__aarch64__) >> ... >> #elif __riscv_xlen == 64 >> ... >> #else >> static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } >> static u64 read_timestamp(void) { return 0; } >> #endif >> >> The first two are clearly #ifdef-ing architecture specific assembly >> code, under what conditions I get RISC-V code ¯\(ツ)/¯ At least worth >> a comment like "csrr is only available when you have xlens of 64 >> because ..." > > __riscv_xlen indicates riscv64, which is the only target of this > patchset. But if you prefer, I don't mind adding back the > defined(__riscv) if I re-spin a new version. I mean, -Wundef is a thing that will scream at you if you evaluate a macro that’s undefined and get an implicit 0, and parts of the linux build seem to enable it. So I would strongly recommend against (ab)using that feature of the preprocessor, especially when it aligns with greater clarity. Jess
On Mon, Jul 31, 2023 at 09:46:07AM -0700, Ian Rogers wrote: > On Mon, Jul 31, 2023 at 9:06 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > I have just had the answer internally (thanks to @Brendan Sweeney): > > csr modifications can alter how the memory is accessed (satp which > > changes the address space, sum which allows/disallows userspace > > access...), so we need the memory barrier to make sure the compiler > > does not reorder the memory accesses. > > The conditions you mention shouldn't apply here though? Even if you > add a memory barrier for the compiler what is stopping the hardware > reordering loads and stores? If it absolutely has to be there then a > comment something like "There is a bug is riscv where the csrr > instruction can clobber memory breaking future reads and some how this > constraint fixes it by ... ". If the hardware doesn't know that writing to a csr can change how memory accesses are done and reorders memory accesses around that csr write, you've got a really broken piece of hardware on your hands ... I know nothing about risc-v, and maybe the definition says that you need to put a memory barrier before and/or after it in the instruction stream, but on all hardware I'm familiar with, writing to a CSR is an implicitly serialising operation.
On 31 Jul 2023, at 20:47, Matthew Wilcox <willy@infradead.org> wrote: > > On Mon, Jul 31, 2023 at 09:46:07AM -0700, Ian Rogers wrote: >> On Mon, Jul 31, 2023 at 9:06 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: >>> I have just had the answer internally (thanks to @Brendan Sweeney): >>> csr modifications can alter how the memory is accessed (satp which >>> changes the address space, sum which allows/disallows userspace >>> access...), so we need the memory barrier to make sure the compiler >>> does not reorder the memory accesses. >> >> The conditions you mention shouldn't apply here though? Even if you >> add a memory barrier for the compiler what is stopping the hardware >> reordering loads and stores? If it absolutely has to be there then a >> comment something like "There is a bug is riscv where the csrr >> instruction can clobber memory breaking future reads and some how this >> constraint fixes it by ... ". > > If the hardware doesn't know that writing to a csr can change how memory > accesses are done and reorders memory accesses around that csr write, > you've got a really broken piece of hardware on your hands ... > > I know nothing about risc-v, and maybe the definition says that you need > to put a memory barrier before and/or after it in the instruction stream, > but on all hardware I'm familiar with, writing to a CSR is an implicitly > serialising operation. satp has some special rules due to the interaction with TLBs. Enabling / disabling translation by toggling between Bare and non-Bare modes doesn’t require any kind of fence, nor does changing the ASID (if not recycling), but any other changes to satp (changing between Sv39/Sv48/Sv57 or changing the page table base address) do require fences (not really sure why to be honest, maybe something about having separate kernel and user page tables?..). §5.2.1 Supervisor Memory-Management Fence Instruction of the privileged spec (the one I’m looking at is version 20211203 but dated October 4, 2022) details this. Jess
On Mon, Jul 31, 2023 at 6:46 PM Ian Rogers <irogers@google.com> wrote: > > On Mon, Jul 31, 2023 at 9:06 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > On Mon, Jul 31, 2023 at 5:10 PM Ian Rogers <irogers@google.com> wrote: > > > > > > On Mon, Jul 31, 2023 at 3:27 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > > > On Mon, Jul 31, 2023 at 12:15 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > > > > > Hi Ian, > > > > > > > > > > On Fri, Jul 28, 2023 at 7:53 PM Ian Rogers <irogers@google.com> wrote: > > > > > > > > > > > > On Thu, Jul 27, 2023 at 7:28 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote: > > > > > > > > > > > > > > riscv now supports mmaping hardware counters so add what's needed to > > > > > > > take advantage of that in libperf. > > > > > > > > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> > > > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com> > > > > > > > Reviewed-by: Atish Patra <atishp@rivosinc.com> > > > > > > > --- > > > > > > > tools/lib/perf/mmap.c | 65 +++++++++++++++++++++++++++++++++++++++++++ > > > > > > > 1 file changed, 65 insertions(+) > > > > > > > > > > > > > > diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c > > > > > > > index 0d1634cedf44..378a163f0554 100644 > > > > > > > --- a/tools/lib/perf/mmap.c > > > > > > > +++ b/tools/lib/perf/mmap.c > > > > > > > @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) > > > > > > > > > > > > > > static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } > > > > > > > > > > > > > > +#elif __riscv_xlen == 64 > > > > > > > > > > > > This is something of an odd guard, perhaps: > > > > > > #elif defined(__riscv) && __riscv_xlen == 64 > > > > > > > > > > > > That way it is more intention revealing that this is riscv code. Could > > > > > > you add a comment relating to the __riscv_xlen ? > > > > > > > > > > I guess Andrew answered that already. > > > > > > > > > > > Not sure. I still think it looks weird: > > > ... > > > #if defined(__i386__) || defined(__x86_64__) > > > ... > > > #elif defined(__aarch64__) > > > ... > > > #elif __riscv_xlen == 64 > > > ... > > > #else > > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > > static u64 read_timestamp(void) { return 0; } > > > #endif > > > > > > The first two are clearly #ifdef-ing architecture specific assembly > > > code, under what conditions I get RISC-V code ¯\(ツ)/¯ At least worth > > > a comment like "csrr is only available when you have xlens of 64 > > > because ..." > > > > __riscv_xlen indicates riscv64, which is the only target of this > > patchset. But if you prefer, I don't mind adding back the > > defined(__riscv) if I re-spin a new version. > > This kind of begs the question as to why there is no __riscv64 ifdef. > The issue with xlen is it isn't intention revealing so for regular > people trying to understand the code it would be nice to document it. I understand, I'll add the defined(__riscv) and a comment to explain the __riscv_xlen. > > > > > > > > > > > > > > > > > + > > > > > > > +/* TODO: implement rv32 support */ > > > > > > > + > > > > > > > +#define CSR_CYCLE 0xc00 > > > > > > > +#define CSR_TIME 0xc01 > > > > > > > + > > > > > > > +#define csr_read(csr) \ > > > > > > > +({ \ > > > > > > > + register unsigned long __v; \ > > > > > > > + __asm__ __volatile__ ("csrr %0, " #csr \ > > > > > > > + : "=r" (__v) : \ > > > > > > > + : "memory"); \ > > > > > > > > > > > > To avoid the macro pasting that could potentially go weird, could this be: > > > > > > > > > > > > __asm__ __volatile__ ("csrr %0, %1", > > > > > > : "=r"(__v) /* outputs */ > > > > > > : "i"(csr) /* inputs */ > > > > > > : "memory" /* clobbers */) > > > > > > > > Forgot to answer this one: it compiles, but I have to admit that I > > > > don't understand the difference and if that's correct (all macros in > > > > arch/riscv/include/asm/csr.h use # to do this) and what benefits it > > > > brings. Can you elaborate more on things that could "go weird"? > > > > > > So rather than use an input constraint for the asm block you are using > > > the C preprocessor to paste in the csr argument. If csr is something > > > like "1" then it looks good and you'll get "csrr %0,1". If you pass > > > something like "1 << 31" then that will be pasted as "csrr %0, 1 << > > > 31" and that starts to get weird in the context of being in the > > > assembler where it is unlikely the C operators work. Using the input > > > constraint avoids this, causes the C compiler to check the type of the > > > argument and you'll probably get more intelligible error messages as a > > > consequence. > > > > > > > Thanks. So if I'm not mistaken, in this exact context, given we only > > use csr_read() through the csr_read_num() function, it seems ok right? > > So you've formed a cargo cult and the justification is not wanting to > stop a copy-paste chain from somewhere else. This code itself will be > copy-pasted and we go to some ends to encourage that by placing parts > of it in include/uapi/linux/perf_event.h. It seems better to catch > this issue early rather than propagate it. OK, nothing to argue here, I'll use your first proposition in the next version. > > > > > > > > > Thanks again, > > > > > > > > Alex > > > > > > > > > > > > > > > > Also, why is this clobbering memory? Worth adding a comment. > > > > > > > > > > No idea, I see that it is also done this way in > > > > > arch/riscv/include/asm/csr.h. @Atish Kumar Patra , @Palmer Dabbelt ? > > > > > > It would seem to make sense then not to have a memory constraint until > > > we know why we're doing it? > > > > > > > I have just had the answer internally (thanks to @Brendan Sweeney): > > csr modifications can alter how the memory is accessed (satp which > > changes the address space, sum which allows/disallows userspace > > access...), so we need the memory barrier to make sure the compiler > > does not reorder the memory accesses. > > The conditions you mention shouldn't apply here though? Even if you > add a memory barrier for the compiler what is stopping the hardware > reordering loads and stores? If it absolutely has to be there then a > comment something like "There is a bug is riscv where the csrr > instruction can clobber memory breaking future reads and some how this > constraint fixes it by ... ". You're right, it does not apply here, so I can remove this memory barrier. Regarding the hardware that could speculate across a csr instruction, that's dealt with in the riscv spec: "In particular, a CSR access is performed after the execution of any prior instructions in program order whose behavior modifies or is modified by the CSR state and before the execution of any subsequent instructions in program order whose behavior modifies or is modified by the CSR state" Thanks for your comments, Alex > > Thanks, > Ian > > > Thanks, > > > > Alex > > > > > Thanks, > > > Ian > > > > > > > > > > > > > Thanks for your comments! > > > > > > > > > > Alex > > > > > > > > > > > > > > > > > Thanks, > > > > > > Ian > > > > > > > > > > > > > + __v; \ > > > > > > > +}) > > > > > > > + > > > > > > > +static unsigned long csr_read_num(int csr_num) > > > > > > > +{ > > > > > > > +#define switchcase_csr_read(__csr_num, __val) {\ > > > > > > > + case __csr_num: \ > > > > > > > + __val = csr_read(__csr_num); \ > > > > > > > + break; } > > > > > > > +#define switchcase_csr_read_2(__csr_num, __val) {\ > > > > > > > + switchcase_csr_read(__csr_num + 0, __val) \ > > > > > > > + switchcase_csr_read(__csr_num + 1, __val)} > > > > > > > +#define switchcase_csr_read_4(__csr_num, __val) {\ > > > > > > > + switchcase_csr_read_2(__csr_num + 0, __val) \ > > > > > > > + switchcase_csr_read_2(__csr_num + 2, __val)} > > > > > > > +#define switchcase_csr_read_8(__csr_num, __val) {\ > > > > > > > + switchcase_csr_read_4(__csr_num + 0, __val) \ > > > > > > > + switchcase_csr_read_4(__csr_num + 4, __val)} > > > > > > > +#define switchcase_csr_read_16(__csr_num, __val) {\ > > > > > > > + switchcase_csr_read_8(__csr_num + 0, __val) \ > > > > > > > + switchcase_csr_read_8(__csr_num + 8, __val)} > > > > > > > +#define switchcase_csr_read_32(__csr_num, __val) {\ > > > > > > > + switchcase_csr_read_16(__csr_num + 0, __val) \ > > > > > > > + switchcase_csr_read_16(__csr_num + 16, __val)} > > > > > > > + > > > > > > > + unsigned long ret = 0; > > > > > > > + > > > > > > > + switch (csr_num) { > > > > > > > + switchcase_csr_read_32(CSR_CYCLE, ret) > > > > > > > + default: > > > > > > > + break; > > > > > > > + } > > > > > > > + > > > > > > > + return ret; > > > > > > > +#undef switchcase_csr_read_32 > > > > > > > +#undef switchcase_csr_read_16 > > > > > > > +#undef switchcase_csr_read_8 > > > > > > > +#undef switchcase_csr_read_4 > > > > > > > +#undef switchcase_csr_read_2 > > > > > > > +#undef switchcase_csr_read > > > > > > > +} > > > > > > > + > > > > > > > +static u64 read_perf_counter(unsigned int counter) > > > > > > > +{ > > > > > > > + return csr_read_num(CSR_CYCLE + counter); > > > > > > > +} > > > > > > > + > > > > > > > +static u64 read_timestamp(void) > > > > > > > +{ > > > > > > > + return csr_read_num(CSR_TIME); > > > > > > > +} > > > > > > > + > > > > > > > #else > > > > > > > static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } > > > > > > > static u64 read_timestamp(void) { return 0; } > > > > > > > -- > > > > > > > 2.39.2 > > > > > > >
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index 0d1634cedf44..378a163f0554 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -392,6 +392,71 @@ static u64 read_perf_counter(unsigned int counter) static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } +#elif __riscv_xlen == 64 + +/* TODO: implement rv32 support */ + +#define CSR_CYCLE 0xc00 +#define CSR_TIME 0xc01 + +#define csr_read(csr) \ +({ \ + register unsigned long __v; \ + __asm__ __volatile__ ("csrr %0, " #csr \ + : "=r" (__v) : \ + : "memory"); \ + __v; \ +}) + +static unsigned long csr_read_num(int csr_num) +{ +#define switchcase_csr_read(__csr_num, __val) {\ + case __csr_num: \ + __val = csr_read(__csr_num); \ + break; } +#define switchcase_csr_read_2(__csr_num, __val) {\ + switchcase_csr_read(__csr_num + 0, __val) \ + switchcase_csr_read(__csr_num + 1, __val)} +#define switchcase_csr_read_4(__csr_num, __val) {\ + switchcase_csr_read_2(__csr_num + 0, __val) \ + switchcase_csr_read_2(__csr_num + 2, __val)} +#define switchcase_csr_read_8(__csr_num, __val) {\ + switchcase_csr_read_4(__csr_num + 0, __val) \ + switchcase_csr_read_4(__csr_num + 4, __val)} +#define switchcase_csr_read_16(__csr_num, __val) {\ + switchcase_csr_read_8(__csr_num + 0, __val) \ + switchcase_csr_read_8(__csr_num + 8, __val)} +#define switchcase_csr_read_32(__csr_num, __val) {\ + switchcase_csr_read_16(__csr_num + 0, __val) \ + switchcase_csr_read_16(__csr_num + 16, __val)} + + unsigned long ret = 0; + + switch (csr_num) { + switchcase_csr_read_32(CSR_CYCLE, ret) + default: + break; + } + + return ret; +#undef switchcase_csr_read_32 +#undef switchcase_csr_read_16 +#undef switchcase_csr_read_8 +#undef switchcase_csr_read_4 +#undef switchcase_csr_read_2 +#undef switchcase_csr_read +} + +static u64 read_perf_counter(unsigned int counter) +{ + return csr_read_num(CSR_CYCLE + counter); +} + +static u64 read_timestamp(void) +{ + return csr_read_num(CSR_TIME); +} + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; }