diff mbox series

[v5,2/2] arch/riscv: Enable kprobes when CONFIG_MODULES=n

Message ID 20240325215502.660-2-jarkko@kernel.org (mailing list archive)
State Superseded
Headers show
Series [v5,1/2] kprobes: textmem API | expand

Checks

Context Check Description
conchuod/vmtest-for-next-PR fail PR summary
conchuod/patch-2-test-1 success .github/scripts/patches/tests/build_rv32_defconfig.sh
conchuod/patch-2-test-2 success .github/scripts/patches/tests/build_rv64_clang_allmodconfig.sh
conchuod/patch-2-test-3 success .github/scripts/patches/tests/build_rv64_gcc_allmodconfig.sh
conchuod/patch-2-test-4 success .github/scripts/patches/tests/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-2-test-5 success .github/scripts/patches/tests/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-2-test-6 warning .github/scripts/patches/tests/checkpatch.sh
conchuod/patch-2-test-7 success .github/scripts/patches/tests/dtb_warn_rv64.sh
conchuod/patch-2-test-8 success .github/scripts/patches/tests/header_inline.sh
conchuod/patch-2-test-9 success .github/scripts/patches/tests/kdoc.sh
conchuod/patch-2-test-10 success .github/scripts/patches/tests/module_param.sh
conchuod/patch-2-test-11 success .github/scripts/patches/tests/verify_fixes.sh
conchuod/patch-2-test-12 success .github/scripts/patches/tests/verify_signedoff.sh

Commit Message

Jarkko Sakkinen March 25, 2024, 9:55 p.m. UTC
Tacing with kprobes while running a monolithic kernel is currently
impossible due the kernel module allocator dependency.

Address the issue by implementing textmem API for RISC-V.

Link: https://www.sochub.fi # for power on testing new SoC's with a minimal stack
Link: https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/ # continuation
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
---
v5:
- No changes, expect removing alloc_execmem() call which should have
  been part of the previous patch.
v4:
- Include linux/execmem.h.
v3:
- Architecture independent parts have been split to separate patches.
- Do not change arch/riscv/kernel/module.c as it is out of scope for
  this patch set now.
v2:
- Better late than never right? :-)
- Focus only to RISC-V for now to make the patch more digestable. This
  is the arch where I use the patch on a daily basis to help with QA.
- Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration.
---
 arch/riscv/Kconfig          |  1 +
 arch/riscv/kernel/Makefile  |  3 +++
 arch/riscv/kernel/execmem.c | 22 ++++++++++++++++++++++
 3 files changed, 26 insertions(+)
 create mode 100644 arch/riscv/kernel/execmem.c

Comments

Alexandre Ghiti March 26, 2024, 1:57 p.m. UTC | #1
Hi Jarkko,

On 25/03/2024 22:55, Jarkko Sakkinen wrote:
> Tacing with kprobes while running a monolithic kernel is currently
> impossible due the kernel module allocator dependency.
>
> Address the issue by implementing textmem API for RISC-V.
>
> Link: https://www.sochub.fi # for power on testing new SoC's with a minimal stack
> Link: https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/ # continuation
> Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> ---
> v5:
> - No changes, expect removing alloc_execmem() call which should have
>    been part of the previous patch.
> v4:
> - Include linux/execmem.h.
> v3:
> - Architecture independent parts have been split to separate patches.
> - Do not change arch/riscv/kernel/module.c as it is out of scope for
>    this patch set now.
> v2:
> - Better late than never right? :-)
> - Focus only to RISC-V for now to make the patch more digestable. This
>    is the arch where I use the patch on a daily basis to help with QA.
> - Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration.
> ---
>   arch/riscv/Kconfig          |  1 +
>   arch/riscv/kernel/Makefile  |  3 +++
>   arch/riscv/kernel/execmem.c | 22 ++++++++++++++++++++++
>   3 files changed, 26 insertions(+)
>   create mode 100644 arch/riscv/kernel/execmem.c
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index e3142ce531a0..499512fb17ff 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -132,6 +132,7 @@ config RISCV
>   	select HAVE_KPROBES if !XIP_KERNEL
>   	select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
>   	select HAVE_KRETPROBES if !XIP_KERNEL
> +	select HAVE_ALLOC_EXECMEM if !XIP_KERNEL
>   	# https://github.com/ClangBuiltLinux/linux/issues/1881
>   	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
>   	select HAVE_MOVE_PMD
> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> index 604d6bf7e476..337797f10d3e 100644
> --- a/arch/riscv/kernel/Makefile
> +++ b/arch/riscv/kernel/Makefile
> @@ -73,6 +73,9 @@ obj-$(CONFIG_SMP)		+= cpu_ops.o
>   
>   obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
>   obj-$(CONFIG_MODULES)		+= module.o
> +ifeq ($(CONFIG_ALLOC_EXECMEM),y)
> +obj-y				+= execmem.o
> +endif
>   obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
>   
>   obj-$(CONFIG_CPU_PM)		+= suspend_entry.o suspend.o
> diff --git a/arch/riscv/kernel/execmem.c b/arch/riscv/kernel/execmem.c
> new file mode 100644
> index 000000000000..3e52522ead32
> --- /dev/null
> +++ b/arch/riscv/kernel/execmem.c
> @@ -0,0 +1,22 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +
> +#include <linux/mm.h>
> +#include <linux/execmem.h>
> +#include <linux/vmalloc.h>
> +#include <asm/sections.h>
> +
> +void *alloc_execmem(unsigned long size, gfp_t /* gfp */)
> +{
> +	return __vmalloc_node_range(size, 1, MODULES_VADDR,
> +				    MODULES_END, GFP_KERNEL,
> +				    PAGE_KERNEL, 0, NUMA_NO_NODE,
> +				    __builtin_return_address(0));
> +}


The __vmalloc_node_range() line ^^ must be from an old kernel since we 
added VM_FLUSH_RESET_PERMS in 6.8, see 749b94b08005 ("riscv: Fix 
module_alloc() that did not reset the linear mapping permissions").

In addition, I guess module_alloc() should now use alloc_execmem() right?


> +
> +void free_execmem(void *region)
> +{
> +	if (in_interrupt())
> +		pr_warn("In interrupt context: vmalloc may not work.\n");
> +
> +	vfree(region);
> +}


I remember Mike Rapoport sent a patchset to introduce an API for 
executable memory allocation 
(https://lore.kernel.org/linux-mm/20230918072955.2507221-1-rppt@kernel.org/), 
how does this intersect with your work? I don't know the status of his 
patchset though.

Thanks,

Alex
Jarkko Sakkinen March 26, 2024, 4:49 p.m. UTC | #2
On Tue Mar 26, 2024 at 3:57 PM EET, Alexandre Ghiti wrote:
> Hi Jarkko,
>
> On 25/03/2024 22:55, Jarkko Sakkinen wrote:
> > Tacing with kprobes while running a monolithic kernel is currently
> > impossible due the kernel module allocator dependency.
> >
> > Address the issue by implementing textmem API for RISC-V.
> >
> > Link: https://www.sochub.fi # for power on testing new SoC's with a minimal stack
> > Link: https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/ # continuation
> > Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> > ---
> > v5:
> > - No changes, expect removing alloc_execmem() call which should have
> >    been part of the previous patch.
> > v4:
> > - Include linux/execmem.h.
> > v3:
> > - Architecture independent parts have been split to separate patches.
> > - Do not change arch/riscv/kernel/module.c as it is out of scope for
> >    this patch set now.
> > v2:
> > - Better late than never right? :-)
> > - Focus only to RISC-V for now to make the patch more digestable. This
> >    is the arch where I use the patch on a daily basis to help with QA.
> > - Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration.
> > ---
> >   arch/riscv/Kconfig          |  1 +
> >   arch/riscv/kernel/Makefile  |  3 +++
> >   arch/riscv/kernel/execmem.c | 22 ++++++++++++++++++++++
> >   3 files changed, 26 insertions(+)
> >   create mode 100644 arch/riscv/kernel/execmem.c
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index e3142ce531a0..499512fb17ff 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -132,6 +132,7 @@ config RISCV
> >   	select HAVE_KPROBES if !XIP_KERNEL
> >   	select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
> >   	select HAVE_KRETPROBES if !XIP_KERNEL
> > +	select HAVE_ALLOC_EXECMEM if !XIP_KERNEL
> >   	# https://github.com/ClangBuiltLinux/linux/issues/1881
> >   	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
> >   	select HAVE_MOVE_PMD
> > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > index 604d6bf7e476..337797f10d3e 100644
> > --- a/arch/riscv/kernel/Makefile
> > +++ b/arch/riscv/kernel/Makefile
> > @@ -73,6 +73,9 @@ obj-$(CONFIG_SMP)		+= cpu_ops.o
> >   
> >   obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
> >   obj-$(CONFIG_MODULES)		+= module.o
> > +ifeq ($(CONFIG_ALLOC_EXECMEM),y)
> > +obj-y				+= execmem.o
> > +endif
> >   obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
> >   
> >   obj-$(CONFIG_CPU_PM)		+= suspend_entry.o suspend.o
> > diff --git a/arch/riscv/kernel/execmem.c b/arch/riscv/kernel/execmem.c
> > new file mode 100644
> > index 000000000000..3e52522ead32
> > --- /dev/null
> > +++ b/arch/riscv/kernel/execmem.c
> > @@ -0,0 +1,22 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +
> > +#include <linux/mm.h>
> > +#include <linux/execmem.h>
> > +#include <linux/vmalloc.h>
> > +#include <asm/sections.h>
> > +
> > +void *alloc_execmem(unsigned long size, gfp_t /* gfp */)

Need to have the parameter name here. I guess this could just as well
pass through gfp to vmalloc from the caller as kprobes does call
module_alloc() with GFP_KERNEL set in RISC-V.

> > +{
> > +	return __vmalloc_node_range(size, 1, MODULES_VADDR,
> > +				    MODULES_END, GFP_KERNEL,
> > +				    PAGE_KERNEL, 0, NUMA_NO_NODE,
> > +				    __builtin_return_address(0));
> > +}
>
>
> The __vmalloc_node_range() line ^^ must be from an old kernel since we 
> added VM_FLUSH_RESET_PERMS in 6.8, see 749b94b08005 ("riscv: Fix 
> module_alloc() that did not reset the linear mapping permissions").
>
> In addition, I guess module_alloc() should now use alloc_execmem() right?

Ack for the first comment. For the 2nd it is up to arch/<arch> to choose
whether to have shared or separate allocators.

So if you want I can change it that way but did not want to make the
call myself.

>
>
> > +
> > +void free_execmem(void *region)
> > +{
> > +	if (in_interrupt())
> > +		pr_warn("In interrupt context: vmalloc may not work.\n");
> > +
> > +	vfree(region);
> > +}
>
>
> I remember Mike Rapoport sent a patchset to introduce an API for 
> executable memory allocation 
> (https://lore.kernel.org/linux-mm/20230918072955.2507221-1-rppt@kernel.org/), 
> how does this intersect with your work? I don't know the status of his 
> patchset though.
>
> Thanks,
>
> Alex

I have also made a patch set for kprobes in the 2022:

https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/

I think this Calvin's, Mike's and my early patch set have the same
problem: they try to choke all architectures at once. And further,
Calvin's and Mike's work also try to cover also tracing subsystems
at once.

I feel that my relatively small patch set which deals only with
trivial kprobe (which is more in the leaf than e.g. bpf which
is more like orchestrator tool) and implements one arch of which
dog food I actually eat is a better starting point.

Arch code is always something where you need to have genuine
understanding so full architecture coverage from day one is
just too risky for stability. Linux is better off if people who
work on a  specific arch proactively will "fill the holes".

So the way I see my patch set is "lowest common denominator"
in both architecture axis and tracing subsystem axist. It should
not interfere that much with the other work (like bpf).

BR, Jarkko
Jarkko Sakkinen March 26, 2024, 4:54 p.m. UTC | #3
On Tue Mar 26, 2024 at 6:49 PM EET, Jarkko Sakkinen wrote:
> On Tue Mar 26, 2024 at 3:57 PM EET, Alexandre Ghiti wrote:
> > Hi Jarkko,
> >
> > On 25/03/2024 22:55, Jarkko Sakkinen wrote:
> > > Tacing with kprobes while running a monolithic kernel is currently
> > > impossible due the kernel module allocator dependency.
> > >
> > > Address the issue by implementing textmem API for RISC-V.
> > >
> > > Link: https://www.sochub.fi # for power on testing new SoC's with a minimal stack
> > > Link: https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/ # continuation
> > > Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
> > > ---
> > > v5:
> > > - No changes, expect removing alloc_execmem() call which should have
> > >    been part of the previous patch.
> > > v4:
> > > - Include linux/execmem.h.
> > > v3:
> > > - Architecture independent parts have been split to separate patches.
> > > - Do not change arch/riscv/kernel/module.c as it is out of scope for
> > >    this patch set now.
> > > v2:
> > > - Better late than never right? :-)
> > > - Focus only to RISC-V for now to make the patch more digestable. This
> > >    is the arch where I use the patch on a daily basis to help with QA.
> > > - Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration.
> > > ---
> > >   arch/riscv/Kconfig          |  1 +
> > >   arch/riscv/kernel/Makefile  |  3 +++
> > >   arch/riscv/kernel/execmem.c | 22 ++++++++++++++++++++++
> > >   3 files changed, 26 insertions(+)
> > >   create mode 100644 arch/riscv/kernel/execmem.c
> > >
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index e3142ce531a0..499512fb17ff 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -132,6 +132,7 @@ config RISCV
> > >   	select HAVE_KPROBES if !XIP_KERNEL
> > >   	select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
> > >   	select HAVE_KRETPROBES if !XIP_KERNEL
> > > +	select HAVE_ALLOC_EXECMEM if !XIP_KERNEL
> > >   	# https://github.com/ClangBuiltLinux/linux/issues/1881
> > >   	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
> > >   	select HAVE_MOVE_PMD
> > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
> > > index 604d6bf7e476..337797f10d3e 100644
> > > --- a/arch/riscv/kernel/Makefile
> > > +++ b/arch/riscv/kernel/Makefile
> > > @@ -73,6 +73,9 @@ obj-$(CONFIG_SMP)		+= cpu_ops.o
> > >   
> > >   obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
> > >   obj-$(CONFIG_MODULES)		+= module.o
> > > +ifeq ($(CONFIG_ALLOC_EXECMEM),y)
> > > +obj-y				+= execmem.o
> > > +endif
> > >   obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
> > >   
> > >   obj-$(CONFIG_CPU_PM)		+= suspend_entry.o suspend.o
> > > diff --git a/arch/riscv/kernel/execmem.c b/arch/riscv/kernel/execmem.c
> > > new file mode 100644
> > > index 000000000000..3e52522ead32
> > > --- /dev/null
> > > +++ b/arch/riscv/kernel/execmem.c
> > > @@ -0,0 +1,22 @@
> > > +// SPDX-License-Identifier: GPL-2.0-or-later
> > > +
> > > +#include <linux/mm.h>
> > > +#include <linux/execmem.h>
> > > +#include <linux/vmalloc.h>
> > > +#include <asm/sections.h>
> > > +
> > > +void *alloc_execmem(unsigned long size, gfp_t /* gfp */)
>
> Need to have the parameter name here. I guess this could just as well
> pass through gfp to vmalloc from the caller as kprobes does call
> module_alloc() with GFP_KERNEL set in RISC-V.
>
> > > +{
> > > +	return __vmalloc_node_range(size, 1, MODULES_VADDR,
> > > +				    MODULES_END, GFP_KERNEL,
> > > +				    PAGE_KERNEL, 0, NUMA_NO_NODE,
> > > +				    __builtin_return_address(0));
> > > +}
> >
> >
> > The __vmalloc_node_range() line ^^ must be from an old kernel since we 
> > added VM_FLUSH_RESET_PERMS in 6.8, see 749b94b08005 ("riscv: Fix 
> > module_alloc() that did not reset the linear mapping permissions").
> >
> > In addition, I guess module_alloc() should now use alloc_execmem() right?
>
> Ack for the first comment. For the 2nd it is up to arch/<arch> to choose
> whether to have shared or separate allocators.
>
> So if you want I can change it that way but did not want to make the
> call myself.
>
> >
> >
> > > +
> > > +void free_execmem(void *region)
> > > +{
> > > +	if (in_interrupt())
> > > +		pr_warn("In interrupt context: vmalloc may not work.\n");
> > > +
> > > +	vfree(region);
> > > +}
> >
> >
> > I remember Mike Rapoport sent a patchset to introduce an API for 
> > executable memory allocation 
> > (https://lore.kernel.org/linux-mm/20230918072955.2507221-1-rppt@kernel.org/), 
> > how does this intersect with your work? I don't know the status of his 
> > patchset though.
> >
> > Thanks,
> >
> > Alex
>
> I have also made a patch set for kprobes in the 2022:
>
> https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/
>
> I think this Calvin's, Mike's and my early patch set have the same
> problem: they try to choke all architectures at once. And further,
> Calvin's and Mike's work also try to cover also tracing subsystems
> at once.
>
> I feel that my relatively small patch set which deals only with
> trivial kprobe (which is more in the leaf than e.g. bpf which
> is more like orchestrator tool) and implements one arch of which
> dog food I actually eat is a better starting point.
>
> Arch code is always something where you need to have genuine
> understanding so full architecture coverage from day one is
> just too risky for stability. Linux is better off if people who
> work on a  specific arch proactively will "fill the holes".
>
> So the way I see my patch set is "lowest common denominator"
> in both architecture axis and tracing subsystem axist. It should
> not interfere that much with the other work (like bpf).

Here also there is a lot of kconfig flag logic changes. I've verified
them but still I think we are better off if this stuff is put in the
wild first in small scale rather than spraying kernel with code changes
in the first run.

BR, Jarkko
Alexandre Ghiti March 26, 2024, 7:03 p.m. UTC | #4
On 26/03/2024 17:49, Jarkko Sakkinen wrote:
> On Tue Mar 26, 2024 at 3:57 PM EET, Alexandre Ghiti wrote:
>> Hi Jarkko,
>>
>> On 25/03/2024 22:55, Jarkko Sakkinen wrote:
>>> Tacing with kprobes while running a monolithic kernel is currently
>>> impossible due the kernel module allocator dependency.
>>>
>>> Address the issue by implementing textmem API for RISC-V.
>>>
>>> Link: https://www.sochub.fi # for power on testing new SoC's with a minimal stack
>>> Link: https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/ # continuation
>>> Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
>>> ---
>>> v5:
>>> - No changes, expect removing alloc_execmem() call which should have
>>>     been part of the previous patch.
>>> v4:
>>> - Include linux/execmem.h.
>>> v3:
>>> - Architecture independent parts have been split to separate patches.
>>> - Do not change arch/riscv/kernel/module.c as it is out of scope for
>>>     this patch set now.
>>> v2:
>>> - Better late than never right? :-)
>>> - Focus only to RISC-V for now to make the patch more digestable. This
>>>     is the arch where I use the patch on a daily basis to help with QA.
>>> - Introduce HAVE_KPROBES_ALLOC flag to help with more gradual migration.
>>> ---
>>>    arch/riscv/Kconfig          |  1 +
>>>    arch/riscv/kernel/Makefile  |  3 +++
>>>    arch/riscv/kernel/execmem.c | 22 ++++++++++++++++++++++
>>>    3 files changed, 26 insertions(+)
>>>    create mode 100644 arch/riscv/kernel/execmem.c
>>>
>>> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
>>> index e3142ce531a0..499512fb17ff 100644
>>> --- a/arch/riscv/Kconfig
>>> +++ b/arch/riscv/Kconfig
>>> @@ -132,6 +132,7 @@ config RISCV
>>>    	select HAVE_KPROBES if !XIP_KERNEL
>>>    	select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
>>>    	select HAVE_KRETPROBES if !XIP_KERNEL
>>> +	select HAVE_ALLOC_EXECMEM if !XIP_KERNEL
>>>    	# https://github.com/ClangBuiltLinux/linux/issues/1881
>>>    	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
>>>    	select HAVE_MOVE_PMD
>>> diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
>>> index 604d6bf7e476..337797f10d3e 100644
>>> --- a/arch/riscv/kernel/Makefile
>>> +++ b/arch/riscv/kernel/Makefile
>>> @@ -73,6 +73,9 @@ obj-$(CONFIG_SMP)		+= cpu_ops.o
>>>    
>>>    obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
>>>    obj-$(CONFIG_MODULES)		+= module.o
>>> +ifeq ($(CONFIG_ALLOC_EXECMEM),y)
>>> +obj-y				+= execmem.o
>>> +endif
>>>    obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
>>>    
>>>    obj-$(CONFIG_CPU_PM)		+= suspend_entry.o suspend.o
>>> diff --git a/arch/riscv/kernel/execmem.c b/arch/riscv/kernel/execmem.c
>>> new file mode 100644
>>> index 000000000000..3e52522ead32
>>> --- /dev/null
>>> +++ b/arch/riscv/kernel/execmem.c
>>> @@ -0,0 +1,22 @@
>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>> +
>>> +#include <linux/mm.h>
>>> +#include <linux/execmem.h>
>>> +#include <linux/vmalloc.h>
>>> +#include <asm/sections.h>
>>> +
>>> +void *alloc_execmem(unsigned long size, gfp_t /* gfp */)
> Need to have the parameter name here. I guess this could just as well
> pass through gfp to vmalloc from the caller as kprobes does call
> module_alloc() with GFP_KERNEL set in RISC-V.
>
>>> +{
>>> +	return __vmalloc_node_range(size, 1, MODULES_VADDR,
>>> +				    MODULES_END, GFP_KERNEL,
>>> +				    PAGE_KERNEL, 0, NUMA_NO_NODE,
>>> +				    __builtin_return_address(0));
>>> +}
>>
>> The __vmalloc_node_range() line ^^ must be from an old kernel since we
>> added VM_FLUSH_RESET_PERMS in 6.8, see 749b94b08005 ("riscv: Fix
>> module_alloc() that did not reset the linear mapping permissions").
>>
>> In addition, I guess module_alloc() should now use alloc_execmem() right?
> Ack for the first comment. For the 2nd it is up to arch/<arch> to choose
> whether to have shared or separate allocators.
>
> So if you want I can change it that way but did not want to make the
> call myself.


I'd say module_alloc() should use alloc_execmem() then since there are 
no differences for now.


>>
>>> +
>>> +void free_execmem(void *region)
>>> +{
>>> +	if (in_interrupt())
>>> +		pr_warn("In interrupt context: vmalloc may not work.\n");
>>> +
>>> +	vfree(region);
>>> +}
>>
>> I remember Mike Rapoport sent a patchset to introduce an API for
>> executable memory allocation
>> (https://lore.kernel.org/linux-mm/20230918072955.2507221-1-rppt@kernel.org/),
>> how does this intersect with your work? I don't know the status of his
>> patchset though.
>>
>> Thanks,
>>
>> Alex
> I have also made a patch set for kprobes in the 2022:
>
> https://lore.kernel.org/all/20220608000014.3054333-1-jarkko@profian.com/
>
> I think this Calvin's, Mike's and my early patch set have the same
> problem: they try to choke all architectures at once. And further,
> Calvin's and Mike's work also try to cover also tracing subsystems
> at once.
>
> I feel that my relatively small patch set which deals only with
> trivial kprobe (which is more in the leaf than e.g. bpf which
> is more like orchestrator tool) and implements one arch of which
> dog food I actually eat is a better starting point.
>
> Arch code is always something where you need to have genuine
> understanding so full architecture coverage from day one is
> just too risky for stability. Linux is better off if people who
> work on a  specific arch proactively will "fill the holes".
>
> So the way I see my patch set is "lowest common denominator"
> in both architecture axis and tracing subsystem axist. It should
> not interfere that much with the other work (like bpf).


I understand your point. But is there any consensus among you on how to 
deal with this issue? I'm not opposed at all to your patch, and it's 
small enough that we can revert it later on if that's not the right 
solution anyway.


>
> BR, Jarkko
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
diff mbox series

Patch

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e3142ce531a0..499512fb17ff 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -132,6 +132,7 @@  config RISCV
 	select HAVE_KPROBES if !XIP_KERNEL
 	select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
 	select HAVE_KRETPROBES if !XIP_KERNEL
+	select HAVE_ALLOC_EXECMEM if !XIP_KERNEL
 	# https://github.com/ClangBuiltLinux/linux/issues/1881
 	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION if !LD_IS_LLD
 	select HAVE_MOVE_PMD
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 604d6bf7e476..337797f10d3e 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -73,6 +73,9 @@  obj-$(CONFIG_SMP)		+= cpu_ops.o
 
 obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o
 obj-$(CONFIG_MODULES)		+= module.o
+ifeq ($(CONFIG_ALLOC_EXECMEM),y)
+obj-y				+= execmem.o
+endif
 obj-$(CONFIG_MODULE_SECTIONS)	+= module-sections.o
 
 obj-$(CONFIG_CPU_PM)		+= suspend_entry.o suspend.o
diff --git a/arch/riscv/kernel/execmem.c b/arch/riscv/kernel/execmem.c
new file mode 100644
index 000000000000..3e52522ead32
--- /dev/null
+++ b/arch/riscv/kernel/execmem.c
@@ -0,0 +1,22 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/mm.h>
+#include <linux/execmem.h>
+#include <linux/vmalloc.h>
+#include <asm/sections.h>
+
+void *alloc_execmem(unsigned long size, gfp_t /* gfp */)
+{
+	return __vmalloc_node_range(size, 1, MODULES_VADDR,
+				    MODULES_END, GFP_KERNEL,
+				    PAGE_KERNEL, 0, NUMA_NO_NODE,
+				    __builtin_return_address(0));
+}
+
+void free_execmem(void *region)
+{
+	if (in_interrupt())
+		pr_warn("In interrupt context: vmalloc may not work.\n");
+
+	vfree(region);
+}