Message ID | d27d9e68491e1df67dbee6c22df6a72ff95bab18.1583772574.git.zong.li@sifive.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support strict kernel memory permissions for security | expand |
Hi, On Tue, 10 Mar 2020 00:55:43 +0800 Zong Li <zong.li@sifive.com> wrote: > On strict kernel memory permission, we couldn't patch code without > writable permission. Preserve two holes in fixmap area, so we can map > the kernel code temporarily to fixmap area, then patch the instructions. > > We need two pages here because we support the compressed instruction, so > the instruction might be align to 2 bytes. When patching the 32-bit > length instruction which is 2 bytes alignment, it will across two pages. > > Introduce two interfaces to patch kernel code: > riscv_patch_text_nosync: > - patch code without synchronization, it's caller's responsibility to > synchronize all CPUs if needed. > riscv_patch_text: > - patch code and always synchronize with stop_machine() > > Signed-off-by: Zong Li <zong.li@sifive.com> > --- > arch/riscv/include/asm/fixmap.h | 2 + > arch/riscv/include/asm/patch.h | 12 ++++ > arch/riscv/kernel/Makefile | 4 +- > arch/riscv/kernel/patch.c | 120 ++++++++++++++++++++++++++++++++ > 4 files changed, 137 insertions(+), 1 deletion(-) > create mode 100644 arch/riscv/include/asm/patch.h > create mode 100644 arch/riscv/kernel/patch.c > > diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h > index 42d2c42f3cc9..2368d49eb4ef 100644 > --- a/arch/riscv/include/asm/fixmap.h > +++ b/arch/riscv/include/asm/fixmap.h > @@ -27,6 +27,8 @@ enum fixed_addresses { > FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, > FIX_PTE, > FIX_PMD, > + FIX_TEXT_POKE1, > + FIX_TEXT_POKE0, > FIX_EARLYCON_MEM_BASE, > __end_of_fixed_addresses > }; > diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h > new file mode 100644 > index 000000000000..b5918a6e0615 > --- /dev/null > +++ b/arch/riscv/include/asm/patch.h > @@ -0,0 +1,12 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright (C) 2020 SiFive > + */ > + > +#ifndef _ASM_RISCV_PATCH_H > +#define _ASM_RISCV_PATCH_H > + > +int riscv_patch_text_nosync(void *addr, const void *insns, size_t len); > +int riscv_patch_text(void *addr, u32 insn); > + > +#endif /* _ASM_RISCV_PATCH_H */ > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile > index f40205cb9a22..d189bd3d8501 100644 > --- a/arch/riscv/kernel/Makefile > +++ b/arch/riscv/kernel/Makefile > @@ -4,7 +4,8 @@ > # > > ifdef CONFIG_FTRACE > -CFLAGS_REMOVE_ftrace.o = -pg > +CFLAGS_REMOVE_ftrace.o = -pg > +CFLAGS_REMOVE_patch.o = -pg > endif > > extra-y += head.o > @@ -26,6 +27,7 @@ obj-y += traps.o > obj-y += riscv_ksyms.o > obj-y += stacktrace.o > obj-y += cacheinfo.o > +obj-y += patch.o > obj-$(CONFIG_MMU) += vdso.o vdso/ > > obj-$(CONFIG_RISCV_M_MODE) += clint.o > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c > new file mode 100644 > index 000000000000..8a4fc65ee022 > --- /dev/null > +++ b/arch/riscv/kernel/patch.c > @@ -0,0 +1,120 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (C) 2020 SiFive > + */ > + > +#include <linux/spinlock.h> > +#include <linux/mm.h> > +#include <linux/uaccess.h> > +#include <linux/stop_machine.h> > +#include <asm/kprobes.h> > +#include <asm/cacheflush.h> > +#include <asm/fixmap.h> > + > +struct riscv_insn_patch { > + void *addr; > + u32 insn; > + atomic_t cpu_count; > +}; > + > +#ifdef CONFIG_MMU > +static DEFINE_RAW_SPINLOCK(patch_lock); > + > +static void __kprobes *patch_map(void *addr, int fixmap) Please use NOKPROBE_SYMBOL() instead of __kprobes. __kprobes is old style. > +{ > + uintptr_t uintaddr = (uintptr_t) addr; > + struct page *page; > + > + if (core_kernel_text(uintaddr)) > + page = phys_to_page(__pa_symbol(addr)); > + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) > + page = vmalloc_to_page(addr); > + else > + return addr; > + > + BUG_ON(!page); > + > + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + > + (uintaddr & ~PAGE_MASK)); > +} > + > +static void __kprobes patch_unmap(int fixmap) > +{ > + clear_fixmap(fixmap); > +} > + > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) Why would you add "riscv_" prefix for those functions? It seems a bit odd. > +{ > + void *waddr = addr; > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > + unsigned long flags = 0; > + int ret; > + > + raw_spin_lock_irqsave(&patch_lock, flags); This looks a bit odd since stop_machine() is protected by its own mutex, and also the irq is already disabled here. Thank you, > + > + if (across_pages) > + patch_map(addr + len, FIX_TEXT_POKE1); > + > + waddr = patch_map(addr, FIX_TEXT_POKE0); > + > + ret = probe_kernel_write(waddr, insn, len); > + > + patch_unmap(FIX_TEXT_POKE0); > + > + if (across_pages) > + patch_unmap(FIX_TEXT_POKE1); > + > + raw_spin_unlock_irqrestore(&patch_lock, flags); > + > + return ret; > +} > +#else > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) > +{ > + return probe_kernel_write(addr, insn, len); > +} > +#endif /* CONFIG_MMU */ > + > +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len) > +{ > + u32 *tp = addr; > + int ret; > + > + ret = riscv_insn_write(tp, insns, len); > + > + if (!ret) > + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); > + > + return ret; > +} > + > +static int __kprobes riscv_patch_text_cb(void *data) > +{ > + struct riscv_insn_patch *patch = data; > + int ret = 0; > + > + if (atomic_inc_return(&patch->cpu_count) == 1) { > + ret = > + riscv_patch_text_nosync(patch->addr, &patch->insn, > + GET_INSN_LENGTH(patch->insn)); > + atomic_inc(&patch->cpu_count); > + } else { > + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) > + cpu_relax(); > + smp_mb(); > + } > + > + return ret; > +} > + > +int __kprobes riscv_patch_text(void *addr, u32 insn) > +{ > + struct riscv_insn_patch patch = { > + .addr = addr, > + .insn = insn, > + .cpu_count = ATOMIC_INIT(0), > + }; > + > + return stop_machine_cpuslocked(riscv_patch_text_cb, > + &patch, cpu_online_mask); > +} > -- > 2.25.1 >
On Tue, Mar 31, 2020 at 11:32 PM Masami Hiramatsu <mhiramat@kernel.org> wrote: > > Hi, > > On Tue, 10 Mar 2020 00:55:43 +0800 > Zong Li <zong.li@sifive.com> wrote: > > > On strict kernel memory permission, we couldn't patch code without > > writable permission. Preserve two holes in fixmap area, so we can map > > the kernel code temporarily to fixmap area, then patch the instructions. > > > > We need two pages here because we support the compressed instruction, so > > the instruction might be align to 2 bytes. When patching the 32-bit > > length instruction which is 2 bytes alignment, it will across two pages. > > > > Introduce two interfaces to patch kernel code: > > riscv_patch_text_nosync: > > - patch code without synchronization, it's caller's responsibility to > > synchronize all CPUs if needed. > > riscv_patch_text: > > - patch code and always synchronize with stop_machine() > > > > Signed-off-by: Zong Li <zong.li@sifive.com> > > --- > > arch/riscv/include/asm/fixmap.h | 2 + > > arch/riscv/include/asm/patch.h | 12 ++++ > > arch/riscv/kernel/Makefile | 4 +- > > arch/riscv/kernel/patch.c | 120 ++++++++++++++++++++++++++++++++ > > 4 files changed, 137 insertions(+), 1 deletion(-) > > create mode 100644 arch/riscv/include/asm/patch.h > > create mode 100644 arch/riscv/kernel/patch.c > > > > diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h > > index 42d2c42f3cc9..2368d49eb4ef 100644 > > --- a/arch/riscv/include/asm/fixmap.h > > +++ b/arch/riscv/include/asm/fixmap.h > > @@ -27,6 +27,8 @@ enum fixed_addresses { > > FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, > > FIX_PTE, > > FIX_PMD, > > + FIX_TEXT_POKE1, > > + FIX_TEXT_POKE0, > > FIX_EARLYCON_MEM_BASE, > > __end_of_fixed_addresses > > }; > > diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h > > new file mode 100644 > > index 000000000000..b5918a6e0615 > > --- /dev/null > > +++ b/arch/riscv/include/asm/patch.h > > @@ -0,0 +1,12 @@ > > +/* SPDX-License-Identifier: GPL-2.0-only */ > > +/* > > + * Copyright (C) 2020 SiFive > > + */ > > + > > +#ifndef _ASM_RISCV_PATCH_H > > +#define _ASM_RISCV_PATCH_H > > + > > +int riscv_patch_text_nosync(void *addr, const void *insns, size_t len); > > +int riscv_patch_text(void *addr, u32 insn); > > + > > +#endif /* _ASM_RISCV_PATCH_H */ > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile > > index f40205cb9a22..d189bd3d8501 100644 > > --- a/arch/riscv/kernel/Makefile > > +++ b/arch/riscv/kernel/Makefile > > @@ -4,7 +4,8 @@ > > # > > > > ifdef CONFIG_FTRACE > > -CFLAGS_REMOVE_ftrace.o = -pg > > +CFLAGS_REMOVE_ftrace.o = -pg > > +CFLAGS_REMOVE_patch.o = -pg > > endif > > > > extra-y += head.o > > @@ -26,6 +27,7 @@ obj-y += traps.o > > obj-y += riscv_ksyms.o > > obj-y += stacktrace.o > > obj-y += cacheinfo.o > > +obj-y += patch.o > > obj-$(CONFIG_MMU) += vdso.o vdso/ > > > > obj-$(CONFIG_RISCV_M_MODE) += clint.o > > diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c > > new file mode 100644 > > index 000000000000..8a4fc65ee022 > > --- /dev/null > > +++ b/arch/riscv/kernel/patch.c > > @@ -0,0 +1,120 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +/* > > + * Copyright (C) 2020 SiFive > > + */ > > + > > +#include <linux/spinlock.h> > > +#include <linux/mm.h> > > +#include <linux/uaccess.h> > > +#include <linux/stop_machine.h> > > +#include <asm/kprobes.h> > > +#include <asm/cacheflush.h> > > +#include <asm/fixmap.h> > > + > > +struct riscv_insn_patch { > > + void *addr; > > + u32 insn; > > + atomic_t cpu_count; > > +}; > > + > > +#ifdef CONFIG_MMU > > +static DEFINE_RAW_SPINLOCK(patch_lock); > > + > > +static void __kprobes *patch_map(void *addr, int fixmap) > > Please use NOKPROBE_SYMBOL() instead of __kprobes. __kprobes is old style. > > > +{ > > + uintptr_t uintaddr = (uintptr_t) addr; > > + struct page *page; > > + > > + if (core_kernel_text(uintaddr)) > > + page = phys_to_page(__pa_symbol(addr)); > > + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) > > + page = vmalloc_to_page(addr); > > + else > > + return addr; > > + > > + BUG_ON(!page); > > + > > + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + > > + (uintaddr & ~PAGE_MASK)); > > +} > > + > > +static void __kprobes patch_unmap(int fixmap) > > +{ > > + clear_fixmap(fixmap); > > +} > > + > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) > > Why would you add "riscv_" prefix for those functions? It seems a bit odd. There is no particular reason, I just was used to adding a prefix for arch-related stuff. I have no preference here, it's OK to me to remove the prefix of these functions, do you think we need to remove them? > > > +{ > > + void *waddr = addr; > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > + unsigned long flags = 0; > > + int ret; > > + > > + raw_spin_lock_irqsave(&patch_lock, flags); > > This looks a bit odd since stop_machine() is protected by its own mutex, > and also the irq is already disabled here. We need it because we don't always enter the riscv_patch_text_nosync() through stop_machine mechanism. If we call the riscv_patch_text_nosync() directly, we need a lock to protect the page. > > Thank you, > > > + > > + if (across_pages) > > + patch_map(addr + len, FIX_TEXT_POKE1); > > + > > + waddr = patch_map(addr, FIX_TEXT_POKE0); > > + > > + ret = probe_kernel_write(waddr, insn, len); > > + > > + patch_unmap(FIX_TEXT_POKE0); > > + > > + if (across_pages) > > + patch_unmap(FIX_TEXT_POKE1); > > + > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > + > > + return ret; > > +} > > +#else > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) > > +{ > > + return probe_kernel_write(addr, insn, len); > > +} > > +#endif /* CONFIG_MMU */ > > + > > +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len) > > +{ > > + u32 *tp = addr; > > + int ret; > > + > > + ret = riscv_insn_write(tp, insns, len); > > + > > + if (!ret) > > + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); > > + > > + return ret; > > +} > > + > > +static int __kprobes riscv_patch_text_cb(void *data) > > +{ > > + struct riscv_insn_patch *patch = data; > > + int ret = 0; > > + > > + if (atomic_inc_return(&patch->cpu_count) == 1) { > > + ret = > > + riscv_patch_text_nosync(patch->addr, &patch->insn, > > + GET_INSN_LENGTH(patch->insn)); > > + atomic_inc(&patch->cpu_count); > > + } else { > > + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) > > + cpu_relax(); > > + smp_mb(); > > + } > > + > > + return ret; > > +} > > + > > +int __kprobes riscv_patch_text(void *addr, u32 insn) > > +{ > > + struct riscv_insn_patch patch = { > > + .addr = addr, > > + .insn = insn, > > + .cpu_count = ATOMIC_INIT(0), > > + }; > > + > > + return stop_machine_cpuslocked(riscv_patch_text_cb, > > + &patch, cpu_online_mask); > > +} > > -- > > 2.25.1 > > > > > -- > Masami Hiramatsu <mhiramat@kernel.org>
Hi, On Wed, 1 Apr 2020 15:42:30 +0800 Zong Li <zong.li@sifive.com> wrote: > > > + > > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) > > > > Why would you add "riscv_" prefix for those functions? It seems a bit odd. > > There is no particular reason, I just was used to adding a prefix for > arch-related stuff. I have no preference here, it's OK to me to remove > the prefix of these functions, do you think we need to remove them? Yeah, it will be better, unless it can mixed up with arch-independent functions. > > > +{ > > > + void *waddr = addr; > > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > > + unsigned long flags = 0; > > > + int ret; > > > + > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > This looks a bit odd since stop_machine() is protected by its own mutex, > > and also the irq is already disabled here. > > We need it because we don't always enter the riscv_patch_text_nosync() > through stop_machine mechanism. If we call the > riscv_patch_text_nosync() directly, we need a lock to protect the > page. Oh, OK, but it leads another question. Is that safe to patch the text without sync? Would you use it for UP system? I think it is better to clarify "in what case user can call _nosync()" and add a comment on it. Thank you, > > > > > Thank you, > > > > > + > > > + if (across_pages) > > > + patch_map(addr + len, FIX_TEXT_POKE1); > > > + > > > + waddr = patch_map(addr, FIX_TEXT_POKE0); > > > + > > > + ret = probe_kernel_write(waddr, insn, len); > > > + > > > + patch_unmap(FIX_TEXT_POKE0); > > > + > > > + if (across_pages) > > > + patch_unmap(FIX_TEXT_POKE1); > > > + > > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > > + > > > + return ret; > > > +} > > > +#else > > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) > > > +{ > > > + return probe_kernel_write(addr, insn, len); > > > +} > > > +#endif /* CONFIG_MMU */ > > > + > > > +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len) > > > +{ > > > + u32 *tp = addr; > > > + int ret; > > > + > > > + ret = riscv_insn_write(tp, insns, len); > > > + > > > + if (!ret) > > > + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); > > > + > > > + return ret; > > > +} > > > + > > > +static int __kprobes riscv_patch_text_cb(void *data) > > > +{ > > > + struct riscv_insn_patch *patch = data; > > > + int ret = 0; > > > + > > > + if (atomic_inc_return(&patch->cpu_count) == 1) { > > > + ret = > > > + riscv_patch_text_nosync(patch->addr, &patch->insn, > > > + GET_INSN_LENGTH(patch->insn)); > > > + atomic_inc(&patch->cpu_count); > > > + } else { > > > + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) > > > + cpu_relax(); > > > + smp_mb(); > > > + } > > > + > > > + return ret; > > > +} > > > + > > > +int __kprobes riscv_patch_text(void *addr, u32 insn) > > > +{ > > > + struct riscv_insn_patch patch = { > > > + .addr = addr, > > > + .insn = insn, > > > + .cpu_count = ATOMIC_INIT(0), > > > + }; > > > + > > > + return stop_machine_cpuslocked(riscv_patch_text_cb, > > > + &patch, cpu_online_mask); > > > +} > > > -- > > > 2.25.1 > > > > > > > > > -- > > Masami Hiramatsu <mhiramat@kernel.org>
On Thu, Apr 2, 2020 at 9:17 AM Masami Hiramatsu <mhiramat@kernel.org> wrote: > > Hi, > > On Wed, 1 Apr 2020 15:42:30 +0800 > Zong Li <zong.li@sifive.com> wrote: > > > > > + > > > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) > > > > > > Why would you add "riscv_" prefix for those functions? It seems a bit odd. > > > > There is no particular reason, I just was used to adding a prefix for > > arch-related stuff. I have no preference here, it's OK to me to remove > > the prefix of these functions, do you think we need to remove them? > > Yeah, it will be better, unless it can mixed up with arch-independent > functions. OK. I'll remove it and use NOKPROBE_SYMBOL() instead of __kprobes annotation. > > > > > +{ > > > > + void *waddr = addr; > > > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > > > + unsigned long flags = 0; > > > > + int ret; > > > > + > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex, > > > and also the irq is already disabled here. > > > > We need it because we don't always enter the riscv_patch_text_nosync() > > through stop_machine mechanism. If we call the > > riscv_patch_text_nosync() directly, we need a lock to protect the > > page. > > Oh, OK, but it leads another question. Is that safe to patch the > text without sync? Would you use it for UP system? > I think it is better to clarify "in what case user can call _nosync()" > and add a comment on it. The ftrace is one of the cases, as documentation of ftrace said, when dynamic ftrace is initialized, it calls kstop_machine to make the machine act like a uniprocessor so that it can freely modify code without worrying about other processors executing that same code. So the ftrace called the _nosync interface here directly. > > Thank you, > > > > > > > > > Thank you, > > > > > > > + > > > > + if (across_pages) > > > > + patch_map(addr + len, FIX_TEXT_POKE1); > > > > + > > > > + waddr = patch_map(addr, FIX_TEXT_POKE0); > > > > + > > > > + ret = probe_kernel_write(waddr, insn, len); > > > > + > > > > + patch_unmap(FIX_TEXT_POKE0); > > > > + > > > > + if (across_pages) > > > > + patch_unmap(FIX_TEXT_POKE1); > > > > + > > > > + raw_spin_unlock_irqrestore(&patch_lock, flags); > > > > + > > > > + return ret; > > > > +} > > > > +#else > > > > +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) > > > > +{ > > > > + return probe_kernel_write(addr, insn, len); > > > > +} > > > > +#endif /* CONFIG_MMU */ > > > > + > > > > +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len) > > > > +{ > > > > + u32 *tp = addr; > > > > + int ret; > > > > + > > > > + ret = riscv_insn_write(tp, insns, len); > > > > + > > > > + if (!ret) > > > > + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); > > > > + > > > > + return ret; > > > > +} > > > > + > > > > +static int __kprobes riscv_patch_text_cb(void *data) > > > > +{ > > > > + struct riscv_insn_patch *patch = data; > > > > + int ret = 0; > > > > + > > > > + if (atomic_inc_return(&patch->cpu_count) == 1) { > > > > + ret = > > > > + riscv_patch_text_nosync(patch->addr, &patch->insn, > > > > + GET_INSN_LENGTH(patch->insn)); > > > > + atomic_inc(&patch->cpu_count); > > > > + } else { > > > > + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) > > > > + cpu_relax(); > > > > + smp_mb(); > > > > + } > > > > + > > > > + return ret; > > > > +} > > > > + > > > > +int __kprobes riscv_patch_text(void *addr, u32 insn) > > > > +{ > > > > + struct riscv_insn_patch patch = { > > > > + .addr = addr, > > > > + .insn = insn, > > > > + .cpu_count = ATOMIC_INIT(0), > > > > + }; > > > > + > > > > + return stop_machine_cpuslocked(riscv_patch_text_cb, > > > > + &patch, cpu_online_mask); > > > > +} > > > > -- > > > > 2.25.1 > > > > > > > > > > > > > -- > > > Masami Hiramatsu <mhiramat@kernel.org> > > > -- > Masami Hiramatsu <mhiramat@kernel.org>
Hi Zong, On Fri, 3 Apr 2020 17:04:51 +0800 Zong Li <zong.li@sifive.com> wrote: > > > > > +{ > > > > > + void *waddr = addr; > > > > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > > > > + unsigned long flags = 0; > > > > > + int ret; > > > > > + > > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex, > > > > and also the irq is already disabled here. > > > > > > We need it because we don't always enter the riscv_patch_text_nosync() > > > through stop_machine mechanism. If we call the > > > riscv_patch_text_nosync() directly, we need a lock to protect the > > > page. > > > > Oh, OK, but it leads another question. Is that safe to patch the > > text without sync? Would you use it for UP system? > > I think it is better to clarify "in what case user can call _nosync()" > > and add a comment on it. > > The ftrace is one of the cases, as documentation of ftrace said, when > dynamic ftrace is initialized, it calls kstop_machine to make the > machine act like a uniprocessor so that it can freely modify code > without worrying about other processors executing that same code. So > the ftrace called the _nosync interface here directly. Hmm, even though, since it already running under kstop_machine(), no other thread will run. Could you consider to use text_mutex instead of that? The text_mutex is already widely used in x86 and kernel/kprobes.c etc. (Hmm, it seems except for x86, alternative code don't care about racing...) Thank you,
On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote: > > Hi Zong, > > On Fri, 3 Apr 2020 17:04:51 +0800 > Zong Li <zong.li@sifive.com> wrote: > > > > > > > +{ > > > > > > + void *waddr = addr; > > > > > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > > > > > + unsigned long flags = 0; > > > > > > + int ret; > > > > > > + > > > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex, > > > > > and also the irq is already disabled here. > > > > > > > > We need it because we don't always enter the riscv_patch_text_nosync() > > > > through stop_machine mechanism. If we call the > > > > riscv_patch_text_nosync() directly, we need a lock to protect the > > > > page. > > > > > > Oh, OK, but it leads another question. Is that safe to patch the > > > text without sync? Would you use it for UP system? > > > I think it is better to clarify "in what case user can call _nosync()" > > > and add a comment on it. > > > > The ftrace is one of the cases, as documentation of ftrace said, when > > dynamic ftrace is initialized, it calls kstop_machine to make the > > machine act like a uniprocessor so that it can freely modify code > > without worrying about other processors executing that same code. So > > the ftrace called the _nosync interface here directly. > > Hmm, even though, since it already running under kstop_machine(), no > other thread will run. > Could you consider to use text_mutex instead of that? The text_mutex > is already widely used in x86 and kernel/kprobes.c etc. > > (Hmm, it seems except for x86, alternative code don't care about > racing...) > Yes, text_mutex seems to be great. I'll change to use text_mutex in the next version if it works fine after testing. Thanks. > Thank you, > -- > Masami Hiramatsu <mhiramat@kernel.org>
On Sat, Apr 4, 2020 at 8:12 PM Zong Li <zong.li@sifive.com> wrote: > > On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote: > > > > Hi Zong, > > > > On Fri, 3 Apr 2020 17:04:51 +0800 > > Zong Li <zong.li@sifive.com> wrote: > > > > > > > > > +{ > > > > > > > + void *waddr = addr; > > > > > > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > > > > > > + unsigned long flags = 0; > > > > > > > + int ret; > > > > > > > + > > > > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > > > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex, > > > > > > and also the irq is already disabled here. > > > > > > > > > > We need it because we don't always enter the riscv_patch_text_nosync() > > > > > through stop_machine mechanism. If we call the > > > > > riscv_patch_text_nosync() directly, we need a lock to protect the > > > > > page. > > > > > > > > Oh, OK, but it leads another question. Is that safe to patch the > > > > text without sync? Would you use it for UP system? > > > > I think it is better to clarify "in what case user can call _nosync()" > > > > and add a comment on it. > > > > > > The ftrace is one of the cases, as documentation of ftrace said, when > > > dynamic ftrace is initialized, it calls kstop_machine to make the > > > machine act like a uniprocessor so that it can freely modify code > > > without worrying about other processors executing that same code. So > > > the ftrace called the _nosync interface here directly. > > > > Hmm, even though, since it already running under kstop_machine(), no > > other thread will run. > > Could you consider to use text_mutex instead of that? The text_mutex > > is already widely used in x86 and kernel/kprobes.c etc. > > > > (Hmm, it seems except for x86, alternative code don't care about > > racing...) > > The mutex_lock doesn't seem to work in ftrace context, I think it might be the reason why other architectures didn't use text_mutex in somewhere. # echo function > current_tracer [ 28.198070] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:281 [ 28.198663] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 11, name: migration/0 [ 28.199491] CPU: 0 PID: 11 Comm: migration/0 Not tainted 5.6.0-00012-gd6f56a7a4be2-dirty #10 [ 28.200330] Call Trace: [ 28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc [ 28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46 [ 28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc [ 28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46 [ 28.201898] [<ffffffe000d498b0>] dump_stack+0x76/0x90 [ 28.202329] [<ffffffe00062c3f0>] ___might_sleep+0x100/0x10e [ 28.202720] [<ffffffe00062c448>] __might_sleep+0x4a/0x78 [ 28.203033] [<ffffffe000d61622>] mutex_lock+0x2c/0x54 [ 28.203397] [<ffffffe00060393e>] patch_insn_write+0x32/0xd8 [ 28.203780] [<ffffffe000603a94>] patch_text_nosync+0x10/0x32 [ 28.204139] [<ffffffe0006051b0>] __ftrace_modify_call+0x5c/0x6c [ 28.204497] [<ffffffe0006052c6>] ftrace_update_ftrace_func+0x20/0x4a [ 28.204919] [<ffffffe000697742>] ftrace_modify_all_code+0xa0/0x148 [ 28.205378] [<ffffffe0006977fc>] __ftrace_modify_code+0x12/0x1c [ 28.205793] [<ffffffe0006924b6>] multi_cpu_stop+0xa2/0x158 [ 28.206147] [<ffffffe0006921b0>] cpu_stopper_thread+0xa4/0x13a [ 28.206510] [<ffffffe000629f38>] smpboot_thread_fn+0xf8/0x1da [ 28.206868] [<ffffffe000625f36>] kthread+0xfa/0x12a [ 28.207201] [<ffffffe0006017e2>] ret_from_exception+0x0/0xc > > Yes, text_mutex seems to be great. I'll change to use text_mutex in > the next version if it works fine after testing. Thanks. > > > Thank you, > > -- > > Masami Hiramatsu <mhiramat@kernel.org>
On Mon, 6 Apr 2020 18:36:42 +0800 Zong Li <zong.li@sifive.com> wrote: > On Sat, Apr 4, 2020 at 8:12 PM Zong Li <zong.li@sifive.com> wrote: > > > > On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote: > > > > > > Hi Zong, > > > > > > On Fri, 3 Apr 2020 17:04:51 +0800 > > > Zong Li <zong.li@sifive.com> wrote: > > > > > > > > > > > +{ > > > > > > > > + void *waddr = addr; > > > > > > > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > > > > > > > + unsigned long flags = 0; > > > > > > > > + int ret; > > > > > > > > + > > > > > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > > > > > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex, > > > > > > > and also the irq is already disabled here. > > > > > > > > > > > > We need it because we don't always enter the riscv_patch_text_nosync() > > > > > > through stop_machine mechanism. If we call the > > > > > > riscv_patch_text_nosync() directly, we need a lock to protect the > > > > > > page. > > > > > > > > > > Oh, OK, but it leads another question. Is that safe to patch the > > > > > text without sync? Would you use it for UP system? > > > > > I think it is better to clarify "in what case user can call _nosync()" > > > > > and add a comment on it. > > > > > > > > The ftrace is one of the cases, as documentation of ftrace said, when > > > > dynamic ftrace is initialized, it calls kstop_machine to make the > > > > machine act like a uniprocessor so that it can freely modify code > > > > without worrying about other processors executing that same code. So > > > > the ftrace called the _nosync interface here directly. > > > > > > Hmm, even though, since it already running under kstop_machine(), no > > > other thread will run. > > > Could you consider to use text_mutex instead of that? The text_mutex > > > is already widely used in x86 and kernel/kprobes.c etc. > > > > > > (Hmm, it seems except for x86, alternative code don't care about > > > racing...) > > > > > The mutex_lock doesn't seem to work in ftrace context, I think it > might be the reason why other architectures didn't use text_mutex in > somewhere. Yes, you need to implement ftrace_arch_code_modify_prepare() and ftrace_arch_code_modify_post_process() in arch/riscv/kernel/ftrace.c. Please see arch/x86/kernel/ftrace.c. Thank you, > > # echo function > current_tracer > [ 28.198070] BUG: sleeping function called from invalid context at > kernel/locking/mutex.c:281 > [ 28.198663] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: > 11, name: migration/0 > [ 28.199491] CPU: 0 PID: 11 Comm: migration/0 Not tainted > 5.6.0-00012-gd6f56a7a4be2-dirty #10 > [ 28.200330] Call Trace: > [ 28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc > [ 28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46 > [ 28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc > [ 28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46 > [ 28.201898] [<ffffffe000d498b0>] dump_stack+0x76/0x90 > [ 28.202329] [<ffffffe00062c3f0>] ___might_sleep+0x100/0x10e > [ 28.202720] [<ffffffe00062c448>] __might_sleep+0x4a/0x78 > [ 28.203033] [<ffffffe000d61622>] mutex_lock+0x2c/0x54 > [ 28.203397] [<ffffffe00060393e>] patch_insn_write+0x32/0xd8 > [ 28.203780] [<ffffffe000603a94>] patch_text_nosync+0x10/0x32 > [ 28.204139] [<ffffffe0006051b0>] __ftrace_modify_call+0x5c/0x6c > [ 28.204497] [<ffffffe0006052c6>] ftrace_update_ftrace_func+0x20/0x4a > [ 28.204919] [<ffffffe000697742>] ftrace_modify_all_code+0xa0/0x148 > [ 28.205378] [<ffffffe0006977fc>] __ftrace_modify_code+0x12/0x1c > [ 28.205793] [<ffffffe0006924b6>] multi_cpu_stop+0xa2/0x158 > [ 28.206147] [<ffffffe0006921b0>] cpu_stopper_thread+0xa4/0x13a > [ 28.206510] [<ffffffe000629f38>] smpboot_thread_fn+0xf8/0x1da > [ 28.206868] [<ffffffe000625f36>] kthread+0xfa/0x12a > [ 28.207201] [<ffffffe0006017e2>] ret_from_exception+0x0/0xc > > > > > Yes, text_mutex seems to be great. I'll change to use text_mutex in > > the next version if it works fine after testing. Thanks. > > > > > Thank you, > > > -- > > > Masami Hiramatsu <mhiramat@kernel.org>
On Tue, Apr 7, 2020 at 8:29 PM Masami Hiramatsu <mhiramat@kernel.org> wrote: > > On Mon, 6 Apr 2020 18:36:42 +0800 > Zong Li <zong.li@sifive.com> wrote: > > > On Sat, Apr 4, 2020 at 8:12 PM Zong Li <zong.li@sifive.com> wrote: > > > > > > On Sat, Apr 4, 2020 at 11:14 AM Masami Hiramatsu <mhiramat@kernel.org> wrote: > > > > > > > > Hi Zong, > > > > > > > > On Fri, 3 Apr 2020 17:04:51 +0800 > > > > Zong Li <zong.li@sifive.com> wrote: > > > > > > > > > > > > > +{ > > > > > > > > > + void *waddr = addr; > > > > > > > > > + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; > > > > > > > > > + unsigned long flags = 0; > > > > > > > > > + int ret; > > > > > > > > > + > > > > > > > > > + raw_spin_lock_irqsave(&patch_lock, flags); > > > > > > > > > > > > > > > > This looks a bit odd since stop_machine() is protected by its own mutex, > > > > > > > > and also the irq is already disabled here. > > > > > > > > > > > > > > We need it because we don't always enter the riscv_patch_text_nosync() > > > > > > > through stop_machine mechanism. If we call the > > > > > > > riscv_patch_text_nosync() directly, we need a lock to protect the > > > > > > > page. > > > > > > > > > > > > Oh, OK, but it leads another question. Is that safe to patch the > > > > > > text without sync? Would you use it for UP system? > > > > > > I think it is better to clarify "in what case user can call _nosync()" > > > > > > and add a comment on it. > > > > > > > > > > The ftrace is one of the cases, as documentation of ftrace said, when > > > > > dynamic ftrace is initialized, it calls kstop_machine to make the > > > > > machine act like a uniprocessor so that it can freely modify code > > > > > without worrying about other processors executing that same code. So > > > > > the ftrace called the _nosync interface here directly. > > > > > > > > Hmm, even though, since it already running under kstop_machine(), no > > > > other thread will run. > > > > Could you consider to use text_mutex instead of that? The text_mutex > > > > is already widely used in x86 and kernel/kprobes.c etc. > > > > > > > > (Hmm, it seems except for x86, alternative code don't care about > > > > racing...) > > > > > > > > The mutex_lock doesn't seem to work in ftrace context, I think it > > might be the reason why other architectures didn't use text_mutex in > > somewhere. > > Yes, you need to implement ftrace_arch_code_modify_prepare() and > ftrace_arch_code_modify_post_process() in arch/riscv/kernel/ftrace.c. > Please see arch/x86/kernel/ftrace.c. > Oh ok, I misunderstood it before, I just use text_mutex instead of patch_lock in patch.c. Thanks. > Thank you, > > > > > # echo function > current_tracer > > [ 28.198070] BUG: sleeping function called from invalid context at > > kernel/locking/mutex.c:281 > > [ 28.198663] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: > > 11, name: migration/0 > > [ 28.199491] CPU: 0 PID: 11 Comm: migration/0 Not tainted > > 5.6.0-00012-gd6f56a7a4be2-dirty #10 > > [ 28.200330] Call Trace: > > [ 28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc > > [ 28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46 > > [ 28.200798] [<ffffffe00060319a>] walk_stackframe+0x0/0xcc > > [ 28.201395] [<ffffffe000603442>] show_stack+0x3c/0x46 > > [ 28.201898] [<ffffffe000d498b0>] dump_stack+0x76/0x90 > > [ 28.202329] [<ffffffe00062c3f0>] ___might_sleep+0x100/0x10e > > [ 28.202720] [<ffffffe00062c448>] __might_sleep+0x4a/0x78 > > [ 28.203033] [<ffffffe000d61622>] mutex_lock+0x2c/0x54 > > [ 28.203397] [<ffffffe00060393e>] patch_insn_write+0x32/0xd8 > > [ 28.203780] [<ffffffe000603a94>] patch_text_nosync+0x10/0x32 > > [ 28.204139] [<ffffffe0006051b0>] __ftrace_modify_call+0x5c/0x6c > > [ 28.204497] [<ffffffe0006052c6>] ftrace_update_ftrace_func+0x20/0x4a > > [ 28.204919] [<ffffffe000697742>] ftrace_modify_all_code+0xa0/0x148 > > [ 28.205378] [<ffffffe0006977fc>] __ftrace_modify_code+0x12/0x1c > > [ 28.205793] [<ffffffe0006924b6>] multi_cpu_stop+0xa2/0x158 > > [ 28.206147] [<ffffffe0006921b0>] cpu_stopper_thread+0xa4/0x13a > > [ 28.206510] [<ffffffe000629f38>] smpboot_thread_fn+0xf8/0x1da > > [ 28.206868] [<ffffffe000625f36>] kthread+0xfa/0x12a > > [ 28.207201] [<ffffffe0006017e2>] ret_from_exception+0x0/0xc > > > > > > > > Yes, text_mutex seems to be great. I'll change to use text_mutex in > > > the next version if it works fine after testing. Thanks. > > > > > > > Thank you, > > > > -- > > > > Masami Hiramatsu <mhiramat@kernel.org> > > > -- > Masami Hiramatsu <mhiramat@kernel.org>
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 42d2c42f3cc9..2368d49eb4ef 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -27,6 +27,8 @@ enum fixed_addresses { FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, FIX_PTE, FIX_PMD, + FIX_TEXT_POKE1, + FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h new file mode 100644 index 000000000000..b5918a6e0615 --- /dev/null +++ b/arch/riscv/include/asm/patch.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 SiFive + */ + +#ifndef _ASM_RISCV_PATCH_H +#define _ASM_RISCV_PATCH_H + +int riscv_patch_text_nosync(void *addr, const void *insns, size_t len); +int riscv_patch_text(void *addr, u32 insn); + +#endif /* _ASM_RISCV_PATCH_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f40205cb9a22..d189bd3d8501 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -4,7 +4,8 @@ # ifdef CONFIG_FTRACE -CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_patch.o = -pg endif extra-y += head.o @@ -26,6 +27,7 @@ obj-y += traps.o obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o +obj-y += patch.o obj-$(CONFIG_MMU) += vdso.o vdso/ obj-$(CONFIG_RISCV_M_MODE) += clint.o diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c new file mode 100644 index 000000000000..8a4fc65ee022 --- /dev/null +++ b/arch/riscv/kernel/patch.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 SiFive + */ + +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <asm/kprobes.h> +#include <asm/cacheflush.h> +#include <asm/fixmap.h> + +struct riscv_insn_patch { + void *addr; + u32 insn; + atomic_t cpu_count; +}; + +#ifdef CONFIG_MMU +static DEFINE_RAW_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + uintptr_t uintaddr = (uintptr_t) addr; + struct page *page; + + if (core_kernel_text(uintaddr)) + page = phys_to_page(__pa_symbol(addr)); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + page = vmalloc_to_page(addr); + else + return addr; + + BUG_ON(!page); + + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} + +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) +{ + void *waddr = addr; + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; + unsigned long flags = 0; + int ret; + + raw_spin_lock_irqsave(&patch_lock, flags); + + if (across_pages) + patch_map(addr + len, FIX_TEXT_POKE1); + + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, insn, len); + + patch_unmap(FIX_TEXT_POKE0); + + if (across_pages) + patch_unmap(FIX_TEXT_POKE1); + + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} +#else +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) +{ + return probe_kernel_write(addr, insn, len); +} +#endif /* CONFIG_MMU */ + +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len) +{ + u32 *tp = addr; + int ret; + + ret = riscv_insn_write(tp, insns, len); + + if (!ret) + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); + + return ret; +} + +static int __kprobes riscv_patch_text_cb(void *data) +{ + struct riscv_insn_patch *patch = data; + int ret = 0; + + if (atomic_inc_return(&patch->cpu_count) == 1) { + ret = + riscv_patch_text_nosync(patch->addr, &patch->insn, + GET_INSN_LENGTH(patch->insn)); + atomic_inc(&patch->cpu_count); + } else { + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) + cpu_relax(); + smp_mb(); + } + + return ret; +} + +int __kprobes riscv_patch_text(void *addr, u32 insn) +{ + struct riscv_insn_patch patch = { + .addr = addr, + .insn = insn, + .cpu_count = ATOMIC_INIT(0), + }; + + return stop_machine_cpuslocked(riscv_patch_text_cb, + &patch, cpu_online_mask); +}
On strict kernel memory permission, we couldn't patch code without writable permission. Preserve two holes in fixmap area, so we can map the kernel code temporarily to fixmap area, then patch the instructions. We need two pages here because we support the compressed instruction, so the instruction might be align to 2 bytes. When patching the 32-bit length instruction which is 2 bytes alignment, it will across two pages. Introduce two interfaces to patch kernel code: riscv_patch_text_nosync: - patch code without synchronization, it's caller's responsibility to synchronize all CPUs if needed. riscv_patch_text: - patch code and always synchronize with stop_machine() Signed-off-by: Zong Li <zong.li@sifive.com> --- arch/riscv/include/asm/fixmap.h | 2 + arch/riscv/include/asm/patch.h | 12 ++++ arch/riscv/kernel/Makefile | 4 +- arch/riscv/kernel/patch.c | 120 ++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 arch/riscv/include/asm/patch.h create mode 100644 arch/riscv/kernel/patch.c