Message ID | 178970b0-1539-8aac-76fd-972c6c46ec17@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | arch: allow pte_offset_map[_lock]() to fail | expand |
Hi Hugh, On Thu, Jun 08, 2023 at 12:17:24PM -0700, Hugh Dickins wrote: > Don't make update_mmu_cache() a wrapper around __update_tlb(): call it > directly, and use the ptep (or pmdp) provided by the caller, instead of > re-calling pte_offset_map() - which would raise a question of whether a > pte_unmap() is needed to balance it. > > Check whether the "ptep" provided by the caller is actually the pmdp, > instead of testing pmd_huge(): or test pmd_huge() too and warn if it > disagrees? This is "hazardous" territory: needs review and testing. > > Signed-off-by: Hugh Dickins <hughd@google.com> > --- > arch/mips/include/asm/pgtable.h | 15 +++------------ > arch/mips/mm/tlb-r3k.c | 5 +++-- > arch/mips/mm/tlb-r4k.c | 9 +++------ > 3 files changed, 9 insertions(+), 20 deletions(-) > > diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h > index 574fa14ac8b2..9175dfab08d5 100644 > --- a/arch/mips/include/asm/pgtable.h > +++ b/arch/mips/include/asm/pgtable.h > @@ -565,15 +565,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) > } > #endif > > -extern void __update_tlb(struct vm_area_struct *vma, unsigned long address, > - pte_t pte); > - > -static inline void update_mmu_cache(struct vm_area_struct *vma, > - unsigned long address, pte_t *ptep) > -{ > - pte_t pte = *ptep; > - __update_tlb(vma, address, pte); > -} > +extern void update_mmu_cache(struct vm_area_struct *vma, > + unsigned long address, pte_t *ptep); > > #define __HAVE_ARCH_UPDATE_MMU_TLB > #define update_mmu_tlb update_mmu_cache > @@ -581,9 +574,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, > unsigned long address, pmd_t *pmdp) > { > - pte_t pte = *(pte_t *)pmdp; > - > - __update_tlb(vma, address, pte); > + update_mmu_cache(vma, address, (pte_t *)pmdp); > } > > /* > diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c > index 53dfa2b9316b..e5722cd8dd6d 100644 > --- a/arch/mips/mm/tlb-r3k.c > +++ b/arch/mips/mm/tlb-r3k.c > @@ -176,7 +176,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) > } > } > > -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) > +void update_mmu_cache(struct vm_area_struct *vma, > + unsigned long address, pte_t *ptep) > { > unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); > unsigned long flags; > @@ -203,7 +204,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) > BARRIER; > tlb_probe(); > idx = read_c0_index(); > - write_c0_entrylo0(pte_val(pte)); > + write_c0_entrylo0(pte_val(*ptep)); > write_c0_entryhi(address | pid); > if (idx < 0) { /* BARRIER */ > tlb_write_random(); > diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c > index 1b939abbe4ca..c96725d17cab 100644 > --- a/arch/mips/mm/tlb-r4k.c > +++ b/arch/mips/mm/tlb-r4k.c > @@ -290,14 +290,14 @@ void local_flush_tlb_one(unsigned long page) > * updates the TLB with the new pte(s), and another which also checks > * for the R4k "end of page" hardware bug and does the needy. > */ > -void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) > +void update_mmu_cache(struct vm_area_struct *vma, > + unsigned long address, pte_t *ptep) > { > unsigned long flags; > pgd_t *pgdp; > p4d_t *p4dp; > pud_t *pudp; > pmd_t *pmdp; > - pte_t *ptep; > int idx, pid; > > /* > @@ -326,10 +326,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) > idx = read_c0_index(); > #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT > /* this could be a huge page */ > - if (pmd_huge(*pmdp)) { > + if (ptep == (pte_t *)pmdp) { > unsigned long lo; > write_c0_pagemask(PM_HUGE_MASK); > - ptep = (pte_t *)pmdp; > lo = pte_to_entrylo(pte_val(*ptep)); > write_c0_entrylo0(lo); > write_c0_entrylo1(lo + (HPAGE_SIZE >> 7)); > @@ -344,8 +343,6 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) > } else > #endif > { > - ptep = pte_offset_map(pmdp, address); > - > #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) > #ifdef CONFIG_XPA > write_c0_entrylo0(pte_to_entrylo(ptep->pte_high)); > -- > 2.35.3 > I just bisected a crash while powering down a MIPS machine in QEMU to this change as commit 8044511d3893 ("mips: update_mmu_cache() can replace __update_tlb()") in linux-next. Unfortunately, I can still reproduce it with the existing fix you have for this change on the mailing list, which is present in next-20230614. I can reproduce it with the GCC 13.1.0 on kernel.org [1]. $ make -skj"$(nproc)" ARCH=mips CROSS_COMPILE=mips-linux- mrproper malta_defconfig vmlinux $ qemu-system-mipsel \ -display none \ -nodefaults \ -cpu 24Kf \ -machine malta \ -kernel vmlinux \ -initrd rootfs.cpio \ -m 512m \ -serial mon:stdio ... Linux version 6.4.0-rc6-next-20230614 (nathan@dev-arch.thelio-3990X) (mips-linux-gcc (GCC) 13.1.0, GNU ld (GNU Binutils) 2.40) #1 SMP Wed Jun 14 16:13:02 MST 2023 ... Run /init as init process process '/bin/busybox' started with executable stack do_page_fault(): sending SIGSEGV to init for invalid read access from 0000003c epc = 77b893dc in ld-uClibc-1.0.39.so[77b84000+8000] ra = 77b8930c in ld-uClibc-1.0.39.so[77b84000+8000] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]--- The rootfs is available at [2] if it is needed. I am more than happy to provide additional information or test patches if necessary. [1]: https://mirrors.edge.kernel.org/pub/tools/crosstool/ [2]: https://github.com/ClangBuiltLinux/boot-utils/releases/download/20230609-194440/mipsel-rootfs.cpio.zst Cheers, Nathan
On Wed, 14 Jun 2023, Nathan Chancellor wrote: > Hi Hugh, > > On Thu, Jun 08, 2023 at 12:17:24PM -0700, Hugh Dickins wrote: > > Don't make update_mmu_cache() a wrapper around __update_tlb(): call it > > directly, and use the ptep (or pmdp) provided by the caller, instead of > > re-calling pte_offset_map() - which would raise a question of whether a > > pte_unmap() is needed to balance it. > > > > Check whether the "ptep" provided by the caller is actually the pmdp, > > instead of testing pmd_huge(): or test pmd_huge() too and warn if it > > disagrees? This is "hazardous" territory: needs review and testing. > > > > Signed-off-by: Hugh Dickins <hughd@google.com> > > --- > > arch/mips/include/asm/pgtable.h | 15 +++------------ > > arch/mips/mm/tlb-r3k.c | 5 +++-- > > arch/mips/mm/tlb-r4k.c | 9 +++------ > > 3 files changed, 9 insertions(+), 20 deletions(-) > > > > diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h > > index 574fa14ac8b2..9175dfab08d5 100644 > > --- a/arch/mips/include/asm/pgtable.h > > +++ b/arch/mips/include/asm/pgtable.h > > @@ -565,15 +565,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) > > } > > #endif > > > > -extern void __update_tlb(struct vm_area_struct *vma, unsigned long address, > > - pte_t pte); > > - > > -static inline void update_mmu_cache(struct vm_area_struct *vma, > > - unsigned long address, pte_t *ptep) > > -{ > > - pte_t pte = *ptep; > > - __update_tlb(vma, address, pte); > > -} > > +extern void update_mmu_cache(struct vm_area_struct *vma, > > + unsigned long address, pte_t *ptep); > > > > #define __HAVE_ARCH_UPDATE_MMU_TLB > > #define update_mmu_tlb update_mmu_cache > > @@ -581,9 +574,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, > > static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, > > unsigned long address, pmd_t *pmdp) > > { > > - pte_t pte = *(pte_t *)pmdp; > > - > > - __update_tlb(vma, address, pte); > > + update_mmu_cache(vma, address, (pte_t *)pmdp); > > } > > > > /* > > diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c > > index 53dfa2b9316b..e5722cd8dd6d 100644 > > --- a/arch/mips/mm/tlb-r3k.c > > +++ b/arch/mips/mm/tlb-r3k.c > > @@ -176,7 +176,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) > > } > > } > > > > -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) > > +void update_mmu_cache(struct vm_area_struct *vma, > > + unsigned long address, pte_t *ptep) > > { > > unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); > > unsigned long flags; > > @@ -203,7 +204,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) > > BARRIER; > > tlb_probe(); > > idx = read_c0_index(); > > - write_c0_entrylo0(pte_val(pte)); > > + write_c0_entrylo0(pte_val(*ptep)); > > write_c0_entryhi(address | pid); > > if (idx < 0) { /* BARRIER */ > > tlb_write_random(); > > diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c > > index 1b939abbe4ca..c96725d17cab 100644 > > --- a/arch/mips/mm/tlb-r4k.c > > +++ b/arch/mips/mm/tlb-r4k.c > > @@ -290,14 +290,14 @@ void local_flush_tlb_one(unsigned long page) > > * updates the TLB with the new pte(s), and another which also checks > > * for the R4k "end of page" hardware bug and does the needy. > > */ > > -void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) > > +void update_mmu_cache(struct vm_area_struct *vma, > > + unsigned long address, pte_t *ptep) > > { > > unsigned long flags; > > pgd_t *pgdp; > > p4d_t *p4dp; > > pud_t *pudp; > > pmd_t *pmdp; > > - pte_t *ptep; > > int idx, pid; > > > > /* > > @@ -326,10 +326,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) > > idx = read_c0_index(); > > #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT > > /* this could be a huge page */ > > - if (pmd_huge(*pmdp)) { > > + if (ptep == (pte_t *)pmdp) { > > unsigned long lo; > > write_c0_pagemask(PM_HUGE_MASK); > > - ptep = (pte_t *)pmdp; > > lo = pte_to_entrylo(pte_val(*ptep)); > > write_c0_entrylo0(lo); > > write_c0_entrylo1(lo + (HPAGE_SIZE >> 7)); > > @@ -344,8 +343,6 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) > > } else > > #endif > > { > > - ptep = pte_offset_map(pmdp, address); > > - > > #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) > > #ifdef CONFIG_XPA > > write_c0_entrylo0(pte_to_entrylo(ptep->pte_high)); > > -- > > 2.35.3 > > > > I just bisected a crash while powering down a MIPS machine in QEMU to > this change as commit 8044511d3893 ("mips: update_mmu_cache() can > replace __update_tlb()") in linux-next. Thank you, Nathan, that's very helpful indeed. This patch certainly knew that it wanted testing, and I'm glad to hear that it is now seeing some. While powering down? The messages below look like it was just coming up, but no doubt that's because you were bisecting (or because I'm unfamiliar with what messages to expect there). It's probably irrelevant information, but I wonder whether the (V)machine worked well enough for a while before you first powered down and spotted the problem, or whether it's never got much further than trying to run init (busybox)? I'm trying to get a feel for whether the problem occurs under common or uncommon conditions. > Unfortunately, I can still > reproduce it with the existing fix you have for this change on the > mailing list, which is present in next-20230614. Right, that later fix was only for a build warning, nothing functional (or at least I hoped that it wasn't making any functional difference). Thanks a lot for the detailed instructions below: unfortunately, those would draw me into a realm of testing I've never needed to enter before, so a lot of time spent on setup and learning. Usually, I just stare at the source. What this probably says is that I should revert most my cleanup there, and keep as close to the existing code as possible. But some change is needed, and I may need to understand (or have a good guess at) what was going wrong, to decide what kind of retreat will be successful. Back to the source for a while: I hope I'll find examples in nearby MIPS kernel source (and git history), which will hint at the right way forward. Then send you a patch against next-20230614 to try, when I'm reasonably confident that it's enough to satisfy my purpose, but likely not to waste your time. Thanks, until later, Hugh > > I can reproduce it with the GCC 13.1.0 on kernel.org [1]. > > $ make -skj"$(nproc)" ARCH=mips CROSS_COMPILE=mips-linux- mrproper malta_defconfig vmlinux > > $ qemu-system-mipsel \ > -display none \ > -nodefaults \ > -cpu 24Kf \ > -machine malta \ > -kernel vmlinux \ > -initrd rootfs.cpio \ > -m 512m \ > -serial mon:stdio > ... > Linux version 6.4.0-rc6-next-20230614 (nathan@dev-arch.thelio-3990X) (mips-linux-gcc (GCC) 13.1.0, GNU ld (GNU Binutils) 2.40) #1 SMP Wed Jun 14 16:13:02 MST 2023 > ... > Run /init as init process > process '/bin/busybox' started with executable stack > do_page_fault(): sending SIGSEGV to init for invalid read access from 0000003c > epc = 77b893dc in ld-uClibc-1.0.39.so[77b84000+8000] > ra = 77b8930c in ld-uClibc-1.0.39.so[77b84000+8000] > Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b > ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]--- > > The rootfs is available at [2] if it is needed. I am more than happy to > provide additional information or test patches if necessary. > > [1]: https://mirrors.edge.kernel.org/pub/tools/crosstool/ > [2]: https://github.com/ClangBuiltLinux/boot-utils/releases/download/20230609-194440/mipsel-rootfs.cpio.zst > > Cheers, > Nathan
On Wed, 14 Jun 2023, Hugh Dickins wrote: > On Wed, 14 Jun 2023, Nathan Chancellor wrote: > > > > I just bisected a crash while powering down a MIPS machine in QEMU to > > this change as commit 8044511d3893 ("mips: update_mmu_cache() can > > replace __update_tlb()") in linux-next. > > Thank you, Nathan, that's very helpful indeed. This patch certainly knew > that it wanted testing, and I'm glad to hear that it is now seeing some. > > While powering down? The messages below look like it was just coming up, > but no doubt that's because you were bisecting (or because I'm unfamiliar > with what messages to expect there). It's probably irrelevant information, > but I wonder whether the (V)machine worked well enough for a while before > you first powered down and spotted the problem, or whether it's never got > much further than trying to run init (busybox)? I'm trying to get a feel > for whether the problem occurs under common or uncommon conditions. > > > Unfortunately, I can still > > reproduce it with the existing fix you have for this change on the > > mailing list, which is present in next-20230614. > > Right, that later fix was only for a build warning, nothing functional > (or at least I hoped that it wasn't making any functional difference). > > Thanks a lot for the detailed instructions below: unfortunately, those > would draw me into a realm of testing I've never needed to enter before, > so a lot of time spent on setup and learning. Usually, I just stare at > the source. > > What this probably says is that I should revert most my cleanup there, > and keep as close to the existing code as possible. But some change is > needed, and I may need to understand (or have a good guess at) what was > going wrong, to decide what kind of retreat will be successful. > > Back to the source for a while: I hope I'll find examples in nearby MIPS > kernel source (and git history), which will hint at the right way forward. > Then send you a patch against next-20230614 to try, when I'm reasonably > confident that it's enough to satisfy my purpose, but likely not to waste > your time. I'm going to take advantage of your good nature by attaching two alternative patches, either to go on top of next-20230614. mips1.patch, arch/mips/mm/tlb-r4k.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) is by far my favourite. I couldn't see anything wrong with what's already there for mips, but it seems possible that (though I didn't find it) somewhere calls update_mmu_cache_pmd() on a page table. So mips1.patch restores the pmd_huge() check, and cleans up further by removing the silly pgdp, p4dp, pudp, pmdp stuff: the pointer has now been passed in by the caller, why walk the tree again? I should have done it this way before. But if that doesn't work, then I'm afraid it will have to be mips2.patch, arch/mips/include/asm/pgtable.h | 15 ++++++++++++--- arch/mips/mm/tlb-r3k.c | 5 ++--- arch/mips/mm/tlb-r4k.c | 27 ++++++++++++++++++--------- 3 files changed, 32 insertions(+), 15 deletions(-) which reverts all of the original patch and its build warning fix, and does a pte_unmap() to balance the silly pte_offset_map() there; with an apologetic comment for this being about the only place in the tree where I have no idea what to do if ptep were NULL. I do hope that you find the first fixes the breakage; but if not, then I even more fervently hope that the second will, despite my hating it. Touch wood for the first, fingers crossed for the second, thanks, Hugh
On Wed, Jun 14, 2023 at 10:43:30PM -0700, Hugh Dickins wrote: > On Wed, 14 Jun 2023, Hugh Dickins wrote: > > On Wed, 14 Jun 2023, Nathan Chancellor wrote: > > > > > > I just bisected a crash while powering down a MIPS machine in QEMU to > > > this change as commit 8044511d3893 ("mips: update_mmu_cache() can > > > replace __update_tlb()") in linux-next. > > > > Thank you, Nathan, that's very helpful indeed. This patch certainly knew > > that it wanted testing, and I'm glad to hear that it is now seeing some. > > > > While powering down? The messages below look like it was just coming up, > > but no doubt that's because you were bisecting (or because I'm unfamiliar > > with what messages to expect there). It's probably irrelevant information, > > but I wonder whether the (V)machine worked well enough for a while before > > you first powered down and spotted the problem, or whether it's never got > > much further than trying to run init (busybox)? I'm trying to get a feel > > for whether the problem occurs under common or uncommon conditions. Ugh sorry, I have been looking into too many bugs lately and got my wires crossed :) this is indeed a problem when running init (which is busybox, this is a simple Buildroot file system). > > > Unfortunately, I can still > > > reproduce it with the existing fix you have for this change on the > > > mailing list, which is present in next-20230614. > > > > Right, that later fix was only for a build warning, nothing functional > > (or at least I hoped that it wasn't making any functional difference). > > > > Thanks a lot for the detailed instructions below: unfortunately, those > > would draw me into a realm of testing I've never needed to enter before, > > so a lot of time spent on setup and learning. Usually, I just stare at > > the source. > > > > What this probably says is that I should revert most my cleanup there, > > and keep as close to the existing code as possible. But some change is > > needed, and I may need to understand (or have a good guess at) what was > > going wrong, to decide what kind of retreat will be successful. > > > > Back to the source for a while: I hope I'll find examples in nearby MIPS > > kernel source (and git history), which will hint at the right way forward. > > Then send you a patch against next-20230614 to try, when I'm reasonably > > confident that it's enough to satisfy my purpose, but likely not to waste > > your time. > > I'm going to take advantage of your good nature by attaching > two alternative patches, either to go on top of next-20230614. > > mips1.patch, > arch/mips/mm/tlb-r4k.c | 12 +----------- > 1 file changed, 1 insertion(+), 11 deletions(-) > > is by far my favourite. I couldn't see anything wrong with what's > already there for mips, but it seems possible that (though I didn't > find it) somewhere calls update_mmu_cache_pmd() on a page table. So > mips1.patch restores the pmd_huge() check, and cleans up further by > removing the silly pgdp, p4dp, pudp, pmdp stuff: the pointer has now > been passed in by the caller, why walk the tree again? I should have > done it this way before. > > But if that doesn't work, then I'm afraid it will have to be > mips2.patch, > arch/mips/include/asm/pgtable.h | 15 ++++++++++++--- > arch/mips/mm/tlb-r3k.c | 5 ++--- > arch/mips/mm/tlb-r4k.c | 27 ++++++++++++++++++--------- > 3 files changed, 32 insertions(+), 15 deletions(-) > > which reverts all of the original patch and its build warning fix, > and does a pte_unmap() to balance the silly pte_offset_map() there; > with an apologetic comment for this being about the only place in > the tree where I have no idea what to do if ptep were NULL. > > I do hope that you find the first fixes the breakage; but if not, then I hate to be the bearer of bad news but the first patch did not fix the breakage, I see the same issue. > I even more fervently hope that the second will, despite my hating it. > Touch wood for the first, fingers crossed for the second, thanks, Thankfully, the second one does. Thanks for the quick and thoughtful responses! Cheers, Nathan
On Thu, 15 Jun 2023, Nathan Chancellor wrote: > On Wed, Jun 14, 2023 at 10:43:30PM -0700, Hugh Dickins wrote: > > > > I do hope that you find the first fixes the breakage; but if not, then > > I hate to be the bearer of bad news but the first patch did not fix the > breakage, I see the same issue. Boo! > > > I even more fervently hope that the second will, despite my hating it. > > Touch wood for the first, fingers crossed for the second, thanks, > > Thankfully, the second one does. Thanks for the quick and thoughtful > responses! Hurrah! Thanks a lot, Nathan. I'll set aside my disappointment and curiosity, clearly I'm not going to have much of a future as a MIPS programmer. I must take a break, then rush Andrew the second patch, well, not exactly that second patch, since most of that is revert: I'll just send the few lines of replacement patch (with a new Subject line, as update_mmu_cache() goes back to being separate from __update_tlb()). Unless you object, I'll include a Tested-by: you. I realize that your testing is limited to seeing it running; but that's true of most of the testing at this stage - it gets to be more interesting when the patch that adds the rcu_read_lock() and rcu_read_unlock() is added on top later. Thanks again, Hugh
On Wed, Jun 14, 2023 at 04:17:58PM -0700, Nathan Chancellor wrote: > Hi Hugh, > > On Thu, Jun 08, 2023 at 12:17:24PM -0700, Hugh Dickins wrote: > > Don't make update_mmu_cache() a wrapper around __update_tlb(): call it > > directly, and use the ptep (or pmdp) provided by the caller, instead of > > re-calling pte_offset_map() - which would raise a question of whether a > > pte_unmap() is needed to balance it. > > > > Check whether the "ptep" provided by the caller is actually the pmdp, > > instead of testing pmd_huge(): or test pmd_huge() too and warn if it > > disagrees? This is "hazardous" territory: needs review and testing. > > > > Signed-off-by: Hugh Dickins <hughd@google.com> > > --- > > arch/mips/include/asm/pgtable.h | 15 +++------------ > > arch/mips/mm/tlb-r3k.c | 5 +++-- > > arch/mips/mm/tlb-r4k.c | 9 +++------ > > 3 files changed, 9 insertions(+), 20 deletions(-) > > > > I just bisected a crash while powering down a MIPS machine in QEMU to > this change as commit 8044511d3893 ("mips: update_mmu_cache() can > replace __update_tlb()") in linux-next. Unfortunately, I can still > reproduce it with the existing fix you have for this change on the > mailing list, which is present in next-20230614. > > I can reproduce it with the GCC 13.1.0 on kernel.org [1]. > > $ make -skj"$(nproc)" ARCH=mips CROSS_COMPILE=mips-linux- mrproper malta_defconfig vmlinux > > $ qemu-system-mipsel \ > -display none \ > -nodefaults \ > -cpu 24Kf \ > -machine malta \ > -kernel vmlinux \ > -initrd rootfs.cpio \ > -m 512m \ > -serial mon:stdio > ... > Linux version 6.4.0-rc6-next-20230614 (nathan@dev-arch.thelio-3990X) (mips-linux-gcc (GCC) 13.1.0, GNU ld (GNU Binutils) 2.40) #1 SMP Wed Jun 14 16:13:02 MST 2023 > ... > Run /init as init process > process '/bin/busybox' started with executable stack > do_page_fault(): sending SIGSEGV to init for invalid read access from 0000003c > epc = 77b893dc in ld-uClibc-1.0.39.so[77b84000+8000] > ra = 77b8930c in ld-uClibc-1.0.39.so[77b84000+8000] > Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b > ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]--- > > The rootfs is available at [2] if it is needed. I am more than happy to > provide additional information or test patches if necessary. > > [1]: https://mirrors.edge.kernel.org/pub/tools/crosstool/ > [2]: https://github.com/ClangBuiltLinux/boot-utils/releases/download/20230609-194440/mipsel-rootfs.cpio.zst Seeing this on real h/w as well (just to confirm). Linux version 6.4.0-rc4-00437-g4bab5c42a698 (root@yuzhao.bld.corp.google.com) (mips64el-linux-gnuabi64-gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40) #3 SMP PREEMPT Thu Jun 15 01:05:20 MDT 2023 Skipping L2 locking due to reduced L2 cache size CVMSEG size: 2 cache lines (256 bytes) printk: bootconsole [early0] enabled CPU0 revision is: 000d9602 (Cavium Octeon III) FPU revision is: 00739600 Kernel sections are not in the memory maps Wasting 243712 bytes for tracking 4352 unused pages Initrd not found or empty - disabling initrd Using passed Device Tree. software IO TLB: SWIOTLB bounce buffer size adjusted to 0MB software IO TLB: area num 1. software IO TLB: mapped [mem 0x000000000370d000-0x000000000374d000] (0MB) Primary instruction cache 78kB, virtually tagged, 39 way, 16 sets, linesize 128 bytes. Primary data cache 32kB, 32-way, 8 sets, linesize 128 bytes. Zone ranges: DMA32 [mem 0x0000000001100000-0x00000000efffffff] Normal empty Movable zone start for each node Early memory node ranges node 0: [mem 0x0000000001100000-0x0000000003646fff] node 0: [mem 0x0000000003700000-0x000000000fafffff] node 0: [mem 0x0000000020000000-0x000000004ebfffff] Initmem setup node 0 [mem 0x0000000001100000-0x000000004ebfffff] On node 0, zone DMA32: 4352 pages in unavailable ranges On node 0, zone DMA32: 185 pages in unavailable ranges On node 0, zone DMA32: 1280 pages in unavailable ranges On node 0, zone DMA32: 5120 pages in unavailable ranges percpu: Embedded 15 pages/cpu s24368 r8192 d28880 u61440 pcpu-alloc: s24368 r8192 d28880 u61440 alloc=15*4096 pcpu-alloc: [0] 0 [0] 1 [0] 2 [0] 3 Kernel command line: loglevel=8 console=ttyS0,115200 printk: log_buf_len individual max cpu contribution: 4096 bytes printk: log_buf_len total cpu_extra contributions: 12288 bytes printk: log_buf_len min size: 16384 bytes printk: log_buf_len: 32768 bytes printk: early log buf free: 14184(86%) Dentry cache hash table entries: 131072 (order: 8, 1048576 bytes, linear) Inode-cache hash table entries: 65536 (order: 7, 524288 bytes, linear) Built 1 zonelists, mobility grouping on. Total pages: 247772 mem auto-init: stack:all(zero), heap alloc:off, heap free:off Memory: 950032K/1004828K available (8058K kernel code, 575K rwdata, 1880K rodata, 27488K init, 158K bss, 54796K reserved, 0K cma-reserved) rcu: Preemptible hierarchical RCU implementation. rcu: RCU event tracing is enabled. rcu: RCU restricting CPUs from NR_CPUS=32 to nr_cpu_ids=4. rcu: RCU calculated value of scheduler-enlistment delay is 10 jiffies. rcu: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4 NR_IRQS: 512 CIB interrupt controller probed: 800107000000e000 23 CIB interrupt controller probed: 800107000000e200 12 CIB interrupt controller probed: 800107000000e400 6 CIB interrupt controller probed: 800107000000ec00 15 CIB interrupt controller probed: 800107000000e600 4 CIB interrupt controller probed: 800107000000e800 11 CIB interrupt controller probed: 800107000000e900 11 rcu: srcu_init: Setting srcu_struct sizes based on contention. clocksource: OCTEON_CVMCOUNT: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns Calibrating delay loop (skipped) preset value.. 2000.00 BogoMIPS (lpj=10000000) pid_max: default: 32768 minimum: 301 LSM: initializing lsm=capability,integrity Mount-cache hash table entries: 2048 (order: 2, 16384 bytes, linear) Mountpoint-cache hash table entries: 2048 (order: 2, 16384 bytes, linear) rcu: Hierarchical SRCU implementation. rcu: Max phase no-delay instances is 1000. smp: Bringing up secondary CPUs ... SMP: Booting CPU01 (CoreId 1)... CPU1 revision is: 000d9602 (Cavium Octeon III) FPU revision is: 00739600 SMP: Booting CPU02 (CoreId 2)... CPU2 revision is: 000d9602 (Cavium Octeon III) FPU revision is: 00739600 SMP: Booting CPU03 (CoreId 3)... CPU3 revision is: 000d9602 (Cavium Octeon III) FPU revision is: 00739600 smp: Brought up 1 node, 4 CPUs devtmpfs: initialized clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604462750000 ns futex hash table entries: 1024 (order: 5, 131072 bytes, linear) NET: Registered PF_NETLINK/PF_ROUTE protocol family PCIe: Initializing port 0 PCIe: BIST2 FAILED for port 0 (0x0000000000000003) PCIe: Link timeout on port 0, probably the slot is empty PCIe: Initializing port 1 PCIe: BIST FAILED for port 1 (0xffffffffffffffff) PCIe: Link timeout on port 1, probably the slot is empty HugeTLB: registered 2.00 MiB page size, pre-allocated 0 pages HugeTLB: 0 KiB vmemmap can be freed for a 2.00 MiB page SCSI subsystem initialized libata version 3.00 loaded. usbcore: registered new interface driver usbfs usbcore: registered new interface driver hub usbcore: registered new device driver usb pps_core: LinuxPPS API ver. 1 registered pps_core: Software ver. 5.3.6 - Copyright 2005-2007 Rodolfo Giometti <giometti@linux.it> PTP clock support registered EDAC MC: Ver: 3.0.0 PCI host bridge to bus 0000:00 pci_bus 0000:00: root bus resource [mem 0x1000000000000] pci_bus 0000:00: root bus resource [io 0x0000] pci_bus 0000:00: No busn resource found for root bus, will use [bus 00-ff] pci_bus 0000:00: busn_res: [bus 00-ff] end is updated to 00 vgaarb: loaded clocksource: Switched to clocksource OCTEON_CVMCOUNT NET: Registered PF_INET protocol family IP idents hash table entries: 16384 (order: 5, 131072 bytes, linear) tcp_listen_portaddr_hash hash table entries: 512 (order: 1, 8192 bytes, linear) Table-perturb hash table entries: 65536 (order: 6, 262144 bytes, linear) TCP established hash table entries: 8192 (order: 4, 65536 bytes, linear) TCP bind hash table entries: 8192 (order: 6, 262144 bytes, linear) TCP: Hash tables configured (established 8192 bind 8192) UDP hash table entries: 512 (order: 2, 16384 bytes, linear) UDP-Lite hash table entries: 512 (order: 2, 16384 bytes, linear) NET: Registered PF_UNIX/PF_LOCAL protocol family RPC: Registered named UNIX socket transport module. RPC: Registered udp transport module. RPC: Registered tcp transport module. RPC: Registered tcp NFSv4.1 backchannel transport module. PCI: CLS 0 bytes, default 128 platform 1180068000000.uctl: clocks initialized. platform 1180069000000.uctl: clocks initialized. Starting KVM with MIPS VZ extensions workingset: timestamp_bits=62 max_order=18 bucket_order=0 NFS: Registering the id_resolver key type Key type id_resolver registered Key type id_legacy registered nfs4filelayout_init: NFSv4 File Layout Driver Registering... nfs4flexfilelayout_init: NFSv4 Flexfile Layout Driver Registering... io scheduler mq-deadline registered io scheduler kyber registered io scheduler bfq registered gpio gpiochip0: Static allocation of GPIO base is deprecated, use dynamic allocation. octeon_gpio 1070000000800.gpio-controller: OCTEON GPIO driver probed. Serial: 8250/16550 driver, 2 ports, IRQ sharing disabled printk: console [ttyS0] disabled 1180000000800.serial: ttyS0 at MMIO 0x1180000000800 (irq = 34, base_baud = 25000000) is a OCTEON printk: console [ttyS0] enabled printk: console [ttyS0] enabled printk: bootconsole [early0] disabled printk: bootconsole [early0] disabled 1180000000c00.serial: ttyS1 at MMIO 0x1180000000c00 (irq = 35, base_baud = 25000000) is a OCTEON loop: module loaded Driver 'pata_octeon_cf' needs updating - please use bus_type methods slram: not enough parameters. spi-octeon 1070000001000.spi: OCTEON SPI bus driver process '/bin/kmod' started with executable stack do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000000298 epc = 000000fff3346470 in ld.so.1[fff3328000+2e000] ra = 000000fff33456d0 in ld.so.1[fff3328000+2e000] do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000000298 epc = 000000fff3c78470 in ld.so.1[fff3c5a000+2e000] ra = 000000fff3c776d0 in ld.so.1[fff3c5a000+2e000] do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000021da8 epc = 000000fff35aa2c0 in ld.so.1[fff358d000+2e000] ra = 000000fff35aa688 in ld.so.1[fff358d000+2e000] do_page_fault(): sending SIGSEGV to modprobe for invalid read access from 0000000000000298 epc = 000000fff34cc470 in ld.so.1[fff34ae000+2e000] ra = 000000fff34cb6d0 in ld.so.1[fff34ae000+2e000] mdio_octeon 1180000001800.mdio: Probed mdio_octeon 1180000001900.mdio: Probed dwc3 1680000000000.xhci: Configuration mismatch. dr_mode forced to host dwc3 1690000000000.xhci: Configuration mismatch. dr_mode forced to host xhci-hcd xhci-hcd.0.auto: xHCI Host Controller xhci-hcd xhci-hcd.0.auto: new USB bus registered, assigned bus number 1 xhci-hcd xhci-hcd.0.auto: hcc params 0x0220f06d hci version 0x100 quirks 0x0000000002010010 xhci-hcd xhci-hcd.0.auto: irq 25, io mem 0x1680000000000 dwc3 1680000000000.xhci: xhci_plat_probe get usb3phy fail (ret=-6) xhci-hcd xhci-hcd.0.auto: xHCI Host Controller xhci-hcd xhci-hcd.0.auto: new USB bus registered, assigned bus number 2 xhci-hcd xhci-hcd.0.auto: Host supports USB 3.0 SuperSpeed hub 1-0:1.0: USB hub found hub 1-0:1.0: 1 port detected usb usb2: We don't know the algorithms for LPM for this host, disabling LPM. hub 2-0:1.0: USB hub found hub 2-0:1.0: 1 port detected xhci-hcd xhci-hcd.1.auto: xHCI Host Controller xhci-hcd xhci-hcd.1.auto: new USB bus registered, assigned bus number 3 xhci-hcd xhci-hcd.1.auto: hcc params 0x0220f06d hci version 0x100 quirks 0x0000000002010010 xhci-hcd xhci-hcd.1.auto: irq 26, io mem 0x1690000000000 dwc3 1690000000000.xhci: xhci_plat_probe get usb3phy fail (ret=-6) xhci-hcd xhci-hcd.1.auto: xHCI Host Controller xhci-hcd xhci-hcd.1.auto: new USB bus registered, assigned bus number 4 xhci-hcd xhci-hcd.1.auto: Host supports USB 3.0 SuperSpeed hub 3-0:1.0: USB hub found hub 3-0:1.0: 1 port detected usb usb4: We don't know the algorithms for LPM for this host, disabling LPM. hub 4-0:1.0: USB hub found hub 4-0:1.0: 1 port detected usbcore: registered new interface driver usb-storage i2c-octeon 1180000001000.i2c: probed i2c-octeon 1180000001200.i2c: probed octeon_wdt: Initial granularity 5 Sec EDAC DEVICE0: Giving out device to module octeon-cpu controller cache: DEV octeon_pc_edac (INTERRUPT) EDAC DEVICE1: Giving out device to module octeon-l2c controller octeon_l2c_err: DEV octeon_l2c_edac (POLLED) octeon_lmc_edac octeon_lmc_edac.0: Disabled (ECC not enabled) Interface 0 has 4 ports (SGMII) Interface 1 has 4 ports (SGMII) Interface 3 has 4 ports (LOOP) NET: Registered PF_INET6 protocol family Segment Routing with IPv6 In-situ OAM (IOAM) with IPv6 sit: IPv6, IPv4 and MPLS over IPv4 tunneling driver NET: Registered PF_PACKET protocol family Key type dns_resolver registered OF: fdt: not creating '/sys/firmware/fdt': CRC check failed Freeing unused kernel image (initmem) memory: 27488K This architecture does not have kernel memory protection. Run /init as init process with arguments: /init with environment: HOME=/ TERM=linux do_page_fault(): sending SIGSEGV to init for invalid read access from 0000000000021da8 epc = 000000fff3a542c0 in ld.so.1[fff3a37000+2e000] ra = 000000fff3a54688 in ld.so.1[fff3a37000+2e000] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b ]---
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 574fa14ac8b2..9175dfab08d5 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -565,15 +565,8 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) } #endif -extern void __update_tlb(struct vm_area_struct *vma, unsigned long address, - pte_t pte); - -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) -{ - pte_t pte = *ptep; - __update_tlb(vma, address, pte); -} +extern void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep); #define __HAVE_ARCH_UPDATE_MMU_TLB #define update_mmu_tlb update_mmu_cache @@ -581,9 +574,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - pte_t pte = *(pte_t *)pmdp; - - __update_tlb(vma, address, pte); + update_mmu_cache(vma, address, (pte_t *)pmdp); } /* diff --git a/arch/mips/mm/tlb-r3k.c b/arch/mips/mm/tlb-r3k.c index 53dfa2b9316b..e5722cd8dd6d 100644 --- a/arch/mips/mm/tlb-r3k.c +++ b/arch/mips/mm/tlb-r3k.c @@ -176,7 +176,8 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) } } -void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) +void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { unsigned long asid_mask = cpu_asid_mask(¤t_cpu_data); unsigned long flags; @@ -203,7 +204,7 @@ void __update_tlb(struct vm_area_struct *vma, unsigned long address, pte_t pte) BARRIER; tlb_probe(); idx = read_c0_index(); - write_c0_entrylo0(pte_val(pte)); + write_c0_entrylo0(pte_val(*ptep)); write_c0_entryhi(address | pid); if (idx < 0) { /* BARRIER */ tlb_write_random(); diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c index 1b939abbe4ca..c96725d17cab 100644 --- a/arch/mips/mm/tlb-r4k.c +++ b/arch/mips/mm/tlb-r4k.c @@ -290,14 +290,14 @@ void local_flush_tlb_one(unsigned long page) * updates the TLB with the new pte(s), and another which also checks * for the R4k "end of page" hardware bug and does the needy. */ -void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) +void update_mmu_cache(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) { unsigned long flags; pgd_t *pgdp; p4d_t *p4dp; pud_t *pudp; pmd_t *pmdp; - pte_t *ptep; int idx, pid; /* @@ -326,10 +326,9 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) idx = read_c0_index(); #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT /* this could be a huge page */ - if (pmd_huge(*pmdp)) { + if (ptep == (pte_t *)pmdp) { unsigned long lo; write_c0_pagemask(PM_HUGE_MASK); - ptep = (pte_t *)pmdp; lo = pte_to_entrylo(pte_val(*ptep)); write_c0_entrylo0(lo); write_c0_entrylo1(lo + (HPAGE_SIZE >> 7)); @@ -344,8 +343,6 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte) } else #endif { - ptep = pte_offset_map(pmdp, address); - #if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_CPU_MIPS32) #ifdef CONFIG_XPA write_c0_entrylo0(pte_to_entrylo(ptep->pte_high));
Don't make update_mmu_cache() a wrapper around __update_tlb(): call it directly, and use the ptep (or pmdp) provided by the caller, instead of re-calling pte_offset_map() - which would raise a question of whether a pte_unmap() is needed to balance it. Check whether the "ptep" provided by the caller is actually the pmdp, instead of testing pmd_huge(): or test pmd_huge() too and warn if it disagrees? This is "hazardous" territory: needs review and testing. Signed-off-by: Hugh Dickins <hughd@google.com> --- arch/mips/include/asm/pgtable.h | 15 +++------------ arch/mips/mm/tlb-r3k.c | 5 +++-- arch/mips/mm/tlb-r4k.c | 9 +++------ 3 files changed, 9 insertions(+), 20 deletions(-)