diff mbox series

arm64: mm: Pass physical address explicitly in map_range

Message ID 20240314125331.24082-1-piliu@redhat.com (mailing list archive)
State New, archived
Headers show
Series arm64: mm: Pass physical address explicitly in map_range | expand

Commit Message

Pingfan Liu March 14, 2024, 12:53 p.m. UTC
This patch is not a fix, just a improvement in code reading.

At the present, the deduction of a symbol's physical address hides in
the compiler's trick. Introduce a function paddr(), which make the
process explicitly at the sacrifice of one sub and one add instruction.

Signed-off-by: Pingfan Liu <piliu@redhat.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
To: linux-arm-kernel@lists.infradead.org
---
 arch/arm64/kernel/pi/map_kernel.c | 30 +++++++++++++++---------------
 arch/arm64/kernel/pi/map_range.c  | 21 +++++++++++++++++++--
 arch/arm64/kernel/pi/pi.h         |  1 +
 3 files changed, 35 insertions(+), 17 deletions(-)

Comments

Ard Biesheuvel March 15, 2024, 11:38 a.m. UTC | #1
On Thu, 14 Mar 2024 at 13:53, Pingfan Liu <piliu@redhat.com> wrote:
>
> This patch is not a fix, just a improvement in code reading.
>
> At the present, the deduction of a symbol's physical address hides in
> the compiler's trick. Introduce a function paddr(), which make the
> process explicitly at the sacrifice of one sub and one add instruction.
>
> Signed-off-by: Pingfan Liu <piliu@redhat.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Cc: Kees Cook <keescook@chromium.org>
> To: linux-arm-kernel@lists.infradead.org
> ---
>  arch/arm64/kernel/pi/map_kernel.c | 30 +++++++++++++++---------------
>  arch/arm64/kernel/pi/map_range.c  | 21 +++++++++++++++++++--
>  arch/arm64/kernel/pi/pi.h         |  1 +
>  3 files changed, 35 insertions(+), 17 deletions(-)
>
> diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
> index cac1e1f63c44..f0bad9ff3cce 100644
> --- a/arch/arm64/kernel/pi/map_kernel.c
> +++ b/arch/arm64/kernel/pi/map_kernel.c
> @@ -78,16 +78,16 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
>         twopass |= enable_scs;
>         prot = twopass ? data_prot : text_prot;
>
> -       map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
> +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(_stext), paddr(_etext), prot,
>                     !twopass, root_level);
> -       map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
> -                   __inittext_begin, data_prot, false, root_level);
> -       map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
> -                   __inittext_end, prot, false, root_level);
> -       map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
> -                   __initdata_end, data_prot, false, root_level);
> -       map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
> -                   true, root_level);
> +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(__start_rodata),
> +                   paddr(__inittext_begin), data_prot, false, root_level);
> +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(__inittext_begin),
> +                   paddr(__inittext_end), prot, false, root_level);
> +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(__initdata_begin),
> +                   paddr(__initdata_end), data_prot, false, root_level);
> +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(_data), paddr(_end),
> +                   data_prot, true, root_level);
>         dsb(ishst);
>
>         idmap_cpu_replace_ttbr1(init_pg_dir);
> @@ -109,7 +109,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
>                  * potential TLB conflicts when creating the contiguous
>                  * descriptors.
>                  */
> -               unmap_segment(init_pg_dir, va_offset, _stext, _etext,
> +               unmap_segment(init_pg_dir, va_offset, paddr(_stext), paddr(_etext),
>                               root_level);
>                 dsb(ishst);
>                 isb();
> @@ -120,10 +120,10 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
>                  * Remap these segments with different permissions
>                  * No new page table allocations should be needed
>                  */
> -               map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
> -                           text_prot, true, root_level);
> -               map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
> -                           __inittext_end, text_prot, false, root_level);
> +               map_segment(init_pg_dir, NULL, va_offset, paddr(_stext),
> +                           paddr(_etext), text_prot, true, root_level);
> +               map_segment(init_pg_dir, NULL, va_offset, paddr(__inittext_begin),
> +                           paddr(__inittext_end), text_prot, false, root_level);
>         }
>
>         /* Copy the root page table to its final location */
> @@ -223,7 +223,7 @@ static void __init map_fdt(u64 fdt)
>  asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
>  {
>         static char const chosen_str[] __initconst = "/chosen";
> -       u64 va_base, pa_base = (u64)&_text;
> +       u64 va_base, pa_base = paddr(_text);
>         u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
>         int root_level = 4 - CONFIG_PGTABLE_LEVELS;
>         int va_bits = VA_BITS;
> diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> index 5410b2cac590..b5a68dcf3cf5 100644
> --- a/arch/arm64/kernel/pi/map_range.c
> +++ b/arch/arm64/kernel/pi/map_range.c
> @@ -87,6 +87,23 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
>         }
>  }
>
> +u64 __init paddr(char *symbol)
> +{
> +       u64 _text_paddr;
> +       u64 delta;
> +
> +       asm volatile(
> +               "adrp %0, _text;"
> +               "add %0, %0, #:lo12:_text;"
> +               : "=r" (_text_paddr)
> +               :
> +               :
> +       );
> +
> +       delta = symbol - _text;
> +       return _text_paddr + delta;
> +}
> +
>  asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>  {
>         u64 ptep = (u64)pg_dir + PAGE_SIZE;
> @@ -96,9 +113,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
>         pgprot_val(text_prot) &= ~clrmask;
>         pgprot_val(data_prot) &= ~clrmask;
>
> -       map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
> +       map_range(&ptep, paddr(_stext), paddr(__initdata_begin), paddr(_stext),
>                   text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
> -       map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
> +       map_range(&ptep, paddr(__initdata_begin), paddr(_end), paddr(__initdata_begin),
>                   data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
>
>         return ptep;
> diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
> index c91e5e965cd3..7ffba712da06 100644
> --- a/arch/arm64/kernel/pi/pi.h
> +++ b/arch/arm64/kernel/pi/pi.h
> @@ -28,6 +28,7 @@ u64 kaslr_early_init(void *fdt, int chosen);
>  void relocate_kernel(u64 offset);
>  int scs_patch(const u8 eh_frame[], int size);
>
> +u64 paddr(char *symbol);
>  void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
>                int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
>

Hello Pingfan,

I struggle to see the point of this patch. create_init_idmap() and
map_kernel() are only called from the early startup code, where the
code is mapped 1:1. Those routines are essentially position dependent,
just not in the conventional way.

You are adding a function that has the same properties - your paddr()
function is position dependent, and will only return the correct PA of
its argument if it is called from a 1:1 mapping of the code.

Both existing routines call map_range(), which is position
independent, and can (and is) used from other contexts as well. All
code built under the pi/ subdirectory is built with instrumentation
disabled and without absolute symbol references, making it safe for
this particular routine to be called from elsewhere. This is also what
guarantees that the references to _text are using the correct
translation, as only PC-relative references are permitted.

If there is confusion about this, feel free to propose improved
documentation. But these code changes only make things worse imo.
Pingfan Liu March 18, 2024, 11:12 a.m. UTC | #2
On Fri, Mar 15, 2024 at 7:39 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Thu, 14 Mar 2024 at 13:53, Pingfan Liu <piliu@redhat.com> wrote:
> >
> > This patch is not a fix, just a improvement in code reading.
> >
> > At the present, the deduction of a symbol's physical address hides in
> > the compiler's trick. Introduce a function paddr(), which make the
> > process explicitly at the sacrifice of one sub and one add instruction.
> >
> > Signed-off-by: Pingfan Liu <piliu@redhat.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > Cc: Ard Biesheuvel <ardb@kernel.org>
> > Cc: Kees Cook <keescook@chromium.org>
> > To: linux-arm-kernel@lists.infradead.org
> > ---
> >  arch/arm64/kernel/pi/map_kernel.c | 30 +++++++++++++++---------------
> >  arch/arm64/kernel/pi/map_range.c  | 21 +++++++++++++++++++--
> >  arch/arm64/kernel/pi/pi.h         |  1 +
> >  3 files changed, 35 insertions(+), 17 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
> > index cac1e1f63c44..f0bad9ff3cce 100644
> > --- a/arch/arm64/kernel/pi/map_kernel.c
> > +++ b/arch/arm64/kernel/pi/map_kernel.c
> > @@ -78,16 +78,16 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
> >         twopass |= enable_scs;
> >         prot = twopass ? data_prot : text_prot;
> >
> > -       map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
> > +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(_stext), paddr(_etext), prot,
> >                     !twopass, root_level);
> > -       map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
> > -                   __inittext_begin, data_prot, false, root_level);
> > -       map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
> > -                   __inittext_end, prot, false, root_level);
> > -       map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
> > -                   __initdata_end, data_prot, false, root_level);
> > -       map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
> > -                   true, root_level);
> > +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(__start_rodata),
> > +                   paddr(__inittext_begin), data_prot, false, root_level);
> > +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(__inittext_begin),
> > +                   paddr(__inittext_end), prot, false, root_level);
> > +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(__initdata_begin),
> > +                   paddr(__initdata_end), data_prot, false, root_level);
> > +       map_segment(init_pg_dir, &pgdp, va_offset, paddr(_data), paddr(_end),
> > +                   data_prot, true, root_level);
> >         dsb(ishst);
> >
> >         idmap_cpu_replace_ttbr1(init_pg_dir);
> > @@ -109,7 +109,7 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
> >                  * potential TLB conflicts when creating the contiguous
> >                  * descriptors.
> >                  */
> > -               unmap_segment(init_pg_dir, va_offset, _stext, _etext,
> > +               unmap_segment(init_pg_dir, va_offset, paddr(_stext), paddr(_etext),
> >                               root_level);
> >                 dsb(ishst);
> >                 isb();
> > @@ -120,10 +120,10 @@ static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
> >                  * Remap these segments with different permissions
> >                  * No new page table allocations should be needed
> >                  */
> > -               map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
> > -                           text_prot, true, root_level);
> > -               map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
> > -                           __inittext_end, text_prot, false, root_level);
> > +               map_segment(init_pg_dir, NULL, va_offset, paddr(_stext),
> > +                           paddr(_etext), text_prot, true, root_level);
> > +               map_segment(init_pg_dir, NULL, va_offset, paddr(__inittext_begin),
> > +                           paddr(__inittext_end), text_prot, false, root_level);
> >         }
> >
> >         /* Copy the root page table to its final location */
> > @@ -223,7 +223,7 @@ static void __init map_fdt(u64 fdt)
> >  asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
> >  {
> >         static char const chosen_str[] __initconst = "/chosen";
> > -       u64 va_base, pa_base = (u64)&_text;
> > +       u64 va_base, pa_base = paddr(_text);
> >         u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
> >         int root_level = 4 - CONFIG_PGTABLE_LEVELS;
> >         int va_bits = VA_BITS;
> > diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
> > index 5410b2cac590..b5a68dcf3cf5 100644
> > --- a/arch/arm64/kernel/pi/map_range.c
> > +++ b/arch/arm64/kernel/pi/map_range.c
> > @@ -87,6 +87,23 @@ void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
> >         }
> >  }
> >
> > +u64 __init paddr(char *symbol)
> > +{
> > +       u64 _text_paddr;
> > +       u64 delta;
> > +
> > +       asm volatile(
> > +               "adrp %0, _text;"
> > +               "add %0, %0, #:lo12:_text;"
> > +               : "=r" (_text_paddr)
> > +               :
> > +               :
> > +       );
> > +
> > +       delta = symbol - _text;
> > +       return _text_paddr + delta;
> > +}
> > +
> >  asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
> >  {
> >         u64 ptep = (u64)pg_dir + PAGE_SIZE;
> > @@ -96,9 +113,9 @@ asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
> >         pgprot_val(text_prot) &= ~clrmask;
> >         pgprot_val(data_prot) &= ~clrmask;
> >
> > -       map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
> > +       map_range(&ptep, paddr(_stext), paddr(__initdata_begin), paddr(_stext),
> >                   text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
> > -       map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
> > +       map_range(&ptep, paddr(__initdata_begin), paddr(_end), paddr(__initdata_begin),
> >                   data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
> >
> >         return ptep;
> > diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
> > index c91e5e965cd3..7ffba712da06 100644
> > --- a/arch/arm64/kernel/pi/pi.h
> > +++ b/arch/arm64/kernel/pi/pi.h
> > @@ -28,6 +28,7 @@ u64 kaslr_early_init(void *fdt, int chosen);
> >  void relocate_kernel(u64 offset);
> >  int scs_patch(const u8 eh_frame[], int size);
> >
> > +u64 paddr(char *symbol);
> >  void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
> >                int level, pte_t *tbl, bool may_use_cont, u64 va_offset);
> >
>
> Hello Pingfan,
>
> I struggle to see the point of this patch. create_init_idmap() and
> map_kernel() are only called from the early startup code, where the
> code is mapped 1:1. Those routines are essentially position dependent,
> just not in the conventional way.
>

From what angle, can it be considered as position dependent code?
Could you enlighten me so I can have a refreshed understanding of this
stuff.

> You are adding a function that has the same properties - your paddr()
> function is position dependent, and will only return the correct PA of
> its argument if it is called from a 1:1 mapping of the code.
>

Is the 1:1 mapping the angle?

> Both existing routines call map_range(), which is position
> independent, and can (and is) used from other contexts as well. All
> code built under the pi/ subdirectory is built with instrumentation
> disabled and without absolute symbol references, making it safe for
> this particular routine to be called from elsewhere. This is also what
> guarantees that the references to _text are using the correct
> translation, as only PC-relative references are permitted.
>

Exactly, it ensures only PC-relative references.

> If there is confusion about this, feel free to propose improved
> documentation. But these code changes only make things worse imo.
>

Does this look so straightforward for senior developers? Or I can send
out some documents for it.

Thanks,

Pingfan
Ard Biesheuvel March 18, 2024, 12:31 p.m. UTC | #3
On Mon, 18 Mar 2024 at 12:12, Pingfan Liu <piliu@redhat.com> wrote:
>
> On Fri, Mar 15, 2024 at 7:39 PM Ard Biesheuvel <ardb@kernel.org> wrote:
> >
> > On Thu, 14 Mar 2024 at 13:53, Pingfan Liu <piliu@redhat.com> wrote:
> > >
> > > This patch is not a fix, just a improvement in code reading.
> > >
> > > At the present, the deduction of a symbol's physical address hides in
> > > the compiler's trick. Introduce a function paddr(), which make the
> > > process explicitly at the sacrifice of one sub and one add instruction.
> > >
> > > Signed-off-by: Pingfan Liu <piliu@redhat.com>
> > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > Cc: Will Deacon <will@kernel.org>
> > > Cc: Ard Biesheuvel <ardb@kernel.org>
> > > Cc: Kees Cook <keescook@chromium.org>
> > > To: linux-arm-kernel@lists.infradead.org
> > > ---
> > >  arch/arm64/kernel/pi/map_kernel.c | 30 +++++++++++++++---------------
> > >  arch/arm64/kernel/pi/map_range.c  | 21 +++++++++++++++++++--
> > >  arch/arm64/kernel/pi/pi.h         |  1 +
> > >  3 files changed, 35 insertions(+), 17 deletions(-)
> > >
...
> >
> > Hello Pingfan,
> >
> > I struggle to see the point of this patch. create_init_idmap() and
> > map_kernel() are only called from the early startup code, where the
> > code is mapped 1:1. Those routines are essentially position dependent,
> > just not in the conventional way.
> >
>
> From what angle, can it be considered as position dependent code?
> Could you enlighten me so I can have a refreshed understanding of this
> stuff.
>

There are basically two kinds of position independent code:
a) relocatable code, which contains statically initialized pointer
variables (which are absolute) but with metadata that allows a loader
to fix up those references to be correct for the actual placement of
the code;
b) true position independent code, which does not use any absolute
references at all, and can therefore execute at any offset

Toolchains always generate a), even with -fPIC/-fPIE etc, which mostly
control optimizations/codegen restrictions that allow shared libraries
and PIE executables to be placed at arbitrary offsets in memory
without the need for all code and r/o data sections to be updatable
(for relocation fixups).

In the kernel startup code, we use b), i.e., we generate code using
PC-relative symbol references, and explicitly forbid the use of
R_AARCH64_ABS64 relocations so that fixups are never needed. Since
toolchains generate a), we need to use a special tool (relacheck) for
this. The resulting code can execute anywhere in memory, and variable
reads and writes will work as expected, given that they are
PC-relative.

This code is in charge of creating the initial 1:1 mapping of memory,
and therefore, it needs the 1:1 mapped addresses of _text and _end,
etc. Given that we only permit PC-relative references in this code,
the only way it can produce the correct 1:1 address of those symbols
is if the code itself is mapped 1:1 too. In other words, calling the
same code again from the kernel's normal virtual mapping would produce
a bogus mapping. This does not matter, though: this code has a
specific purpose at early boot, and shouldn't be used afterwards.


> > You are adding a function that has the same properties - your paddr()
> > function is position dependent, and will only return the correct PA of
> > its argument if it is called from a 1:1 mapping of the code.
> >
>
> Is the 1:1 mapping the angle?
>

I don't understand this question.

> > Both existing routines call map_range(), which is position
> > independent, and can (and is) used from other contexts as well. All
> > code built under the pi/ subdirectory is built with instrumentation
> > disabled and without absolute symbol references, making it safe for
> > this particular routine to be called from elsewhere. This is also what
> > guarantees that the references to _text are using the correct
> > translation, as only PC-relative references are permitted.
> >
>
> Exactly, it ensures only PC-relative references.
>

Yes, but paddr() suggests that the result is the physical address. But
this is only the case if paddr() itself runs from a 1:1 mapping of
memory, otherwise it will be based on whatever other translation is
being used.

> > If there is confusion about this, feel free to propose improved
> > documentation. But these code changes only make things worse imo.
> >
>
> Does this look so straightforward for senior developers? Or I can send
> out some documents for it.
>

I cannot speak for other developers. I will note that this code is
highly bespoke for the early execution context where the startup code
lives. Good documentation is always better, but that doesn't mean this
code could or should be used more widely.
Pingfan Liu March 20, 2024, 10:59 a.m. UTC | #4
On Mon, Mar 18, 2024 at 8:31 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> On Mon, 18 Mar 2024 at 12:12, Pingfan Liu <piliu@redhat.com> wrote:
> >
> > On Fri, Mar 15, 2024 at 7:39 PM Ard Biesheuvel <ardb@kernel.org> wrote:
> > >
> > > On Thu, 14 Mar 2024 at 13:53, Pingfan Liu <piliu@redhat.com> wrote:
> > > >
> > > > This patch is not a fix, just a improvement in code reading.
> > > >
> > > > At the present, the deduction of a symbol's physical address hides in
> > > > the compiler's trick. Introduce a function paddr(), which make the
> > > > process explicitly at the sacrifice of one sub and one add instruction.
> > > >
> > > > Signed-off-by: Pingfan Liu <piliu@redhat.com>
> > > > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > > > Cc: Will Deacon <will@kernel.org>
> > > > Cc: Ard Biesheuvel <ardb@kernel.org>
> > > > Cc: Kees Cook <keescook@chromium.org>
> > > > To: linux-arm-kernel@lists.infradead.org
> > > > ---
> > > >  arch/arm64/kernel/pi/map_kernel.c | 30 +++++++++++++++---------------
> > > >  arch/arm64/kernel/pi/map_range.c  | 21 +++++++++++++++++++--
> > > >  arch/arm64/kernel/pi/pi.h         |  1 +
> > > >  3 files changed, 35 insertions(+), 17 deletions(-)
> > > >
> ...
> > >
> > > Hello Pingfan,
> > >
> > > I struggle to see the point of this patch. create_init_idmap() and
> > > map_kernel() are only called from the early startup code, where the
> > > code is mapped 1:1. Those routines are essentially position dependent,
> > > just not in the conventional way.
> > >
> >
> > From what angle, can it be considered as position dependent code?
> > Could you enlighten me so I can have a refreshed understanding of this
> > stuff.
> >
>
> There are basically two kinds of position independent code:
> a) relocatable code, which contains statically initialized pointer
> variables (which are absolute) but with metadata that allows a loader
> to fix up those references to be correct for the actual placement of
> the code;
> b) true position independent code, which does not use any absolute
> references at all, and can therefore execute at any offset
>
> Toolchains always generate a), even with -fPIC/-fPIE etc, which mostly

OK, here I get your point. Appreciate your patient explanation so that
I can have a fresh viewpoint of the whole stuff.

> control optimizations/codegen restrictions that allow shared libraries
> and PIE executables to be placed at arbitrary offsets in memory
> without the need for all code and r/o data sections to be updatable
> (for relocation fixups).
>
> In the kernel startup code, we use b), i.e., we generate code using
> PC-relative symbol references, and explicitly forbid the use of
> R_AARCH64_ABS64 relocations so that fixups are never needed. Since
> toolchains generate a), we need to use a special tool (relacheck) for
> this. The resulting code can execute anywhere in memory, and variable
> reads and writes will work as expected, given that they are
> PC-relative.
>
> This code is in charge of creating the initial 1:1 mapping of memory,
> and therefore, it needs the 1:1 mapped addresses of _text and _end,
> etc. Given that we only permit PC-relative references in this code,
> the only way it can produce the correct 1:1 address of those symbols
> is if the code itself is mapped 1:1 too. In other words, calling the
> same code again from the kernel's normal virtual mapping would produce
> a bogus mapping. This does not matter, though: this code has a
> specific purpose at early boot, and shouldn't be used afterwards.
>

A convincing viewpoint. I agree.

>
> > > You are adding a function that has the same properties - your paddr()
> > > function is position dependent, and will only return the correct PA of
> > > its argument if it is called from a 1:1 mapping of the code.
> > >
> >
> > Is the 1:1 mapping the angle?
> >
>
> I don't understand this question.
>

I tried to guess your comment "Those routines are essentially position
dependent, just not in the conventional way." between lines. And now,
you have explained it completely. I have total understand it.

> > > Both existing routines call map_range(), which is position
> > > independent, and can (and is) used from other contexts as well. All
> > > code built under the pi/ subdirectory is built with instrumentation
> > > disabled and without absolute symbol references, making it safe for
> > > this particular routine to be called from elsewhere. This is also what
> > > guarantees that the references to _text are using the correct
> > > translation, as only PC-relative references are permitted.
> > >
> >
> > Exactly, it ensures only PC-relative references.
> >
>
> Yes, but paddr() suggests that the result is the physical address. But
> this is only the case if paddr() itself runs from a 1:1 mapping of
> memory, otherwise it will be based on whatever other translation is
> being used.
>
> > > If there is confusion about this, feel free to propose improved
> > > documentation. But these code changes only make things worse imo.
> > >
> >
> > Does this look so straightforward for senior developers? Or I can send
> > out some documents for it.
> >
>
> I cannot speak for other developers. I will note that this code is
> highly bespoke for the early execution context where the startup code
> lives. Good documentation is always better, but that doesn't mean this
> code could or should be used more widely.
>

OK, thanks. I will send some documents about it if there is anybody
puzzled by this code later.

Best regards,

Pingfan
diff mbox series

Patch

diff --git a/arch/arm64/kernel/pi/map_kernel.c b/arch/arm64/kernel/pi/map_kernel.c
index cac1e1f63c44..f0bad9ff3cce 100644
--- a/arch/arm64/kernel/pi/map_kernel.c
+++ b/arch/arm64/kernel/pi/map_kernel.c
@@ -78,16 +78,16 @@  static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
 	twopass |= enable_scs;
 	prot = twopass ? data_prot : text_prot;
 
-	map_segment(init_pg_dir, &pgdp, va_offset, _stext, _etext, prot,
+	map_segment(init_pg_dir, &pgdp, va_offset, paddr(_stext), paddr(_etext), prot,
 		    !twopass, root_level);
-	map_segment(init_pg_dir, &pgdp, va_offset, __start_rodata,
-		    __inittext_begin, data_prot, false, root_level);
-	map_segment(init_pg_dir, &pgdp, va_offset, __inittext_begin,
-		    __inittext_end, prot, false, root_level);
-	map_segment(init_pg_dir, &pgdp, va_offset, __initdata_begin,
-		    __initdata_end, data_prot, false, root_level);
-	map_segment(init_pg_dir, &pgdp, va_offset, _data, _end, data_prot,
-		    true, root_level);
+	map_segment(init_pg_dir, &pgdp, va_offset, paddr(__start_rodata),
+		    paddr(__inittext_begin), data_prot, false, root_level);
+	map_segment(init_pg_dir, &pgdp, va_offset, paddr(__inittext_begin),
+		    paddr(__inittext_end), prot, false, root_level);
+	map_segment(init_pg_dir, &pgdp, va_offset, paddr(__initdata_begin),
+		    paddr(__initdata_end), data_prot, false, root_level);
+	map_segment(init_pg_dir, &pgdp, va_offset, paddr(_data), paddr(_end),
+		    data_prot, true, root_level);
 	dsb(ishst);
 
 	idmap_cpu_replace_ttbr1(init_pg_dir);
@@ -109,7 +109,7 @@  static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
 		 * potential TLB conflicts when creating the contiguous
 		 * descriptors.
 		 */
-		unmap_segment(init_pg_dir, va_offset, _stext, _etext,
+		unmap_segment(init_pg_dir, va_offset, paddr(_stext), paddr(_etext),
 			      root_level);
 		dsb(ishst);
 		isb();
@@ -120,10 +120,10 @@  static void __init map_kernel(u64 kaslr_offset, u64 va_offset, int root_level)
 		 * Remap these segments with different permissions
 		 * No new page table allocations should be needed
 		 */
-		map_segment(init_pg_dir, NULL, va_offset, _stext, _etext,
-			    text_prot, true, root_level);
-		map_segment(init_pg_dir, NULL, va_offset, __inittext_begin,
-			    __inittext_end, text_prot, false, root_level);
+		map_segment(init_pg_dir, NULL, va_offset, paddr(_stext),
+			    paddr(_etext), text_prot, true, root_level);
+		map_segment(init_pg_dir, NULL, va_offset, paddr(__inittext_begin),
+			    paddr(__inittext_end), text_prot, false, root_level);
 	}
 
 	/* Copy the root page table to its final location */
@@ -223,7 +223,7 @@  static void __init map_fdt(u64 fdt)
 asmlinkage void __init early_map_kernel(u64 boot_status, void *fdt)
 {
 	static char const chosen_str[] __initconst = "/chosen";
-	u64 va_base, pa_base = (u64)&_text;
+	u64 va_base, pa_base = paddr(_text);
 	u64 kaslr_offset = pa_base % MIN_KIMG_ALIGN;
 	int root_level = 4 - CONFIG_PGTABLE_LEVELS;
 	int va_bits = VA_BITS;
diff --git a/arch/arm64/kernel/pi/map_range.c b/arch/arm64/kernel/pi/map_range.c
index 5410b2cac590..b5a68dcf3cf5 100644
--- a/arch/arm64/kernel/pi/map_range.c
+++ b/arch/arm64/kernel/pi/map_range.c
@@ -87,6 +87,23 @@  void __init map_range(u64 *pte, u64 start, u64 end, u64 pa, pgprot_t prot,
 	}
 }
 
+u64 __init paddr(char *symbol)
+{
+	u64 _text_paddr;
+	u64 delta;
+
+	asm volatile(
+		"adrp %0, _text;"
+		"add %0, %0, #:lo12:_text;"
+		: "=r" (_text_paddr)
+		:
+		:
+	);
+
+	delta = symbol - _text;
+	return _text_paddr + delta;
+}
+
 asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
 {
 	u64 ptep = (u64)pg_dir + PAGE_SIZE;
@@ -96,9 +113,9 @@  asmlinkage u64 __init create_init_idmap(pgd_t *pg_dir, pteval_t clrmask)
 	pgprot_val(text_prot) &= ~clrmask;
 	pgprot_val(data_prot) &= ~clrmask;
 
-	map_range(&ptep, (u64)_stext, (u64)__initdata_begin, (u64)_stext,
+	map_range(&ptep, paddr(_stext), paddr(__initdata_begin), paddr(_stext),
 		  text_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
-	map_range(&ptep, (u64)__initdata_begin, (u64)_end, (u64)__initdata_begin,
+	map_range(&ptep, paddr(__initdata_begin), paddr(_end), paddr(__initdata_begin),
 		  data_prot, IDMAP_ROOT_LEVEL, (pte_t *)pg_dir, false, 0);
 
 	return ptep;
diff --git a/arch/arm64/kernel/pi/pi.h b/arch/arm64/kernel/pi/pi.h
index c91e5e965cd3..7ffba712da06 100644
--- a/arch/arm64/kernel/pi/pi.h
+++ b/arch/arm64/kernel/pi/pi.h
@@ -28,6 +28,7 @@  u64 kaslr_early_init(void *fdt, int chosen);
 void relocate_kernel(u64 offset);
 int scs_patch(const u8 eh_frame[], int size);
 
+u64 paddr(char *symbol);
 void map_range(u64 *pgd, u64 start, u64 end, u64 pa, pgprot_t prot,
 	       int level, pte_t *tbl, bool may_use_cont, u64 va_offset);