diff mbox series

[03/10] x86 setup: change bootstrap map to accept new boot module structures

Message ID 20230701071835.41599-4-christopher.w.clark@gmail.com (mailing list archive)
State New, archived
Headers show
Series v3: Boot modules for Hyperlaunch | expand

Commit Message

Christopher Clark July 1, 2023, 7:18 a.m. UTC
To convert the x86 boot logic from multiboot to boot module structures,
change the bootstrap map function to accept a boot module parameter.

To allow incremental change from multiboot to boot modules across all
x86 setup logic, provide a temporary inline wrapper that still accepts a
multiboot module parameter and use it where necessary. The wrapper is
placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
inline function into an existing header that has no such functions
already. This new header will be expanded with additional functions in
subsequent patches in this series.

No functional change intended.

Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>

---
A new patch so v1 series patch 3 can be implemented incrementally
instead.

 xen/arch/x86/cpu/microcode/core.c |  7 ++++---
 xen/arch/x86/hvm/dom0_build.c     |  4 +++-
 xen/arch/x86/include/asm/boot.h   | 32 +++++++++++++++++++++++++++++++
 xen/arch/x86/include/asm/setup.h  |  3 ++-
 xen/arch/x86/pv/dom0_build.c      |  3 ++-
 xen/arch/x86/setup.c              | 18 +++++++++--------
 xen/include/xen/bootinfo.h        |  3 +++
 xen/xsm/xsm_policy.c              |  3 ++-
 8 files changed, 58 insertions(+), 15 deletions(-)
 create mode 100644 xen/arch/x86/include/asm/boot.h

Comments

Stefano Stabellini July 8, 2023, 6:47 p.m. UTC | #1
On Sat, 1 Jul 2023, Christopher Clark wrote:
> To convert the x86 boot logic from multiboot to boot module structures,
> change the bootstrap map function to accept a boot module parameter.
> 
> To allow incremental change from multiboot to boot modules across all
> x86 setup logic, provide a temporary inline wrapper that still accepts a
> multiboot module parameter and use it where necessary. The wrapper is
> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
> inline function into an existing header that has no such functions
> already. This new header will be expanded with additional functions in
> subsequent patches in this series.
> 
> No functional change intended.
> 
> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
> 

[...]

> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
> index b72ae31a66..eb93cc3439 100644
> --- a/xen/include/xen/bootinfo.h
> +++ b/xen/include/xen/bootinfo.h
> @@ -10,6 +10,9 @@
>  #endif
>  
>  struct boot_module {
> +    paddr_t start;
> +    size_t size;

I think size should be paddr_t (instead of size_t) to make sure it is
the right size on both 64-bit and 32-bit architectures that support
64-bit addresses.


>      struct arch_bootmodule *arch;
>  };
Christopher Clark July 14, 2023, 6:51 a.m. UTC | #2
On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
wrote:

> On Sat, 1 Jul 2023, Christopher Clark wrote:
> > To convert the x86 boot logic from multiboot to boot module structures,
> > change the bootstrap map function to accept a boot module parameter.
> >
> > To allow incremental change from multiboot to boot modules across all
> > x86 setup logic, provide a temporary inline wrapper that still accepts a
> > multiboot module parameter and use it where necessary. The wrapper is
> > placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
> > inline function into an existing header that has no such functions
> > already. This new header will be expanded with additional functions in
> > subsequent patches in this series.
> >
> > No functional change intended.
> >
> > Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
> > Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
> >
>
> [...]
>
> > diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
> > index b72ae31a66..eb93cc3439 100644
> > --- a/xen/include/xen/bootinfo.h
> > +++ b/xen/include/xen/bootinfo.h
> > @@ -10,6 +10,9 @@
> >  #endif
> >
> >  struct boot_module {
> > +    paddr_t start;
> > +    size_t size;
>
> I think size should be paddr_t (instead of size_t) to make sure it is
> the right size on both 64-bit and 32-bit architectures that support
> 64-bit addresses.
>

Thanks, that explanation does make sense - ack.

Christopher


>
>
> >      struct arch_bootmodule *arch;
> >  };
>
Christopher Clark July 20, 2023, 10:12 p.m. UTC | #3
On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
christopher.w.clark@gmail.com> wrote:

>
>
> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
> wrote:
>
>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>> > To convert the x86 boot logic from multiboot to boot module structures,
>> > change the bootstrap map function to accept a boot module parameter.
>> >
>> > To allow incremental change from multiboot to boot modules across all
>> > x86 setup logic, provide a temporary inline wrapper that still accepts a
>> > multiboot module parameter and use it where necessary. The wrapper is
>> > placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>> > inline function into an existing header that has no such functions
>> > already. This new header will be expanded with additional functions in
>> > subsequent patches in this series.
>> >
>> > No functional change intended.
>> >
>> > Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>> > Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>> >
>>
>> [...]
>>
>> > diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>> > index b72ae31a66..eb93cc3439 100644
>> > --- a/xen/include/xen/bootinfo.h
>> > +++ b/xen/include/xen/bootinfo.h
>> > @@ -10,6 +10,9 @@
>> >  #endif
>> >
>> >  struct boot_module {
>> > +    paddr_t start;
>> > +    size_t size;
>>
>> I think size should be paddr_t (instead of size_t) to make sure it is
>> the right size on both 64-bit and 32-bit architectures that support
>> 64-bit addresses.
>>
>
> Thanks, that explanation does make sense - ack.
>

I've come back to reconsider this as it doesn't seem right to me to store a
non-address value (which this will always be) in a type explicitly defined
to hold an address: addresses may have architectural alignment requirements
whereas a size value is just a number of bytes so will not. The point of a
size_t value is that size_t is defined to be large enough to hold the size
of any valid object in memory, so I think this was right as-is.

Christopher



>
> Christopher
>
>
>>
>>
>> >      struct arch_bootmodule *arch;
>> >  };
>>
>
Jan Beulich July 21, 2023, 6:14 a.m. UTC | #4
On 21.07.2023 00:12, Christopher Clark wrote:
> On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
> christopher.w.clark@gmail.com> wrote:
> 
>>
>>
>> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
>> wrote:
>>
>>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>>>> To convert the x86 boot logic from multiboot to boot module structures,
>>>> change the bootstrap map function to accept a boot module parameter.
>>>>
>>>> To allow incremental change from multiboot to boot modules across all
>>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
>>>> multiboot module parameter and use it where necessary. The wrapper is
>>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>>>> inline function into an existing header that has no such functions
>>>> already. This new header will be expanded with additional functions in
>>>> subsequent patches in this series.
>>>>
>>>> No functional change intended.
>>>>
>>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>>
>>>
>>> [...]
>>>
>>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>>>> index b72ae31a66..eb93cc3439 100644
>>>> --- a/xen/include/xen/bootinfo.h
>>>> +++ b/xen/include/xen/bootinfo.h
>>>> @@ -10,6 +10,9 @@
>>>>  #endif
>>>>
>>>>  struct boot_module {
>>>> +    paddr_t start;
>>>> +    size_t size;
>>>
>>> I think size should be paddr_t (instead of size_t) to make sure it is
>>> the right size on both 64-bit and 32-bit architectures that support
>>> 64-bit addresses.
>>>
>>
>> Thanks, that explanation does make sense - ack.
>>
> 
> I've come back to reconsider this as it doesn't seem right to me to store a
> non-address value (which this will always be) in a type explicitly defined
> to hold an address: addresses may have architectural alignment requirements
> whereas a size value is just a number of bytes so will not. The point of a
> size_t value is that size_t is defined to be large enough to hold the size
> of any valid object in memory, so I think this was right as-is.

"Any object in memory" implies virtual addresses (or more generally addresses
which can be used for accessing objects). This isn't the case when considering
physical addresses - there may be far more memory in a system than can be made
accessible all in one go.

Jan
Stefano Stabellini July 21, 2023, 10:08 p.m. UTC | #5
On Fri, 21 Jul 2023, Jan Beulich wrote:
> On 21.07.2023 00:12, Christopher Clark wrote:
> > On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
> > christopher.w.clark@gmail.com> wrote:
> > 
> >>
> >>
> >> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
> >> wrote:
> >>
> >>> On Sat, 1 Jul 2023, Christopher Clark wrote:
> >>>> To convert the x86 boot logic from multiboot to boot module structures,
> >>>> change the bootstrap map function to accept a boot module parameter.
> >>>>
> >>>> To allow incremental change from multiboot to boot modules across all
> >>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
> >>>> multiboot module parameter and use it where necessary. The wrapper is
> >>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
> >>>> inline function into an existing header that has no such functions
> >>>> already. This new header will be expanded with additional functions in
> >>>> subsequent patches in this series.
> >>>>
> >>>> No functional change intended.
> >>>>
> >>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
> >>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
> >>>>
> >>>
> >>> [...]
> >>>
> >>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
> >>>> index b72ae31a66..eb93cc3439 100644
> >>>> --- a/xen/include/xen/bootinfo.h
> >>>> +++ b/xen/include/xen/bootinfo.h
> >>>> @@ -10,6 +10,9 @@
> >>>>  #endif
> >>>>
> >>>>  struct boot_module {
> >>>> +    paddr_t start;
> >>>> +    size_t size;
> >>>
> >>> I think size should be paddr_t (instead of size_t) to make sure it is
> >>> the right size on both 64-bit and 32-bit architectures that support
> >>> 64-bit addresses.
> >>>
> >>
> >> Thanks, that explanation does make sense - ack.
> >>
> > 
> > I've come back to reconsider this as it doesn't seem right to me to store a
> > non-address value (which this will always be) in a type explicitly defined
> > to hold an address: addresses may have architectural alignment requirements
> > whereas a size value is just a number of bytes so will not. The point of a
> > size_t value is that size_t is defined to be large enough to hold the size
> > of any valid object in memory, so I think this was right as-is.
> 
> "Any object in memory" implies virtual addresses (or more generally addresses
> which can be used for accessing objects). This isn't the case when considering
> physical addresses - there may be far more memory in a system than can be made
> accessible all in one go.

Right. And I think size_t is defined as 32-bit in Xen which is a
problem.
Daniel P. Smith July 27, 2023, 11:46 a.m. UTC | #6
On 7/21/23 02:14, Jan Beulich wrote:
> On 21.07.2023 00:12, Christopher Clark wrote:
>> On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
>> christopher.w.clark@gmail.com> wrote:
>>
>>>
>>>
>>> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
>>> wrote:
>>>
>>>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>>>>> To convert the x86 boot logic from multiboot to boot module structures,
>>>>> change the bootstrap map function to accept a boot module parameter.
>>>>>
>>>>> To allow incremental change from multiboot to boot modules across all
>>>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
>>>>> multiboot module parameter and use it where necessary. The wrapper is
>>>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>>>>> inline function into an existing header that has no such functions
>>>>> already. This new header will be expanded with additional functions in
>>>>> subsequent patches in this series.
>>>>>
>>>>> No functional change intended.
>>>>>
>>>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>>>
>>>>
>>>> [...]
>>>>
>>>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>>>>> index b72ae31a66..eb93cc3439 100644
>>>>> --- a/xen/include/xen/bootinfo.h
>>>>> +++ b/xen/include/xen/bootinfo.h
>>>>> @@ -10,6 +10,9 @@
>>>>>   #endif
>>>>>
>>>>>   struct boot_module {
>>>>> +    paddr_t start;
>>>>> +    size_t size;
>>>>
>>>> I think size should be paddr_t (instead of size_t) to make sure it is
>>>> the right size on both 64-bit and 32-bit architectures that support
>>>> 64-bit addresses.
>>>>
>>>
>>> Thanks, that explanation does make sense - ack.
>>>
>>
>> I've come back to reconsider this as it doesn't seem right to me to store a
>> non-address value (which this will always be) in a type explicitly defined
>> to hold an address: addresses may have architectural alignment requirements
>> whereas a size value is just a number of bytes so will not. The point of a
>> size_t value is that size_t is defined to be large enough to hold the size
>> of any valid object in memory, so I think this was right as-is.
> 
> "Any object in memory" implies virtual addresses (or more generally addresses
> which can be used for accessing objects). This isn't the case when considering
> physical addresses - there may be far more memory in a system than can be made
> accessible all in one go.

That is not my understanding of it, but I could be wrong. My 
understanding based on all the debates I have read online around this 
topic is that the intent in the spec is that size_t has to be able to 
hold a value that represents the largest object the CPU can manipulate 
with general purpose operations. From which I understand to mean as 
large as the largest register a CPU instruction may use for a size 
argument to a general purpose instruction. On x86_64, that is a 64bit 
register, as I don't believe the SSE/AVX registers are counted even 
though the are used by compiler/libc implementations to optimize some 
memory operations.

 From what I have seen for Xen, this is currently reflected in the x86 
code base, as size_t is 32bits for the early 32bit code and 64bits for 
Xen proper.

That aside, another objection I have to the use of paddr_t is that it is 
type abuse. Types are meant to convey context to the intended use of the 
variable and enable the ability to enforce proper usage of the variable, 
otherwise we might as well just use u64/uint64_t and be done. The 
field's purpose is to convey a size of an object, and labeling it a type 
that is intended for physical address objects violates both intents 
behind declaring a type, it asserts an invalid context and enables 
violations of type checking.

V/r,
Daniel P. Smith
Daniel P. Smith July 27, 2023, 11:49 a.m. UTC | #7
On 7/21/23 18:08, Stefano Stabellini wrote:
> On Fri, 21 Jul 2023, Jan Beulich wrote:
>> On 21.07.2023 00:12, Christopher Clark wrote:
>>> On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
>>> christopher.w.clark@gmail.com> wrote:
>>>
>>>>
>>>>
>>>> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
>>>> wrote:
>>>>
>>>>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>>>>>> To convert the x86 boot logic from multiboot to boot module structures,
>>>>>> change the bootstrap map function to accept a boot module parameter.
>>>>>>
>>>>>> To allow incremental change from multiboot to boot modules across all
>>>>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
>>>>>> multiboot module parameter and use it where necessary. The wrapper is
>>>>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>>>>>> inline function into an existing header that has no such functions
>>>>>> already. This new header will be expanded with additional functions in
>>>>>> subsequent patches in this series.
>>>>>>
>>>>>> No functional change intended.
>>>>>>
>>>>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>>>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>>>>
>>>>>
>>>>> [...]
>>>>>
>>>>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>>>>>> index b72ae31a66..eb93cc3439 100644
>>>>>> --- a/xen/include/xen/bootinfo.h
>>>>>> +++ b/xen/include/xen/bootinfo.h
>>>>>> @@ -10,6 +10,9 @@
>>>>>>   #endif
>>>>>>
>>>>>>   struct boot_module {
>>>>>> +    paddr_t start;
>>>>>> +    size_t size;
>>>>>
>>>>> I think size should be paddr_t (instead of size_t) to make sure it is
>>>>> the right size on both 64-bit and 32-bit architectures that support
>>>>> 64-bit addresses.
>>>>>
>>>>
>>>> Thanks, that explanation does make sense - ack.
>>>>
>>>
>>> I've come back to reconsider this as it doesn't seem right to me to store a
>>> non-address value (which this will always be) in a type explicitly defined
>>> to hold an address: addresses may have architectural alignment requirements
>>> whereas a size value is just a number of bytes so will not. The point of a
>>> size_t value is that size_t is defined to be large enough to hold the size
>>> of any valid object in memory, so I think this was right as-is.
>>
>> "Any object in memory" implies virtual addresses (or more generally addresses
>> which can be used for accessing objects). This isn't the case when considering
>> physical addresses - there may be far more memory in a system than can be made
>> accessible all in one go.
> 
> Right. And I think size_t is defined as 32-bit in Xen which is a
> problem.

In x86 32bit early boot code it is 32bits, but in Xen proper it is 
64bits. That is why in the 32bit HL code, a second set of structures was 
used with macros to ensure the structures used 64bit values for field 
types that are not 64bits in 32bit mode code.

v/r,
dps
Jan Beulich July 27, 2023, 11:58 a.m. UTC | #8
On 27.07.2023 13:46, Daniel P. Smith wrote:
> 
> 
> On 7/21/23 02:14, Jan Beulich wrote:
>> On 21.07.2023 00:12, Christopher Clark wrote:
>>> On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
>>> christopher.w.clark@gmail.com> wrote:
>>>
>>>>
>>>>
>>>> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
>>>> wrote:
>>>>
>>>>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>>>>>> To convert the x86 boot logic from multiboot to boot module structures,
>>>>>> change the bootstrap map function to accept a boot module parameter.
>>>>>>
>>>>>> To allow incremental change from multiboot to boot modules across all
>>>>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
>>>>>> multiboot module parameter and use it where necessary. The wrapper is
>>>>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>>>>>> inline function into an existing header that has no such functions
>>>>>> already. This new header will be expanded with additional functions in
>>>>>> subsequent patches in this series.
>>>>>>
>>>>>> No functional change intended.
>>>>>>
>>>>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>>>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>>>>
>>>>>
>>>>> [...]
>>>>>
>>>>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>>>>>> index b72ae31a66..eb93cc3439 100644
>>>>>> --- a/xen/include/xen/bootinfo.h
>>>>>> +++ b/xen/include/xen/bootinfo.h
>>>>>> @@ -10,6 +10,9 @@
>>>>>>   #endif
>>>>>>
>>>>>>   struct boot_module {
>>>>>> +    paddr_t start;
>>>>>> +    size_t size;
>>>>>
>>>>> I think size should be paddr_t (instead of size_t) to make sure it is
>>>>> the right size on both 64-bit and 32-bit architectures that support
>>>>> 64-bit addresses.
>>>>>
>>>>
>>>> Thanks, that explanation does make sense - ack.
>>>>
>>>
>>> I've come back to reconsider this as it doesn't seem right to me to store a
>>> non-address value (which this will always be) in a type explicitly defined
>>> to hold an address: addresses may have architectural alignment requirements
>>> whereas a size value is just a number of bytes so will not. The point of a
>>> size_t value is that size_t is defined to be large enough to hold the size
>>> of any valid object in memory, so I think this was right as-is.
>>
>> "Any object in memory" implies virtual addresses (or more generally addresses
>> which can be used for accessing objects). This isn't the case when considering
>> physical addresses - there may be far more memory in a system than can be made
>> accessible all in one go.
> 
> That is not my understanding of it, but I could be wrong. My 
> understanding based on all the debates I have read online around this 
> topic is that the intent in the spec is that size_t has to be able to 
> hold a value that represents the largest object the CPU can manipulate 
> with general purpose operations. From which I understand to mean as 
> large as the largest register a CPU instruction may use for a size 
> argument to a general purpose instruction. On x86_64, that is a 64bit 
> register, as I don't believe the SSE/AVX registers are counted even 
> though the are used by compiler/libc implementations to optimize some 
> memory operations.

I can't see how this relates to my earlier remark.

>  From what I have seen for Xen, this is currently reflected in the x86 
> code base, as size_t is 32bits for the early 32bit code and 64bits for 
> Xen proper.
> 
> That aside, another objection I have to the use of paddr_t is that it is 
> type abuse. Types are meant to convey context to the intended use of the 
> variable and enable the ability to enforce proper usage of the variable, 
> otherwise we might as well just use u64/uint64_t and be done. The 
> field's purpose is to convey a size of an object,

You use "object" here again, when in physical address space (with paging
enabled) this isn't an appropriate term.

> and labeling it a type 
> that is intended for physical address objects violates both intents 
> behind declaring a type, it asserts an invalid context and enables 
> violations of type checking.

It is type abuse to a certain extent, yes, but what do you do? We could
invent psize_t, but that would (afaics) always match paddr_t. uint64_t
otoh may be too larger for 32-bit platforms which only know a 32-bit
wide physical address space.

Jan
Daniel P. Smith July 27, 2023, 12:48 p.m. UTC | #9
On 7/27/23 07:58, Jan Beulich wrote:
> On 27.07.2023 13:46, Daniel P. Smith wrote:
>>
>>
>> On 7/21/23 02:14, Jan Beulich wrote:
>>> On 21.07.2023 00:12, Christopher Clark wrote:
>>>> On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
>>>> christopher.w.clark@gmail.com> wrote:
>>>>
>>>>>
>>>>>
>>>>> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
>>>>> wrote:
>>>>>
>>>>>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>>>>>>> To convert the x86 boot logic from multiboot to boot module structures,
>>>>>>> change the bootstrap map function to accept a boot module parameter.
>>>>>>>
>>>>>>> To allow incremental change from multiboot to boot modules across all
>>>>>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
>>>>>>> multiboot module parameter and use it where necessary. The wrapper is
>>>>>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>>>>>>> inline function into an existing header that has no such functions
>>>>>>> already. This new header will be expanded with additional functions in
>>>>>>> subsequent patches in this series.
>>>>>>>
>>>>>>> No functional change intended.
>>>>>>>
>>>>>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>>>>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>>>>>
>>>>>>
>>>>>> [...]
>>>>>>
>>>>>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>>>>>>> index b72ae31a66..eb93cc3439 100644
>>>>>>> --- a/xen/include/xen/bootinfo.h
>>>>>>> +++ b/xen/include/xen/bootinfo.h
>>>>>>> @@ -10,6 +10,9 @@
>>>>>>>    #endif
>>>>>>>
>>>>>>>    struct boot_module {
>>>>>>> +    paddr_t start;
>>>>>>> +    size_t size;
>>>>>>
>>>>>> I think size should be paddr_t (instead of size_t) to make sure it is
>>>>>> the right size on both 64-bit and 32-bit architectures that support
>>>>>> 64-bit addresses.
>>>>>>
>>>>>
>>>>> Thanks, that explanation does make sense - ack.
>>>>>
>>>>
>>>> I've come back to reconsider this as it doesn't seem right to me to store a
>>>> non-address value (which this will always be) in a type explicitly defined
>>>> to hold an address: addresses may have architectural alignment requirements
>>>> whereas a size value is just a number of bytes so will not. The point of a
>>>> size_t value is that size_t is defined to be large enough to hold the size
>>>> of any valid object in memory, so I think this was right as-is.
>>>
>>> "Any object in memory" implies virtual addresses (or more generally addresses
>>> which can be used for accessing objects). This isn't the case when considering
>>> physical addresses - there may be far more memory in a system than can be made
>>> accessible all in one go.
>>
>> That is not my understanding of it, but I could be wrong. My
>> understanding based on all the debates I have read online around this
>> topic is that the intent in the spec is that size_t has to be able to
>> hold a value that represents the largest object the CPU can manipulate
>> with general purpose operations. From which I understand to mean as
>> large as the largest register a CPU instruction may use for a size
>> argument to a general purpose instruction. On x86_64, that is a 64bit
>> register, as I don't believe the SSE/AVX registers are counted even
>> though the are used by compiler/libc implementations to optimize some
>> memory operations.
> 
> I can't see how this relates to my earlier remark.

Perhaps I misunderstood what your point was then. I thought you were 
taking the position that size_t could not be used to represent the 
largest object in memory addressable by a single CPU operation.

>>   From what I have seen for Xen, this is currently reflected in the x86
>> code base, as size_t is 32bits for the early 32bit code and 64bits for
>> Xen proper.
>>
>> That aside, another objection I have to the use of paddr_t is that it is
>> type abuse. Types are meant to convey context to the intended use of the
>> variable and enable the ability to enforce proper usage of the variable,
>> otherwise we might as well just use u64/uint64_t and be done. The
>> field's purpose is to convey a size of an object,
> 
> You use "object" here again, when in physical address space (with paging
> enabled) this isn't an appropriate term.

Because that is the language used in the C spec to refer to instances in 
memory,

"Object: region of data storage in the execution environment, the 
contents of which can represent values"

ISO/IEC 9899:1999(E) - 3.14: 
https://www.dii.uchile.cl/~daespino/files/Iso_C_1999_definition.pdf



With the following two interpretations of the spec for size_t to mean 
(any emphasis being mine),


"size_t is an unsigned integer type used to represent the size of any 
**object** (including arrays) in the particular implementation."

Wikipedia - size_t: https://en.wikipedia.org/wiki/C_data_types#stddef.h


"size_t can store the maximum size of a theoretically possible 
**object** of any type (including array)."

CPP Ref - size_t: (https://en.cppreference.com/w/c/types/size_t)


>> and labeling it a type
>> that is intended for physical address objects violates both intents
>> behind declaring a type, it asserts an invalid context and enables
>> violations of type checking.
> 
> It is type abuse to a certain extent, yes, but what do you do? We could
> invent psize_t, but that would (afaics) always match paddr_t. uint64_t
> otoh may be too larger for 32-bit platforms which only know a 32-bit
> wide physical address space.

Why invent a new type? That is the purpose of `size_t`, and it should be 
of the correct size, otherwise Xen's implementation is incorrect (which 
it is not).

dps
Jan Beulich July 27, 2023, 12:54 p.m. UTC | #10
On 27.07.2023 14:48, Daniel P. Smith wrote:
> On 7/27/23 07:58, Jan Beulich wrote:
>> On 27.07.2023 13:46, Daniel P. Smith wrote:
>>> On 7/21/23 02:14, Jan Beulich wrote:
>>>> On 21.07.2023 00:12, Christopher Clark wrote:
>>>>> On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
>>>>> christopher.w.clark@gmail.com> wrote:
>>>>>> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
>>>>>> wrote:
>>>>>>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>>>>>>>> To convert the x86 boot logic from multiboot to boot module structures,
>>>>>>>> change the bootstrap map function to accept a boot module parameter.
>>>>>>>>
>>>>>>>> To allow incremental change from multiboot to boot modules across all
>>>>>>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
>>>>>>>> multiboot module parameter and use it where necessary. The wrapper is
>>>>>>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>>>>>>>> inline function into an existing header that has no such functions
>>>>>>>> already. This new header will be expanded with additional functions in
>>>>>>>> subsequent patches in this series.
>>>>>>>>
>>>>>>>> No functional change intended.
>>>>>>>>
>>>>>>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>>>>>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>>>>>>
>>>>>>>
>>>>>>> [...]
>>>>>>>
>>>>>>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>>>>>>>> index b72ae31a66..eb93cc3439 100644
>>>>>>>> --- a/xen/include/xen/bootinfo.h
>>>>>>>> +++ b/xen/include/xen/bootinfo.h
>>>>>>>> @@ -10,6 +10,9 @@
>>>>>>>>    #endif
>>>>>>>>
>>>>>>>>    struct boot_module {
>>>>>>>> +    paddr_t start;
>>>>>>>> +    size_t size;
>>>>>>>
>>>>>>> I think size should be paddr_t (instead of size_t) to make sure it is
>>>>>>> the right size on both 64-bit and 32-bit architectures that support
>>>>>>> 64-bit addresses.
>>>>>>>
>>>>>>
>>>>>> Thanks, that explanation does make sense - ack.
>>>>>>
>>>>>
>>>>> I've come back to reconsider this as it doesn't seem right to me to store a
>>>>> non-address value (which this will always be) in a type explicitly defined
>>>>> to hold an address: addresses may have architectural alignment requirements
>>>>> whereas a size value is just a number of bytes so will not. The point of a
>>>>> size_t value is that size_t is defined to be large enough to hold the size
>>>>> of any valid object in memory, so I think this was right as-is.
>>>>
>>>> "Any object in memory" implies virtual addresses (or more generally addresses
>>>> which can be used for accessing objects). This isn't the case when considering
>>>> physical addresses - there may be far more memory in a system than can be made
>>>> accessible all in one go.
>>>
>>> That is not my understanding of it, but I could be wrong. My
>>> understanding based on all the debates I have read online around this
>>> topic is that the intent in the spec is that size_t has to be able to
>>> hold a value that represents the largest object the CPU can manipulate
>>> with general purpose operations. From which I understand to mean as
>>> large as the largest register a CPU instruction may use for a size
>>> argument to a general purpose instruction. On x86_64, that is a 64bit
>>> register, as I don't believe the SSE/AVX registers are counted even
>>> though the are used by compiler/libc implementations to optimize some
>>> memory operations.
>>
>> I can't see how this relates to my earlier remark.
> 
> Perhaps I misunderstood what your point was then. I thought you were 
> taking the position that size_t could not be used to represent the 
> largest object in memory addressable by a single CPU operation.

No. I was trying to clarify that we're talking about physical addresses
here. Which you still seem to have trouble with, ...

>>>   From what I have seen for Xen, this is currently reflected in the x86
>>> code base, as size_t is 32bits for the early 32bit code and 64bits for
>>> Xen proper.
>>>
>>> That aside, another objection I have to the use of paddr_t is that it is
>>> type abuse. Types are meant to convey context to the intended use of the
>>> variable and enable the ability to enforce proper usage of the variable,
>>> otherwise we might as well just use u64/uint64_t and be done. The
>>> field's purpose is to convey a size of an object,
>>
>> You use "object" here again, when in physical address space (with paging
>> enabled) this isn't an appropriate term.
> 
> Because that is the language used in the C spec to refer to instances in 
> memory,
> 
> "Object: region of data storage in the execution environment, the 
> contents of which can represent values"
> 
> ISO/IEC 9899:1999(E) - 3.14: 
> https://www.dii.uchile.cl/~daespino/files/Iso_C_1999_definition.pdf
> 
> 
> 
> With the following two interpretations of the spec for size_t to mean 
> (any emphasis being mine),
> 
> 
> "size_t is an unsigned integer type used to represent the size of any 
> **object** (including arrays) in the particular implementation."
> 
> Wikipedia - size_t: https://en.wikipedia.org/wiki/C_data_types#stddef.h
> 
> 
> "size_t can store the maximum size of a theoretically possible 
> **object** of any type (including array)."
> 
> CPP Ref - size_t: (https://en.cppreference.com/w/c/types/size_t)

... according to all of this and ...

>>> and labeling it a type
>>> that is intended for physical address objects violates both intents
>>> behind declaring a type, it asserts an invalid context and enables
>>> violations of type checking.
>>
>> It is type abuse to a certain extent, yes, but what do you do? We could
>> invent psize_t, but that would (afaics) always match paddr_t. uint64_t
>> otoh may be too larger for 32-bit platforms which only know a 32-bit
>> wide physical address space.
> 
> Why invent a new type? That is the purpose of `size_t`, and it should be 
> of the correct size, otherwise Xen's implementation is incorrect (which 
> it is not).

... this. What C talks about is what the CPU can address (within a single
address space, i.e. normally virtual addresses). With 32-bit addresses
you can address at most 4G, when the system you're running on may have
much more memory. Yet in an OS or hypervisor you need to deal with this
larger amount of memory, no matter that you can't address all of it in
one go.

Jan
Daniel P. Smith July 27, 2023, 1:26 p.m. UTC | #11
On 7/27/23 08:54, Jan Beulich wrote:
> On 27.07.2023 14:48, Daniel P. Smith wrote:
>> On 7/27/23 07:58, Jan Beulich wrote:
>>> On 27.07.2023 13:46, Daniel P. Smith wrote:
>>>> On 7/21/23 02:14, Jan Beulich wrote:
>>>>> On 21.07.2023 00:12, Christopher Clark wrote:
>>>>>> On Thu, Jul 13, 2023 at 11:51 PM Christopher Clark <
>>>>>> christopher.w.clark@gmail.com> wrote:
>>>>>>> On Sat, Jul 8, 2023 at 11:47 AM Stefano Stabellini <sstabellini@kernel.org>
>>>>>>> wrote:
>>>>>>>> On Sat, 1 Jul 2023, Christopher Clark wrote:
>>>>>>>>> To convert the x86 boot logic from multiboot to boot module structures,
>>>>>>>>> change the bootstrap map function to accept a boot module parameter.
>>>>>>>>>
>>>>>>>>> To allow incremental change from multiboot to boot modules across all
>>>>>>>>> x86 setup logic, provide a temporary inline wrapper that still accepts a
>>>>>>>>> multiboot module parameter and use it where necessary. The wrapper is
>>>>>>>>> placed in a new arch/x86 header <asm/boot.h> to avoid putting a static
>>>>>>>>> inline function into an existing header that has no such functions
>>>>>>>>> already. This new header will be expanded with additional functions in
>>>>>>>>> subsequent patches in this series.
>>>>>>>>>
>>>>>>>>> No functional change intended.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Christopher Clark <christopher.w.clark@gmail.com>
>>>>>>>>> Signed-off-by: Daniel P. Smith <dpsmith@apertussolutions.com>
>>>>>>>>>
>>>>>>>>
>>>>>>>> [...]
>>>>>>>>
>>>>>>>>> diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
>>>>>>>>> index b72ae31a66..eb93cc3439 100644
>>>>>>>>> --- a/xen/include/xen/bootinfo.h
>>>>>>>>> +++ b/xen/include/xen/bootinfo.h
>>>>>>>>> @@ -10,6 +10,9 @@
>>>>>>>>>     #endif
>>>>>>>>>
>>>>>>>>>     struct boot_module {
>>>>>>>>> +    paddr_t start;
>>>>>>>>> +    size_t size;
>>>>>>>>
>>>>>>>> I think size should be paddr_t (instead of size_t) to make sure it is
>>>>>>>> the right size on both 64-bit and 32-bit architectures that support
>>>>>>>> 64-bit addresses.
>>>>>>>>
>>>>>>>
>>>>>>> Thanks, that explanation does make sense - ack.
>>>>>>>
>>>>>>
>>>>>> I've come back to reconsider this as it doesn't seem right to me to store a
>>>>>> non-address value (which this will always be) in a type explicitly defined
>>>>>> to hold an address: addresses may have architectural alignment requirements
>>>>>> whereas a size value is just a number of bytes so will not. The point of a
>>>>>> size_t value is that size_t is defined to be large enough to hold the size
>>>>>> of any valid object in memory, so I think this was right as-is.
>>>>>
>>>>> "Any object in memory" implies virtual addresses (or more generally addresses
>>>>> which can be used for accessing objects). This isn't the case when considering
>>>>> physical addresses - there may be far more memory in a system than can be made
>>>>> accessible all in one go.
>>>>
>>>> That is not my understanding of it, but I could be wrong. My
>>>> understanding based on all the debates I have read online around this
>>>> topic is that the intent in the spec is that size_t has to be able to
>>>> hold a value that represents the largest object the CPU can manipulate
>>>> with general purpose operations. From which I understand to mean as
>>>> large as the largest register a CPU instruction may use for a size
>>>> argument to a general purpose instruction. On x86_64, that is a 64bit
>>>> register, as I don't believe the SSE/AVX registers are counted even
>>>> though the are used by compiler/libc implementations to optimize some
>>>> memory operations.
>>>
>>> I can't see how this relates to my earlier remark.
>>
>> Perhaps I misunderstood what your point was then. I thought you were
>> taking the position that size_t could not be used to represent the
>> largest object in memory addressable by a single CPU operation.
> 
> No. I was trying to clarify that we're talking about physical addresses
> here. Which you still seem to have trouble with, ...

No, I perfectly understand what you are saying and am not having 
difficulties with.

>>>>    From what I have seen for Xen, this is currently reflected in the x86
>>>> code base, as size_t is 32bits for the early 32bit code and 64bits for
>>>> Xen proper.
>>>>
>>>> That aside, another objection I have to the use of paddr_t is that it is
>>>> type abuse. Types are meant to convey context to the intended use of the
>>>> variable and enable the ability to enforce proper usage of the variable,
>>>> otherwise we might as well just use u64/uint64_t and be done. The
>>>> field's purpose is to convey a size of an object,
>>>
>>> You use "object" here again, when in physical address space (with paging
>>> enabled) this isn't an appropriate term.
>>
>> Because that is the language used in the C spec to refer to instances in
>> memory,
>>
>> "Object: region of data storage in the execution environment, the
>> contents of which can represent values"
>>
>> ISO/IEC 9899:1999(E) - 3.14:
>> https://www.dii.uchile.cl/~daespino/files/Iso_C_1999_definition.pdf
>>
>>
>>
>> With the following two interpretations of the spec for size_t to mean
>> (any emphasis being mine),
>>
>>
>> "size_t is an unsigned integer type used to represent the size of any
>> **object** (including arrays) in the particular implementation."
>>
>> Wikipedia - size_t: https://en.wikipedia.org/wiki/C_data_types#stddef.h
>>
>>
>> "size_t can store the maximum size of a theoretically possible
>> **object** of any type (including array)."
>>
>> CPP Ref - size_t: (https://en.cppreference.com/w/c/types/size_t)
> 
> ... according to all of this and ...
> 
>>>> and labeling it a type
>>>> that is intended for physical address objects violates both intents
>>>> behind declaring a type, it asserts an invalid context and enables
>>>> violations of type checking.
>>>
>>> It is type abuse to a certain extent, yes, but what do you do? We could
>>> invent psize_t, but that would (afaics) always match paddr_t. uint64_t
>>> otoh may be too larger for 32-bit platforms which only know a 32-bit
>>> wide physical address space.
>>
>> Why invent a new type? That is the purpose of `size_t`, and it should be
>> of the correct size, otherwise Xen's implementation is incorrect (which
>> it is not).
> 
> ... this. What C talks about is what the CPU can address (within a single
> address space, i.e. normally virtual addresses). With 32-bit addresses
> you can address at most 4G, when the system you're running on may have
> much more memory. Yet in an OS or hypervisor you need to deal with this
> larger amount of memory, no matter that you can't address all of it in
> one go.

Nothing I said disagrees with your statement.

Let's bring this back to the actual implementation instead of the 
theoretical. Your position is that Xen's paddr_t is desired because it 
can store larger values than that of size_t. Now if you look in Xen 
proper (main 64bit code on x86), paddr_t is a typedef for a 64bit 
unsigned integer. And if you look at size_t, it is also a typedef to a 
64bit unsigned integer, they are literally a couple of lines apart in 
types.h. Thus they are the same size and can only represent the same 
maximum size. The only area of issue for x86 is during the short bit of 
code that runs in 32bit mode during startup. In this series, we address 
this by using a set of macros in the 32bit code to provide 64bit clean 
definition of the structures. This approach is acceptable because as far 
as I am aware, x86 is the only platform where the hypervisor has to 
transition from one bit size to another, e.g. Arm just starts in 64bit 
mode when on a 64bit device.

At the end of the day, size_t is the same size as paddr_t for the end 
execution environments and I would levy a guess that should x86 suddenly 
find itself having a 128bit mode which would likely drive paddr_t to 
128bits, so would follow size_t.

v/r,
dps
Jan Beulich July 27, 2023, 2:42 p.m. UTC | #12
On 27.07.2023 15:26, Daniel P. Smith wrote:
> Let's bring this back to the actual implementation instead of the 
> theoretical. Your position is that Xen's paddr_t is desired because it 
> can store larger values than that of size_t. Now if you look in Xen 
> proper (main 64bit code on x86), paddr_t is a typedef for a 64bit 
> unsigned integer. And if you look at size_t, it is also a typedef to a 
> 64bit unsigned integer, they are literally a couple of lines apart in 
> types.h. Thus they are the same size and can only represent the same 
> maximum size.

What about 32-bit Arm, or any other 32-bit architecture that we might
see Xen ported to, with wider than 32-bit physical address space?

> The only area of issue for x86 is during the short bit of 
> code that runs in 32bit mode during startup. In this series, we address 
> this by using a set of macros in the 32bit code to provide 64bit clean 
> definition of the structures. This approach is acceptable because as far 
> as I am aware, x86 is the only platform where the hypervisor has to 
> transition from one bit size to another, e.g. Arm just starts in 64bit 
> mode when on a 64bit device.
> 
> At the end of the day, size_t is the same size as paddr_t for the end 
> execution environments and I would levy a guess that should x86 suddenly 
> find itself having a 128bit mode which would likely drive paddr_t to 
> 128bits, so would follow size_t.

Likely. Yet when we still had ix86 (x86-32) support, paddr_t also
wasn't the same as size_t. Even on x86-64 it's possible we'd see
physical address width go beyond 64 bits before virtual address width
would, just like had happened for ix86 (which initially only allowed
for 32-bit physical addresses, until PSE36 arrived).

In your implementation you want to cover the general case, not a subset
of special ones.

Jan
George Dunlap July 27, 2023, 3:15 p.m. UTC | #13
On Thu, Jul 27, 2023 at 3:42 PM Jan Beulich <jbeulich@suse.com> wrote:

> On 27.07.2023 15:26, Daniel P. Smith wrote:
> > Let's bring this back to the actual implementation instead of the
> > theoretical. Your position is that Xen's paddr_t is desired because it
> > can store larger values than that of size_t. Now if you look in Xen
> > proper (main 64bit code on x86), paddr_t is a typedef for a 64bit
> > unsigned integer. And if you look at size_t, it is also a typedef to a
> > 64bit unsigned integer, they are literally a couple of lines apart in
> > types.h. Thus they are the same size and can only represent the same
> > maximum size.
>
> What about 32-bit Arm, or any other 32-bit architecture that we might
> see Xen ported to, with wider than 32-bit physical address space?
>

To be more concrete here:

Suppose that you had a machine with 32-bit virtual address spaces (i.e.,
going up to 4GiB), and 36-bit physical address spaces (i.e., going up to
64GiB).  And suppose you had a system with 16GiB of physical ram.  And you
wanted to use Hyperlaunch to create VMs using some sort of memory image
that was 5GiB (presumably of some kind of static data, not, say, a kernel
or initramfs).  You wouldn't be able to do it if the "size" parameter of
the boot modules was limited to 4GiB (without some kind of hack where you
string multiple boot modules together).

I guess part of the question is whether we think that's an important use
case; on the whole if you're populating 5GiB of RAM, it seems like it would
be better to have the VM load it itself from disk.

I do see the logic behind wanting to avoid "paddr_t" for a size; I'm sure
Jan that you would nack any patch that used "size_t" as a memory address
(instead of, say, uintptr_t).  In that case, "psize_t" is the obvious
solution.

 -George
Stefano Stabellini July 27, 2023, 6:36 p.m. UTC | #14
On Thu, 27 Jul 2023, George Dunlap wrote:
> On Thu, Jul 27, 2023 at 3:42 PM Jan Beulich <jbeulich@suse.com> wrote:
>       On 27.07.2023 15:26, Daniel P. Smith wrote:
>       > Let's bring this back to the actual implementation instead of the
>       > theoretical. Your position is that Xen's paddr_t is desired because it
>       > can store larger values than that of size_t. Now if you look in Xen
>       > proper (main 64bit code on x86), paddr_t is a typedef for a 64bit
>       > unsigned integer. And if you look at size_t, it is also a typedef to a
>       > 64bit unsigned integer, they are literally a couple of lines apart in
>       > types.h. Thus they are the same size and can only represent the same
>       > maximum size.
> 
>       What about 32-bit Arm, or any other 32-bit architecture that we might
>       see Xen ported to, with wider than 32-bit physical address space?
> 
> 
> To be more concrete here:
> 
> Suppose that you had a machine with 32-bit virtual address spaces (i.e., going up to 4GiB), and 36-bit physical address spaces (i.e., going
> up to 64GiB).  And suppose you had a system with 16GiB of physical ram.  And you wanted to use Hyperlaunch to create VMs using some sort of
> memory image that was 5GiB (presumably of some kind of static data, not, say, a kernel or initramfs).  You wouldn't be able to do it if the
> "size" parameter of the boot modules was limited to 4GiB (without some kind of hack where you string multiple boot modules together).

Yes exactly.

I would like this code to be common across ARM and x86. On arm32 size_t
wouldn't work, with size_t as it is defined today. One option is to use
paddr_t on all arches, or at least on arm32. Another option is to change
the definition of size_t on arm32.

My suggestion it to move the existing equivalent dom0less interface
defined here:
xen/arch/arm/include/asm/setup.c:struct bootmodule
to common code and base this work on top of it. struct bootmodule uses
paddr_t for both start and size to solve the arm32 issue today.
Daniel P. Smith July 27, 2023, 7:29 p.m. UTC | #15
On 7/27/23 11:15, George Dunlap wrote:
> 
> 
> On Thu, Jul 27, 2023 at 3:42 PM Jan Beulich <jbeulich@suse.com 
> <mailto:jbeulich@suse.com>> wrote:
> 
>     On 27.07.2023 15:26, Daniel P. Smith wrote:
>      > Let's bring this back to the actual implementation instead of the
>      > theoretical. Your position is that Xen's paddr_t is desired
>     because it
>      > can store larger values than that of size_t. Now if you look in Xen
>      > proper (main 64bit code on x86), paddr_t is a typedef for a 64bit
>      > unsigned integer. And if you look at size_t, it is also a typedef
>     to a
>      > 64bit unsigned integer, they are literally a couple of lines
>     apart in
>      > types.h. Thus they are the same size and can only represent the same
>      > maximum size.
> 
>     What about 32-bit Arm, or any other 32-bit architecture that we might
>     see Xen ported to, with wider than 32-bit physical address space?
> 
> 
> To be more concrete here:
> 
> Suppose that you had a machine with 32-bit virtual address spaces (i.e., 
> going up to 4GiB), and 36-bit physical address spaces (i.e., going up to 
> 64GiB).  And suppose you had a system with 16GiB of physical ram.  And 
> you wanted to use Hyperlaunch to create VMs using some sort of memory 
> image that was 5GiB (presumably of some kind of static data, not, say, a 
> kernel or initramfs).  You wouldn't be able to do it if the "size" 
> parameter of the boot modules was limited to 4GiB (without some kind of 
> hack where you string multiple boot modules together).

The reality to your scenario here is that it would never work on x86. 
Regardless of which entry point you come in through, 32bit BIOS entry, 
UEFI, or PVH, all of these expect the boot material to be passed in 
using the MB1 or MB2 protocol. Under those protocols, a module 
definition has a start and end addresses which are u32. While the start 
address could be okay, the end address will be well beyond 4GiB which at 
best would overflow the address, but more likely be rejected by the 
bootloader.

> I guess part of the question is whether we think that's an important use 
> case; on the whole if you're populating 5GiB of RAM, it seems like it 
> would be better to have the VM load it itself from disk.

The point of hyperlaunch is to enable advanced use-cases and domain 
resume on boot is one I think someone might find useful.

> I do see the logic behind wanting to avoid "paddr_t" for a size; I'm 
> sure Jan that you would nack any patch that used "size_t" as a memory 
> address (instead of, say, uintptr_t).  In that case, "psize_t" is the 
> obvious solution.

I would be amenable with declaring a psize_t that lived in all the same 
places paddr_t are defined to signal them as a pair, of sorts. I would 
have to see if adding paddr_t and this new psize_t addresses to the 
32bit defs.h would result in making the structures 64bit clean in the 
x86 32bit code. If so, then it would remove the need for a separate 
declaration of the structures there.

v/r,
dps
Jan Beulich July 28, 2023, 7:01 a.m. UTC | #16
On 27.07.2023 20:36, Stefano Stabellini wrote:
> On Thu, 27 Jul 2023, George Dunlap wrote:
>> On Thu, Jul 27, 2023 at 3:42 PM Jan Beulich <jbeulich@suse.com> wrote:
>>       On 27.07.2023 15:26, Daniel P. Smith wrote:
>>       > Let's bring this back to the actual implementation instead of the
>>       > theoretical. Your position is that Xen's paddr_t is desired because it
>>       > can store larger values than that of size_t. Now if you look in Xen
>>       > proper (main 64bit code on x86), paddr_t is a typedef for a 64bit
>>       > unsigned integer. And if you look at size_t, it is also a typedef to a
>>       > 64bit unsigned integer, they are literally a couple of lines apart in
>>       > types.h. Thus they are the same size and can only represent the same
>>       > maximum size.
>>
>>       What about 32-bit Arm, or any other 32-bit architecture that we might
>>       see Xen ported to, with wider than 32-bit physical address space?
>>
>>
>> To be more concrete here:
>>
>> Suppose that you had a machine with 32-bit virtual address spaces (i.e., going up to 4GiB), and 36-bit physical address spaces (i.e., going
>> up to 64GiB).  And suppose you had a system with 16GiB of physical ram.  And you wanted to use Hyperlaunch to create VMs using some sort of
>> memory image that was 5GiB (presumably of some kind of static data, not, say, a kernel or initramfs).  You wouldn't be able to do it if the
>> "size" parameter of the boot modules was limited to 4GiB (without some kind of hack where you string multiple boot modules together).
> 
> Yes exactly.
> 
> I would like this code to be common across ARM and x86. On arm32 size_t
> wouldn't work, with size_t as it is defined today. One option is to use
> paddr_t on all arches, or at least on arm32. Another option is to change
> the definition of size_t on arm32.

How can changing size_t be an option? This is determined by the psABI,
and going out of sync with what the compiler assumes size_t is will
bring you all sorts of trouble.

Jan
Jan Beulich Sept. 19, 2023, 1:51 p.m. UTC | #17
On 01.07.2023 09:18, Christopher Clark wrote:
> To convert the x86 boot logic from multiboot to boot module structures,
> change the bootstrap map function to accept a boot module parameter.
> 
> To allow incremental change from multiboot to boot modules across all
> x86 setup logic, provide a temporary inline wrapper that still accepts a
> multiboot module parameter and use it where necessary.

And all uses of the original function are converted to the new wrapper,
except when passing NULL - am I getting this right? Plus down the road
you'll change all of them back? Too much code churn for my taste, to be
honest, not the least because this undermines easy use of "git blame".

If the above observation is right, and since passing NULL to the wrapper
is fine too, why don't you deal with this by using a macro wrapper
instead, without needing to touch all the call sites:

#define bootstrap_map(m) bootstrap_map_multiboot(m)

Misra won't like that, but as you say it's temporary.

Jan
diff mbox series

Patch

diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c
index c3fee62906..ce3c8042a2 100644
--- a/xen/arch/x86/cpu/microcode/core.c
+++ b/xen/arch/x86/cpu/microcode/core.c
@@ -34,6 +34,7 @@ 
 #include <xen/watchdog.h>
 
 #include <asm/apic.h>
+#include <asm/boot.h>
 #include <asm/cpu-policy.h>
 #include <asm/delay.h>
 #include <asm/nmi.h>
@@ -180,7 +181,7 @@  void __init microcode_scan_module(
         if ( !test_bit(i, module_map) )
             continue;
 
-        _blob_start = bootstrap_map(&mod[i]);
+        _blob_start = bootstrap_map_multiboot(&mod[i]);
         _blob_size = mod[i].mod_end;
         if ( !_blob_start )
         {
@@ -798,7 +799,7 @@  int __init microcode_init_cache(unsigned long *module_map,
         microcode_scan_module(module_map, mbi);
 
     if ( ucode_mod.mod_end )
-        rc = early_update_cache(bootstrap_map(&ucode_mod),
+        rc = early_update_cache(bootstrap_map_multiboot(&ucode_mod),
                                 ucode_mod.mod_end);
     else if ( ucode_blob.size )
         rc = early_update_cache(ucode_blob.data, ucode_blob.size);
@@ -821,7 +822,7 @@  static int __init early_microcode_update_cpu(void)
     else if ( ucode_mod.mod_end )
     {
         len = ucode_mod.mod_end;
-        data = bootstrap_map(&ucode_mod);
+        data = bootstrap_map_multiboot(&ucode_mod);
     }
 
     if ( !data )
diff --git a/xen/arch/x86/hvm/dom0_build.c b/xen/arch/x86/hvm/dom0_build.c
index bf08998862..56fe89632b 100644
--- a/xen/arch/x86/hvm/dom0_build.c
+++ b/xen/arch/x86/hvm/dom0_build.c
@@ -16,6 +16,7 @@ 
 
 #include <acpi/actables.h>
 
+#include <asm/boot.h>
 #include <asm/bzimage.h>
 #include <asm/dom0_build.h>
 #include <asm/hvm/support.h>
@@ -1208,7 +1209,8 @@  int __init dom0_construct_pvh(struct domain *d, const module_t *image,
         return rc;
     }
 
-    rc = pvh_load_kernel(d, image, image_headroom, initrd, bootstrap_map(image),
+    rc = pvh_load_kernel(d, image, image_headroom, initrd,
+                         bootstrap_map_multiboot(image),
                          cmdline, &entry, &start_info);
     if ( rc )
     {
diff --git a/xen/arch/x86/include/asm/boot.h b/xen/arch/x86/include/asm/boot.h
new file mode 100644
index 0000000000..10b17f12b2
--- /dev/null
+++ b/xen/arch/x86/include/asm/boot.h
@@ -0,0 +1,32 @@ 
+#ifndef __ASM_X86_BOOT_H__
+#define __ASM_X86_BOOT_H__
+
+#include <xen/bootinfo.h>
+#include <xen/multiboot.h>
+
+#include <asm/setup.h>
+
+static inline void *bootstrap_map_multiboot(const module_t *mod)
+{
+    struct boot_module bm;
+
+    if ( !mod )
+        return bootstrap_map(NULL);
+
+    bm.start = mod->mod_start << PAGE_SHIFT;
+    bm.size = mod->mod_end;
+
+    return bootstrap_map(&bm);
+}
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h
index ae0dd3915a..80d0bb2a21 100644
--- a/xen/arch/x86/include/asm/setup.h
+++ b/xen/arch/x86/include/asm/setup.h
@@ -1,6 +1,7 @@ 
 #ifndef __X86_SETUP_H_
 #define __X86_SETUP_H_
 
+#include <xen/bootinfo.h>
 #include <xen/multiboot.h>
 #include <asm/numa.h>
 
@@ -40,7 +41,7 @@  void setup_io_bitmap(struct domain *d);
 
 unsigned long initial_images_nrpages(nodeid_t node);
 void discard_initial_images(void);
-void *bootstrap_map(const module_t *mod);
+void *bootstrap_map(const struct boot_module *mod);
 
 int xen_in_range(unsigned long mfn);
 
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 6ed9f8bbed..4fe806b60d 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -14,6 +14,7 @@ 
 #include <xen/sched.h>
 #include <xen/softirq.h>
 
+#include <asm/boot.h>
 #include <asm/bzimage.h>
 #include <asm/dom0_build.h>
 #include <asm/guest.h>
@@ -374,7 +375,7 @@  int __init dom0_construct_pv(struct domain *d,
     unsigned int flush_flags = 0;
     start_info_t *si;
     struct vcpu *v = d->vcpu[0];
-    void *image_base = bootstrap_map(image);
+    void *image_base = bootstrap_map_multiboot(image);
     unsigned long image_len = image->mod_end;
     void *image_start = image_base + image_headroom;
     unsigned long initrd_len = initrd ? initrd->mod_end : 0;
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index c0e8fc6ab7..3b623a4149 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -32,6 +32,7 @@ 
 #include <compat/xen.h>
 #endif
 #include <xen/bitops.h>
+#include <asm/boot.h>
 #include <asm/smp.h>
 #include <asm/processor.h>
 #include <asm/mpspec.h>
@@ -405,14 +406,14 @@  static void __init normalise_cpu_order(void)
  * Ensure a given physical memory range is present in the bootstrap mappings.
  * Use superpage mappings to ensure that pagetable memory needn't be allocated.
  */
-void *__init bootstrap_map(const module_t *mod)
+void *__init bootstrap_map(const struct boot_module *mod)
 {
     static unsigned long __initdata map_cur = BOOTSTRAP_MAP_BASE;
     uint64_t start, end, mask = (1L << L2_PAGETABLE_SHIFT) - 1;
     void *ret;
 
     if ( system_state != SYS_STATE_early_boot )
-        return mod ? mfn_to_virt(mod->mod_start) : NULL;
+        return mod ? maddr_to_virt(mod->start) : NULL;
 
     if ( !mod )
     {
@@ -421,8 +422,8 @@  void *__init bootstrap_map(const module_t *mod)
         return NULL;
     }
 
-    start = (uint64_t)mod->mod_start << PAGE_SHIFT;
-    end = start + mod->mod_end;
+    start = (uint64_t)mod->start;
+    end = start + mod->size;
     if ( start >= end )
         return NULL;
 
@@ -460,7 +461,7 @@  static void __init move_memory(
         if ( mod.mod_end > blksz )
             mod.mod_end = blksz;
         sz = mod.mod_end - soffs;
-        s = bootstrap_map(&mod);
+        s = bootstrap_map_multiboot(&mod);
 
         mod.mod_start = (dst - doffs) >> PAGE_SHIFT;
         mod.mod_end = doffs + size;
@@ -468,7 +469,7 @@  static void __init move_memory(
             mod.mod_end = blksz;
         if ( sz > mod.mod_end - doffs )
             sz = mod.mod_end - doffs;
-        d = bootstrap_map(&mod);
+        d = bootstrap_map_multiboot(&mod);
 
         memmove(d + doffs, s + soffs, sz);
 
@@ -1360,8 +1361,9 @@  void __init noreturn __start_xen(unsigned long mbi_p)
         mod[boot_info->nr_mods].mod_end = __2M_rwdata_end - _stext;
     }
 
-    boot_info->mods[0].arch->headroom = bzimage_headroom(bootstrap_map(mod),
-                                                         mod->mod_end);
+    boot_info->mods[0].arch->headroom =
+        bzimage_headroom(bootstrap_map_multiboot(mod), mod->mod_end);
+
     bootstrap_map(NULL);
 
 #ifndef highmem_start
diff --git a/xen/include/xen/bootinfo.h b/xen/include/xen/bootinfo.h
index b72ae31a66..eb93cc3439 100644
--- a/xen/include/xen/bootinfo.h
+++ b/xen/include/xen/bootinfo.h
@@ -10,6 +10,9 @@ 
 #endif
 
 struct boot_module {
+    paddr_t start;
+    size_t size;
+
     struct arch_bootmodule *arch;
 };
 
diff --git a/xen/xsm/xsm_policy.c b/xen/xsm/xsm_policy.c
index 8dafbc9381..c6df8c6e06 100644
--- a/xen/xsm/xsm_policy.c
+++ b/xen/xsm/xsm_policy.c
@@ -20,6 +20,7 @@ 
 
 #include <xsm/xsm.h>
 #ifdef CONFIG_MULTIBOOT
+#include <asm/boot.h>
 #include <xen/multiboot.h>
 #include <asm/setup.h>
 #endif
@@ -49,7 +50,7 @@  int __init xsm_multiboot_policy_init(
         if ( !test_bit(i, module_map) )
             continue;
 
-        _policy_start = bootstrap_map(mod + i);
+        _policy_start = bootstrap_map_multiboot(mod + i);
         _policy_len   = mod[i].mod_end;
 
         if ( (xsm_magic_t)(*_policy_start) == XSM_MAGIC )