diff mbox series

[v2] xen/arm: add warning if memory modules overlap

Message ID 1571342860-15838-1-git-send-email-brian.woods@xilinx.com (mailing list archive)
State New, archived
Headers show
Series [v2] xen/arm: add warning if memory modules overlap | expand

Commit Message

Brian Woods Oct. 17, 2019, 8:07 p.m. UTC
It's possible for a misconfigured device tree to cause Xen to crash when
there are overlapping addresses in the memory modules.  Add a warning
when printing the addresses to let the user know there's a possible
issue.

Signed-off-by: Brian Woods <brian.woods@xilinx.com>
---
v1 -> v2
	- removed nested loop and placed check in add_boot_module()

Sample output:
...
(XEN) MODULE[0]: 0000000001400000 - 0000000001542121 Xen         
(XEN) MODULE[1]: 0000000003846000 - 0000000003850080 Device Tree 
(XEN) MODULE[2]: 0000000003853000 - 0000000007fff676 Ramdisk     
(XEN) MODULE[3]: 0000000000080000 - 0000000003180000 Kernel      
(XEN)  RESVD[0]: 0000000003846000 - 0000000003850000
(XEN)  RESVD[1]: 0000000003853000 - 0000000007fff676
(XEN) 
(XEN) WARNING: overlap detected in the memory module addresses
(XEN) 
(XEN) Command line: console=dtuart dtuart=serial0 dom0_mem=1G bootscrub=0 maxcpus=1 timer_slop=0
...

 xen/arch/arm/bootfdt.c      | 4 ++++
 xen/arch/arm/setup.c        | 6 ++++++
 xen/include/asm-arm/setup.h | 1 +
 3 files changed, 11 insertions(+)

Comments

Julien Grall Oct. 17, 2019, 8:34 p.m. UTC | #1
Hi,

On Thu, 17 Oct 2019 at 21:08, Brian Woods <brian.woods@xilinx.com> wrote:
>
> It's possible for a misconfigured device tree to cause Xen to crash when
> there are overlapping addresses in the memory modules.  Add a warning
> when printing the addresses to let the user know there's a possible
> issue.
>
> Signed-off-by: Brian Woods <brian.woods@xilinx.com>
> ---
> v1 -> v2
>         - removed nested loop and placed check in add_boot_module()
>
> Sample output:
> ...
> (XEN) MODULE[0]: 0000000001400000 - 0000000001542121 Xen
> (XEN) MODULE[1]: 0000000003846000 - 0000000003850080 Device Tree
> (XEN) MODULE[2]: 0000000003853000 - 0000000007fff676 Ramdisk
> (XEN) MODULE[3]: 0000000000080000 - 0000000003180000 Kernel
> (XEN)  RESVD[0]: 0000000003846000 - 0000000003850000
> (XEN)  RESVD[1]: 0000000003853000 - 0000000007fff676
> (XEN)
> (XEN) WARNING: overlap detected in the memory module addresses
> (XEN)
> (XEN) Command line: console=dtuart dtuart=serial0 dom0_mem=1G bootscrub=0 maxcpus=1 timer_slop=0
> ...
>
>  xen/arch/arm/bootfdt.c      | 4 ++++
>  xen/arch/arm/setup.c        | 6 ++++++
>  xen/include/asm-arm/setup.h | 1 +
>  3 files changed, 11 insertions(+)
>
> diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
> index 08fb59f..f8b34d4 100644
> --- a/xen/arch/arm/bootfdt.c
> +++ b/xen/arch/arm/bootfdt.c
> @@ -387,6 +387,10 @@ static void __init early_print_info(void)
>                 mem_resv->bank[j].start + mem_resv->bank[j].size - 1);
>      }
>      printk("\n");
> +
> +    if ( mem_module_overlap )
> +        printk("WARNING: overlap detected in the memory module addresses.\n");

As a user such message would likely put me off. You tell me there are
an overlap, but you don't provide more information even if you likely
have the information in place. However...

> +
>      for ( i = 0 ; i < cmds->nr_mods; i++ )
>          printk("CMDLINE[%"PRIpaddr"]:%s %s\n", cmds->cmdline[i].start,
>                 cmds->cmdline[i].dt_name,
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index 705a917..315a131 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -69,6 +69,8 @@ integer_param("xenheap_megabytes", opt_xenheap_megabytes);
>
>  domid_t __read_mostly max_init_domid;
>
> +bool __initdata mem_module_overlap;
> +
>  static __used void init_done(void)
>  {
>      /* Must be done past setting system_state. */
> @@ -254,6 +256,10 @@ struct bootmodule __init *add_boot_module(bootmodule_kind kind,
>                  mod->domU = false;
>              return mod;
>          }
> +
> +        if ( ((mod->start >= start) && (mod->start < start + size)) ||
> +             ((start >= mod->start) && (start < mod->start + mod->size)) )
> +            mem_module_overlap = true;

... What's wrong with just dumping the information here directly?

Cheers,
Brian Woods Oct. 17, 2019, 9:20 p.m. UTC | #2
On Thu, Oct 17, 2019 at 09:34:51PM +0100, Julien Grall wrote:
> Hi,
> 
> On Thu, 17 Oct 2019 at 21:08, Brian Woods <brian.woods@xilinx.com> wrote:
> >
> > It's possible for a misconfigured device tree to cause Xen to crash when
> > there are overlapping addresses in the memory modules.  Add a warning
> > when printing the addresses to let the user know there's a possible
> > issue.
> >
> > Signed-off-by: Brian Woods <brian.woods@xilinx.com>
> > ---
> > v1 -> v2
> >         - removed nested loop and placed check in add_boot_module()
> >
> > Sample output:
> > ...
> > (XEN) MODULE[0]: 0000000001400000 - 0000000001542121 Xen
> > (XEN) MODULE[1]: 0000000003846000 - 0000000003850080 Device Tree
> > (XEN) MODULE[2]: 0000000003853000 - 0000000007fff676 Ramdisk
> > (XEN) MODULE[3]: 0000000000080000 - 0000000003180000 Kernel
> > (XEN)  RESVD[0]: 0000000003846000 - 0000000003850000
> > (XEN)  RESVD[1]: 0000000003853000 - 0000000007fff676
> > (XEN)
> > (XEN) WARNING: overlap detected in the memory module addresses
> > (XEN)
> > (XEN) Command line: console=dtuart dtuart=serial0 dom0_mem=1G bootscrub=0 maxcpus=1 timer_slop=0
> > ...
> >
> >  xen/arch/arm/bootfdt.c      | 4 ++++
> >  xen/arch/arm/setup.c        | 6 ++++++
> >  xen/include/asm-arm/setup.h | 1 +
> >  3 files changed, 11 insertions(+)
> >
> > diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
> > index 08fb59f..f8b34d4 100644
> > --- a/xen/arch/arm/bootfdt.c
> > +++ b/xen/arch/arm/bootfdt.c
> > @@ -387,6 +387,10 @@ static void __init early_print_info(void)
> >                 mem_resv->bank[j].start + mem_resv->bank[j].size - 1);
> >      }
> >      printk("\n");
> > +
> > +    if ( mem_module_overlap )
> > +        printk("WARNING: overlap detected in the memory module addresses.\n");
> 
> As a user such message would likely put me off. You tell me there are
> an overlap, but you don't provide more information even if you likely
> have the information in place. However...

Well, I suppose the message could be changed to something like:
"WARNING: overlap detected in the above memory module addresses."
or something to more directly guide the users to the section.  Maybe
move the 'printk("\n");' after the warning so it's grouped tighter with
the module information.

> > +
> >      for ( i = 0 ; i < cmds->nr_mods; i++ )
> >          printk("CMDLINE[%"PRIpaddr"]:%s %s\n", cmds->cmdline[i].start,
> >                 cmds->cmdline[i].dt_name,
> > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> > index 705a917..315a131 100644
> > --- a/xen/arch/arm/setup.c
> > +++ b/xen/arch/arm/setup.c
> > @@ -69,6 +69,8 @@ integer_param("xenheap_megabytes", opt_xenheap_megabytes);
> >
> >  domid_t __read_mostly max_init_domid;
> >
> > +bool __initdata mem_module_overlap;
> > +
> >  static __used void init_done(void)
> >  {
> >      /* Must be done past setting system_state. */
> > @@ -254,6 +256,10 @@ struct bootmodule __init *add_boot_module(bootmodule_kind kind,
> >                  mod->domU = false;
> >              return mod;
> >          }
> > +
> > +        if ( ((mod->start >= start) && (mod->start < start + size)) ||
> > +             ((start >= mod->start) && (start < mod->start + mod->size)) )
> > +            mem_module_overlap = true;
> 
> ... What's wrong with just dumping the information here directly?

IMO, it is better to have all the information printed in one spot.
There is less to go through and easier to find out what is happening.
There is also the fact that we do not have to print things twice (2 sets
of names, starting addresses and ending addresses per overlap) when it
is going to be printed in the near future anyway.  The cost of this is
just one initdata bool, which while I am not thrilled about, does not
seem that expensive (compared to a nested loop or printing out at least
(16*2 + 12) * 2 characters per overlap(at least on Arm64)).

I do think the message could use some polish, but this approach makes
the most sense to me.

Brian
Julien Grall Oct. 17, 2019, 9:49 p.m. UTC | #3
On Thu, 17 Oct 2019 at 22:20, Brian Woods <brian.woods@xilinx.com> wrote:
>
> On Thu, Oct 17, 2019 at 09:34:51PM +0100, Julien Grall wrote:
> > Hi,
> >
> > On Thu, 17 Oct 2019 at 21:08, Brian Woods <brian.woods@xilinx.com> wrote:
> > >
> > > It's possible for a misconfigured device tree to cause Xen to crash when
> > > there are overlapping addresses in the memory modules.  Add a warning
> > > when printing the addresses to let the user know there's a possible
> > > issue.
> > >
> > > Signed-off-by: Brian Woods <brian.woods@xilinx.com>
> > > ---
> > > v1 -> v2
> > >         - removed nested loop and placed check in add_boot_module()
> > >
> > > Sample output:
> > > ...
> > > (XEN) MODULE[0]: 0000000001400000 - 0000000001542121 Xen
> > > (XEN) MODULE[1]: 0000000003846000 - 0000000003850080 Device Tree
> > > (XEN) MODULE[2]: 0000000003853000 - 0000000007fff676 Ramdisk
> > > (XEN) MODULE[3]: 0000000000080000 - 0000000003180000 Kernel
> > > (XEN)  RESVD[0]: 0000000003846000 - 0000000003850000
> > > (XEN)  RESVD[1]: 0000000003853000 - 0000000007fff676
> > > (XEN)
> > > (XEN) WARNING: overlap detected in the memory module addresses
> > > (XEN)
> > > (XEN) Command line: console=dtuart dtuart=serial0 dom0_mem=1G bootscrub=0 maxcpus=1 timer_slop=0
> > > ...
> > >
> > >  xen/arch/arm/bootfdt.c      | 4 ++++
> > >  xen/arch/arm/setup.c        | 6 ++++++
> > >  xen/include/asm-arm/setup.h | 1 +
> > >  3 files changed, 11 insertions(+)
> > >
> > > diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
> > > index 08fb59f..f8b34d4 100644
> > > --- a/xen/arch/arm/bootfdt.c
> > > +++ b/xen/arch/arm/bootfdt.c
> > > @@ -387,6 +387,10 @@ static void __init early_print_info(void)
> > >                 mem_resv->bank[j].start + mem_resv->bank[j].size - 1);
> > >      }
> > >      printk("\n");
> > > +
> > > +    if ( mem_module_overlap )
> > > +        printk("WARNING: overlap detected in the memory module addresses.\n");
> >
> > As a user such message would likely put me off. You tell me there are
> > an overlap, but you don't provide more information even if you likely
> > have the information in place. However...
>
> Well, I suppose the message could be changed to something like:
> "WARNING: overlap detected in the above memory module addresses."
> or something to more directly guide the users to the section.  Maybe
> move the 'printk("\n");' after the warning so it's grouped tighter with
> the module information.

My point stands even for this sort of message. You know the exact
overlap, so why would you hide it from the users?

>
> > > +
> > >      for ( i = 0 ; i < cmds->nr_mods; i++ )
> > >          printk("CMDLINE[%"PRIpaddr"]:%s %s\n", cmds->cmdline[i].start,
> > >                 cmds->cmdline[i].dt_name,
> > > diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> > > index 705a917..315a131 100644
> > > --- a/xen/arch/arm/setup.c
> > > +++ b/xen/arch/arm/setup.c
> > > @@ -69,6 +69,8 @@ integer_param("xenheap_megabytes", opt_xenheap_megabytes);
> > >
> > >  domid_t __read_mostly max_init_domid;
> > >
> > > +bool __initdata mem_module_overlap;
> > > +
> > >  static __used void init_done(void)
> > >  {
> > >      /* Must be done past setting system_state. */
> > > @@ -254,6 +256,10 @@ struct bootmodule __init *add_boot_module(bootmodule_kind kind,
> > >                  mod->domU = false;
> > >              return mod;
> > >          }
> > > +
> > > +        if ( ((mod->start >= start) && (mod->start < start + size)) ||
> > > +             ((start >= mod->start) && (start < mod->start + mod->size)) )
> > > +            mem_module_overlap = true;
> >
> > ... What's wrong with just dumping the information here directly?
>
> IMO, it is better to have all the information printed in one spot.
> There is less to go through and easier to find out what is happening.
> There is also the fact that we do not have to print things twice (2 sets
> of names, starting addresses and ending addresses per overlap) when it
> is going to be printed in the near future anyway.  The cost of this is
> just one initdata bool, which while I am not thrilled about, does not
> seem that expensive (compared to a nested loop or printing out at least
> (16*2 + 12) * 2 characters per overlap(at least on Arm64)).

Again, this is boot code and not a path that is going to be called
hundreds of time. So performance is the last thing I care in this
patch.

If we try to help the users by telling them there is an overlap
between modules, then we should do it properly and tell them the exact
overlap. Otherwise this is nearly as pointless as a crash later on in
the boot process.

I also don't want a double for loop or any additional global variable
when it can be done by simply adding a check in add_boot_module().

Cheers,
Brian Woods Oct. 17, 2019, 10:34 p.m. UTC | #4
On Thu, Oct 17, 2019 at 10:49:15PM +0100, Julien Grall wrote:
> On Thu, 17 Oct 2019 at 22:20, Brian Woods <brian.woods@xilinx.com> wrote:
> >
> > On Thu, Oct 17, 2019 at 09:34:51PM +0100, Julien Grall wrote:
> > > Hi,
> > >
> > > As a user such message would likely put me off. You tell me there are
> > > an overlap, but you don't provide more information even if you likely
> > > have the information in place. However...
> >
> > Well, I suppose the message could be changed to something like:
> > "WARNING: overlap detected in the above memory module addresses."
> > or something to more directly guide the users to the section.  Maybe
> > move the 'printk("\n");' after the warning so it's grouped tighter with
> > the module information.
> 
> My point stands even for this sort of message. You know the exact
> overlap, so why would you hide it from the users?

We're not hiding it.  You're not cluttering up the log with the same
data multiple times.  See below.

> > >
> > > ... What's wrong with just dumping the information here directly?
> >
> > IMO, it is better to have all the information printed in one spot.
> > There is less to go through and easier to find out what is happening.
> > There is also the fact that we do not have to print things twice (2 sets
> > of names, starting addresses and ending addresses per overlap) when it
> > is going to be printed in the near future anyway.  The cost of this is
> > just one initdata bool, which while I am not thrilled about, does not
> > seem that expensive (compared to a nested loop or printing out at least
> > (16*2 + 12) * 2 characters per overlap(at least on Arm64)).
> 
> Again, this is boot code and not a path that is going to be called
> hundreds of time. So performance is the last thing I care in this
> patch.
> 
> If we try to help the users by telling them there is an overlap
> between modules, then we should do it properly and tell them the exact
> overlap. Otherwise this is nearly as pointless as a crash later on in
> the boot process.
> 
> I also don't want a double for loop or any additional global variable
> when it can be done by simply adding a check in add_boot_module().

This isn't about performance (other than the nested for), this is about
providing a relatively clean and sane log to read.  It's not that
difficult to go through the addresses and see conflicts.  This also
keeps it all in one part of the log and shorter without losing
information.  Shorter and well structured logs (without losing info)
makes it easier to read.  Making logs easier to read helps everyone.

Showing the addresses and module name itself will take 2 lines assuming
you stay within 80 chars.  (16*2 + 12) * 2 = 88, that's without spaces,
'0x's or any sort of message explaining what's actually going wrong.
The module names and addresses will be printed out anyway in the near
future, so why not group them together?

The purpose of the warning is to tell the user something is wrong, both
messages do that and provide the information to determine what's wrong.

Brian
Julien Grall Oct. 18, 2019, 3:41 p.m. UTC | #5
Hi Brian,

On 17/10/2019 23:34, Brian Woods wrote:
> On Thu, Oct 17, 2019 at 10:49:15PM +0100, Julien Grall wrote:
>> On Thu, 17 Oct 2019 at 22:20, Brian Woods <brian.woods@xilinx.com> wrote:
>>>
>>> On Thu, Oct 17, 2019 at 09:34:51PM +0100, Julien Grall wrote:
>>>> Hi,
>>>>
>>>> As a user such message would likely put me off. You tell me there are
>>>> an overlap, but you don't provide more information even if you likely
>>>> have the information in place. However...
>>>
>>> Well, I suppose the message could be changed to something like:
>>> "WARNING: overlap detected in the above memory module addresses."
>>> or something to more directly guide the users to the section.  Maybe
>>> move the 'printk("\n");' after the warning so it's grouped tighter with
>>> the module information.
>>
>> My point stands even for this sort of message. You know the exact
>> overlap, so why would you hide it from the users?
> 
> We're not hiding it.  You're not cluttering up the log with the same
> data multiple times.  See below.

While the values are the same, the data is printed in a different way to help 
the users.

> 
>>>>
>>>> ... What's wrong with just dumping the information here directly?
>>>
>>> IMO, it is better to have all the information printed in one spot.
>>> There is less to go through and easier to find out what is happening.
>>> There is also the fact that we do not have to print things twice (2 sets
>>> of names, starting addresses and ending addresses per overlap) when it
>>> is going to be printed in the near future anyway.  The cost of this is
>>> just one initdata bool, which while I am not thrilled about, does not
>>> seem that expensive (compared to a nested loop or printing out at least
>>> (16*2 + 12) * 2 characters per overlap(at least on Arm64)).
>>
>> Again, this is boot code and not a path that is going to be called
>> hundreds of time. So performance is the last thing I care in this
>> patch.
>>
>> If we try to help the users by telling them there is an overlap
>> between modules, then we should do it properly and tell them the exact
>> overlap. Otherwise this is nearly as pointless as a crash later on in
>> the boot process.
>>
>> I also don't want a double for loop or any additional global variable
>> when it can be done by simply adding a check in add_boot_module().
> 
> This isn't about performance (other than the nested for), this is about
> providing a relatively clean and sane log to read.  It's not that
> difficult to go through the addresses and see conflicts.  This also
> keeps it all in one part of the log and shorter without losing
> information.  Shorter and well structured logs (without losing info)
> makes it easier to read.  Making logs easier to read helps everyone.
> 
> Showing the addresses and module name itself will take 2 lines assuming
> you stay within 80 chars.  (16*2 + 12) * 2 = 88, that's without spaces,
> '0x's or any sort of message explaining what's actually going wrong.
> The module names and addresses will be printed out anyway in the near
> future, so why not group them together?

Here again you argue about the performance and smaller message... This is a 
warning (so not printed in the normal course) and Xen is likely to break 
afterwards. So what you want here is a big fat warning and not a small one and 
easy to miss.

> 
> The purpose of the warning is to tell the user something is wrong, both
> messages do that and provide the information to determine what's wrong
We both can probably figure out with more or less some effort. If you have 2-3 
modules that's fine. But if you have 10, then it is becoming more complex.
My time is quite valuable and therefore I want the hypervisor to help me find.

To make an analogy, would you like if your compiler tells you "There is a bug in 
file X" but does not tell you the exact line? I personally wouldn't, even if the 
file is fairly small.

So while I am happy to se a way to check the modules, I dislike this approach. So:

Nacked-by: Julien Grall <julien.grall@arm.com>

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/bootfdt.c b/xen/arch/arm/bootfdt.c
index 08fb59f..f8b34d4 100644
--- a/xen/arch/arm/bootfdt.c
+++ b/xen/arch/arm/bootfdt.c
@@ -387,6 +387,10 @@  static void __init early_print_info(void)
                mem_resv->bank[j].start + mem_resv->bank[j].size - 1);
     }
     printk("\n");
+
+    if ( mem_module_overlap )
+        printk("WARNING: overlap detected in the memory module addresses.\n");
+
     for ( i = 0 ; i < cmds->nr_mods; i++ )
         printk("CMDLINE[%"PRIpaddr"]:%s %s\n", cmds->cmdline[i].start,
                cmds->cmdline[i].dt_name,
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 705a917..315a131 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -69,6 +69,8 @@  integer_param("xenheap_megabytes", opt_xenheap_megabytes);
 
 domid_t __read_mostly max_init_domid;
 
+bool __initdata mem_module_overlap;
+
 static __used void init_done(void)
 {
     /* Must be done past setting system_state. */
@@ -254,6 +256,10 @@  struct bootmodule __init *add_boot_module(bootmodule_kind kind,
                 mod->domU = false;
             return mod;
         }
+
+        if ( ((mod->start >= start) && (mod->start < start + size)) ||
+             ((start >= mod->start) && (start < mod->start + mod->size)) )
+            mem_module_overlap = true;
     }
 
     mod = &mods->module[mods->nr_mods++];
diff --git a/xen/include/asm-arm/setup.h b/xen/include/asm-arm/setup.h
index 2f8f24e..4bb1ba1 100644
--- a/xen/include/asm-arm/setup.h
+++ b/xen/include/asm-arm/setup.h
@@ -122,6 +122,7 @@  void device_tree_get_reg(const __be32 **cell, u32 address_cells,
 u32 device_tree_get_u32(const void *fdt, int node,
                         const char *prop_name, u32 dflt);
 
+extern bool mem_module_overlap;
 #endif
 /*
  * Local variables: