diff mbox

[v7,45/50] drivers/of: Avoid recursively calling unflatten_dt_node()

Message ID 1446642770-4681-46-git-send-email-gwshan@linux.vnet.ibm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Gavin Shan Nov. 4, 2015, 1:12 p.m. UTC
In current implementation, unflatten_dt_node() is called recursively
to unflatten device nodes in FDT blob. It's stress to limited stack
capacity.

This avoids calling the function recursively, meaning the device
nodes are unflattened in one call on unflatten_dt_node(): two arrays
are introduced to track the parent path size and the device node of
current level of depth, which will be used by the device node on next
level of depth to be unflattened. Also, the parameter "poffset" and
"fpsize" are unused and dropped.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 drivers/of/fdt.c | 94 +++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 56 insertions(+), 38 deletions(-)

Comments

Rob Herring Nov. 4, 2015, 4:07 p.m. UTC | #1
On Wed, Nov 4, 2015 at 7:12 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote:
> In current implementation, unflatten_dt_node() is called recursively
> to unflatten device nodes in FDT blob. It's stress to limited stack
> capacity.

Did you actually hit a problem?

Now we have a max depth of 64. Seems like that should be plenty... Any
idea how this compares to when we run out of stack space?

> This avoids calling the function recursively, meaning the device
> nodes are unflattened in one call on unflatten_dt_node(): two arrays
> are introduced to track the parent path size and the device node of
> current level of depth, which will be used by the device node on next
> level of depth to be unflattened. Also, the parameter "poffset" and
> "fpsize" are unused and dropped.

Yay. I'm happy to see parameters removed instead of added to this function.

>
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  drivers/of/fdt.c | 94 +++++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 56 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
> index 173b036..f4793d0 100644
> --- a/drivers/of/fdt.c
> +++ b/drivers/of/fdt.c
> @@ -355,61 +355,82 @@ static unsigned long populate_node(const void *blob,
>         return fpsize;
>  }
>
> +static void reverse_nodes(struct device_node *parent)
> +{
> +       struct device_node *child, *next;
> +
> +       /* In-depth first */
> +       child = parent->child;
> +       while (child) {
> +               reverse_nodes(child);
> +
> +               child = child->sibling;
> +       }
> +
> +       /* Reverse the nodes in the child list */
> +       child = parent->child;
> +       parent->child = NULL;
> +       while (child) {
> +               next = child->sibling;
> +
> +               child->sibling = parent->child;
> +               parent->child = child;
> +               child = next;
> +       }
> +}
> +
>  /**
>   * unflatten_dt_node - Alloc and populate a device_node from the flat tree
>   * @blob: The parent device tree blob
>   * @mem: Memory chunk to use for allocating device nodes and properties
> - * @poffset: pointer to node in flat tree
>   * @dad: Parent struct device_node
>   * @nodepp: The device_node tree created by the call
> - * @fpsize: Size of the node path up at the current depth.
>   * @dryrun: If true, do not allocate device nodes but still calculate needed
>   * memory size
>   */
>  static void *unflatten_dt_node(const void *blob,
>                                void *mem,
> -                              int *poffset,
>                                struct device_node *dad,
>                                struct device_node **nodepp,
> -                              unsigned long fpsize,
>                                bool dryrun)

We can probably further simplify things by returning an int with
negative being errors and positive being the size. Also, dryrun can be
dropped and implied by mem and/or nodepp being NULL.

>  {
> -       struct device_node *np;
> -       static int depth;
> -       int old_depth;
> -
> -       fpsize = populate_node(blob, *poffset, &mem, dad, fpsize, &np, dryrun);
> -       if (!fpsize)
> -               return mem;
> +       struct device_node *root;
> +       int offset = 0, depth = 0;
> +       unsigned long fpsizes[64];
> +       struct device_node *nps[64];

Use a define here.

>
> -       old_depth = depth;
> -       *poffset = fdt_next_node(blob, *poffset, &depth);
> -       if (depth < 0)
> -               depth = 0;
> -       while (*poffset > 0 && depth > old_depth)
> -               mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
> -                                       fpsize, dryrun);
> +       if (nodepp)
> +               *nodepp = NULL;
> +
> +       root = dad;
> +       fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
> +       nps[depth++] = dad;
> +       while (offset >= 0 && depth < 64) {
> +               fpsizes[depth] = populate_node(blob, offset, &mem,
> +                                              nps[depth - 1],
> +                                              fpsizes[depth - 1],
> +                                              &nps[depth], dryrun);
> +               if (!fpsizes[depth])
> +                       return mem;
> +
> +               if (!dryrun && nodepp && !*nodepp)
> +                       *nodepp = nps[depth];
> +               if (!dryrun && !root)
> +                       root = nps[depth];
> +
> +               offset = fdt_next_node(blob, offset, &depth);
> +       }
>
> -       if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
> -               pr_err("unflatten: error %d processing FDT\n", *poffset);
> +       if (offset < 0 && offset != -FDT_ERR_NOTFOUND)
> +               pr_err("%s: Error %d processing FDT\n",
> +                      __func__, offset);

What about depth == 64 case? I think the behavior should be a WARN and
ignore those nodes so we at least can continue to boot and see the
error. Of course, if there is a phandle pointing to ignored nodes, we
have to handle that too.

>
>         /*
>          * Reverse the child list. Some drivers assumes node order matches .dts
>          * node order
>          */
> -       if (!dryrun && np->child) {
> -               struct device_node *child = np->child;
> -               np->child = NULL;
> -               while (child) {
> -                       struct device_node *next = child->sibling;
> -                       child->sibling = np->child;
> -                       np->child = child;
> -                       child = next;
> -               }
> -       }
> -
> -       if (nodepp)
> -               *nodepp = np;
> +       if (!dryrun)
> +               reverse_nodes(root);
>
>         return mem;
>  }
> @@ -431,7 +452,6 @@ static void __unflatten_device_tree(const void *blob,
>                              void * (*dt_alloc)(u64 size, u64 align))
>  {
>         unsigned long size;
> -       int start;
>         void *mem;
>
>         pr_debug(" -> unflatten_device_tree()\n");
> @@ -452,8 +472,7 @@ static void __unflatten_device_tree(const void *blob,
>         }
>
>         /* First pass, scan for size */
> -       start = 0;
> -       size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
> +       size = (unsigned long)unflatten_dt_node(blob, NULL, NULL, NULL, true);
>         size = ALIGN(size, 4);
>
>         pr_debug("  size is %lx, allocating...\n", size);
> @@ -467,8 +486,7 @@ static void __unflatten_device_tree(const void *blob,
>         pr_debug("  unflattening %p...\n", mem);
>
>         /* Second pass, do actual unflattening */
> -       start = 0;
> -       unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
> +       unflatten_dt_node(blob, mem, NULL, mynodes, false);
>         if (be32_to_cpup(mem + size) != 0xdeadbeef)
>                 pr_warning("End of tree marker overwritten: %08x\n",
>                            be32_to_cpup(mem + size));
> --
> 2.1.0
>
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan Nov. 4, 2015, 11:23 p.m. UTC | #2
On Wed, Nov 04, 2015 at 10:07:50AM -0600, Rob Herring wrote:
>On Wed, Nov 4, 2015 at 7:12 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote:
>> In current implementation, unflatten_dt_node() is called recursively
>> to unflatten device nodes in FDT blob. It's stress to limited stack
>> capacity.
>
>Did you actually hit a problem?
>
>Now we have a max depth of 64. Seems like that should be plenty... Any
>idea how this compares to when we run out of stack space?
>

When I rebased last revision (v6), particular below patch, to 4.3.rc6,
the kernel won't boot in P7 and P8 boxes. On P7 boxes, the stack overruns
according to the printed kernel messages. On P8 boxes, the /bin/init in
initramfs image can't be loaded/executed properly and it's potentially
caused by memory corruption. That's why I reworked it to avoid recursive
calling to unflatten_dt_node().

The max depth "64" wasn't selected based on the stack usage. I was thinking
the device tree is converted to friendly *.dts format and it's using TAB
as the prefix for each line. If the device tree has 64 depth, Each line
in *.dts for leaf nodes have to be wrapped and spanning multiple lines.
That's why I choosed 64, maybe 32 is enough. Did you see a device-tree
that has more than 16 depth in field? :-)

>> This avoids calling the function recursively, meaning the device
>> nodes are unflattened in one call on unflatten_dt_node(): two arrays
>> are introduced to track the parent path size and the device node of
>> current level of depth, which will be used by the device node on next
>> level of depth to be unflattened. Also, the parameter "poffset" and
>> "fpsize" are unused and dropped.
>
>Yay. I'm happy to see parameters removed instead of added to this function.
>
>>
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  drivers/of/fdt.c | 94 +++++++++++++++++++++++++++++++++-----------------------
>>  1 file changed, 56 insertions(+), 38 deletions(-)
>>
>> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
>> index 173b036..f4793d0 100644
>> --- a/drivers/of/fdt.c
>> +++ b/drivers/of/fdt.c
>> @@ -355,61 +355,82 @@ static unsigned long populate_node(const void *blob,
>>         return fpsize;
>>  }
>>
>> +static void reverse_nodes(struct device_node *parent)
>> +{
>> +       struct device_node *child, *next;
>> +
>> +       /* In-depth first */
>> +       child = parent->child;
>> +       while (child) {
>> +               reverse_nodes(child);
>> +
>> +               child = child->sibling;
>> +       }
>> +
>> +       /* Reverse the nodes in the child list */
>> +       child = parent->child;
>> +       parent->child = NULL;
>> +       while (child) {
>> +               next = child->sibling;
>> +
>> +               child->sibling = parent->child;
>> +               parent->child = child;
>> +               child = next;
>> +       }
>> +}
>> +
>>  /**
>>   * unflatten_dt_node - Alloc and populate a device_node from the flat tree
>>   * @blob: The parent device tree blob
>>   * @mem: Memory chunk to use for allocating device nodes and properties
>> - * @poffset: pointer to node in flat tree
>>   * @dad: Parent struct device_node
>>   * @nodepp: The device_node tree created by the call
>> - * @fpsize: Size of the node path up at the current depth.
>>   * @dryrun: If true, do not allocate device nodes but still calculate needed
>>   * memory size
>>   */
>>  static void *unflatten_dt_node(const void *blob,
>>                                void *mem,
>> -                              int *poffset,
>>                                struct device_node *dad,
>>                                struct device_node **nodepp,
>> -                              unsigned long fpsize,
>>                                bool dryrun)
>
>We can probably further simplify things by returning an int with
>negative being errors and positive being the size. Also, dryrun can be
>dropped and implied by mem and/or nodepp being NULL.
>

Yeah, I think it's reasonable to return "size" from this function. "dryrun"
can be dropped and implied by NULL @mem. @nodepp can't be NULL. I perhaps
have separate patch to address it in next revision.

>>  {
>> -       struct device_node *np;
>> -       static int depth;
>> -       int old_depth;
>> -
>> -       fpsize = populate_node(blob, *poffset, &mem, dad, fpsize, &np, dryrun);
>> -       if (!fpsize)
>> -               return mem;
>> +       struct device_node *root;
>> +       int offset = 0, depth = 0;
>> +       unsigned long fpsizes[64];
>> +       struct device_node *nps[64];
>
>Use a define here.
>

Fair enough, will do in next revision. I'm not good at naming. Would
"FDT_MAX_DEPTH" is a good one?

>>
>> -       old_depth = depth;
>> -       *poffset = fdt_next_node(blob, *poffset, &depth);
>> -       if (depth < 0)
>> -               depth = 0;
>> -       while (*poffset > 0 && depth > old_depth)
>> -               mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
>> -                                       fpsize, dryrun);
>> +       if (nodepp)
>> +               *nodepp = NULL;
>> +
>> +       root = dad;
>> +       fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
>> +       nps[depth++] = dad;
>> +       while (offset >= 0 && depth < 64) {
>> +               fpsizes[depth] = populate_node(blob, offset, &mem,
>> +                                              nps[depth - 1],
>> +                                              fpsizes[depth - 1],
>> +                                              &nps[depth], dryrun);
>> +               if (!fpsizes[depth])
>> +                       return mem;
>> +
>> +               if (!dryrun && nodepp && !*nodepp)
>> +                       *nodepp = nps[depth];
>> +               if (!dryrun && !root)
>> +                       root = nps[depth];
>> +
>> +               offset = fdt_next_node(blob, offset, &depth);
>> +       }
>>
>> -       if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
>> -               pr_err("unflatten: error %d processing FDT\n", *poffset);
>> +       if (offset < 0 && offset != -FDT_ERR_NOTFOUND)
>> +               pr_err("%s: Error %d processing FDT\n",
>> +                      __func__, offset);
>
>What about depth == 64 case? I think the behavior should be a WARN and
>ignore those nodes so we at least can continue to boot and see the
>error. Of course, if there is a phandle pointing to ignored nodes, we
>have to handle that too.
>

Yeah, I'll have a WARN_ON(depth >= 64) in next revision. Sorry, I didn't
get the 2nd part of your comments: When depth > 64, the system won't work.
It might boot up. Why the phandle pointing to the ignored node has to be
dropped? 

>>
>>         /*
>>          * Reverse the child list. Some drivers assumes node order matches .dts
>>          * node order
>>          */
>> -       if (!dryrun && np->child) {
>> -               struct device_node *child = np->child;
>> -               np->child = NULL;
>> -               while (child) {
>> -                       struct device_node *next = child->sibling;
>> -                       child->sibling = np->child;
>> -                       np->child = child;
>> -                       child = next;
>> -               }
>> -       }
>> -
>> -       if (nodepp)
>> -               *nodepp = np;
>> +       if (!dryrun)
>> +               reverse_nodes(root);
>>
>>         return mem;
>>  }
>> @@ -431,7 +452,6 @@ static void __unflatten_device_tree(const void *blob,
>>                              void * (*dt_alloc)(u64 size, u64 align))
>>  {
>>         unsigned long size;
>> -       int start;
>>         void *mem;
>>
>>         pr_debug(" -> unflatten_device_tree()\n");
>> @@ -452,8 +472,7 @@ static void __unflatten_device_tree(const void *blob,
>>         }
>>
>>         /* First pass, scan for size */
>> -       start = 0;
>> -       size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
>> +       size = (unsigned long)unflatten_dt_node(blob, NULL, NULL, NULL, true);
>>         size = ALIGN(size, 4);
>>
>>         pr_debug("  size is %lx, allocating...\n", size);
>> @@ -467,8 +486,7 @@ static void __unflatten_device_tree(const void *blob,
>>         pr_debug("  unflattening %p...\n", mem);
>>
>>         /* Second pass, do actual unflattening */
>> -       start = 0;
>> -       unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
>> +       unflatten_dt_node(blob, mem, NULL, mynodes, false);
>>         if (be32_to_cpup(mem + size) != 0xdeadbeef)
>>                 pr_warning("End of tree marker overwritten: %08x\n",
>>                            be32_to_cpup(mem + size));

Thanks,
Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gavin Shan Nov. 4, 2015, 11:26 p.m. UTC | #3
On Thu, Nov 05, 2015 at 10:23:15AM +1100, Gavin Shan wrote:
>On Wed, Nov 04, 2015 at 10:07:50AM -0600, Rob Herring wrote:
>>On Wed, Nov 4, 2015 at 7:12 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote:
>>> In current implementation, unflatten_dt_node() is called recursively
>>> to unflatten device nodes in FDT blob. It's stress to limited stack
>>> capacity.
>>
>>Did you actually hit a problem?
>>
>>Now we have a max depth of 64. Seems like that should be plenty... Any
>>idea how this compares to when we run out of stack space?
>>
>
>When I rebased last revision (v6), particular below patch, to 4.3.rc6,
>the kernel won't boot in P7 and P8 boxes. On P7 boxes, the stack overruns
>according to the printed kernel messages. On P8 boxes, the /bin/init in
>initramfs image can't be loaded/executed properly and it's potentially
>caused by memory corruption. That's why I reworked it to avoid recursive
>calling to unflatten_dt_node().
>

Missed the link to the patch here:

https://patchwork.ozlabs.org/patch/504512/

>The max depth "64" wasn't selected based on the stack usage. I was thinking
>the device tree is converted to friendly *.dts format and it's using TAB
>as the prefix for each line. If the device tree has 64 depth, Each line
>in *.dts for leaf nodes have to be wrapped and spanning multiple lines.
>That's why I choosed 64, maybe 32 is enough. Did you see a device-tree
>that has more than 16 depth in field? :-)
>
>>> This avoids calling the function recursively, meaning the device
>>> nodes are unflattened in one call on unflatten_dt_node(): two arrays
>>> are introduced to track the parent path size and the device node of
>>> current level of depth, which will be used by the device node on next
>>> level of depth to be unflattened. Also, the parameter "poffset" and
>>> "fpsize" are unused and dropped.
>>
>>Yay. I'm happy to see parameters removed instead of added to this function.
>>
>>>
>>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>>> ---
>>>  drivers/of/fdt.c | 94 +++++++++++++++++++++++++++++++++-----------------------
>>>  1 file changed, 56 insertions(+), 38 deletions(-)
>>>
>>> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
>>> index 173b036..f4793d0 100644
>>> --- a/drivers/of/fdt.c
>>> +++ b/drivers/of/fdt.c
>>> @@ -355,61 +355,82 @@ static unsigned long populate_node(const void *blob,
>>>         return fpsize;
>>>  }
>>>
>>> +static void reverse_nodes(struct device_node *parent)
>>> +{
>>> +       struct device_node *child, *next;
>>> +
>>> +       /* In-depth first */
>>> +       child = parent->child;
>>> +       while (child) {
>>> +               reverse_nodes(child);
>>> +
>>> +               child = child->sibling;
>>> +       }
>>> +
>>> +       /* Reverse the nodes in the child list */
>>> +       child = parent->child;
>>> +       parent->child = NULL;
>>> +       while (child) {
>>> +               next = child->sibling;
>>> +
>>> +               child->sibling = parent->child;
>>> +               parent->child = child;
>>> +               child = next;
>>> +       }
>>> +}
>>> +
>>>  /**
>>>   * unflatten_dt_node - Alloc and populate a device_node from the flat tree
>>>   * @blob: The parent device tree blob
>>>   * @mem: Memory chunk to use for allocating device nodes and properties
>>> - * @poffset: pointer to node in flat tree
>>>   * @dad: Parent struct device_node
>>>   * @nodepp: The device_node tree created by the call
>>> - * @fpsize: Size of the node path up at the current depth.
>>>   * @dryrun: If true, do not allocate device nodes but still calculate needed
>>>   * memory size
>>>   */
>>>  static void *unflatten_dt_node(const void *blob,
>>>                                void *mem,
>>> -                              int *poffset,
>>>                                struct device_node *dad,
>>>                                struct device_node **nodepp,
>>> -                              unsigned long fpsize,
>>>                                bool dryrun)
>>
>>We can probably further simplify things by returning an int with
>>negative being errors and positive being the size. Also, dryrun can be
>>dropped and implied by mem and/or nodepp being NULL.
>>
>
>Yeah, I think it's reasonable to return "size" from this function. "dryrun"
>can be dropped and implied by NULL @mem. @nodepp can't be NULL. I perhaps
>have separate patch to address it in next revision.
>
>>>  {
>>> -       struct device_node *np;
>>> -       static int depth;
>>> -       int old_depth;
>>> -
>>> -       fpsize = populate_node(blob, *poffset, &mem, dad, fpsize, &np, dryrun);
>>> -       if (!fpsize)
>>> -               return mem;
>>> +       struct device_node *root;
>>> +       int offset = 0, depth = 0;
>>> +       unsigned long fpsizes[64];
>>> +       struct device_node *nps[64];
>>
>>Use a define here.
>>
>
>Fair enough, will do in next revision. I'm not good at naming. Would
>"FDT_MAX_DEPTH" is a good one?
>
>>>
>>> -       old_depth = depth;
>>> -       *poffset = fdt_next_node(blob, *poffset, &depth);
>>> -       if (depth < 0)
>>> -               depth = 0;
>>> -       while (*poffset > 0 && depth > old_depth)
>>> -               mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
>>> -                                       fpsize, dryrun);
>>> +       if (nodepp)
>>> +               *nodepp = NULL;
>>> +
>>> +       root = dad;
>>> +       fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
>>> +       nps[depth++] = dad;
>>> +       while (offset >= 0 && depth < 64) {
>>> +               fpsizes[depth] = populate_node(blob, offset, &mem,
>>> +                                              nps[depth - 1],
>>> +                                              fpsizes[depth - 1],
>>> +                                              &nps[depth], dryrun);
>>> +               if (!fpsizes[depth])
>>> +                       return mem;
>>> +
>>> +               if (!dryrun && nodepp && !*nodepp)
>>> +                       *nodepp = nps[depth];
>>> +               if (!dryrun && !root)
>>> +                       root = nps[depth];
>>> +
>>> +               offset = fdt_next_node(blob, offset, &depth);
>>> +       }
>>>
>>> -       if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
>>> -               pr_err("unflatten: error %d processing FDT\n", *poffset);
>>> +       if (offset < 0 && offset != -FDT_ERR_NOTFOUND)
>>> +               pr_err("%s: Error %d processing FDT\n",
>>> +                      __func__, offset);
>>
>>What about depth == 64 case? I think the behavior should be a WARN and
>>ignore those nodes so we at least can continue to boot and see the
>>error. Of course, if there is a phandle pointing to ignored nodes, we
>>have to handle that too.
>>
>
>Yeah, I'll have a WARN_ON(depth >= 64) in next revision. Sorry, I didn't
>get the 2nd part of your comments: When depth > 64, the system won't work.
>It might boot up. Why the phandle pointing to the ignored node has to be
>dropped? 
>
>>>
>>>         /*
>>>          * Reverse the child list. Some drivers assumes node order matches .dts
>>>          * node order
>>>          */
>>> -       if (!dryrun && np->child) {
>>> -               struct device_node *child = np->child;
>>> -               np->child = NULL;
>>> -               while (child) {
>>> -                       struct device_node *next = child->sibling;
>>> -                       child->sibling = np->child;
>>> -                       np->child = child;
>>> -                       child = next;
>>> -               }
>>> -       }
>>> -
>>> -       if (nodepp)
>>> -               *nodepp = np;
>>> +       if (!dryrun)
>>> +               reverse_nodes(root);
>>>
>>>         return mem;
>>>  }
>>> @@ -431,7 +452,6 @@ static void __unflatten_device_tree(const void *blob,
>>>                              void * (*dt_alloc)(u64 size, u64 align))
>>>  {
>>>         unsigned long size;
>>> -       int start;
>>>         void *mem;
>>>
>>>         pr_debug(" -> unflatten_device_tree()\n");
>>> @@ -452,8 +472,7 @@ static void __unflatten_device_tree(const void *blob,
>>>         }
>>>
>>>         /* First pass, scan for size */
>>> -       start = 0;
>>> -       size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
>>> +       size = (unsigned long)unflatten_dt_node(blob, NULL, NULL, NULL, true);
>>>         size = ALIGN(size, 4);
>>>
>>>         pr_debug("  size is %lx, allocating...\n", size);
>>> @@ -467,8 +486,7 @@ static void __unflatten_device_tree(const void *blob,
>>>         pr_debug("  unflattening %p...\n", mem);
>>>
>>>         /* Second pass, do actual unflattening */
>>> -       start = 0;
>>> -       unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
>>> +       unflatten_dt_node(blob, mem, NULL, mynodes, false);
>>>         if (be32_to_cpup(mem + size) != 0xdeadbeef)
>>>                 pr_warning("End of tree marker overwritten: %08x\n",
>>>                            be32_to_cpup(mem + size));
>
>Thanks,
>Gavin

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rob Herring Dec. 6, 2015, 8:28 p.m. UTC | #4
+Guenter

On Wed, Nov 4, 2015 at 7:12 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote:
> In current implementation, unflatten_dt_node() is called recursively
> to unflatten device nodes in FDT blob. It's stress to limited stack
> capacity.
>
> This avoids calling the function recursively, meaning the device
> nodes are unflattened in one call on unflatten_dt_node(): two arrays
> are introduced to track the parent path size and the device node of
> current level of depth, which will be used by the device node on next
> level of depth to be unflattened. Also, the parameter "poffset" and
> "fpsize" are unused and dropped.

Do you plan to respin the OF parts at least soon? There's another
problem Guenter found that of_fdt_unflatten_tree is not re-entrant due
to "depth" being static and this series fixes that. So I'd rather
apply this and avoid adding a mutex if possible.

Rob

>
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  drivers/of/fdt.c | 94 +++++++++++++++++++++++++++++++++-----------------------
>  1 file changed, 56 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
> index 173b036..f4793d0 100644
> --- a/drivers/of/fdt.c
> +++ b/drivers/of/fdt.c
> @@ -355,61 +355,82 @@ static unsigned long populate_node(const void *blob,
>         return fpsize;
>  }
>
> +static void reverse_nodes(struct device_node *parent)
> +{
> +       struct device_node *child, *next;
> +
> +       /* In-depth first */
> +       child = parent->child;
> +       while (child) {
> +               reverse_nodes(child);
> +
> +               child = child->sibling;
> +       }
> +
> +       /* Reverse the nodes in the child list */
> +       child = parent->child;
> +       parent->child = NULL;
> +       while (child) {
> +               next = child->sibling;
> +
> +               child->sibling = parent->child;
> +               parent->child = child;
> +               child = next;
> +       }
> +}
> +
>  /**
>   * unflatten_dt_node - Alloc and populate a device_node from the flat tree
>   * @blob: The parent device tree blob
>   * @mem: Memory chunk to use for allocating device nodes and properties
> - * @poffset: pointer to node in flat tree
>   * @dad: Parent struct device_node
>   * @nodepp: The device_node tree created by the call
> - * @fpsize: Size of the node path up at the current depth.
>   * @dryrun: If true, do not allocate device nodes but still calculate needed
>   * memory size
>   */
>  static void *unflatten_dt_node(const void *blob,
>                                void *mem,
> -                              int *poffset,
>                                struct device_node *dad,
>                                struct device_node **nodepp,
> -                              unsigned long fpsize,
>                                bool dryrun)
>  {
> -       struct device_node *np;
> -       static int depth;
> -       int old_depth;
> -
> -       fpsize = populate_node(blob, *poffset, &mem, dad, fpsize, &np, dryrun);
> -       if (!fpsize)
> -               return mem;
> +       struct device_node *root;
> +       int offset = 0, depth = 0;
> +       unsigned long fpsizes[64];
> +       struct device_node *nps[64];
>
> -       old_depth = depth;
> -       *poffset = fdt_next_node(blob, *poffset, &depth);
> -       if (depth < 0)
> -               depth = 0;
> -       while (*poffset > 0 && depth > old_depth)
> -               mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
> -                                       fpsize, dryrun);
> +       if (nodepp)
> +               *nodepp = NULL;
> +
> +       root = dad;
> +       fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
> +       nps[depth++] = dad;
> +       while (offset >= 0 && depth < 64) {
> +               fpsizes[depth] = populate_node(blob, offset, &mem,
> +                                              nps[depth - 1],
> +                                              fpsizes[depth - 1],
> +                                              &nps[depth], dryrun);
> +               if (!fpsizes[depth])
> +                       return mem;
> +
> +               if (!dryrun && nodepp && !*nodepp)
> +                       *nodepp = nps[depth];
> +               if (!dryrun && !root)
> +                       root = nps[depth];
> +
> +               offset = fdt_next_node(blob, offset, &depth);
> +       }
>
> -       if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
> -               pr_err("unflatten: error %d processing FDT\n", *poffset);
> +       if (offset < 0 && offset != -FDT_ERR_NOTFOUND)
> +               pr_err("%s: Error %d processing FDT\n",
> +                      __func__, offset);
>
>         /*
>          * Reverse the child list. Some drivers assumes node order matches .dts
>          * node order
>          */
> -       if (!dryrun && np->child) {
> -               struct device_node *child = np->child;
> -               np->child = NULL;
> -               while (child) {
> -                       struct device_node *next = child->sibling;
> -                       child->sibling = np->child;
> -                       np->child = child;
> -                       child = next;
> -               }
> -       }
> -
> -       if (nodepp)
> -               *nodepp = np;
> +       if (!dryrun)
> +               reverse_nodes(root);
>
>         return mem;
>  }
> @@ -431,7 +452,6 @@ static void __unflatten_device_tree(const void *blob,
>                              void * (*dt_alloc)(u64 size, u64 align))
>  {
>         unsigned long size;
> -       int start;
>         void *mem;
>
>         pr_debug(" -> unflatten_device_tree()\n");
> @@ -452,8 +472,7 @@ static void __unflatten_device_tree(const void *blob,
>         }
>
>         /* First pass, scan for size */
> -       start = 0;
> -       size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
> +       size = (unsigned long)unflatten_dt_node(blob, NULL, NULL, NULL, true);
>         size = ALIGN(size, 4);
>
>         pr_debug("  size is %lx, allocating...\n", size);
> @@ -467,8 +486,7 @@ static void __unflatten_device_tree(const void *blob,
>         pr_debug("  unflattening %p...\n", mem);
>
>         /* Second pass, do actual unflattening */
> -       start = 0;
> -       unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
> +       unflatten_dt_node(blob, mem, NULL, mynodes, false);
>         if (be32_to_cpup(mem + size) != 0xdeadbeef)
>                 pr_warning("End of tree marker overwritten: %08x\n",
>                            be32_to_cpup(mem + size));
> --
> 2.1.0
>
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Dec. 6, 2015, 9:49 p.m. UTC | #5
On 12/06/2015 12:28 PM, Rob Herring wrote:
> +Guenter
>
> On Wed, Nov 4, 2015 at 7:12 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote:
>> In current implementation, unflatten_dt_node() is called recursively
>> to unflatten device nodes in FDT blob. It's stress to limited stack
>> capacity.
>>
>> This avoids calling the function recursively, meaning the device
>> nodes are unflattened in one call on unflatten_dt_node(): two arrays
>> are introduced to track the parent path size and the device node of
>> current level of depth, which will be used by the device node on next
>> level of depth to be unflattened. Also, the parameter "poffset" and
>> "fpsize" are unused and dropped.
>
> Do you plan to respin the OF parts at least soon? There's another
> problem Guenter found that of_fdt_unflatten_tree is not re-entrant due
> to "depth" being static and this series fixes that. So I'd rather
> apply this and avoid adding a mutex if possible.
>

Hi Rob,

We see this problem in 4.1, so whatever patch you accept should be
back-ported to at least that release.

Any idea when this patch will be accepted ? We actively see the problem
in our kernel, so I'll need a solution soon. Otherwise I'll have to apply
my patch to our kernel and revert it as soon as the 'real' patch has been
back-ported.

Thanks,
Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Benjamin Herrenschmidt Dec. 6, 2015, 11:54 p.m. UTC | #6
On Sun, 2015-12-06 at 14:28 -0600, Rob Herring wrote:
> 
> Do you plan to respin the OF parts at least soon? There's another
> problem Guenter found that of_fdt_unflatten_tree is not re-entrant due
> to "depth" being static and this series fixes that. So I'd rather
> apply this and avoid adding a mutex if possible.

Gavin is on vacation until next year.

Cheers,
Ben.

> Rob
> 
> > 
> > Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> > ---
> >  drivers/of/fdt.c | 94 +++++++++++++++++++++++++++++++++-----------------------
> >  1 file changed, 56 insertions(+), 38 deletions(-)
> > 
> > diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
> > index 173b036..f4793d0 100644
> > --- a/drivers/of/fdt.c
> > +++ b/drivers/of/fdt.c
> > @@ -355,61 +355,82 @@ static unsigned long populate_node(const void *blob,
> >         return fpsize;
> >  }
> > 
> > +static void reverse_nodes(struct device_node *parent)
> > +{
> > +       struct device_node *child, *next;
> > +
> > +       /* In-depth first */
> > +       child = parent->child;
> > +       while (child) {
> > +               reverse_nodes(child);
> > +
> > +               child = child->sibling;
> > +       }
> > +
> > +       /* Reverse the nodes in the child list */
> > +       child = parent->child;
> > +       parent->child = NULL;
> > +       while (child) {
> > +               next = child->sibling;
> > +
> > +               child->sibling = parent->child;
> > +               parent->child = child;
> > +               child = next;
> > +       }
> > +}
> > +
> >  /**
> >   * unflatten_dt_node - Alloc and populate a device_node from the flat tree
> >   * @blob: The parent device tree blob
> >   * @mem: Memory chunk to use for allocating device nodes and properties
> > - * @poffset: pointer to node in flat tree
> >   * @dad: Parent struct device_node
> >   * @nodepp: The device_node tree created by the call
> > - * @fpsize: Size of the node path up at the current depth.
> >   * @dryrun: If true, do not allocate device nodes but still calculate needed
> >   * memory size
> >   */
> >  static void *unflatten_dt_node(const void *blob,
> >                                void *mem,
> > -                              int *poffset,
> >                                struct device_node *dad,
> >                                struct device_node **nodepp,
> > -                              unsigned long fpsize,
> >                                bool dryrun)
> >  {
> > -       struct device_node *np;
> > -       static int depth;
> > -       int old_depth;
> > -
> > -       fpsize = populate_node(blob, *poffset, &mem, dad, fpsize, &np, dryrun);
> > -       if (!fpsize)
> > -               return mem;
> > +       struct device_node *root;
> > +       int offset = 0, depth = 0;
> > +       unsigned long fpsizes[64];
> > +       struct device_node *nps[64];
> > 
> > -       old_depth = depth;
> > -       *poffset = fdt_next_node(blob, *poffset, &depth);
> > -       if (depth < 0)
> > -               depth = 0;
> > -       while (*poffset > 0 && depth > old_depth)
> > -               mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
> > -                                       fpsize, dryrun);
> > +       if (nodepp)
> > +               *nodepp = NULL;
> > +
> > +       root = dad;
> > +       fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
> > +       nps[depth++] = dad;
> > +       while (offset >= 0 && depth < 64) {
> > +               fpsizes[depth] = populate_node(blob, offset, &mem,
> > +                                              nps[depth - 1],
> > +                                              fpsizes[depth - 1],
> > +                                              &nps[depth], dryrun);
> > +               if (!fpsizes[depth])
> > +                       return mem;
> > +
> > +               if (!dryrun && nodepp && !*nodepp)
> > +                       *nodepp = nps[depth];
> > +               if (!dryrun && !root)
> > +                       root = nps[depth];
> > +
> > +               offset = fdt_next_node(blob, offset, &depth);
> > +       }
> > 
> > -       if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
> > -               pr_err("unflatten: error %d processing FDT\n", *poffset);
> > +       if (offset < 0 && offset != -FDT_ERR_NOTFOUND)
> > +               pr_err("%s: Error %d processing FDT\n",
> > +                      __func__, offset);
> > 
> >         /*
> >          * Reverse the child list. Some drivers assumes node order matches .dts
> >          * node order
> >          */
> > -       if (!dryrun && np->child) {
> > -               struct device_node *child = np->child;
> > -               np->child = NULL;
> > -               while (child) {
> > -                       struct device_node *next = child->sibling;
> > -                       child->sibling = np->child;
> > -                       np->child = child;
> > -                       child = next;
> > -               }
> > -       }
> > -
> > -       if (nodepp)
> > -               *nodepp = np;
> > +       if (!dryrun)
> > +               reverse_nodes(root);
> > 
> >         return mem;
> >  }
> > @@ -431,7 +452,6 @@ static void __unflatten_device_tree(const void *blob,
> >                              void * (*dt_alloc)(u64 size, u64 align))
> >  {
> >         unsigned long size;
> > -       int start;
> >         void *mem;
> > 
> >         pr_debug(" -> unflatten_device_tree()\n");
> > @@ -452,8 +472,7 @@ static void __unflatten_device_tree(const void *blob,
> >         }
> > 
> >         /* First pass, scan for size */
> > -       start = 0;
> > -       size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
> > +       size = (unsigned long)unflatten_dt_node(blob, NULL, NULL, NULL, true);
> >         size = ALIGN(size, 4);
> > 
> >         pr_debug("  size is %lx, allocating...\n", size);
> > @@ -467,8 +486,7 @@ static void __unflatten_device_tree(const void *blob,
> >         pr_debug("  unflattening %p...\n", mem);
> > 
> >         /* Second pass, do actual unflattening */
> > -       start = 0;
> > -       unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
> > +       unflatten_dt_node(blob, mem, NULL, mynodes, false);
> >         if (be32_to_cpup(mem + size) != 0xdeadbeef)
> >                 pr_warning("End of tree marker overwritten: %08x\n",
> >                            be32_to_cpup(mem + size));
> > --
> > 2.1.0
> > 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pci" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Dec. 7, 2015, 2:21 a.m. UTC | #7
On 12/06/2015 03:54 PM, Benjamin Herrenschmidt wrote:
> On Sun, 2015-12-06 at 14:28 -0600, Rob Herring wrote:
>>
>> Do you plan to respin the OF parts at least soon? There's another
>> problem Guenter found that of_fdt_unflatten_tree is not re-entrant due
>> to "depth" being static and this series fixes that. So I'd rather
>> apply this and avoid adding a mutex if possible.
>
> Gavin is on vacation until next year.
>

That is a bit more than the timeline I am looking for.

Rob, any chance to accept my patch for now ? After all, it can be
reverted after the rework is complete, and it would be easier
to apply to earlier kernels.

Thanks,
Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Rob Herring Dec. 7, 2015, 2:33 a.m. UTC | #8
On Sun, Dec 6, 2015 at 8:21 PM, Guenter Roeck <linux@roeck-us.net> wrote:
> On 12/06/2015 03:54 PM, Benjamin Herrenschmidt wrote:
>>
>> On Sun, 2015-12-06 at 14:28 -0600, Rob Herring wrote:
>>>
>>>
>>> Do you plan to respin the OF parts at least soon? There's another
>>> problem Guenter found that of_fdt_unflatten_tree is not re-entrant due
>>> to "depth" being static and this series fixes that. So I'd rather
>>> apply this and avoid adding a mutex if possible.
>>
>>
>> Gavin is on vacation until next year.
>>
>
> That is a bit more than the timeline I am looking for.
>
> Rob, any chance to accept my patch for now ? After all, it can be
> reverted after the rework is complete, and it would be easier
> to apply to earlier kernels.

Yes, will do. It's only 4.1 and later that it should be marked for stable?

Rob
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Guenter Roeck Dec. 7, 2015, 3:40 a.m. UTC | #9
On 12/06/2015 06:33 PM, Rob Herring wrote:
> On Sun, Dec 6, 2015 at 8:21 PM, Guenter Roeck <linux@roeck-us.net> wrote:
>> On 12/06/2015 03:54 PM, Benjamin Herrenschmidt wrote:
>>>
>>> On Sun, 2015-12-06 at 14:28 -0600, Rob Herring wrote:
>>>>
>>>>
>>>> Do you plan to respin the OF parts at least soon? There's another
>>>> problem Guenter found that of_fdt_unflatten_tree is not re-entrant due
>>>> to "depth" being static and this series fixes that. So I'd rather
>>>> apply this and avoid adding a mutex if possible.
>>>
>>>
>>> Gavin is on vacation until next year.
>>>
>>
>> That is a bit more than the timeline I am looking for.
>>
>> Rob, any chance to accept my patch for now ? After all, it can be
>> reverted after the rework is complete, and it would be easier
>> to apply to earlier kernels.
>
> Yes, will do. It's only 4.1 and later that it should be marked for stable?
>

Yes, I think so. Earlier kernels would need a manual backport.

Thanks,
Guenter

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Geert Uytterhoeven May 13, 2016, 7:16 a.m. UTC | #10
On Wed, Nov 4, 2015 at 5:07 PM, Rob Herring <robherring2@gmail.com> wrote:
> On Wed, Nov 4, 2015 at 7:12 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote:
>> In current implementation, unflatten_dt_node() is called recursively
>> to unflatten device nodes in FDT blob. It's stress to limited stack
>> capacity.
>
> Did you actually hit a problem?
>
> Now we have a max depth of 64. Seems like that should be plenty... Any
> idea how this compares to when we run out of stack space?

FWIW, on arm64:

drivers/of/fdt.c:443:1: warning: the frame size of 1136 bytes is
larger than 1024 bytes [-Wframe-larger-than=]

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 173b036..f4793d0 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -355,61 +355,82 @@  static unsigned long populate_node(const void *blob,
 	return fpsize;
 }
 
+static void reverse_nodes(struct device_node *parent)
+{
+	struct device_node *child, *next;
+
+	/* In-depth first */
+	child = parent->child;
+	while (child) {
+		reverse_nodes(child);
+
+		child = child->sibling;
+	}
+
+	/* Reverse the nodes in the child list */
+	child = parent->child;
+	parent->child = NULL;
+	while (child) {
+		next = child->sibling;
+
+		child->sibling = parent->child;
+		parent->child = child;
+		child = next;
+	}
+}
+
 /**
  * unflatten_dt_node - Alloc and populate a device_node from the flat tree
  * @blob: The parent device tree blob
  * @mem: Memory chunk to use for allocating device nodes and properties
- * @poffset: pointer to node in flat tree
  * @dad: Parent struct device_node
  * @nodepp: The device_node tree created by the call
- * @fpsize: Size of the node path up at the current depth.
  * @dryrun: If true, do not allocate device nodes but still calculate needed
  * memory size
  */
 static void *unflatten_dt_node(const void *blob,
 			       void *mem,
-			       int *poffset,
 			       struct device_node *dad,
 			       struct device_node **nodepp,
-			       unsigned long fpsize,
 			       bool dryrun)
 {
-	struct device_node *np;
-	static int depth;
-	int old_depth;
-
-	fpsize = populate_node(blob, *poffset, &mem, dad, fpsize, &np, dryrun);
-	if (!fpsize)
-		return mem;
+	struct device_node *root;
+	int offset = 0, depth = 0;
+	unsigned long fpsizes[64];
+	struct device_node *nps[64];
 
-	old_depth = depth;
-	*poffset = fdt_next_node(blob, *poffset, &depth);
-	if (depth < 0)
-		depth = 0;
-	while (*poffset > 0 && depth > old_depth)
-		mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
-					fpsize, dryrun);
+	if (nodepp)
+		*nodepp = NULL;
+
+	root = dad;
+	fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
+	nps[depth++] = dad;
+	while (offset >= 0 && depth < 64) {
+		fpsizes[depth] = populate_node(blob, offset, &mem,
+					       nps[depth - 1],
+					       fpsizes[depth - 1],
+					       &nps[depth], dryrun);
+		if (!fpsizes[depth])
+			return mem;
+
+		if (!dryrun && nodepp && !*nodepp)
+			*nodepp = nps[depth];
+		if (!dryrun && !root)
+			root = nps[depth];
+
+		offset = fdt_next_node(blob, offset, &depth);
+	}
 
-	if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
-		pr_err("unflatten: error %d processing FDT\n", *poffset);
+	if (offset < 0 && offset != -FDT_ERR_NOTFOUND)
+		pr_err("%s: Error %d processing FDT\n",
+		       __func__, offset);
 
 	/*
 	 * Reverse the child list. Some drivers assumes node order matches .dts
 	 * node order
 	 */
-	if (!dryrun && np->child) {
-		struct device_node *child = np->child;
-		np->child = NULL;
-		while (child) {
-			struct device_node *next = child->sibling;
-			child->sibling = np->child;
-			np->child = child;
-			child = next;
-		}
-	}
-
-	if (nodepp)
-		*nodepp = np;
+	if (!dryrun)
+		reverse_nodes(root);
 
 	return mem;
 }
@@ -431,7 +452,6 @@  static void __unflatten_device_tree(const void *blob,
 			     void * (*dt_alloc)(u64 size, u64 align))
 {
 	unsigned long size;
-	int start;
 	void *mem;
 
 	pr_debug(" -> unflatten_device_tree()\n");
@@ -452,8 +472,7 @@  static void __unflatten_device_tree(const void *blob,
 	}
 
 	/* First pass, scan for size */
-	start = 0;
-	size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
+	size = (unsigned long)unflatten_dt_node(blob, NULL, NULL, NULL, true);
 	size = ALIGN(size, 4);
 
 	pr_debug("  size is %lx, allocating...\n", size);
@@ -467,8 +486,7 @@  static void __unflatten_device_tree(const void *blob,
 	pr_debug("  unflattening %p...\n", mem);
 
 	/* Second pass, do actual unflattening */
-	start = 0;
-	unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
+	unflatten_dt_node(blob, mem, NULL, mynodes, false);
 	if (be32_to_cpup(mem + size) != 0xdeadbeef)
 		pr_warning("End of tree marker overwritten: %08x\n",
 			   be32_to_cpup(mem + size));