Message ID | 1430460188-31343-20-git-send-email-gwshan@linux.vnet.ibm.com (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Bjorn Helgaas |
Headers | show |
+dt list On Fri, May 1, 2015 at 1:03 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote: > The requirement is raised when developing the PCI hotplug feature > for PowerPC PowerNV platform, which runs on top of skiboot firmware. > When plugging PCI adapter to one PCI slot, the firmware rescans the > slot and build FDT (Flat Device Tree) blob, which is sent to the > PowerNV PCI hotplug driver for processing. The new constructed device > nodes from the FDT blob are expected to be attached to the device > node of the PCI slot. Unfortunately, it seems we don't have a API > to support the scenario. The patch intends to support it by newly > introduced function of_fdt_add_subtree(), the design behind it is > shown as below: > > * When the sub-tree FDT blob, which is owned by firmware, is > received by kernel. It's copied over to the blob, which is > dynamically allocated. Since then, the FDT blob owned by > firmware isn't touched. > * Rework unflatten_dt_node() so that the device nodes in current > and deeper depth have been constructed from the FDT blob. All > device nodes are marked with flag OF_DYNAMIC_HYBIRD, which is Perhaps you meant HYBRID? > similar to OF_DYNAMIC. However, device node with the flag set > can be free'd, but in the way other than that for OF_DYNAMIC > device nodes. The difference seems to be whether you allocate space or just point to the FDT for various strings/data. Is that right? > * of_fdt_add_subtree() is the introduced API to do the work. Have you looked at overlays and if so why do they not work for your purposes? Why do you need to do this with the flattened tree? Rob > > Cc: Grant Likely <grant.likely@linaro.org> > Cc: Rob Herring <robh+dt@kernel.org> > Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> > --- > drivers/of/dynamic.c | 19 +++++-- > drivers/of/fdt.c | 133 ++++++++++++++++++++++++++++++++++++++++--------- > include/linux/of.h | 2 + > include/linux/of_fdt.h | 1 + > 4 files changed, 127 insertions(+), 28 deletions(-) > > diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c > index 3351ef4..f562080 100644 > --- a/drivers/of/dynamic.c > +++ b/drivers/of/dynamic.c > @@ -330,13 +330,22 @@ void of_node_release(struct kobject *kobj) > return; > } > > - if (!of_node_check_flag(node, OF_DYNAMIC)) > + /* Release the subtree */ > + if (node->subtree) { > + kfree(node->subtree); > + node->subtree = NULL; > + } > + > + if (!of_node_check_flag(node, OF_DYNAMIC) && > + !of_node_check_flag(node, OF_DYNAMIC_HYBIRD)) > return; > > while (prop) { > struct property *next = prop->next; > - kfree(prop->name); > - kfree(prop->value); > + if (of_node_check_flag(node, OF_DYNAMIC)) { > + kfree(prop->name); > + kfree(prop->value); > + } > kfree(prop); > prop = next; > > @@ -345,7 +354,9 @@ void of_node_release(struct kobject *kobj) > node->deadprops = NULL; > } > } > - kfree(node->full_name); > + > + if (of_node_check_flag(node, OF_DYNAMIC)) > + kfree(node->full_name); > kfree(node->data); > kfree(node); > } > diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c > index cde35c5d01..7659560 100644 > --- a/drivers/of/fdt.c > +++ b/drivers/of/fdt.c > @@ -28,6 +28,10 @@ > #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ > #include <asm/page.h> > > +#include "of_private.h" > + > +static int cur_node_depth; > + > /* > * of_fdt_limit_memory - limit the number of regions in the /memory node > * @limit: maximum entries > @@ -168,20 +172,20 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size, > * @dad: Parent struct device_node > * @fpsize: Size of the node path up at the current depth. > */ > -static void * unflatten_dt_node(void *blob, > - void *mem, > - int *poffset, > - struct device_node *dad, > - struct device_node **nodepp, > - unsigned long fpsize, > - bool dryrun) > +static void *unflatten_dt_node(void *blob, > + void *mem, > + int *poffset, > + struct device_node *dad, > + struct device_node **nodepp, > + unsigned long fpsize, > + bool dryrun, > + bool dynamic) > { > const __be32 *p; > struct device_node *np; > struct property *pp, **prev_pp = NULL; > const char *pathp; > unsigned int l, allocl; > - static int depth = 0; > int old_depth; > int offset; > int has_name = 0; > @@ -219,12 +223,18 @@ static void * unflatten_dt_node(void *blob, > } > } > > - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, > + if (dynamic) > + np = kzalloc(sizeof(struct device_node) + allocl, GFP_KERNEL); > + else > + np = unflatten_dt_alloc(&mem, > + sizeof(struct device_node) + allocl, > __alignof__(struct device_node)); > if (!dryrun) { > char *fn; > of_node_init(np); > np->full_name = fn = ((char *)np) + sizeof(*np); > + if (dynamic) > + of_node_set_flag(np, OF_DYNAMIC_HYBIRD); > if (new_format) { > /* rebuild full path for new format */ > if (dad && dad->parent) { > @@ -267,8 +277,12 @@ static void * unflatten_dt_node(void *blob, > } > if (strcmp(pname, "name") == 0) > has_name = 1; > - pp = unflatten_dt_alloc(&mem, sizeof(struct property), > - __alignof__(struct property)); > + > + if (dynamic) > + pp = kzalloc(sizeof(struct property), GFP_KERNEL); > + else > + pp = unflatten_dt_alloc(&mem, sizeof(struct property), > + __alignof__(struct property)); > if (!dryrun) { > /* We accept flattened tree phandles either in > * ePAPR-style "phandle" properties, or the > @@ -309,8 +323,13 @@ static void * unflatten_dt_node(void *blob, > if (pa < ps) > pa = p1; > sz = (pa - ps) + 1; > - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, > - __alignof__(struct property)); > + > + if (dynamic) > + pp = kzalloc(sizeof(struct property) + sz, GFP_KERNEL); > + else > + pp = unflatten_dt_alloc(&mem, > + sizeof(struct property) + sz, > + __alignof__(struct property)); > if (!dryrun) { > pp->name = "name"; > pp->length = sz; > @@ -334,13 +353,21 @@ static void * unflatten_dt_node(void *blob, > np->type = "<NULL>"; > } > > - old_depth = depth; > - *poffset = fdt_next_node(blob, *poffset, &depth); > - if (depth < 0) > - depth = 0; > - while (*poffset > 0 && depth > old_depth) > - mem = unflatten_dt_node(blob, mem, poffset, np, NULL, > - fpsize, dryrun); > + old_depth = cur_node_depth; > + *poffset = fdt_next_node(blob, *poffset, &cur_node_depth); > + while (*poffset > 0) { > + if (cur_node_depth < old_depth) > + break; > + > + if (cur_node_depth == old_depth) > + mem = unflatten_dt_node(blob, mem, poffset, > + dad, NULL, fpsize, > + dryrun, dynamic); > + else if (cur_node_depth > old_depth) > + mem = unflatten_dt_node(blob, mem, poffset, > + np, NULL, fpsize, > + dryrun, dynamic); > + } > > if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND) > pr_err("unflatten: error %d processing FDT\n", *poffset); > @@ -379,8 +406,8 @@ static void * unflatten_dt_node(void *blob, > * for the resulting tree > */ > static void __unflatten_device_tree(void *blob, > - struct device_node **mynodes, > - void * (*dt_alloc)(u64 size, u64 align)) > + struct device_node **mynodes, > + void * (*dt_alloc)(u64 size, u64 align)) > { > unsigned long size; > int start; > @@ -405,7 +432,9 @@ static void __unflatten_device_tree(void *blob, > > /* First pass, scan for size */ > start = 0; > - size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true); > + cur_node_depth = 1; > + size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, > + NULL, 0, true, false); > size = ALIGN(size, 4); > > pr_debug(" size is %lx, allocating...\n", size); > @@ -420,7 +449,8 @@ static void __unflatten_device_tree(void *blob, > > /* Second pass, do actual unflattening */ > start = 0; > - unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false); > + cur_node_depth = 1; > + unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false, false); > if (be32_to_cpup(mem + size) != 0xdeadbeef) > pr_warning("End of tree marker overwritten: %08x\n", > be32_to_cpup(mem + size)); > @@ -448,6 +478,61 @@ void of_fdt_unflatten_tree(unsigned long *blob, > } > EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree); > > +static void populate_sysfs_for_child_nodes(struct device_node *parent) > +{ > + struct device_node *child; > + > + for_each_child_of_node(parent, child) { > + __of_attach_node_sysfs(child); > + populate_sysfs_for_child_nodes(child); > + } > +} > + > +/** > + * of_fdt_add_substree - Create sub-tree of device nodes > + * @parent: parent device node to which the sub-tree will attach > + * @blob: flat device tree blob representing the sub-tree > + * > + * Copy over the FDT blob, which passed from firmware, and then > + * unflatten the sub-tree. > + */ > +void of_fdt_add_subtree(struct device_node *parent, void *blob) > +{ > + int start = 0; > + > + /* Validate the header */ > + if (!blob || fdt_check_header(blob)) { > + pr_err("%s: Invalid device-tree blob header at 0x%p\n", > + __func__, blob); > + return; > + } > + > + /* Free the flat blob for last time lazily */ > + if (parent->subtree) { > + kfree(parent->subtree); > + parent->subtree = NULL; > + } > + > + /* Copy over the flat blob */ > + parent->subtree = kzalloc(fdt_totalsize(blob), GFP_KERNEL); > + if (!parent->subtree) { > + pr_err("%s: Cannot copy over device-tree blob\n", > + __func__); > + return; > + } > + > + memcpy(parent->subtree, blob, fdt_totalsize(blob)); > + > + /* Unflatten it */ > + mutex_lock(&of_mutex); > + cur_node_depth = 1; > + unflatten_dt_node(parent->subtree, NULL, &start, parent, NULL, > + strlen(parent->full_name), false, true); > + populate_sysfs_for_child_nodes(parent); > + mutex_unlock(&of_mutex); > +} > +EXPORT_SYMBOL(of_fdt_add_subtree); > + > /* Everything below here references initial_boot_params directly. */ > int __initdata dt_root_addr_cells; > int __initdata dt_root_size_cells; > diff --git a/include/linux/of.h b/include/linux/of.h > index ddeaae6..ac50b02 100644 > --- a/include/linux/of.h > +++ b/include/linux/of.h > @@ -60,6 +60,7 @@ struct device_node { > struct device_node *sibling; > struct kobject kobj; > unsigned long _flags; > + void *subtree; > void *data; > #if defined(CONFIG_SPARC) > const char *path_component_name; > @@ -222,6 +223,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) > #define OF_DETACHED 2 /* node has been detached from the device tree */ > #define OF_POPULATED 3 /* device already created for the node */ > #define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ > +#define OF_DYNAMIC_HYBIRD 5 /* similar to OF_DYNAMIC, but partially */ > > #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) > #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) > diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h > index 587ee50..1fb47d7 100644 > --- a/include/linux/of_fdt.h > +++ b/include/linux/of_fdt.h > @@ -39,6 +39,7 @@ extern int of_fdt_match(const void *blob, unsigned long node, > const char *const *compat); > extern void of_fdt_unflatten_tree(unsigned long *blob, > struct device_node **mynodes); > +extern void of_fdt_add_subtree(struct device_node *parent, void *blob); > > /* TBD: Temporary export of fdt globals - remove when code fully merged */ > extern int __initdata dt_root_addr_cells; > -- > 2.1.0 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 2015-05-01 at 07:54 -0500, Rob Herring wrote: > The difference seems to be whether you allocate space or just point to > the FDT for various strings/data. Is that right? > > > * of_fdt_add_subtree() is the introduced API to do the work. > > Have you looked at overlays and if so why do they not work for your purposes? > > Why do you need to do this with the flattened tree? The basic idea I asked Gavin to implement is that since the FW needs to provide a bunch of DT updates to Linux at runtime in the form of new nodes below an existing one, rather than doing it via some new/custom format, instead, have it send a bit of FDT blob to expand under an existing node. As for the details of Gavin implementation, I haven't looked at it in details yet so there might be issues there, however I don't know what you mean by "overlays", any pointer ? Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 1, 2015 at 10:22 AM, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > On Fri, 2015-05-01 at 07:54 -0500, Rob Herring wrote: > >> The difference seems to be whether you allocate space or just point to >> the FDT for various strings/data. Is that right? >> >> > * of_fdt_add_subtree() is the introduced API to do the work. >> >> Have you looked at overlays and if so why do they not work for your purposes? >> >> Why do you need to do this with the flattened tree? > > The basic idea I asked Gavin to implement is that since the FW needs to > provide a bunch of DT updates to Linux at runtime in the form of new > nodes below an existing one, rather than doing it via some new/custom > format, instead, have it send a bit of FDT blob to expand under an > existing node. Overlay = an FDT blob to graft into a live running system. Sounds like the same thing. > As for the details of Gavin implementation, I haven't looked at it in > details yet so there might be issues there, however I don't know what > you mean by "overlays", any pointer ? CONFIG_OF_OVERLAY http://events.linuxfoundation.org/sites/events/files/slides/dynamic-dt-keynote-v3.pdf Rob -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 2015-05-01 at 13:46 -0500, Rob Herring wrote: > On Fri, May 1, 2015 at 10:22 AM, Benjamin Herrenschmidt > <benh@kernel.crashing.org> wrote: > > On Fri, 2015-05-01 at 07:54 -0500, Rob Herring wrote: > > > >> The difference seems to be whether you allocate space or just point to > >> the FDT for various strings/data. Is that right? > >> > >> > * of_fdt_add_subtree() is the introduced API to do the work. > >> > >> Have you looked at overlays and if so why do they not work for your purposes? > >> > >> Why do you need to do this with the flattened tree? > > > > The basic idea I asked Gavin to implement is that since the FW needs to > > provide a bunch of DT updates to Linux at runtime in the form of new > > nodes below an existing one, rather than doing it via some new/custom > > format, instead, have it send a bit of FDT blob to expand under an > > existing node. > > Overlay = an FDT blob to graft into a live running system. Sounds like > the same thing. > > > As for the details of Gavin implementation, I haven't looked at it in > > details yet so there might be issues there, however I don't know what > > you mean by "overlays", any pointer ? > > CONFIG_OF_OVERLAY > > http://events.linuxfoundation.org/sites/events/files/slides/dynamic-dt-keynote-v3.pdf Well, that looks horrendously complicated, poorly documented and totally unused in-tree outside of the unittest stuff, yay ! It has all sort of "features" that I don't really care about. I still don't see what it buys me other than making my FW a lot more complex having to generate all that additional fixup etc... crap that I don't totally get yet. What's wrong with just unflattening the nodes in place ? The DT comes from the FW in the first place so all the phandles are already good in the new added blob. Internally, the FW created new nodes in its internal representation and flattened the subtree and sends that subtree to Linux. I don't plan to play "revert" either, if you unplug, I do need to remove what's under the slot but that's true of boot time devices, not just "new" ones, so the overlay stuff won't do the trick and I certainly don't want to keep track... Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2015-05-02 at 08:57 +1000, Benjamin Herrenschmidt wrote: > > Overlay = an FDT blob to graft into a live running system. Sounds like > > the same thing. > > > > > As for the details of Gavin implementation, I haven't looked at it in > > > details yet so there might be issues there, however I don't know what > > > you mean by "overlays", any pointer ? > > > > CONFIG_OF_OVERLAY > > > > http://events.linuxfoundation.org/sites/events/files/slides/dynamic-dt-keynote-v3.pdf > > Well, that looks horrendously complicated, poorly documented and totally > unused in-tree outside of the unittest stuff, yay ! It has all sort of > "features" that I don't really care about. Looking a bit more at it, I don't quite see how I can attach a subtree using that stuff. Instead, each node in the overlay seems to need extra nodes and properties to refer to the original. So the FW would essentially have to create something a lot more complex than just reflattening a bit of its internal tree. For each internal node, it will need to add all those __overlay__ nodes and properties. That is not going to fly for me at all. It's order of magnitudes more complex than the solution we are pursuing. So I think for our use case, we should continue in the direction of having a helper to unflatten a piece of FDT underneath an existing node. I don't like the "HYBRID" stuff though, we should not refer to the original FDT, we should just make them normal dynamic nodes. Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, 2015-05-02 at 09:29 +1000, Benjamin Herrenschmidt wrote: > Looking a bit more at it, I don't quite see how I can attach a subtree > using that stuff. > > Instead, each node in the overlay seems to need extra nodes and > properties to refer to the original. > > So the FW would essentially have to create something a lot more complex > than just reflattening a bit of its internal tree. For each internal > node, it will need to add all those __overlay__ nodes and properties. > > That is not going to fly for me at all. It's order of magnitudes more > complex than the solution we are pursuing. > > So I think for our use case, we should continue in the direction of > having a helper to unflatten a piece of FDT underneath an existing > node. I don't like the "HYBRID" stuff though, we should not refer to > the original FDT, we should just make them normal dynamic nodes. A bit more thought... if we were to use the overlay stuff, Gavin, what we *could* do is add to OPAL FW internal representation a generation count to every node and property. That way we could essentially know whenever something's changed from what we flattened originally for the kernel. We can then create a generic (not PCI specific) call that generates an overlay tree for every node and property that has a generation count that is newer than what was flattened (or passed by the OS). It's still a LOT more complex than what we need though... Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 01, 2015 at 07:54:03AM -0500, Rob Herring wrote: >+dt list > >On Fri, May 1, 2015 at 1:03 AM, Gavin Shan <gwshan@linux.vnet.ibm.com> wrote: >> The requirement is raised when developing the PCI hotplug feature >> for PowerPC PowerNV platform, which runs on top of skiboot firmware. >> When plugging PCI adapter to one PCI slot, the firmware rescans the >> slot and build FDT (Flat Device Tree) blob, which is sent to the >> PowerNV PCI hotplug driver for processing. The new constructed device >> nodes from the FDT blob are expected to be attached to the device >> node of the PCI slot. Unfortunately, it seems we don't have a API >> to support the scenario. The patch intends to support it by newly >> introduced function of_fdt_add_subtree(), the design behind it is >> shown as below: >> >> * When the sub-tree FDT blob, which is owned by firmware, is >> received by kernel. It's copied over to the blob, which is >> dynamically allocated. Since then, the FDT blob owned by >> firmware isn't touched. >> * Rework unflatten_dt_node() so that the device nodes in current >> and deeper depth have been constructed from the FDT blob. All >> device nodes are marked with flag OF_DYNAMIC_HYBIRD, which is > >Perhaps you meant HYBRID? > Yeah, It should be "HYBRID". >> similar to OF_DYNAMIC. However, device node with the flag set >> can be free'd, but in the way other than that for OF_DYNAMIC >> device nodes. > >The difference seems to be whether you allocate space or just point to >the FDT for various strings/data. Is that right? > It's correct. The FDT blob passed from firmware is copied by kernel to the memory chunk, which is allocated from slab. That means the FDT blob managed by firmware can be released in time. In kernel, the instances of "struct device_node" and "struct property" are allocated from slab dynamically, but some of their fields are points to the (copied) FDT blob. It indicates the (copied) FDT can only be released when the sub-tree is cut off completely. >> * of_fdt_add_subtree() is the introduced API to do the work. > >Have you looked at overlays and if so why do they not work for your purposes? > >Why do you need to do this with the flattened tree? > It seems that Ben already helped answering the questions. I'll reply in other threads if necessary. Rob, thanks for review. Thanks, Gavin >Rob > >> >> Cc: Grant Likely <grant.likely@linaro.org> >> Cc: Rob Herring <robh+dt@kernel.org> >> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> >> --- >> drivers/of/dynamic.c | 19 +++++-- >> drivers/of/fdt.c | 133 ++++++++++++++++++++++++++++++++++++++++--------- >> include/linux/of.h | 2 + >> include/linux/of_fdt.h | 1 + >> 4 files changed, 127 insertions(+), 28 deletions(-) >> >> diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c >> index 3351ef4..f562080 100644 >> --- a/drivers/of/dynamic.c >> +++ b/drivers/of/dynamic.c >> @@ -330,13 +330,22 @@ void of_node_release(struct kobject *kobj) >> return; >> } >> >> - if (!of_node_check_flag(node, OF_DYNAMIC)) >> + /* Release the subtree */ >> + if (node->subtree) { >> + kfree(node->subtree); >> + node->subtree = NULL; >> + } >> + >> + if (!of_node_check_flag(node, OF_DYNAMIC) && >> + !of_node_check_flag(node, OF_DYNAMIC_HYBIRD)) >> return; >> >> while (prop) { >> struct property *next = prop->next; >> - kfree(prop->name); >> - kfree(prop->value); >> + if (of_node_check_flag(node, OF_DYNAMIC)) { >> + kfree(prop->name); >> + kfree(prop->value); >> + } >> kfree(prop); >> prop = next; >> >> @@ -345,7 +354,9 @@ void of_node_release(struct kobject *kobj) >> node->deadprops = NULL; >> } >> } >> - kfree(node->full_name); >> + >> + if (of_node_check_flag(node, OF_DYNAMIC)) >> + kfree(node->full_name); >> kfree(node->data); >> kfree(node); >> } >> diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c >> index cde35c5d01..7659560 100644 >> --- a/drivers/of/fdt.c >> +++ b/drivers/of/fdt.c >> @@ -28,6 +28,10 @@ >> #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ >> #include <asm/page.h> >> >> +#include "of_private.h" >> + >> +static int cur_node_depth; >> + >> /* >> * of_fdt_limit_memory - limit the number of regions in the /memory node >> * @limit: maximum entries >> @@ -168,20 +172,20 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size, >> * @dad: Parent struct device_node >> * @fpsize: Size of the node path up at the current depth. >> */ >> -static void * unflatten_dt_node(void *blob, >> - void *mem, >> - int *poffset, >> - struct device_node *dad, >> - struct device_node **nodepp, >> - unsigned long fpsize, >> - bool dryrun) >> +static void *unflatten_dt_node(void *blob, >> + void *mem, >> + int *poffset, >> + struct device_node *dad, >> + struct device_node **nodepp, >> + unsigned long fpsize, >> + bool dryrun, >> + bool dynamic) >> { >> const __be32 *p; >> struct device_node *np; >> struct property *pp, **prev_pp = NULL; >> const char *pathp; >> unsigned int l, allocl; >> - static int depth = 0; >> int old_depth; >> int offset; >> int has_name = 0; >> @@ -219,12 +223,18 @@ static void * unflatten_dt_node(void *blob, >> } >> } >> >> - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, >> + if (dynamic) >> + np = kzalloc(sizeof(struct device_node) + allocl, GFP_KERNEL); >> + else >> + np = unflatten_dt_alloc(&mem, >> + sizeof(struct device_node) + allocl, >> __alignof__(struct device_node)); >> if (!dryrun) { >> char *fn; >> of_node_init(np); >> np->full_name = fn = ((char *)np) + sizeof(*np); >> + if (dynamic) >> + of_node_set_flag(np, OF_DYNAMIC_HYBIRD); >> if (new_format) { >> /* rebuild full path for new format */ >> if (dad && dad->parent) { >> @@ -267,8 +277,12 @@ static void * unflatten_dt_node(void *blob, >> } >> if (strcmp(pname, "name") == 0) >> has_name = 1; >> - pp = unflatten_dt_alloc(&mem, sizeof(struct property), >> - __alignof__(struct property)); >> + >> + if (dynamic) >> + pp = kzalloc(sizeof(struct property), GFP_KERNEL); >> + else >> + pp = unflatten_dt_alloc(&mem, sizeof(struct property), >> + __alignof__(struct property)); >> if (!dryrun) { >> /* We accept flattened tree phandles either in >> * ePAPR-style "phandle" properties, or the >> @@ -309,8 +323,13 @@ static void * unflatten_dt_node(void *blob, >> if (pa < ps) >> pa = p1; >> sz = (pa - ps) + 1; >> - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, >> - __alignof__(struct property)); >> + >> + if (dynamic) >> + pp = kzalloc(sizeof(struct property) + sz, GFP_KERNEL); >> + else >> + pp = unflatten_dt_alloc(&mem, >> + sizeof(struct property) + sz, >> + __alignof__(struct property)); >> if (!dryrun) { >> pp->name = "name"; >> pp->length = sz; >> @@ -334,13 +353,21 @@ static void * unflatten_dt_node(void *blob, >> np->type = "<NULL>"; >> } >> >> - old_depth = depth; >> - *poffset = fdt_next_node(blob, *poffset, &depth); >> - if (depth < 0) >> - depth = 0; >> - while (*poffset > 0 && depth > old_depth) >> - mem = unflatten_dt_node(blob, mem, poffset, np, NULL, >> - fpsize, dryrun); >> + old_depth = cur_node_depth; >> + *poffset = fdt_next_node(blob, *poffset, &cur_node_depth); >> + while (*poffset > 0) { >> + if (cur_node_depth < old_depth) >> + break; >> + >> + if (cur_node_depth == old_depth) >> + mem = unflatten_dt_node(blob, mem, poffset, >> + dad, NULL, fpsize, >> + dryrun, dynamic); >> + else if (cur_node_depth > old_depth) >> + mem = unflatten_dt_node(blob, mem, poffset, >> + np, NULL, fpsize, >> + dryrun, dynamic); >> + } >> >> if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND) >> pr_err("unflatten: error %d processing FDT\n", *poffset); >> @@ -379,8 +406,8 @@ static void * unflatten_dt_node(void *blob, >> * for the resulting tree >> */ >> static void __unflatten_device_tree(void *blob, >> - struct device_node **mynodes, >> - void * (*dt_alloc)(u64 size, u64 align)) >> + struct device_node **mynodes, >> + void * (*dt_alloc)(u64 size, u64 align)) >> { >> unsigned long size; >> int start; >> @@ -405,7 +432,9 @@ static void __unflatten_device_tree(void *blob, >> >> /* First pass, scan for size */ >> start = 0; >> - size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true); >> + cur_node_depth = 1; >> + size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, >> + NULL, 0, true, false); >> size = ALIGN(size, 4); >> >> pr_debug(" size is %lx, allocating...\n", size); >> @@ -420,7 +449,8 @@ static void __unflatten_device_tree(void *blob, >> >> /* Second pass, do actual unflattening */ >> start = 0; >> - unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false); >> + cur_node_depth = 1; >> + unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false, false); >> if (be32_to_cpup(mem + size) != 0xdeadbeef) >> pr_warning("End of tree marker overwritten: %08x\n", >> be32_to_cpup(mem + size)); >> @@ -448,6 +478,61 @@ void of_fdt_unflatten_tree(unsigned long *blob, >> } >> EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree); >> >> +static void populate_sysfs_for_child_nodes(struct device_node *parent) >> +{ >> + struct device_node *child; >> + >> + for_each_child_of_node(parent, child) { >> + __of_attach_node_sysfs(child); >> + populate_sysfs_for_child_nodes(child); >> + } >> +} >> + >> +/** >> + * of_fdt_add_substree - Create sub-tree of device nodes >> + * @parent: parent device node to which the sub-tree will attach >> + * @blob: flat device tree blob representing the sub-tree >> + * >> + * Copy over the FDT blob, which passed from firmware, and then >> + * unflatten the sub-tree. >> + */ >> +void of_fdt_add_subtree(struct device_node *parent, void *blob) >> +{ >> + int start = 0; >> + >> + /* Validate the header */ >> + if (!blob || fdt_check_header(blob)) { >> + pr_err("%s: Invalid device-tree blob header at 0x%p\n", >> + __func__, blob); >> + return; >> + } >> + >> + /* Free the flat blob for last time lazily */ >> + if (parent->subtree) { >> + kfree(parent->subtree); >> + parent->subtree = NULL; >> + } >> + >> + /* Copy over the flat blob */ >> + parent->subtree = kzalloc(fdt_totalsize(blob), GFP_KERNEL); >> + if (!parent->subtree) { >> + pr_err("%s: Cannot copy over device-tree blob\n", >> + __func__); >> + return; >> + } >> + >> + memcpy(parent->subtree, blob, fdt_totalsize(blob)); >> + >> + /* Unflatten it */ >> + mutex_lock(&of_mutex); >> + cur_node_depth = 1; >> + unflatten_dt_node(parent->subtree, NULL, &start, parent, NULL, >> + strlen(parent->full_name), false, true); >> + populate_sysfs_for_child_nodes(parent); >> + mutex_unlock(&of_mutex); >> +} >> +EXPORT_SYMBOL(of_fdt_add_subtree); >> + >> /* Everything below here references initial_boot_params directly. */ >> int __initdata dt_root_addr_cells; >> int __initdata dt_root_size_cells; >> diff --git a/include/linux/of.h b/include/linux/of.h >> index ddeaae6..ac50b02 100644 >> --- a/include/linux/of.h >> +++ b/include/linux/of.h >> @@ -60,6 +60,7 @@ struct device_node { >> struct device_node *sibling; >> struct kobject kobj; >> unsigned long _flags; >> + void *subtree; >> void *data; >> #if defined(CONFIG_SPARC) >> const char *path_component_name; >> @@ -222,6 +223,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) >> #define OF_DETACHED 2 /* node has been detached from the device tree */ >> #define OF_POPULATED 3 /* device already created for the node */ >> #define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ >> +#define OF_DYNAMIC_HYBIRD 5 /* similar to OF_DYNAMIC, but partially */ >> >> #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) >> #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) >> diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h >> index 587ee50..1fb47d7 100644 >> --- a/include/linux/of_fdt.h >> +++ b/include/linux/of_fdt.h >> @@ -39,6 +39,7 @@ extern int of_fdt_match(const void *blob, unsigned long node, >> const char *const *compat); >> extern void of_fdt_unflatten_tree(unsigned long *blob, >> struct device_node **mynodes); >> +extern void of_fdt_add_subtree(struct device_node *parent, void *blob); >> >> /* TBD: Temporary export of fdt globals - remove when code fully merged */ >> extern int __initdata dt_root_addr_cells; >> -- >> 2.1.0 >> > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, May 02, 2015 at 09:29:36AM +1000, Benjamin Herrenschmidt wrote: >On Sat, 2015-05-02 at 08:57 +1000, Benjamin Herrenschmidt wrote: > >> > Overlay = an FDT blob to graft into a live running system. Sounds like >> > the same thing. >> > >> > > As for the details of Gavin implementation, I haven't looked at it in >> > > details yet so there might be issues there, however I don't know what >> > > you mean by "overlays", any pointer ? >> > >> > CONFIG_OF_OVERLAY >> > >> > http://events.linuxfoundation.org/sites/events/files/slides/dynamic-dt-keynote-v3.pdf >> >> Well, that looks horrendously complicated, poorly documented and totally >> unused in-tree outside of the unittest stuff, yay ! It has all sort of >> "features" that I don't really care about. > >Looking a bit more at it, I don't quite see how I can attach a subtree >using that stuff. > >Instead, each node in the overlay seems to need extra nodes and >properties to refer to the original. > >So the FW would essentially have to create something a lot more complex >than just reflattening a bit of its internal tree. For each internal >node, it will need to add all those __overlay__ nodes and properties. > >That is not going to fly for me at all. It's order of magnitudes more >complex than the solution we are pursuing. > >So I think for our use case, we should continue in the direction of >having a helper to unflatten a piece of FDT underneath an existing >node. I don't like the "HYBRID" stuff though, we should not refer to >the original FDT, we should just make them normal dynamic nodes. > The original FDT from firmware is copied over to the memory chunk allocated from slab by kernel. So we refer to the copy of the FDT, not original one. Yeah, "HYBRID" wouldn't be a good idea. If we want make all device nodes and properties of the sub-tree "DYNAMIC", the FDT hasn't to be copied over from skiboot to kernel, indicating those dynamic device nodes and properties in the subtree can be figured out directly from the FDT blob, which is owned by firmware. Also, it just need small changes to code what we have. Not too much changes needed. Thanks, Gavin -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sat, May 02, 2015 at 12:48:26PM +1000, Benjamin Herrenschmidt wrote: >On Sat, 2015-05-02 at 09:29 +1000, Benjamin Herrenschmidt wrote: > >> Looking a bit more at it, I don't quite see how I can attach a subtree >> using that stuff. >> >> Instead, each node in the overlay seems to need extra nodes and >> properties to refer to the original. >> >> So the FW would essentially have to create something a lot more complex >> than just reflattening a bit of its internal tree. For each internal >> node, it will need to add all those __overlay__ nodes and properties. >> >> That is not going to fly for me at all. It's order of magnitudes more >> complex than the solution we are pursuing. >> >> So I think for our use case, we should continue in the direction of >> having a helper to unflatten a piece of FDT underneath an existing >> node. I don't like the "HYBRID" stuff though, we should not refer to >> the original FDT, we should just make them normal dynamic nodes. Just took a close look on the overlay code. Hopefully I understand how it works completely. Yeah, there is one questions according to my understanding. The "overlay" device node should have been in child list of the device node, who also has the indicator to "target" node. That means some one else has to create "overlay" node and figure out the "target" node in advance, then invokes overlay module to apply the changes. From this perspective, the mechanism is something used to apply the changes to device-tree, not parsing and create device nodes from input. It does gurantee all the changes will be applied or none of them. So I agree on what Ben suggested: to continue the direction of having a helper to unflatten FDT blobk underneath the existing node, and "HYBRID" should be replaced with "OF_DYNAMIC". > >A bit more thought... if we were to use the overlay stuff, Gavin, what >we *could* do is add to OPAL FW internal representation a generation >count to every node and property. > >That way we could essentially know whenever something's changed from >what we flattened originally for the kernel. > >We can then create a generic (not PCI specific) call that generates >an overlay tree for every node and property that has a generation >count that is newer than what was flattened (or passed by the OS). > >It's still a LOT more complex than what we need though... > Thanks, Ben. If we really need utilize overlay to support our case, we need some one to parse the input (device-tree changes) from firmware and create "overlay" device node and "target" node as I mentioned above. It's not simpler than the way we had to support our case. I'm not sure if we really need utilize overlay for our case. Thanks, Gavin -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2015-05-04 at 11:30 +1000, Gavin Shan wrote: > Thanks, Ben. If we really need utilize overlay to support our case, > we need some one to parse the input (device-tree changes) from > firmware > and create "overlay" device node and "target" node as I mentioned > above. > It's not simpler than the way we had to support our case. I'm not sure > if we really need utilize overlay for our case. No, if we decide to go down that path, then the FW needs to create the overlay. This could be done by having some kind of versioning to all nodes and properties using a global generation count. Ie, if we "know" what we passed to Linux, we can generate an overlay that contains everything that changed since then using the version numbers. However, we should probably encode the version in the tree itself and have specific APIs to retrieve "from" a given version to properly deal with kexec'ing a kernel since in that case, the new kernel will have something that isn't version 0 but version N where N is the latest applied overlay. Also I don't know how removing nodes works with overlay. IE the overlay system is designed around the idea of removing the overlay to retrieve the original tree. In our case, our overlays are meant to be fully committed, and I don't know whether there's a way to keep track. On unplug, we will just remove all the nodes below the slot. Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ben, > On May 2, 2015, at 01:57 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > On Fri, 2015-05-01 at 13:46 -0500, Rob Herring wrote: >> On Fri, May 1, 2015 at 10:22 AM, Benjamin Herrenschmidt >> <benh@kernel.crashing.org> wrote: >>> On Fri, 2015-05-01 at 07:54 -0500, Rob Herring wrote: >>> >>>> The difference seems to be whether you allocate space or just point to >>>> the FDT for various strings/data. Is that right? >>>> >>>>> * of_fdt_add_subtree() is the introduced API to do the work. >>>> >>>> Have you looked at overlays and if so why do they not work for your purposes? >>>> >>>> Why do you need to do this with the flattened tree? >>> >>> The basic idea I asked Gavin to implement is that since the FW needs to >>> provide a bunch of DT updates to Linux at runtime in the form of new >>> nodes below an existing one, rather than doing it via some new/custom >>> format, instead, have it send a bit of FDT blob to expand under an >>> existing node. >> >> Overlay = an FDT blob to graft into a live running system. Sounds like >> the same thing. >> >>> As for the details of Gavin implementation, I haven't looked at it in >>> details yet so there might be issues there, however I don't know what >>> you mean by "overlays", any pointer ? >> >> CONFIG_OF_OVERLAY >> >> http://events.linuxfoundation.org/sites/events/files/slides/dynamic-dt-keynote-v3.pdf > > Well, that looks horrendously complicated, poorly documented and totally > unused in-tree outside of the unittest stuff, yay ! It has all sort of > "features" that I don't really care about. > If it was easy to get stuff in, it would get more of the real-use drivers in. > I still don't see what it buys me other than making my FW a lot more > complex having to generate all that additional fixup etc... crap that I > don't totally get yet. > You don’t generate any additional fixups. You just compile with the option that generates all the fixups for you. > What's wrong with just unflattening the nodes in place ? The DT comes > from the FW in the first place so all the phandles are already good in > the new added blob. Internally, the FW created new nodes in its internal > representation and flattened the subtree and sends that subtree to > Linux. > > I don't plan to play "revert" either, if you unplug, I do need to remove > what's under the slot but that's true of boot time devices, not just > "new" ones, so the overlay stuff won't do the trick and I certainly > don't want to keep track… > You get all of the corner cases handled for free. Perhaps it works for your case too. Perhaps you can educate me on what you need supported and we can make sure it’s included. > Ben. > > Regards — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 2015-05-04 at 19:41 +0300, Pantelis Antoniou wrote: > > You get all of the corner cases handled for free. Perhaps it works for > your case too. > > Perhaps you can educate me on what you need supported and we can make > sure it’s included. Which corner cases ? IE, what I want is simply "update" the device-tree below a PCIe slot on PCI hotplug. The DT isn't "compiled" from a dts (it's amazing how many people seem to believe this is the only way you get fdt's nowadays). It's dynamically (ie programatically) generated by firmware at boot time and contains whatever PCIe devices happen to be plugged during boot. When doing PCIe hotplug operations, the kernel does various FW calls (among others to control slot power), and during these, the FW re-probes underneath the slot and refreshes its internal representation. So the phandles remain fully consistent, there is no fixup needed. We want the kernel to also update his copy as wee in order to avoid keeping stale nodes that don't match what's there anymore. Also, when plugging specific kind of IO drawers, the FW can provide additional node and properties that will be used to control slots inside the drawers. So what we need is: - On PCIe unplug, remove all old nodes below the slot - On PCIe plug, get all the new nodes from FW Note that there is no need to do anything like platform device probing etc... the PCI layer takes care of that, we will remove the old nodes after the pci_dev are gone and create the new ones before Linux re-probes the PCIe bus subtree. So what we need is very simple: The removal can be handled without FW help, and the plug case is a matter of just transferring all those new nodes to Linux to re-expand. Since the phandle etc... are all consistent with the original tree, there is no fixups required. So the "trivial" way to do it (and the way we have implemented the FW side so far) is to have the FW simply "flatten" the subtree below the slot and pass it to Linux, with the intent of expanding it back below the slot node. This is what Gavin proposed patches do. The overlay mechanism adds all sorts of features that we don't seen to need and would make the above more complex. Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Tue, 2015-05-05 at 07:14 +1000, Benjamin Herrenschmidt wrote: > So the "trivial" way to do it (and the way we have implemented the FW > side so far) is to have the FW simply "flatten" the subtree below the > slot and pass it to Linux, with the intent of expanding it back below > the slot node. > > This is what Gavin proposed patches do. > > The overlay mechanism adds all sorts of features that we don't seen to > need and would make the above more complex. Guys, I never got a final answer from you on this. Are we ok with adding the way to just expand a subtree or are you insistent we need to use the overlap mechanism ? Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, May 13, 2015 at 6:35 PM, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > On Tue, 2015-05-05 at 07:14 +1000, Benjamin Herrenschmidt wrote: >> So the "trivial" way to do it (and the way we have implemented the FW >> side so far) is to have the FW simply "flatten" the subtree below the >> slot and pass it to Linux, with the intent of expanding it back below >> the slot node. >> >> This is what Gavin proposed patches do. >> >> The overlay mechanism adds all sorts of features that we don't seen to >> need and would make the above more complex. > > Guys, I never got a final answer from you on this. Are we ok with adding > the way to just expand a subtree or are you insistent we need to use the > overlap mechanism ? I haven't decided really. The main thing with the current patch is I don't really like the added complexity to unflatten_dt_node. It is already a fairly complex function. Perhaps removing of "hybrid" as discussed will help? If there are things we can do to make overlays easier to use in your use case, I'd like to hear ideas. I don't really buy that being more complex than needed is an obstacle. That is very often the case to have common, scale-able solutions. I want to see a simple case be simple to support. Rob -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Wed, 2015-05-13 at 19:18 -0500, Rob Herring wrote: > I haven't decided really. > > The main thing with the current patch is I don't really like the added > complexity to unflatten_dt_node. It is already a fairly complex > function. Perhaps removing of "hybrid" as discussed will help? I agree, we should be able to make that much simpler, I was planning on sorting that out with Gavin. > If there are things we can do to make overlays easier to use in your > use case, I'd like to hear ideas. I don't really buy that being more > complex than needed is an obstacle. That is very often the case to > have common, scale-able solutions. I want to see a simple case be > simple to support. Well, it's a LOT more complex from the FW perspective for a bunch of features we don't really need, in a way because the DT update in our case is just purely informational to avoid keeping wrong/outdated DT bits, it has little functional impact (it might have a bit for interrupt routing through bridges though). However, I am also pursuing an approach on FW side using a generation count in our nodes and properties which we could use to generate arbitrary overlays if we know what generation linux has. There might actual be a usage scenario for a generic way for our firwmare to convey DT updates to Linux for other reasons. A few things that I don't find in the overlay code (but maybe I haven't looked at it hard enough): - Can it remove nodes/properties ? - Can it "commit" a changeset so it's permanently part of the main DT ? We will never have a concept of "revertable" changesets, if we need a subsequent update, we will get a new overlay from FW that will remove what needs to be removed and add what needs to be added. IE, our current mechanism without overlay is fairly simple: - On PCI unplug, we remove all nodes below the slot (from linux), the FW does the equivalent internally. - On PCI re-plug, the FW internally builds new nodes and sends a new subtree as an FDT that we can expand/attach. Now we could consider that subtree as a changeset that can be undone, but that wouldn't work for boot time. And subsequent updates wouldn't have that concept of "undoing" anyway. IE. conceptually, what overlays do today is quite rooted around the idea of having a fixed "base" DT and some pre-compiled DTB overlays that get added/removed. The design completely ignore the idea of a FW that maintains a "live" tree which we want to keep in sync, which is what we want to do here, or what we could do with a "live" open firmware implementation. Now we might be able to reconcile them, but it feels to me that the overlay/changeset stuff is too rooted in the first concept... Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ben, Sorry for taking this long to respond, but I am working on the same problem right now. I thought I might have something to show, but not yet :) My PCI overlay case is different. In my case there is no firmware and there is the blob is provided as an overlay. The idea is that for a given PCI bus, when a PCI device with a matching device id, vendor id is probed a matching overlay should be applied. The trickiness lies in the way that the way that the target is different each time and how to handle generational issues (i.e. what happens if the pci device is removed before the application of the overlay occurs, what happens when multiple applications should happen in parallel, etc.) > On May 14, 2015, at 03:54 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > On Wed, 2015-05-13 at 19:18 -0500, Rob Herring wrote: > >> I haven't decided really. >> >> The main thing with the current patch is I don't really like the added >> complexity to unflatten_dt_node. It is already a fairly complex >> function. Perhaps removing of "hybrid" as discussed will help? > > I agree, we should be able to make that much simpler, I was planning on > sorting that out with Gavin. > I think using overlays should cover your case without any issues. I don’t like messing with the unflatten method TBH. >> If there are things we can do to make overlays easier to use in your >> use case, I'd like to hear ideas. I don't really buy that being more >> complex than needed is an obstacle. That is very often the case to >> have common, scale-able solutions. I want to see a simple case be >> simple to support. > > Well, it's a LOT more complex from the FW perspective for a bunch of > features we don't really need, in a way because the DT update in our > case is just purely informational to avoid keeping wrong/outdated DT > bits, it has little functional impact (it might have a bit for interrupt > routing through bridges though). > > However, I am also pursuing an approach on FW side using a generation > count in our nodes and properties which we could use to generate > arbitrary overlays if we know what generation linux has. > > There might actual be a usage scenario for a generic way for our > firwmare to convey DT updates to Linux for other reasons. > > A few things that I don't find in the overlay code (but maybe I haven't > looked at it hard enough): > > - Can it remove nodes/properties ? > Yes. > - Can it "commit" a changeset so it's permanently part of the main DT ? > We will never have a concept of "revertable" changesets, if we need a > subsequent update, we will get a new overlay from FW that will remove > what needs to be removed and add what needs to be added. > The overlay when applied is a part of the kernel DT tree. It is trivial to add a mechanism that simply commits everything and tosses away the revert information. Note that in that case you have to make provisions for the unflatten blob to not be freed or for the device tree nodes/properties to be dynamically allocated. > IE, our current mechanism without overlay is fairly simple: > > - On PCI unplug, we remove all nodes below the slot (from linux), > the FW does the equivalent internally. > If you use an overlay, you just revert it and everything would be as it was before, without anything hanging below the slot node. Note that the ‘remove all nodes below the slot’ does not work for my case. That is because there are devices being instantiated under the slot (i2c busses, i2c devices, FPGAs etc) that need to be removed from the system. > - On PCI re-plug, the FW internally builds new nodes and sends a > new subtree as an FDT that we can expand/attach. > You can easily send a DT blob containing an overlay from firmware. It can be even easy, since you might not have to recreate the full blob each time, but instead using flat device tree methods to populate the few properties that change each time. > Now we could consider that subtree as a changeset that can be undone, > but that wouldn't work for boot time. And subsequent updates wouldn't > have that concept of "undoing" anyway. > I have posted another patch that does boot-time DT quirk which are non-revertable. https://lkml.org/lkml/2015/2/18/258 > IE. conceptually, what overlays do today is quite rooted around the idea > of having a fixed "base" DT and some pre-compiled DTB overlays that > get added/removed. The design completely ignore the idea of a FW that > maintains a "live" tree which we want to keep in sync, which is what we > want to do here, or what we could do with a "live" open firmware > implementation. > > Now we might be able to reconcile them, but it feels to me that the > overlay/changeset stuff is too rooted in the first concept… > The first DT overlays use case (beaglebone capes) is what got the concept started. Right now is a generic mechanism to apply modifications to the kernel live tree, with the possibility to revert them. > Ben. > > Regards — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-05-14 at 09:23 +0300, Pantelis Antoniou wrote: > > A few things that I don't find in the overlay code (but maybe I haven't > > looked at it hard enough): > > > > - Can it remove nodes/properties ? > > > > Yes. Ok, I've missed that when looking at the overlay code then, I'll have to give it a closer look. > > - Can it "commit" a changeset so it's permanently part of the main DT ? > > We will never have a concept of "revertable" changesets, if we need a > > subsequent update, we will get a new overlay from FW that will remove > > what needs to be removed and add what needs to be added. > > > > The overlay when applied is a part of the kernel DT tree. > It is trivial to add a mechanism that simply commits everything and > tosses away the revert information. > > Note that in that case you have to make provisions for the unflatten > blob to not be freed or for the device tree nodes/properties to be > dynamically allocated. I think it makes sense to do the dynamic thing anyway... > > IE, our current mechanism without overlay is fairly simple: > > > > - On PCI unplug, we remove all nodes below the slot (from linux), > > the FW does the equivalent internally. > > > > If you use an overlay, you just revert it and everything would > be as it was before, without anything hanging below the slot node. Except that doesn't work for the boot time content which we get from the firmware as part of the initial FDT (and we can't change that without breaking backward compatibility). > Note that the ‘remove all nodes below the slot’ does not work for my case. > > That is because there are devices being instantiated under the slot > (i2c busses, i2c devices, FPGAs etc) that need to be removed from the > system. Right while in my case, there isn't, it's just the standard OF PCI representation generated by FW, the main thing is that it might have some enriched properties for some known cable cards of external drawers that are good to have. > > - On PCI re-plug, the FW internally builds new nodes and sends a > > new subtree as an FDT that we can expand/attach. > > > > You can easily send a DT blob containing an overlay from firmware. Sending one is easy. Building it is not :-) > It can be even easy, since you might not have to recreate the full blob > each time, but instead using flat device tree methods to populate the > few properties that change each time. No, we basically have our internal tree in the firmware in a format similar to Linux, ie, a pointer based tree. We can "flatten" it of course, but generating an overlay is trickier. We can, it's just more work and we are running out of time (I basically have to cut that FW in the next few days, then we'll be stuck with whatever interfaces we created, I have a big of time to fix bugs after that but that's about it). > > Now we could consider that subtree as a changeset that can be undone, > > but that wouldn't work for boot time. And subsequent updates wouldn't > > have that concept of "undoing" anyway. > > > > I have posted another patch that does boot-time DT quirk which are > non-revertable. > > https://lkml.org/lkml/2015/2/18/258 Not sure how that applies in my case ... I can't change the representation of the PCI subtree, this is standard OFW representation, I can't change the FW to make it an overlay-like thing at boot time, that would break existing kernels. > > IE. conceptually, what overlays do today is quite rooted around the idea > > of having a fixed "base" DT and some pre-compiled DTB overlays that > > get added/removed. The design completely ignore the idea of a FW that > > maintains a "live" tree which we want to keep in sync, which is what we > > want to do here, or what we could do with a "live" open firmware > > implementation. > > > > Now we might be able to reconcile them, but it feels to me that the > > overlay/changeset stuff is too rooted in the first concept… > > > > The first DT overlays use case (beaglebone capes) is what got the concept > started. > > Right now is a generic mechanism to apply modifications to the kernel > live tree, with the possibility to revert them. Yes but as I said it's not really thought in term of keeping the kernel tree in sync with an external dynamically generated tree. Maybe we can fix it, but it's more complex... Ben. > > Ben. > > > > > > Regards > > — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ben, > On May 14, 2015, at 09:46 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > On Thu, 2015-05-14 at 09:23 +0300, Pantelis Antoniou wrote: > >>> A few things that I don't find in the overlay code (but maybe I haven't >>> looked at it hard enough): >>> >>> - Can it remove nodes/properties ? >>> >> >> Yes. > > Ok, I've missed that when looking at the overlay code then, I'll have to > give it a closer look. > Ok, let me be more specific. It used to be able to do it ;) The problem was the format used (a ‘-‘ prefix to the name). Since I didn’t have clear use for it, I was asked to drop it by Grant. The removal capability is of-course there for the revert case. >>> - Can it "commit" a changeset so it's permanently part of the main DT ? >>> We will never have a concept of "revertable" changesets, if we need a >>> subsequent update, we will get a new overlay from FW that will remove >>> what needs to be removed and add what needs to be added. >>> >> >> The overlay when applied is a part of the kernel DT tree. >> It is trivial to add a mechanism that simply commits everything and >> tosses away the revert information. >> >> Note that in that case you have to make provisions for the unflatten >> blob to not be freed or for the device tree nodes/properties to be >> dynamically allocated. > > I think it makes sense to do the dynamic thing anyway... > >>> IE, our current mechanism without overlay is fairly simple: >>> >>> - On PCI unplug, we remove all nodes below the slot (from linux), >>> the FW does the equivalent internally. >>> >> >> If you use an overlay, you just revert it and everything would >> be as it was before, without anything hanging below the slot node. > > Except that doesn't work for the boot time content which we get > from the firmware as part of the initial FDT (and we can't change that > without breaking backward compatibility). > OK >> Note that the ‘remove all nodes below the slot’ does not work for my case. >> >> That is because there are devices being instantiated under the slot >> (i2c busses, i2c devices, FPGAs etc) that need to be removed from the >> system. > > Right while in my case, there isn't, it's just the standard OF PCI > representation generated by FW, the main thing is that it might have > some enriched properties for some known cable cards of external drawers > that are good to have. > I see. >>> - On PCI re-plug, the FW internally builds new nodes and sends a >>> new subtree as an FDT that we can expand/attach. >>> >> >> You can easily send a DT blob containing an overlay from firmware. > > Sending one is easy. Building it is not :-) > Heh, true ;) >> It can be even easy, since you might not have to recreate the full blob >> each time, but instead using flat device tree methods to populate the >> few properties that change each time. > > No, we basically have our internal tree in the firmware in a format > similar to Linux, ie, a pointer based tree. We can "flatten" it of > course, but generating an overlay is trickier. We can, it's just more > work and we are running out of time (I basically have to cut that FW in > the next few days, then we'll be stuck with whatever interfaces we > created, I have a big of time to fix bugs after that but that's about > it). > Hmm, since you just want to transmit a whole subtree things are a bit simpler. You don’t need any of the fixups, and your target node is known. So your overlay is simply: / { fragment@0 { target-path = “/foo”; __overlay__ { /* contents of the slot */ }; }; }; I think it’s possible to just bit-mangle a blob (in pseudo code). const u8 template_overlay_blob[] = { <compiled blob of the above> }; flatten_slot(slot_blob); overlay_blob = allocate_new_blob(template_overlay_blob, slot_blob); overlay_node = find_node(overlay_blob, “/fragment@0/__overlay__); target_prop = find_prop(overlay_blob, “/fragment@0/target-path”); inject_slot_blob(overlay_blob, overlay_node, slot_blob); modify_slot_target(overlay_blob, target_prop, slot_target); I don’t think you need to re-flatten anything, shuffling bits around with memmove should work. >>> Now we could consider that subtree as a changeset that can be undone, >>> but that wouldn't work for boot time. And subsequent updates wouldn't >>> have that concept of "undoing" anyway. >>> >> >> I have posted another patch that does boot-time DT quirk which are >> non-revertable. >> >> https://lkml.org/lkml/2015/2/18/258 > > Not sure how that applies in my case ... I can't change the > representation of the PCI subtree, this is standard OFW representation, > I can't change the FW to make it an overlay-like thing at boot time, > that would break existing kernels. > The idea is to append the ‘quirk’ to the already booting device tree blob. Another idea floating around was to simple concatenate the booting blob with any overlay blobs you want applied at boot time. >>> IE. conceptually, what overlays do today is quite rooted around the idea >>> of having a fixed "base" DT and some pre-compiled DTB overlays that >>> get added/removed. The design completely ignore the idea of a FW that >>> maintains a "live" tree which we want to keep in sync, which is what we >>> want to do here, or what we could do with a "live" open firmware >>> implementation. >>> >>> Now we might be able to reconcile them, but it feels to me that the >>> overlay/changeset stuff is too rooted in the first concept… >>> >> >> The first DT overlays use case (beaglebone capes) is what got the concept >> started. >> >> Right now is a generic mechanism to apply modifications to the kernel >> live tree, with the possibility to revert them. > > Yes but as I said it's not really thought in term of keeping the kernel > tree in sync with an external dynamically generated tree. Maybe we can > fix it, but it's more complex… > Yes it is, unfortunately. > Ben. > >>> Ben. >>> >>> >> >> Regards >> >> — Pantelis > > Regards — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-05-14 at 10:04 +0300, Pantelis Antoniou wrote: > Hmm, since you just want to transmit a whole subtree things are a bit > simpler. > > You don’t need any of the fixups, and your target node is known. > > So your overlay is simply: > > / { > fragment@0 { > target-path = “/foo”; > __overlay__ { > /* contents of the slot */ > }; > }; > }; > > I think it’s possible to just bit-mangle a blob (in pseudo code). > > const u8 template_overlay_blob[] = { <compiled blob of the above> }; > > flatten_slot(slot_blob); > > overlay_blob = allocate_new_blob(template_overlay_blob, slot_blob); > > overlay_node = find_node(overlay_blob, “/fragment@0/__overlay__); > target_prop = find_prop(overlay_blob, “/fragment@0/target-path”); > > inject_slot_blob(overlay_blob, overlay_node, slot_blob); > modify_slot_target(overlay_blob, target_prop, slot_target); > > I don’t think you need to re-flatten anything, shuffling bits around with > memmove should work. Fairly gross :-) But yeah generating the overlay doesn't necessarily scare me, I can generate a temp tree that is the overlay in which I "copy" the subtree (or in my internal ptr-based representation I could have a concept of alias which I follow while flattening). That leaves me with these problems: - No support for removing of nodes, so that needs to be added back to the format and to Linux unless I continue removing by hand in the PCI hotplug code itself - No support for "committing" the overlay which needs to be added as well. > >>> Now we could consider that subtree as a changeset that can be undone, > >>> but that wouldn't work for boot time. And subsequent updates wouldn't > >>> have that concept of "undoing" anyway. > >>> > >> > >> I have posted another patch that does boot-time DT quirk which are > >> non-revertable. > >> > >> https://lkml.org/lkml/2015/2/18/258 > > > > Not sure how that applies in my case ... I can't change the > > representation of the PCI subtree, this is standard OFW representation, > > I can't change the FW to make it an overlay-like thing at boot time, > > that would break existing kernels. > > > > The idea is to append the ‘quirk’ to the already booting device tree blob. I know but that's not how things work for me. At boot time the FW passes me one tree that contains all the PCI stuff it has probed. > Another idea floating around was to simple concatenate the booting blob with > any overlay blobs you want applied at boot time. Sure but I don't get overlay blobs at boot time. > >>> IE. conceptually, what overlays do today is quite rooted around the idea > >>> of having a fixed "base" DT and some pre-compiled DTB overlays that > >>> get added/removed. The design completely ignore the idea of a FW that > >>> maintains a "live" tree which we want to keep in sync, which is what we > >>> want to do here, or what we could do with a "live" open firmware > >>> implementation. > >>> > >>> Now we might be able to reconcile them, but it feels to me that the > >>> overlay/changeset stuff is too rooted in the first concept… > >>> > >> > >> The first DT overlays use case (beaglebone capes) is what got the concept > >> started. > >> > >> Right now is a generic mechanism to apply modifications to the kernel > >> live tree, with the possibility to revert them. > > > > Yes but as I said it's not really thought in term of keeping the kernel > > tree in sync with an external dynamically generated tree. Maybe we can > > fix it, but it's more complex… > > > > Yes it is, unfortunately. Right. Which makes the solution of just passing my bit of tree as a blob which I expand in Linux where I want it rather than an overlay tempting if we can make Gavin patch more palatable (removing the hybrid stuff etc...). Cheers, Ben. > > Ben. > > > >>> Ben. > >>> > >>> > >> > >> Regards > >> > >> — Pantelis > > > > > > Regards > > — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ben, > On May 14, 2015, at 10:14 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > On Thu, 2015-05-14 at 10:04 +0300, Pantelis Antoniou wrote: > >> Hmm, since you just want to transmit a whole subtree things are a bit >> simpler. >> >> You don’t need any of the fixups, and your target node is known. >> >> So your overlay is simply: >> >> / { >> fragment@0 { >> target-path = “/foo”; >> __overlay__ { >> /* contents of the slot */ >> }; >> }; >> }; >> >> I think it’s possible to just bit-mangle a blob (in pseudo code). >> >> const u8 template_overlay_blob[] = { <compiled blob of the above> }; >> >> flatten_slot(slot_blob); >> >> overlay_blob = allocate_new_blob(template_overlay_blob, slot_blob); >> >> overlay_node = find_node(overlay_blob, “/fragment@0/__overlay__); >> target_prop = find_prop(overlay_blob, “/fragment@0/target-path”); >> >> inject_slot_blob(overlay_blob, overlay_node, slot_blob); >> modify_slot_target(overlay_blob, target_prop, slot_target); >> >> I don’t think you need to re-flatten anything, shuffling bits around with >> memmove should work. > > Fairly gross :-) > You don’t want to know how sausages are made, but they are delicious :) > But yeah generating the overlay doesn't necessarily scare me, I can > generate a temp tree that is the overlay in which I "copy" the subtree > (or in my internal ptr-based representation I could have a concept of > alias which I follow while flattening). > > That leaves me with these problems: > > - No support for removing of nodes, so that needs to be added back to > the format and to Linux unless I continue removing by hand in the PCI > hotplug code itself > What kind of nodes/properties you need to remove at _application_ time? What you describe is inserting a bunch of properties and nodes under a slot’s device node. Reverting the overlay removes them all just fine. > - No support for "committing" the overlay which needs to be added as > well. > That’s the easiest part. >>>>> Now we could consider that subtree as a changeset that can be undone, >>>>> but that wouldn't work for boot time. And subsequent updates wouldn't >>>>> have that concept of "undoing" anyway. >>>>> >>>> >>>> I have posted another patch that does boot-time DT quirk which are >>>> non-revertable. >>>> >>>> https://lkml.org/lkml/2015/2/18/258 >>> >>> Not sure how that applies in my case ... I can't change the >>> representation of the PCI subtree, this is standard OFW representation, >>> I can't change the FW to make it an overlay-like thing at boot time, >>> that would break existing kernels. >>> >> >> The idea is to append the ‘quirk’ to the already booting device tree blob. > > I know but that's not how things work for me. At boot time the FW passes > me one tree that contains all the PCI stuff it has probed. > >> Another idea floating around was to simple concatenate the booting blob with >> any overlay blobs you want applied at boot time. > > Sure but I don't get overlay blobs at boot time. > >>>>> IE. conceptually, what overlays do today is quite rooted around the idea >>>>> of having a fixed "base" DT and some pre-compiled DTB overlays that >>>>> get added/removed. The design completely ignore the idea of a FW that >>>>> maintains a "live" tree which we want to keep in sync, which is what we >>>>> want to do here, or what we could do with a "live" open firmware >>>>> implementation. >>>>> >>>>> Now we might be able to reconcile them, but it feels to me that the >>>>> overlay/changeset stuff is too rooted in the first concept… >>>>> >>>> >>>> The first DT overlays use case (beaglebone capes) is what got the concept >>>> started. >>>> >>>> Right now is a generic mechanism to apply modifications to the kernel >>>> live tree, with the possibility to revert them. >>> >>> Yes but as I said it's not really thought in term of keeping the kernel >>> tree in sync with an external dynamically generated tree. Maybe we can >>> fix it, but it's more complex… >>> >> >> Yes it is, unfortunately. > > Right. Which makes the solution of just passing my bit of tree as a blob > which I expand in Linux where I want it rather than an overlay tempting > if we can make Gavin patch more palatable (removing the hybrid stuff > etc…) > . > I see. Well, how about this? Who said you have to do the whole blob dance in the firmware? You can just as easily pass the blob as it is to the linux kernel and the kernel there can convert it to an overlay and apply it. > Cheers, > Ben. > >>> Ben. >>> >>>>> Ben. >>>>> >>>>> >>>> >>>> Regards >>>> >>>> — Pantelis >>> >>> >> >> Regards >> >> — Pantelis > > Regards — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-05-14 at 10:19 +0300, Pantelis Antoniou wrote: > > You don’t want to know how sausages are made, but they are delicious :) ... most of the time :) > > But yeah generating the overlay doesn't necessarily scare me, I can > > generate a temp tree that is the overlay in which I "copy" the subtree > > (or in my internal ptr-based representation I could have a concept of > > alias which I follow while flattening). > > > > That leaves me with these problems: > > > > - No support for removing of nodes, so that needs to be added back to > > the format and to Linux unless I continue removing by hand in the PCI > > hotplug code itself > > > > What kind of nodes/properties you need to remove at _application_ time? Well, if we stick to removing by hand in Linux for the unplug case, then none. > What you describe is inserting a bunch of properties and nodes under > a slot’s device node. Reverting the overlay removes them all just fine. Except that still doesn't work for boot time :-) So I would have to do a special case on unplug: if (slot->dt_is_overlay) /* set to false at boot */ remove_subtree_myself(); else undo_overlay(slot->overlay); > > - No support for "committing" the overlay which needs to be added as > > well. > > > > That’s the easiest part. Yeah, I will need to get my head around the code a bit more but it doesn't seem too scary. > I see. Well, how about this? > > Who said you have to do the whole blob dance in the firmware? > > You can just as easily pass the blob as it is to the linux kernel and > the kernel there can convert it to an overlay and apply it. That's not that pretty but we can do that too which solve the problem of fixing the FW interface. There is however an argument to be made in having the FW be able to generate arbitrary overlays. If we ever want to pass more "property" updates or node updates to Linux at runtime. A few cases have crept up on the radar, like updating the pstate tables or VPD informations ... If we go down that path, then I would implement a concept of generation count in the firmware, so I can generate an overlay that include all the changes since the last "generation" given to Linux. However that requires supporting removal of nodes/properties. So I'm tempted to keep that feature on the back burner and go with an ad-hoc interface for PCI for now. Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> So I would have to do a special case on unplug: > > if (slot->dt_is_overlay) /* set to false at boot */ > remove_subtree_myself(); > else > undo_overlay(slot->overlay); Of course I just inverted the polarity of the if () in the example :-) But you get the idea... -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ben, > On May 14, 2015, at 10:25 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > On Thu, 2015-05-14 at 10:19 +0300, Pantelis Antoniou wrote: > >> >> You don’t want to know how sausages are made, but they are delicious :) > > ... most of the time :) > >>> But yeah generating the overlay doesn't necessarily scare me, I can >>> generate a temp tree that is the overlay in which I "copy" the subtree >>> (or in my internal ptr-based representation I could have a concept of >>> alias which I follow while flattening). >>> >>> That leaves me with these problems: >>> >>> - No support for removing of nodes, so that needs to be added back to >>> the format and to Linux unless I continue removing by hand in the PCI >>> hotplug code itself >>> >> >> What kind of nodes/properties you need to remove at _application_ time? > > Well, if we stick to removing by hand in Linux for the unplug case, then > none. > OK >> What you describe is inserting a bunch of properties and nodes under >> a slot’s device node. Reverting the overlay removes them all just fine. > > Except that still doesn't work for boot time :-) > > So I would have to do a special case on unplug: > > if (slot->dt_is_overlay) /* set to false at boot */ > remove_subtree_myself(); > else > undo_overlay(slot->overlay); > OK, in that case you do require removal. But in any case it’s the ‘negative’ of an already applied one, either at boot time or not. Modifying the overlay code to apply a ‘negative’ property should do the trick. Is that correct? >>> - No support for "committing" the overlay which needs to be added as >>> well. >>> >> >> That’s the easiest part. > > Yeah, I will need to get my head around the code a bit more but it > doesn't seem too scary. > >> I see. Well, how about this? >> >> Who said you have to do the whole blob dance in the firmware? >> >> You can just as easily pass the blob as it is to the linux kernel and >> the kernel there can convert it to an overlay and apply it. > > That's not that pretty but we can do that too which solve the problem of > fixing the FW interface. > > There is however an argument to be made in having the FW be able to > generate arbitrary overlays. If we ever want to pass more "property" > updates or node updates to Linux at runtime. > > A few cases have crept up on the radar, like updating the pstate tables > or VPD informations ... > > If we go down that path, then I would implement a concept of generation > count in the firmware, so I can generate an overlay that include all the > changes since the last "generation" given to Linux. > I will probably need that generation count myself for my PCI use case. > However that requires supporting removal of nodes/properties. So I'm > tempted to keep that feature on the back burner and go with an ad-hoc > interface for PCI for now. > I see. Bonne chance :) > Ben. > > Regards — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-05-14 at 10:34 +0300, Pantelis Antoniou wrote: > >> What you describe is inserting a bunch of properties and nodes under > >> a slot’s device node. Reverting the overlay removes them all just fine. > > > > Except that still doesn't work for boot time :-) > > > > So I would have to do a special case on unplug: > > > > if (slot->dt_is_overlay) /* set to false at boot */ > > remove_subtree_myself(); > > else > > undo_overlay(slot->overlay); > > > > OK, in that case you do require removal. But in any case it’s the ‘negative’ > of an already applied one, either at boot time or not. Sort-of, unless we have a way in the overlay to simply specify node removal statements so we don't have to explicitly remove all properties (or even all children). > Modifying the overlay code to apply a ‘negative’ property should do the trick. > > Is that correct? I would do negatives node and let Linux imply the properties (or even children). But yes, that would probably do. .../... > I will probably need that generation count myself for my PCI use case. > > > However that requires supporting removal of nodes/properties. So I'm > > tempted to keep that feature on the back burner and go with an ad-hoc > > interface for PCI for now. > > > > I see. Bonne chance :) Merci :) Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Hi Ben, > On May 14, 2015, at 10:47 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > [snip] So I spend some time thinking about your use case and I think it boils down to this: I have a live tree in the firmware, I have made changes and I need to reflect those changes to the live tree in the kernel. Sounds like ‘how do I generate a patch for getting those two in sync'. No? I can see where this might be useful for others as all. I think we really need to create a liblivedt like we have libfdt since we have a number of projects going about using/manipulating DT at runtime. 1. The linux kernel, with it’s own live tree implementation. 2. The device tree compiler (it has a live tree) custom implemented. 3. Your weird and wonderful (or wacky) firmware. 4. u-boot does use DT now, but it does with libfdt. I believe this is suboptimal. 5. barebox does DT as well. Most of what we want to do with DT can be abstracted in a library I think that all of those projects can use. What are your thoughts? Regards — Pantelis -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, 2015-05-14 at 14:02 +0300, Pantelis Antoniou wrote: > Hi Ben, > > > On May 14, 2015, at 10:47 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > > > > [snip] > > So I spend some time thinking about your use case and I think it boils down > to this: > > I have a live tree in the firmware, I have made changes and I need to reflect > those changes to the live tree in the kernel. > > Sounds like ‘how do I generate a patch for getting those two in sync'. No? More or less. > I can see where this might be useful for others as all. > > I think we really need to create a liblivedt like we have libfdt since > we have a number of projects going about using/manipulating DT at runtime. > > 1. The linux kernel, with it’s own live tree implementation. > 2. The device tree compiler (it has a live tree) custom implemented. > 3. Your weird and wonderful (or wacky) firmware. > 4. u-boot does use DT now, but it does with libfdt. I believe this is suboptimal. > 5. barebox does DT as well. > > Most of what we want to do with DT can be abstracted in a library I think that > all of those projects can use. > > What are your thoughts? Well, we have at least two implementations, the kernel one and the one in our OPAL firmware: https://github.com/open-power/skiboot/blob/master/include/device.h https://github.com/open-power/skiboot/blob/master/core/device.c The latter uses some nice Rusty tricks (tm) for multiple argument functions. It would make sense to do a library somewhere yes. However, I need to cut my firmware API pretty much today so I think for now I'll stick to something Ad-Hoc for the PCI hotplug code that just passes the bit of FDT with the new devices and leave the "grand project" of live sync of the tree for later. There are other implementations of live DT in various Open Firmware variants out there, most are in Forth which I suggest you don't bother with unless you enjoy pain, but I think at least one of these is actually in C. Cheers, Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, May 01, 2015 at 04:03:06PM +1000, Gavin Shan wrote: >The requirement is raised when developing the PCI hotplug feature >for PowerPC PowerNV platform, which runs on top of skiboot firmware. >When plugging PCI adapter to one PCI slot, the firmware rescans the >slot and build FDT (Flat Device Tree) blob, which is sent to the >PowerNV PCI hotplug driver for processing. The new constructed device >nodes from the FDT blob are expected to be attached to the device >node of the PCI slot. Unfortunately, it seems we don't have a API >to support the scenario. The patch intends to support it by newly >introduced function of_fdt_add_subtree(), the design behind it is >shown as below: > > * When the sub-tree FDT blob, which is owned by firmware, is > received by kernel. It's copied over to the blob, which is > dynamically allocated. Since then, the FDT blob owned by > firmware isn't touched. > * Rework unflatten_dt_node() so that the device nodes in current > and deeper depth have been constructed from the FDT blob. All > device nodes are marked with flag OF_DYNAMIC_HYBIRD, which is > similar to OF_DYNAMIC. However, device node with the flag set > can be free'd, but in the way other than that for OF_DYNAMIC > device nodes. > * of_fdt_add_subtree() is the introduced API to do the work. > There are already lots of discussion on how to reuse overlay for my case. Thanks to all for your time on this. I spend some time thinking about it last night and this morning. I would like to summarize it as below. It's for sure almost all ideas coming from your guys and I'm just documenting it. If there are obvious problems in the following summary, please let me know so that I can fix them as early as possible to save more time. ================ SKIBOOT & KERNEL ================ The idea came from Ben and I'm following to implement it as follows: - One counter is mantained: cur_counter = 0; - PCI hot plugging happens happens as: * Kernel gets skiboot's cur_counter with OPAL API, which is (x). * Skiboot does hot plugging and rescans the slot, then populate the device-tree nodes with (++cur_counter), which means the counter is turned to (x+1). * Kernel retrieves the FDT overlay blob on the device-tree changes since last time/token ((x)). Kernel unflattens it and applies the changes by overlay. The slot simply records the overlay (IDR) ID for the device-tree change. - PCI hot unplugging happens as: * Revert the changes simply if the slot had valid IDR ID. Otherwise, the device nodes are flatten during bootup time, we just remove them as we're doing now. Note that device nodes can't be free'd because the memory chunks consumed by them are allocated from memblock or reserved by skiboot. - Some questions/problems: * I don't understand how kexec can figure out the device-tree with applied changes from overlay. I assume kexec is simply using the FDT blob from skiboot as seen by the first kernel during frest boot. Kernel APIs: * id = of_overlay_create(); of_overlay_destroy(id) Skiboot API: * int64_t opal_get_overlay_dt(uint64_t *counter, void *blob, uint32_t len) blob == NULL, get current counter. blob != NULL, get overlay blob since (*counter). Skiboot also returns the last counter. The memory chunk for "blob" is always owned by kernel, which doesn't know the memory size to hold the overlay FDT blob. So we have to try with discret 64KB, which is PAGE_SIZE. After the overlay blob is unflatten and applied, the memory chunk can be free. We don't have to keep it for reverting the changes introduced by the overlay. ============================= OVERLAY FDT BLOB FROM SKIBOOT ============================= ROOT { A { target=<target node's phandle> __overlay__ { } } B { target=<target node's phandle> __overlay__ { } } } Thanks, Gavin >Cc: Grant Likely <grant.likely@linaro.org> >Cc: Rob Herring <robh+dt@kernel.org> >Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> >--- > drivers/of/dynamic.c | 19 +++++-- > drivers/of/fdt.c | 133 ++++++++++++++++++++++++++++++++++++++++--------- > include/linux/of.h | 2 + > include/linux/of_fdt.h | 1 + > 4 files changed, 127 insertions(+), 28 deletions(-) > >diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c >index 3351ef4..f562080 100644 >--- a/drivers/of/dynamic.c >+++ b/drivers/of/dynamic.c >@@ -330,13 +330,22 @@ void of_node_release(struct kobject *kobj) > return; > } > >- if (!of_node_check_flag(node, OF_DYNAMIC)) >+ /* Release the subtree */ >+ if (node->subtree) { >+ kfree(node->subtree); >+ node->subtree = NULL; >+ } >+ >+ if (!of_node_check_flag(node, OF_DYNAMIC) && >+ !of_node_check_flag(node, OF_DYNAMIC_HYBIRD)) > return; > > while (prop) { > struct property *next = prop->next; >- kfree(prop->name); >- kfree(prop->value); >+ if (of_node_check_flag(node, OF_DYNAMIC)) { >+ kfree(prop->name); >+ kfree(prop->value); >+ } > kfree(prop); > prop = next; > >@@ -345,7 +354,9 @@ void of_node_release(struct kobject *kobj) > node->deadprops = NULL; > } > } >- kfree(node->full_name); >+ >+ if (of_node_check_flag(node, OF_DYNAMIC)) >+ kfree(node->full_name); > kfree(node->data); > kfree(node); > } >diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c >index cde35c5d01..7659560 100644 >--- a/drivers/of/fdt.c >+++ b/drivers/of/fdt.c >@@ -28,6 +28,10 @@ > #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ > #include <asm/page.h> > >+#include "of_private.h" >+ >+static int cur_node_depth; >+ > /* > * of_fdt_limit_memory - limit the number of regions in the /memory node > * @limit: maximum entries >@@ -168,20 +172,20 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size, > * @dad: Parent struct device_node > * @fpsize: Size of the node path up at the current depth. > */ >-static void * unflatten_dt_node(void *blob, >- void *mem, >- int *poffset, >- struct device_node *dad, >- struct device_node **nodepp, >- unsigned long fpsize, >- bool dryrun) >+static void *unflatten_dt_node(void *blob, >+ void *mem, >+ int *poffset, >+ struct device_node *dad, >+ struct device_node **nodepp, >+ unsigned long fpsize, >+ bool dryrun, >+ bool dynamic) > { > const __be32 *p; > struct device_node *np; > struct property *pp, **prev_pp = NULL; > const char *pathp; > unsigned int l, allocl; >- static int depth = 0; > int old_depth; > int offset; > int has_name = 0; >@@ -219,12 +223,18 @@ static void * unflatten_dt_node(void *blob, > } > } > >- np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, >+ if (dynamic) >+ np = kzalloc(sizeof(struct device_node) + allocl, GFP_KERNEL); >+ else >+ np = unflatten_dt_alloc(&mem, >+ sizeof(struct device_node) + allocl, > __alignof__(struct device_node)); > if (!dryrun) { > char *fn; > of_node_init(np); > np->full_name = fn = ((char *)np) + sizeof(*np); >+ if (dynamic) >+ of_node_set_flag(np, OF_DYNAMIC_HYBIRD); > if (new_format) { > /* rebuild full path for new format */ > if (dad && dad->parent) { >@@ -267,8 +277,12 @@ static void * unflatten_dt_node(void *blob, > } > if (strcmp(pname, "name") == 0) > has_name = 1; >- pp = unflatten_dt_alloc(&mem, sizeof(struct property), >- __alignof__(struct property)); >+ >+ if (dynamic) >+ pp = kzalloc(sizeof(struct property), GFP_KERNEL); >+ else >+ pp = unflatten_dt_alloc(&mem, sizeof(struct property), >+ __alignof__(struct property)); > if (!dryrun) { > /* We accept flattened tree phandles either in > * ePAPR-style "phandle" properties, or the >@@ -309,8 +323,13 @@ static void * unflatten_dt_node(void *blob, > if (pa < ps) > pa = p1; > sz = (pa - ps) + 1; >- pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, >- __alignof__(struct property)); >+ >+ if (dynamic) >+ pp = kzalloc(sizeof(struct property) + sz, GFP_KERNEL); >+ else >+ pp = unflatten_dt_alloc(&mem, >+ sizeof(struct property) + sz, >+ __alignof__(struct property)); > if (!dryrun) { > pp->name = "name"; > pp->length = sz; >@@ -334,13 +353,21 @@ static void * unflatten_dt_node(void *blob, > np->type = "<NULL>"; > } > >- old_depth = depth; >- *poffset = fdt_next_node(blob, *poffset, &depth); >- if (depth < 0) >- depth = 0; >- while (*poffset > 0 && depth > old_depth) >- mem = unflatten_dt_node(blob, mem, poffset, np, NULL, >- fpsize, dryrun); >+ old_depth = cur_node_depth; >+ *poffset = fdt_next_node(blob, *poffset, &cur_node_depth); >+ while (*poffset > 0) { >+ if (cur_node_depth < old_depth) >+ break; >+ >+ if (cur_node_depth == old_depth) >+ mem = unflatten_dt_node(blob, mem, poffset, >+ dad, NULL, fpsize, >+ dryrun, dynamic); >+ else if (cur_node_depth > old_depth) >+ mem = unflatten_dt_node(blob, mem, poffset, >+ np, NULL, fpsize, >+ dryrun, dynamic); >+ } > > if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND) > pr_err("unflatten: error %d processing FDT\n", *poffset); >@@ -379,8 +406,8 @@ static void * unflatten_dt_node(void *blob, > * for the resulting tree > */ > static void __unflatten_device_tree(void *blob, >- struct device_node **mynodes, >- void * (*dt_alloc)(u64 size, u64 align)) >+ struct device_node **mynodes, >+ void * (*dt_alloc)(u64 size, u64 align)) > { > unsigned long size; > int start; >@@ -405,7 +432,9 @@ static void __unflatten_device_tree(void *blob, > > /* First pass, scan for size */ > start = 0; >- size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true); >+ cur_node_depth = 1; >+ size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, >+ NULL, 0, true, false); > size = ALIGN(size, 4); > > pr_debug(" size is %lx, allocating...\n", size); >@@ -420,7 +449,8 @@ static void __unflatten_device_tree(void *blob, > > /* Second pass, do actual unflattening */ > start = 0; >- unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false); >+ cur_node_depth = 1; >+ unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false, false); > if (be32_to_cpup(mem + size) != 0xdeadbeef) > pr_warning("End of tree marker overwritten: %08x\n", > be32_to_cpup(mem + size)); >@@ -448,6 +478,61 @@ void of_fdt_unflatten_tree(unsigned long *blob, > } > EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree); > >+static void populate_sysfs_for_child_nodes(struct device_node *parent) >+{ >+ struct device_node *child; >+ >+ for_each_child_of_node(parent, child) { >+ __of_attach_node_sysfs(child); >+ populate_sysfs_for_child_nodes(child); >+ } >+} >+ >+/** >+ * of_fdt_add_substree - Create sub-tree of device nodes >+ * @parent: parent device node to which the sub-tree will attach >+ * @blob: flat device tree blob representing the sub-tree >+ * >+ * Copy over the FDT blob, which passed from firmware, and then >+ * unflatten the sub-tree. >+ */ >+void of_fdt_add_subtree(struct device_node *parent, void *blob) >+{ >+ int start = 0; >+ >+ /* Validate the header */ >+ if (!blob || fdt_check_header(blob)) { >+ pr_err("%s: Invalid device-tree blob header at 0x%p\n", >+ __func__, blob); >+ return; >+ } >+ >+ /* Free the flat blob for last time lazily */ >+ if (parent->subtree) { >+ kfree(parent->subtree); >+ parent->subtree = NULL; >+ } >+ >+ /* Copy over the flat blob */ >+ parent->subtree = kzalloc(fdt_totalsize(blob), GFP_KERNEL); >+ if (!parent->subtree) { >+ pr_err("%s: Cannot copy over device-tree blob\n", >+ __func__); >+ return; >+ } >+ >+ memcpy(parent->subtree, blob, fdt_totalsize(blob)); >+ >+ /* Unflatten it */ >+ mutex_lock(&of_mutex); >+ cur_node_depth = 1; >+ unflatten_dt_node(parent->subtree, NULL, &start, parent, NULL, >+ strlen(parent->full_name), false, true); >+ populate_sysfs_for_child_nodes(parent); >+ mutex_unlock(&of_mutex); >+} >+EXPORT_SYMBOL(of_fdt_add_subtree); >+ > /* Everything below here references initial_boot_params directly. */ > int __initdata dt_root_addr_cells; > int __initdata dt_root_size_cells; >diff --git a/include/linux/of.h b/include/linux/of.h >index ddeaae6..ac50b02 100644 >--- a/include/linux/of.h >+++ b/include/linux/of.h >@@ -60,6 +60,7 @@ struct device_node { > struct device_node *sibling; > struct kobject kobj; > unsigned long _flags; >+ void *subtree; > void *data; > #if defined(CONFIG_SPARC) > const char *path_component_name; >@@ -222,6 +223,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) > #define OF_DETACHED 2 /* node has been detached from the device tree */ > #define OF_POPULATED 3 /* device already created for the node */ > #define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ >+#define OF_DYNAMIC_HYBIRD 5 /* similar to OF_DYNAMIC, but partially */ > > #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) > #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) >diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h >index 587ee50..1fb47d7 100644 >--- a/include/linux/of_fdt.h >+++ b/include/linux/of_fdt.h >@@ -39,6 +39,7 @@ extern int of_fdt_match(const void *blob, unsigned long node, > const char *const *compat); > extern void of_fdt_unflatten_tree(unsigned long *blob, > struct device_node **mynodes); >+extern void of_fdt_add_subtree(struct device_node *parent, void *blob); > > /* TBD: Temporary export of fdt globals - remove when code fully merged */ > extern int __initdata dt_root_addr_cells; >-- >2.1.0 > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Sorry for not weighing in earlier, I've had other work keeping me away. My short answer: don't use overlays. They're not what you need. Generic CONFIG_OF_DYNAMIC should be all that is required to make changes in your use case. Overlays are a specific api for being able to apply a set of changes in a revertable way, but as you say, it is a lot more complicated. However, overlays are built on top of the of_changeset API which is a lot simpler. It doesn't do any phandle resolution, and it doesn't do any tracking. It takes a set of changes (attach node, detach node, add property, remove property), an applies them to the live tree. At that point the changes are permenant*. It is documented in Documentation/devicetree/changesets.txt Ideally, I want all DT changes to go through the changeset API so that the lifecycle issues are delt with in one place. It also defers firing notifiers until after the entire changeset is applied. With of_attach_node/of_detach_node the notifiers are sent immediately after each change when the tree may be in an inconsistent state. For example, a driver expecting child nodes, but the child nodes haven't been added yet. Comments below... * There is an API for reverting a changeset, which simply applies the changes backwards and in reverse. The overlay code uses it, but you won't need it. On Thu, 14 May 2015 10:54:31 +1000 , Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > On Wed, 2015-05-13 at 19:18 -0500, Rob Herring wrote: > > > I haven't decided really. > > > > The main thing with the current patch is I don't really like the added > > complexity to unflatten_dt_node. It is already a fairly complex > > function. Perhaps removing of "hybrid" as discussed will help? > > I agree, we should be able to make that much simpler, I was planning on > sorting that out with Gavin. Ditto here. I don't want to have any new kinds of nodes created either. They are either OF_DYNAMIC, and therefore freeable, or they are not and cannot be freed. > > If there are things we can do to make overlays easier to use in your > > use case, I'd like to hear ideas. I don't really buy that being more > > complex than needed is an obstacle. That is very often the case to > > have common, scale-able solutions. I want to see a simple case be > > simple to support. > > Well, it's a LOT more complex from the FW perspective for a bunch of > features we don't really need, in a way because the DT update in our > case is just purely informational to avoid keeping wrong/outdated DT > bits, it has little functional impact (it might have a bit for interrupt > routing through bridges though). > > However, I am also pursuing an approach on FW side using a generation > count in our nodes and properties which we could use to generate > arbitrary overlays if we know what generation linux has. > > There might actual be a usage scenario for a generic way for our > firwmare to convey DT updates to Linux for other reasons. > > A few things that I don't find in the overlay code (but maybe I haven't > looked at it hard enough): > > - Can it remove nodes/properties ? Overlays: No, because I asked Pantelis to drop them. Changeset: yes, absolutely > - Can it "commit" a changeset so it's permanently part of the main DT ? > We will never have a concept of "revertable" changesets, if we need a > subsequent update, we will get a new overlay from FW that will remove > what needs to be removed and add what needs to be added. Yes > IE, our current mechanism without overlay is fairly simple: > > - On PCI unplug, we remove all nodes below the slot (from linux), > the FW does the equivalent internally. > > - On PCI re-plug, the FW internally builds new nodes and sends a > new subtree as an FDT that we can expand/attach. > > Now we could consider that subtree as a changeset that can be undone, > but that wouldn't work for boot time. And subsequent updates wouldn't > have that concept of "undoing" anyway. > > IE. conceptually, what overlays do today is quite rooted around the idea > of having a fixed "base" DT and some pre-compiled DTB overlays that > get added/removed. The design completely ignore the idea of a FW that > maintains a "live" tree which we want to keep in sync, which is what we > want to do here, or what we could do with a "live" open firmware > implementation. Right, which is exactly the reason for the changeset/overlay split. Overlays assume a fixed base, and that overlays are kind of like plug-in modules. changeset makes no such assumption. g. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun, 2015-06-07 at 08:54 +0100, Grant Likely wrote: > > IE. conceptually, what overlays do today is quite rooted around the idea > > of having a fixed "base" DT and some pre-compiled DTB overlays that > > get added/removed. The design completely ignore the idea of a FW that > > maintains a "live" tree which we want to keep in sync, which is what we > > want to do here, or what we could do with a "live" open firmware > > implementation. > > Right, which is exactly the reason for the changeset/overlay split. > Overlays assume a fixed base, and that overlays are kind of like plug-in > modules. changeset makes no such assumption. So you suggest we create a function that takes an fdt and an "anchor" as input, and expands that FDT below that anchor, but does so by using the changeset API under the hood ? Even that looks somewhat tricky (turn that bit of FDT into a pile of changeset actions), however, I can see how we could create a new function inside changeset to attach a subtree. Ie. of_attach_subtree() (which could have it's own reconfig action but we don't care that much yet), which takes an expanded subtree and an anchor, and calls of_attach_node() in effect for all nodes in there. We could then have a two pass mechanism in our hotplug code: - Expand the bit of fdt into a separate tree - Use of_attach_subtree to "add" that subtree to the main tree What do you think ? Ben. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jun 8, 2015 at 9:57 PM, Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote: > On Sun, 2015-06-07 at 08:54 +0100, Grant Likely wrote: >> > IE. conceptually, what overlays do today is quite rooted around the idea >> > of having a fixed "base" DT and some pre-compiled DTB overlays that >> > get added/removed. The design completely ignore the idea of a FW that >> > maintains a "live" tree which we want to keep in sync, which is what we >> > want to do here, or what we could do with a "live" open firmware >> > implementation. >> >> Right, which is exactly the reason for the changeset/overlay split. >> Overlays assume a fixed base, and that overlays are kind of like plug-in >> modules. changeset makes no such assumption. > > So you suggest we create a function that takes an fdt and an "anchor" as input, > and expands that FDT below that anchor, but does so by using the changeset API > under the hood ? > > Even that looks somewhat tricky (turn that bit of FDT into a pile of changeset > actions), however, I can see how we could create a new function inside changeset > to attach a subtree. > > Ie. of_attach_subtree() (which could have it's own reconfig action but we > don't care that much yet), which takes an expanded subtree and an anchor, and > calls of_attach_node() in effect for all nodes in there. > > We could then have a two pass mechanism in our hotplug code: > > - Expand the bit of fdt into a separate tree > - Use of_attach_subtree to "add" that subtree to the main tree > > What do you think ? I like that. g. -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jun 08, 2015 at 10:34:13PM +0100, Grant Likely wrote: >On Mon, Jun 8, 2015 at 9:57 PM, Benjamin Herrenschmidt ><benh@kernel.crashing.org> wrote: >> On Sun, 2015-06-07 at 08:54 +0100, Grant Likely wrote: >>> > IE. conceptually, what overlays do today is quite rooted around the idea >>> > of having a fixed "base" DT and some pre-compiled DTB overlays that >>> > get added/removed. The design completely ignore the idea of a FW that >>> > maintains a "live" tree which we want to keep in sync, which is what we >>> > want to do here, or what we could do with a "live" open firmware >>> > implementation. >>> >>> Right, which is exactly the reason for the changeset/overlay split. >>> Overlays assume a fixed base, and that overlays are kind of like plug-in >>> modules. changeset makes no such assumption. >> >> So you suggest we create a function that takes an fdt and an "anchor" as input, >> and expands that FDT below that anchor, but does so by using the changeset API >> under the hood ? >> >> Even that looks somewhat tricky (turn that bit of FDT into a pile of changeset >> actions), however, I can see how we could create a new function inside changeset >> to attach a subtree. >> >> Ie. of_attach_subtree() (which could have it's own reconfig action but we >> don't care that much yet), which takes an expanded subtree and an anchor, and >> calls of_attach_node() in effect for all nodes in there. >> >> We could then have a two pass mechanism in our hotplug code: >> >> - Expand the bit of fdt into a separate tree >> - Use of_attach_subtree to "add" that subtree to the main tree >> >> What do you think ? > >I like that. > Thanks, Grant and Ben. Currently, I'm collecting more feedbacks for v5. So it's something I will address in v6. Thanks, Gavin >g. > -- To unsubscribe from this list: send the line "unsubscribe linux-pci" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c index 3351ef4..f562080 100644 --- a/drivers/of/dynamic.c +++ b/drivers/of/dynamic.c @@ -330,13 +330,22 @@ void of_node_release(struct kobject *kobj) return; } - if (!of_node_check_flag(node, OF_DYNAMIC)) + /* Release the subtree */ + if (node->subtree) { + kfree(node->subtree); + node->subtree = NULL; + } + + if (!of_node_check_flag(node, OF_DYNAMIC) && + !of_node_check_flag(node, OF_DYNAMIC_HYBIRD)) return; while (prop) { struct property *next = prop->next; - kfree(prop->name); - kfree(prop->value); + if (of_node_check_flag(node, OF_DYNAMIC)) { + kfree(prop->name); + kfree(prop->value); + } kfree(prop); prop = next; @@ -345,7 +354,9 @@ void of_node_release(struct kobject *kobj) node->deadprops = NULL; } } - kfree(node->full_name); + + if (of_node_check_flag(node, OF_DYNAMIC)) + kfree(node->full_name); kfree(node->data); kfree(node); } diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index cde35c5d01..7659560 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -28,6 +28,10 @@ #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ #include <asm/page.h> +#include "of_private.h" + +static int cur_node_depth; + /* * of_fdt_limit_memory - limit the number of regions in the /memory node * @limit: maximum entries @@ -168,20 +172,20 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size, * @dad: Parent struct device_node * @fpsize: Size of the node path up at the current depth. */ -static void * unflatten_dt_node(void *blob, - void *mem, - int *poffset, - struct device_node *dad, - struct device_node **nodepp, - unsigned long fpsize, - bool dryrun) +static void *unflatten_dt_node(void *blob, + void *mem, + int *poffset, + struct device_node *dad, + struct device_node **nodepp, + unsigned long fpsize, + bool dryrun, + bool dynamic) { const __be32 *p; struct device_node *np; struct property *pp, **prev_pp = NULL; const char *pathp; unsigned int l, allocl; - static int depth = 0; int old_depth; int offset; int has_name = 0; @@ -219,12 +223,18 @@ static void * unflatten_dt_node(void *blob, } } - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, + if (dynamic) + np = kzalloc(sizeof(struct device_node) + allocl, GFP_KERNEL); + else + np = unflatten_dt_alloc(&mem, + sizeof(struct device_node) + allocl, __alignof__(struct device_node)); if (!dryrun) { char *fn; of_node_init(np); np->full_name = fn = ((char *)np) + sizeof(*np); + if (dynamic) + of_node_set_flag(np, OF_DYNAMIC_HYBIRD); if (new_format) { /* rebuild full path for new format */ if (dad && dad->parent) { @@ -267,8 +277,12 @@ static void * unflatten_dt_node(void *blob, } if (strcmp(pname, "name") == 0) has_name = 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property), - __alignof__(struct property)); + + if (dynamic) + pp = kzalloc(sizeof(struct property), GFP_KERNEL); + else + pp = unflatten_dt_alloc(&mem, sizeof(struct property), + __alignof__(struct property)); if (!dryrun) { /* We accept flattened tree phandles either in * ePAPR-style "phandle" properties, or the @@ -309,8 +323,13 @@ static void * unflatten_dt_node(void *blob, if (pa < ps) pa = p1; sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); + + if (dynamic) + pp = kzalloc(sizeof(struct property) + sz, GFP_KERNEL); + else + pp = unflatten_dt_alloc(&mem, + sizeof(struct property) + sz, + __alignof__(struct property)); if (!dryrun) { pp->name = "name"; pp->length = sz; @@ -334,13 +353,21 @@ static void * unflatten_dt_node(void *blob, np->type = "<NULL>"; } - old_depth = depth; - *poffset = fdt_next_node(blob, *poffset, &depth); - if (depth < 0) - depth = 0; - while (*poffset > 0 && depth > old_depth) - mem = unflatten_dt_node(blob, mem, poffset, np, NULL, - fpsize, dryrun); + old_depth = cur_node_depth; + *poffset = fdt_next_node(blob, *poffset, &cur_node_depth); + while (*poffset > 0) { + if (cur_node_depth < old_depth) + break; + + if (cur_node_depth == old_depth) + mem = unflatten_dt_node(blob, mem, poffset, + dad, NULL, fpsize, + dryrun, dynamic); + else if (cur_node_depth > old_depth) + mem = unflatten_dt_node(blob, mem, poffset, + np, NULL, fpsize, + dryrun, dynamic); + } if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND) pr_err("unflatten: error %d processing FDT\n", *poffset); @@ -379,8 +406,8 @@ static void * unflatten_dt_node(void *blob, * for the resulting tree */ static void __unflatten_device_tree(void *blob, - struct device_node **mynodes, - void * (*dt_alloc)(u64 size, u64 align)) + struct device_node **mynodes, + void * (*dt_alloc)(u64 size, u64 align)) { unsigned long size; int start; @@ -405,7 +432,9 @@ static void __unflatten_device_tree(void *blob, /* First pass, scan for size */ start = 0; - size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true); + cur_node_depth = 1; + size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, + NULL, 0, true, false); size = ALIGN(size, 4); pr_debug(" size is %lx, allocating...\n", size); @@ -420,7 +449,8 @@ static void __unflatten_device_tree(void *blob, /* Second pass, do actual unflattening */ start = 0; - unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false); + cur_node_depth = 1; + unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false, false); if (be32_to_cpup(mem + size) != 0xdeadbeef) pr_warning("End of tree marker overwritten: %08x\n", be32_to_cpup(mem + size)); @@ -448,6 +478,61 @@ void of_fdt_unflatten_tree(unsigned long *blob, } EXPORT_SYMBOL_GPL(of_fdt_unflatten_tree); +static void populate_sysfs_for_child_nodes(struct device_node *parent) +{ + struct device_node *child; + + for_each_child_of_node(parent, child) { + __of_attach_node_sysfs(child); + populate_sysfs_for_child_nodes(child); + } +} + +/** + * of_fdt_add_substree - Create sub-tree of device nodes + * @parent: parent device node to which the sub-tree will attach + * @blob: flat device tree blob representing the sub-tree + * + * Copy over the FDT blob, which passed from firmware, and then + * unflatten the sub-tree. + */ +void of_fdt_add_subtree(struct device_node *parent, void *blob) +{ + int start = 0; + + /* Validate the header */ + if (!blob || fdt_check_header(blob)) { + pr_err("%s: Invalid device-tree blob header at 0x%p\n", + __func__, blob); + return; + } + + /* Free the flat blob for last time lazily */ + if (parent->subtree) { + kfree(parent->subtree); + parent->subtree = NULL; + } + + /* Copy over the flat blob */ + parent->subtree = kzalloc(fdt_totalsize(blob), GFP_KERNEL); + if (!parent->subtree) { + pr_err("%s: Cannot copy over device-tree blob\n", + __func__); + return; + } + + memcpy(parent->subtree, blob, fdt_totalsize(blob)); + + /* Unflatten it */ + mutex_lock(&of_mutex); + cur_node_depth = 1; + unflatten_dt_node(parent->subtree, NULL, &start, parent, NULL, + strlen(parent->full_name), false, true); + populate_sysfs_for_child_nodes(parent); + mutex_unlock(&of_mutex); +} +EXPORT_SYMBOL(of_fdt_add_subtree); + /* Everything below here references initial_boot_params directly. */ int __initdata dt_root_addr_cells; int __initdata dt_root_size_cells; diff --git a/include/linux/of.h b/include/linux/of.h index ddeaae6..ac50b02 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -60,6 +60,7 @@ struct device_node { struct device_node *sibling; struct kobject kobj; unsigned long _flags; + void *subtree; void *data; #if defined(CONFIG_SPARC) const char *path_component_name; @@ -222,6 +223,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define OF_DETACHED 2 /* node has been detached from the device tree */ #define OF_POPULATED 3 /* device already created for the node */ #define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ +#define OF_DYNAMIC_HYBIRD 5 /* similar to OF_DYNAMIC, but partially */ #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 587ee50..1fb47d7 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -39,6 +39,7 @@ extern int of_fdt_match(const void *blob, unsigned long node, const char *const *compat); extern void of_fdt_unflatten_tree(unsigned long *blob, struct device_node **mynodes); +extern void of_fdt_add_subtree(struct device_node *parent, void *blob); /* TBD: Temporary export of fdt globals - remove when code fully merged */ extern int __initdata dt_root_addr_cells;
The requirement is raised when developing the PCI hotplug feature for PowerPC PowerNV platform, which runs on top of skiboot firmware. When plugging PCI adapter to one PCI slot, the firmware rescans the slot and build FDT (Flat Device Tree) blob, which is sent to the PowerNV PCI hotplug driver for processing. The new constructed device nodes from the FDT blob are expected to be attached to the device node of the PCI slot. Unfortunately, it seems we don't have a API to support the scenario. The patch intends to support it by newly introduced function of_fdt_add_subtree(), the design behind it is shown as below: * When the sub-tree FDT blob, which is owned by firmware, is received by kernel. It's copied over to the blob, which is dynamically allocated. Since then, the FDT blob owned by firmware isn't touched. * Rework unflatten_dt_node() so that the device nodes in current and deeper depth have been constructed from the FDT blob. All device nodes are marked with flag OF_DYNAMIC_HYBIRD, which is similar to OF_DYNAMIC. However, device node with the flag set can be free'd, but in the way other than that for OF_DYNAMIC device nodes. * of_fdt_add_subtree() is the introduced API to do the work. Cc: Grant Likely <grant.likely@linaro.org> Cc: Rob Herring <robh+dt@kernel.org> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> --- drivers/of/dynamic.c | 19 +++++-- drivers/of/fdt.c | 133 ++++++++++++++++++++++++++++++++++++++++--------- include/linux/of.h | 2 + include/linux/of_fdt.h | 1 + 4 files changed, 127 insertions(+), 28 deletions(-)