Message ID | 1516875247-19599-1-git-send-email-cpandya@codeaurora.org (mailing list archive) |
---|---|
State | Not Applicable, archived |
Delegated to: | Andy Gross |
Headers | show |
On Thu, Jan 25, 2018 at 4:14 AM, Chintan Pandya <cpandya@codeaurora.org> wrote: > of_find_node_by_phandle() takes a lot of time finding Got some numbers for what is "a lot of time"? > right node when your intended device is too right-side > in the fdt. Reason is, we search each device serially > from the fdt, starting from left-most to right-most. By right side, you mean a deep path? > > Implement, device-phandle relation in hash-table so > that look up can be faster. > > Change-Id: I4a2bc7eff6de142e4f91a7bf474893a45e61c128 Run checkpatch.pl > Signed-off-by: Chintan Pandya <cpandya@codeaurora.org> > --- > drivers/of/base.c | 9 +++++++-- > drivers/of/fdt.c | 18 ++++++++++++++++++ > include/linux/of.h | 6 ++++++ > 3 files changed, 31 insertions(+), 2 deletions(-) [...] > diff --git a/include/linux/of.h b/include/linux/of.h > index 299aeb1..5b3f4f1 100644 > --- a/include/linux/of.h > +++ b/include/linux/of.h > @@ -25,6 +25,7 @@ > #include <linux/notifier.h> > #include <linux/property.h> > #include <linux/list.h> > +#include <linux/hashtable.h> > > #include <asm/byteorder.h> > #include <asm/errno.h> > @@ -61,6 +62,7 @@ struct device_node { > struct kobject kobj; > unsigned long _flags; > void *data; > + struct hlist_node hash; Always base your patches on the latest -rc at least. This won't apply. This grows struct device_node for every single node which we recently worked on to shrink (which is why this won't apply). So I'm now sensitive to anything that grows it. I'd really prefer something out of band. I'd guess that there's really only a few phandle lookups that occur over and over. The clock controller, interrupt controller, etc. What if you just had a simple array of previously found nodes for a cache and of_find_node_by_phandle can check that array first. Probably 8-16 entries would be enough. If that still has too much trashing, you could also have a lookup count for each entry and expel the least used first. Or maybe the list_lru would work here. Rob -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 01/25/18 02:14, Chintan Pandya wrote: > of_find_node_by_phandle() takes a lot of time finding > right node when your intended device is too right-side > in the fdt. Reason is, we search each device serially > from the fdt, starting from left-most to right-most. Please give me a pointer to the code that is doing this search. -Frank > > Implement, device-phandle relation in hash-table so > that look up can be faster. > > Change-Id: I4a2bc7eff6de142e4f91a7bf474893a45e61c128 > Signed-off-by: Chintan Pandya <cpandya@codeaurora.org> > --- > drivers/of/base.c | 9 +++++++-- > drivers/of/fdt.c | 18 ++++++++++++++++++ > include/linux/of.h | 6 ++++++ > 3 files changed, 31 insertions(+), 2 deletions(-) > > diff --git a/drivers/of/base.c b/drivers/of/base.c > index a0bccb5..3e06316 100644 > --- a/drivers/of/base.c > +++ b/drivers/of/base.c > @@ -25,6 +25,7 @@ > #include <linux/cpu.h> > #include <linux/module.h> > #include <linux/of.h> > +#include <linux/hashtable.h> > #include <linux/of_graph.h> > #include <linux/spinlock.h> > #include <linux/slab.h> > @@ -1099,10 +1100,14 @@ struct device_node *of_find_node_by_phandle(phandle handle) > if (!handle) > return NULL; > > - raw_spin_lock_irqsave(&devtree_lock, flags); > - for_each_of_allnodes(np) > + spin_lock(&dt_hash_spinlock); > + hash_for_each_possible(dt_hash_table, np, hash, handle) > if (np->phandle == handle) > break; > + > + spin_unlock(&dt_hash_spinlock); > + > + raw_spin_lock_irqsave(&devtree_lock, flags); > of_node_get(np); > raw_spin_unlock_irqrestore(&devtree_lock, flags); > return np; > diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c > index c0914fb..f0e78a7 100644 > --- a/drivers/of/fdt.c > +++ b/drivers/of/fdt.c > @@ -31,6 +31,10 @@ > #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ > #include <asm/page.h> > > +static bool dt_hash_needs_init = true; > +DECLARE_HASHTABLE(dt_hash_table, DT_HASH_BITS); > +DEFINE_SPINLOCK(dt_hash_spinlock); > + > /* > * of_fdt_limit_memory - limit the number of regions in the /memory node > * @limit: maximum entries > @@ -227,6 +231,20 @@ static void populate_properties(const void *blob, > pprev = &pp->next; > } > > + /* > + * In 'dryrun = true' cases, np is some non-NULL junk. So, protect > + * against those cases. > + */ > + if (!dryrun && np->phandle) { > + spin_lock(&dt_hash_spinlock); > + if (dt_hash_needs_init) { > + dt_hash_needs_init = false; > + hash_init(dt_hash_table); > + } > + hash_add(dt_hash_table, &np->hash, np->phandle); > + spin_unlock(&dt_hash_spinlock); > + } > + > /* With version 0x10 we may not have the name property, > * recreate it here from the unit name if absent > */ > diff --git a/include/linux/of.h b/include/linux/of.h > index 299aeb1..5b3f4f1 100644 > --- a/include/linux/of.h > +++ b/include/linux/of.h > @@ -25,6 +25,7 @@ > #include <linux/notifier.h> > #include <linux/property.h> > #include <linux/list.h> > +#include <linux/hashtable.h> > > #include <asm/byteorder.h> > #include <asm/errno.h> > @@ -61,6 +62,7 @@ struct device_node { > struct kobject kobj; > unsigned long _flags; > void *data; > + struct hlist_node hash; > #if defined(CONFIG_SPARC) > const char *path_component_name; > unsigned int unique_id; > @@ -68,6 +70,10 @@ struct device_node { > #endif > }; > > +#define DT_HASH_BITS 6 > +extern DECLARE_HASHTABLE(dt_hash_table, DT_HASH_BITS); > +extern spinlock_t dt_hash_spinlock; > + > #define MAX_PHANDLE_ARGS 16 > struct of_phandle_args { > struct device_node *np; > -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 1/25/2018 8:20 PM, Rob Herring wrote: > On Thu, Jan 25, 2018 at 4:14 AM, Chintan Pandya <cpandya@codeaurora.org> wrote: >> of_find_node_by_phandle() takes a lot of time finding > Got some numbers for what is "a lot of time"? On my SDM device, I see total saving of 400ms during boot time. For some clients whose node is quite deeper, they see 1ms time taken by this API. > >> right node when your intended device is too right-side >> in the fdt. Reason is, we search each device serially >> from the fdt, starting from left-most to right-most. > By right side, you mean a deep path? Yes, will correct this if original is confusing. > >> Implement, device-phandle relation in hash-table so >> that look up can be faster. >> >> Change-Id: I4a2bc7eff6de142e4f91a7bf474893a45e61c128 > Run checkpatch.pl Sure. My bad. > >> @@ -61,6 +62,7 @@ struct device_node { >> struct kobject kobj; >> unsigned long _flags; >> void *data; >> + struct hlist_node hash; > Always base your patches on the latest -rc at least. This won't apply. Ok, sure. > > This grows struct device_node for every single node which we recently > worked on to shrink (which is why this won't apply). So I'm now > sensitive to anything that grows it. I'd really prefer something out > of band. > > I'd guess that there's really only a few phandle lookups that occur > over and over. On my system, there are ~6.7k calls of this API during boot. > The clock controller, interrupt controller, etc. What > if you just had a simple array of previously found nodes for a cache > and of_find_node_by_phandle can check that array first. Probably 8-16 > entries would be enough. I clearly see repeat calling with same phandle. But I have few hundreds of nodes. I see hashing as generic optimization which applies equally good to all sized DT. Using ~4KB more size to save 400 ms is a good trade-off, I believe. Chintan Pandya
On 1/26/2018 1:24 AM, Frank Rowand wrote: > On 01/25/18 02:14, Chintan Pandya wrote: >> of_find_node_by_phandle() takes a lot of time finding >> right node when your intended device is too right-side >> in the fdt. Reason is, we search each device serially >> from the fdt, starting from left-most to right-most. > Please give me a pointer to the code that is doing > this search. > > -Frank You can refer include/linux/of.h #define for_each_of_allnodes_from(from, dn) \ for (dn = __of_find_all_nodes(from); dn; dn = __of_find_all_nodes(dn)) #define for_each_of_allnodes(dn) for_each_of_allnodes_from(NULL, dn) where __of_find_all_nodes() does struct device_node *__of_find_all_nodes(struct device_node *prev) { struct device_node *np; if (!prev) { np = of_root; } else if (prev->child) { np = prev->child; } else { /* Walk back up looking for a sibling, or the end of the structure */ np = prev; while (np->parent && !np->sibling) np = np->parent; np = np->sibling; /* Might be null at the end of the tree */ } return np; }
On Fri, Jan 26, 2018 at 1:22 AM, Chintan Pandya <cpandya@codeaurora.org> wrote: > On 1/25/2018 8:20 PM, Rob Herring wrote: >> >> On Thu, Jan 25, 2018 at 4:14 AM, Chintan Pandya <cpandya@codeaurora.org> >> wrote: >>> [...] >> I'd guess that there's really only a few phandle lookups that occur >> over and over. > > On my system, there are ~6.7k calls of this API during boot. And after boot it will be near 0 yet we carry the memory usage forever. >> The clock controller, interrupt controller, etc. What >> if you just had a simple array of previously found nodes for a cache >> and of_find_node_by_phandle can check that array first. Probably 8-16 >> entries would be enough. > > I clearly see repeat calling with same phandle. But I have few hundreds of > nodes. > I see hashing as generic optimization which applies equally good to all > sized DT. > Using ~4KB more size to save 400 ms is a good trade-off, I believe. But if you can use 200 bytes and save 350 ms, that would be a better trade off IMO. But we don't know because we have no data. Rob -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 01/26/18 00:22, Chintan Pandya wrote: > > > On 1/26/2018 1:24 AM, Frank Rowand wrote: >> On 01/25/18 02:14, Chintan Pandya wrote: >>> of_find_node_by_phandle() takes a lot of time finding >>> right node when your intended device is too right-side >>> in the fdt. Reason is, we search each device serially >>> from the fdt, starting from left-most to right-most. >> Please give me a pointer to the code that is doing >> this search. >> >> -Frank > You can refer include/linux/of.h > > #define for_each_of_allnodes_from(from, dn) \ > for (dn = __of_find_all_nodes(from); dn; dn = __of_find_all_nodes(dn)) > #define for_each_of_allnodes(dn) for_each_of_allnodes_from(NULL, dn) > > where __of_find_all_nodes() does > > struct device_node *__of_find_all_nodes(struct device_node *prev) > { > struct device_node *np; > if (!prev) { > np = of_root; > } else if (prev->child) { > np = prev->child; > } else { > /* Walk back up looking for a sibling, or the end of the structure */ > np = prev; > while (np->parent && !np->sibling) > np = np->parent; > np = np->sibling; /* Might be null at the end of the tree */ > } > return np; > } > Let me restate my question. Can you point me to the driver code that is invoking the search? -Frank -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 01/26/18 13:27, Frank Rowand wrote: > On 01/26/18 00:22, Chintan Pandya wrote: >> >> >> On 1/26/2018 1:24 AM, Frank Rowand wrote: >>> On 01/25/18 02:14, Chintan Pandya wrote: >>>> of_find_node_by_phandle() takes a lot of time finding >>>> right node when your intended device is too right-side >>>> in the fdt. Reason is, we search each device serially >>>> from the fdt, starting from left-most to right-most. >>> Please give me a pointer to the code that is doing >>> this search. >>> >>> -Frank >> You can refer include/linux/of.h >> >> #define for_each_of_allnodes_from(from, dn) \ >> for (dn = __of_find_all_nodes(from); dn; dn = __of_find_all_nodes(dn)) >> #define for_each_of_allnodes(dn) for_each_of_allnodes_from(NULL, dn) >> >> where __of_find_all_nodes() does >> >> struct device_node *__of_find_all_nodes(struct device_node *prev) >> { >> struct device_node *np; >> if (!prev) { >> np = of_root; >> } else if (prev->child) { >> np = prev->child; >> } else { >> /* Walk back up looking for a sibling, or the end of the structure */ >> np = prev; >> while (np->parent && !np->sibling) >> np = np->parent; >> np = np->sibling; /* Might be null at the end of the tree */ >> } >> return np; >> } >> > > Let me restate my question. > > Can you point me to the driver code that is invoking > the search? > > -Frank > And also the .dts devicetree source file that you are seeing large overhead with. -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 01/26/18 13:29, Frank Rowand wrote: > On 01/26/18 13:27, Frank Rowand wrote: >> On 01/26/18 00:22, Chintan Pandya wrote: >>> >>> >>> On 1/26/2018 1:24 AM, Frank Rowand wrote: >>>> On 01/25/18 02:14, Chintan Pandya wrote: >>>>> of_find_node_by_phandle() takes a lot of time finding >>>>> right node when your intended device is too right-side >>>>> in the fdt. Reason is, we search each device serially >>>>> from the fdt, starting from left-most to right-most. >>>> Please give me a pointer to the code that is doing >>>> this search. >>>> >>>> -Frank >>> You can refer include/linux/of.h >>> >>> #define for_each_of_allnodes_from(from, dn) \ >>> for (dn = __of_find_all_nodes(from); dn; dn = __of_find_all_nodes(dn)) >>> #define for_each_of_allnodes(dn) for_each_of_allnodes_from(NULL, dn) >>> >>> where __of_find_all_nodes() does >>> >>> struct device_node *__of_find_all_nodes(struct device_node *prev) >>> { >>> struct device_node *np; >>> if (!prev) { >>> np = of_root; >>> } else if (prev->child) { >>> np = prev->child; >>> } else { >>> /* Walk back up looking for a sibling, or the end of the structure */ >>> np = prev; >>> while (np->parent && !np->sibling) >>> np = np->parent; >>> np = np->sibling; /* Might be null at the end of the tree */ >>> } >>> return np; >>> } >>> >> >> Let me restate my question. >> >> Can you point me to the driver code that is invoking >> the search? >> >> -Frank >> > > And also the .dts devicetree source file that you are seeing > large overhead with. > Sorry about dribbling out questions instead of all at once.... What is the hardware you are testing this on? Processor? Cache size? Memory size? Processor frequency? Any other attribute of the system that will help me understand the boot performance you are seeing? Thanks, Frank -- To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/drivers/of/base.c b/drivers/of/base.c index a0bccb5..3e06316 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -25,6 +25,7 @@ #include <linux/cpu.h> #include <linux/module.h> #include <linux/of.h> +#include <linux/hashtable.h> #include <linux/of_graph.h> #include <linux/spinlock.h> #include <linux/slab.h> @@ -1099,10 +1100,14 @@ struct device_node *of_find_node_by_phandle(phandle handle) if (!handle) return NULL; - raw_spin_lock_irqsave(&devtree_lock, flags); - for_each_of_allnodes(np) + spin_lock(&dt_hash_spinlock); + hash_for_each_possible(dt_hash_table, np, hash, handle) if (np->phandle == handle) break; + + spin_unlock(&dt_hash_spinlock); + + raw_spin_lock_irqsave(&devtree_lock, flags); of_node_get(np); raw_spin_unlock_irqrestore(&devtree_lock, flags); return np; diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index c0914fb..f0e78a7 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -31,6 +31,10 @@ #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ #include <asm/page.h> +static bool dt_hash_needs_init = true; +DECLARE_HASHTABLE(dt_hash_table, DT_HASH_BITS); +DEFINE_SPINLOCK(dt_hash_spinlock); + /* * of_fdt_limit_memory - limit the number of regions in the /memory node * @limit: maximum entries @@ -227,6 +231,20 @@ static void populate_properties(const void *blob, pprev = &pp->next; } + /* + * In 'dryrun = true' cases, np is some non-NULL junk. So, protect + * against those cases. + */ + if (!dryrun && np->phandle) { + spin_lock(&dt_hash_spinlock); + if (dt_hash_needs_init) { + dt_hash_needs_init = false; + hash_init(dt_hash_table); + } + hash_add(dt_hash_table, &np->hash, np->phandle); + spin_unlock(&dt_hash_spinlock); + } + /* With version 0x10 we may not have the name property, * recreate it here from the unit name if absent */ diff --git a/include/linux/of.h b/include/linux/of.h index 299aeb1..5b3f4f1 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -25,6 +25,7 @@ #include <linux/notifier.h> #include <linux/property.h> #include <linux/list.h> +#include <linux/hashtable.h> #include <asm/byteorder.h> #include <asm/errno.h> @@ -61,6 +62,7 @@ struct device_node { struct kobject kobj; unsigned long _flags; void *data; + struct hlist_node hash; #if defined(CONFIG_SPARC) const char *path_component_name; unsigned int unique_id; @@ -68,6 +70,10 @@ struct device_node { #endif }; +#define DT_HASH_BITS 6 +extern DECLARE_HASHTABLE(dt_hash_table, DT_HASH_BITS); +extern spinlock_t dt_hash_spinlock; + #define MAX_PHANDLE_ARGS 16 struct of_phandle_args { struct device_node *np;
of_find_node_by_phandle() takes a lot of time finding right node when your intended device is too right-side in the fdt. Reason is, we search each device serially from the fdt, starting from left-most to right-most. Implement, device-phandle relation in hash-table so that look up can be faster. Change-Id: I4a2bc7eff6de142e4f91a7bf474893a45e61c128 Signed-off-by: Chintan Pandya <cpandya@codeaurora.org> --- drivers/of/base.c | 9 +++++++-- drivers/of/fdt.c | 18 ++++++++++++++++++ include/linux/of.h | 6 ++++++ 3 files changed, 31 insertions(+), 2 deletions(-)