Message ID | 20220107003756.806582-10-ben.widawsky@intel.com |
---|---|
State | Superseded |
Headers | show |
Series | CXL Region driver | expand |
On Thu, 6 Jan 2022 16:37:52 -0800 Ben Widawsky <ben.widawsky@intel.com> wrote: > Cross host bridge verification primarily determines if the requested > interleave ordering can be achieved by the root decoder, which isn't as > programmable as other decoders. > > The algorithm implemented here is based on the CXL Type 3 Memory Device > Software Guide, chapter 2.13.14 > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> Hi Ben, A few things I'm carrying 'fixes' for in here. Jonathan > --- > .clang-format | 2 + > drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++- > drivers/cxl/trace.h | 3 ++ > 3 files changed, 93 insertions(+), 1 deletion(-) > > diff --git a/.clang-format b/.clang-format > index 15d4eaabc6b5..55f628f21722 100644 > --- a/.clang-format > +++ b/.clang-format > @@ -169,6 +169,8 @@ ForEachMacros: > - 'for_each_cpu_and' > - 'for_each_cpu_not' > - 'for_each_cpu_wrap' > + - 'for_each_cxl_decoder_target' > + - 'for_each_cxl_endpoint' > - 'for_each_dapm_widgets' > - 'for_each_dev_addr' > - 'for_each_dev_scope' > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c > index c8e3c48dfbb9..ca559a4b5347 100644 > --- a/drivers/cxl/region.c > +++ b/drivers/cxl/region.c > @@ -28,6 +28,17 @@ > */ > > #define region_ways(region) ((region)->config.eniw) > +#define region_ig(region) (ilog2((region)->config.ig)) > + > +#define for_each_cxl_endpoint(ep, region, idx) \ > + for (idx = 0, ep = (region)->config.targets[idx]; \ > + idx < region_ways(region); \ > + idx++, ep = (region)->config.targets[idx]) > + > +#define for_each_cxl_decoder_target(target, decoder, idx) \ > + for (idx = 0, target = (decoder)->target[idx]; \ > + idx < (decoder)->nr_targets; \ > + idx++, target++) > > static struct cxl_decoder *rootd_from_region(struct cxl_region *r) > { > @@ -175,6 +186,30 @@ static bool qtg_match(const struct cxl_decoder *rootd, > return true; > } > > +static int get_unique_hostbridges(const struct cxl_region *region, > + struct cxl_port **hbs) > +{ > + struct cxl_memdev *ep; > + int i, hb_count = 0; > + > + for_each_cxl_endpoint(ep, region, i) { > + struct cxl_port *hb = get_hostbridge(ep); > + bool found = false; > + int j; > + > + BUG_ON(!hb); > + > + for (j = 0; j < hb_count; j++) { > + if (hbs[j] == hb) > + found = true; > + } > + if (!found) > + hbs[hb_count++] = hb; > + } > + > + return hb_count; > +} > + > /** > * region_xhb_config_valid() - determine cross host bridge validity > * @rootd: The root decoder to check against > @@ -188,7 +223,59 @@ static bool qtg_match(const struct cxl_decoder *rootd, > static bool region_xhb_config_valid(const struct cxl_region *region, > const struct cxl_decoder *rootd) > { > - /* TODO: */ > + struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE]; > + int rootd_ig, i; > + struct cxl_dport *target; > + > + /* Are all devices in this region on the same CXL host bridge */ > + if (get_unique_hostbridges(region, hbs) == 1) > + return true; > + > + rootd_ig = rootd->interleave_granularity; > + > + /* CFMWS.HBIG >= Device.Label.IG */ > + if (rootd_ig < region_ig(region)) { > + trace_xhb_valid(region, > + "granularity does not support the region interleave granularity\n"); > + return false; > + } > + > + /* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */ > + if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) > This maths isn't what the comment says it is. ((1 << (rootd_ig - region_ig(region))) * rootd->interleaveways) so brackets needed to avoid 2^( all the rest) and rootd->interleave_ways seems to the actual number of ways not the log2 of it. That feeds through below. > + region_ways(region)) { > + trace_xhb_valid(region, > + "granularity to device granularity ratio requires a larger number of devices than currently configured"); > + return false; > + } > + > + /* Check that endpoints are hooked up in the correct order */ > + for_each_cxl_decoder_target(target, rootd, i) { > + struct cxl_memdev *endpoint = region->config.targets[i]; > + > + if (get_hostbridge(endpoint) != target->port) { I think this should be get_hostbridge(endpoint)->uport != target->dport As it stands you are comparing the host bridge with the root object. > + trace_xhb_valid(region, "device ordering bad\n"); > + return false; > + } > + } > + > + /* > + * CFMWS.InterleaveTargetList[n] must contain all devices, x where: > + * (Device[x],RegionLabel.Position >> (CFMWS.HBIG - > + * Device[x].RegionLabel.InterleaveGranularity)) & > + * ((2^CFMWS.ENIW) - 1) = n > + * > + * Linux notes: All devices are known to have the same interleave > + * granularity at this point. > + */ > + for_each_cxl_decoder_target(target, rootd, i) { > + if (((i >> (rootd_ig - region_ig(region)))) & > + (((1 << rootd->interleave_ways) - 1) != target->port_id)) { > + trace_xhb_valid(region, > + "One or more devices are not connected to the correct hostbridge."); > + return false; > + } > + } > + > return true; > } > > diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h > index a53f00ba5d0e..4de47d1111ac 100644 > --- a/drivers/cxl/trace.h > +++ b/drivers/cxl/trace.h > @@ -38,6 +38,9 @@ DEFINE_EVENT(cxl_region_template, sanitize_failed, > DEFINE_EVENT(cxl_region_template, allocation_failed, > TP_PROTO(const struct cxl_region *region, char *status), > TP_ARGS(region, status)); > +DEFINE_EVENT(cxl_region_template, xhb_valid, > + TP_PROTO(const struct cxl_region *region, char *status), > + TP_ARGS(region, status)); > > #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */ >
On Thu, 6 Jan 2022 16:37:52 -0800 Ben Widawsky <ben.widawsky@intel.com> wrote: > Cross host bridge verification primarily determines if the requested > interleave ordering can be achieved by the root decoder, which isn't as > programmable as other decoders. > > The algorithm implemented here is based on the CXL Type 3 Memory Device > Software Guide, chapter 2.13.14 > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> Trivial thing inline. > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c > index c8e3c48dfbb9..ca559a4b5347 100644 > --- a/drivers/cxl/region.c > +++ b/drivers/cxl/region.c > @@ -28,6 +28,17 @@ > */ > > #define region_ways(region) ((region)->config.eniw) > +#define region_ig(region) (ilog2((region)->config.ig)) > + > +#define for_each_cxl_endpoint(ep, region, idx) \ > + for (idx = 0, ep = (region)->config.targets[idx]; \ > + idx < region_ways(region); \ > + idx++, ep = (region)->config.targets[idx]) > + > +#define for_each_cxl_decoder_target(target, decoder, idx) \ > + for (idx = 0, target = (decoder)->target[idx]; \ As target is used too often in here, you'll replace it in ->target[idx] as well. It happens to work today because the parameter always happens to be target > + idx < (decoder)->nr_targets; \ > + idx++, target++) >
On Fri, 7 Jan 2022 10:30:52 +0000 Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: > On Thu, 6 Jan 2022 16:37:52 -0800 > Ben Widawsky <ben.widawsky@intel.com> wrote: > > > Cross host bridge verification primarily determines if the requested > > interleave ordering can be achieved by the root decoder, which isn't as > > programmable as other decoders. > > > > The algorithm implemented here is based on the CXL Type 3 Memory Device > > Software Guide, chapter 2.13.14 > > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > Trivial thing inline. > > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c > > index c8e3c48dfbb9..ca559a4b5347 100644 > > --- a/drivers/cxl/region.c > > +++ b/drivers/cxl/region.c > > @@ -28,6 +28,17 @@ > > */ > > > > #define region_ways(region) ((region)->config.eniw) > > +#define region_ig(region) (ilog2((region)->config.ig)) > > + > > +#define for_each_cxl_endpoint(ep, region, idx) \ > > + for (idx = 0, ep = (region)->config.targets[idx]; \ > > + idx < region_ways(region); \ > > + idx++, ep = (region)->config.targets[idx]) > > + > > +#define for_each_cxl_decoder_target(target, decoder, idx) \ > > + for (idx = 0, target = (decoder)->target[idx]; \ > > As target is used too often in here, you'll replace it in ->target[idx] as well. > It happens to work today because the parameter always happens to be target > > > + idx < (decoder)->nr_targets; \ > > + idx++, target++) I should have read the next few lines :) target++ doesn't get (decoder)->target[idx] which is what we want - it indexes off the end of a particular instance rather than through the array. I'm guessing this was from my unclear comment yesterday. I should have spent a little more time being explicit there. Jonathan > >
On Fri, 7 Jan 2022 10:07:14 +0000 Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote: > On Thu, 6 Jan 2022 16:37:52 -0800 > Ben Widawsky <ben.widawsky@intel.com> wrote: > > > Cross host bridge verification primarily determines if the requested > > interleave ordering can be achieved by the root decoder, which isn't as > > programmable as other decoders. > > > > The algorithm implemented here is based on the CXL Type 3 Memory Device > > Software Guide, chapter 2.13.14 > > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > Hi Ben, > > A few things I'm carrying 'fixes' for in here. > > Jonathan > > > --- > > .clang-format | 2 + > > drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++- > > drivers/cxl/trace.h | 3 ++ > > 3 files changed, 93 insertions(+), 1 deletion(-) > > > > diff --git a/.clang-format b/.clang-format > > index 15d4eaabc6b5..55f628f21722 100644 > > --- a/.clang-format > > +++ b/.clang-format > > @@ -169,6 +169,8 @@ ForEachMacros: > > - 'for_each_cpu_and' > > - 'for_each_cpu_not' > > - 'for_each_cpu_wrap' > > + - 'for_each_cxl_decoder_target' > > + - 'for_each_cxl_endpoint' > > - 'for_each_dapm_widgets' > > - 'for_each_dev_addr' > > - 'for_each_dev_scope' > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c > > index c8e3c48dfbb9..ca559a4b5347 100644 > > --- a/drivers/cxl/region.c > > +++ b/drivers/cxl/region.c > > @@ -28,6 +28,17 @@ > > */ > > > > #define region_ways(region) ((region)->config.eniw) > > +#define region_ig(region) (ilog2((region)->config.ig)) > > + > > +#define for_each_cxl_endpoint(ep, region, idx) \ > > + for (idx = 0, ep = (region)->config.targets[idx]; \ > > + idx < region_ways(region); \ > > + idx++, ep = (region)->config.targets[idx]) > > + > > +#define for_each_cxl_decoder_target(target, decoder, idx) \ > > + for (idx = 0, target = (decoder)->target[idx]; \ > > + idx < (decoder)->nr_targets; \ > > + idx++, target++) > > > > static struct cxl_decoder *rootd_from_region(struct cxl_region *r) > > { > > @@ -175,6 +186,30 @@ static bool qtg_match(const struct cxl_decoder *rootd, > > return true; > > } > > > > +static int get_unique_hostbridges(const struct cxl_region *region, > > + struct cxl_port **hbs) > > +{ > > + struct cxl_memdev *ep; > > + int i, hb_count = 0; > > + > > + for_each_cxl_endpoint(ep, region, i) { > > + struct cxl_port *hb = get_hostbridge(ep); > > + bool found = false; > > + int j; > > + > > + BUG_ON(!hb); > > + > > + for (j = 0; j < hb_count; j++) { > > + if (hbs[j] == hb) > > + found = true; > > + } > > + if (!found) > > + hbs[hb_count++] = hb; > > + } > > + > > + return hb_count; > > +} > > + > > /** > > * region_xhb_config_valid() - determine cross host bridge validity > > * @rootd: The root decoder to check against > > @@ -188,7 +223,59 @@ static bool qtg_match(const struct cxl_decoder *rootd, > > static bool region_xhb_config_valid(const struct cxl_region *region, > > const struct cxl_decoder *rootd) > > { > > - /* TODO: */ > > + struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE]; > > + int rootd_ig, i; > > + struct cxl_dport *target; > > + > > + /* Are all devices in this region on the same CXL host bridge */ > > + if (get_unique_hostbridges(region, hbs) == 1) > > + return true; > > + > > + rootd_ig = rootd->interleave_granularity; > > + > > + /* CFMWS.HBIG >= Device.Label.IG */ > > + if (rootd_ig < region_ig(region)) { > > + trace_xhb_valid(region, > > + "granularity does not support the region interleave granularity\n"); > > + return false; > > + } > > + > > + /* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */ > > + if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) > > > This maths isn't what the comment says it is. > ((1 << (rootd_ig - region_ig(region))) * rootd->interleaveways) > so brackets needed to avoid 2^( all the rest) and rootd->interleave_ways seems to the > actual number of ways not the log2 of it. > > That feeds through below. > > > > + region_ways(region)) { > > + trace_xhb_valid(region, > > + "granularity to device granularity ratio requires a larger number of devices than currently configured"); > > + return false; > > + } > > + > > + /* Check that endpoints are hooked up in the correct order */ > > + for_each_cxl_decoder_target(target, rootd, i) { > > + struct cxl_memdev *endpoint = region->config.targets[i]; > > + > > + if (get_hostbridge(endpoint) != target->port) { > > I think this should be > get_hostbridge(endpoint)->uport != target->dport > > As it stands you are comparing the host bridge with the root object. On closer inspection this code doesn't do what it is meant to do at all if there are multiple EP below a given root bridge. You'd expect multiple endpoints to match to each target->port. Something along the lines of this should work: { struct cxl_memdev **epgroupstart = region->config.targets; struct cxl_memdev **endpoint; for_each_cxl_decoder_target(target, rootd, i) { /* Find start of next endpoint group */ endpoint = epgroupstart; if (*endpoint == NULL) { printk("No endpoints under decoder target\n"); return false; } while (*epgroupstart && get_hostbridge(*endpoint) == get_hostbridge(*epgroupstart)) epgroupstart++; } if (*epgroupstart) { printk("still some entries left. boom\n"); return false; } } Only lightly tested with correct inputs... Next up is figuring out why the EP HDM decoder won't commit. :) Jonathan > > > + trace_xhb_valid(region, "device ordering bad\n"); > > + return false; > > + } > > + } > > + > > + /* > > + * CFMWS.InterleaveTargetList[n] must contain all devices, x where: > > + * (Device[x],RegionLabel.Position >> (CFMWS.HBIG - > > + * Device[x].RegionLabel.InterleaveGranularity)) & > > + * ((2^CFMWS.ENIW) - 1) = n > > + * > > + * Linux notes: All devices are known to have the same interleave > > + * granularity at this point. > > + */ > > + for_each_cxl_decoder_target(target, rootd, i) { > > + if (((i >> (rootd_ig - region_ig(region)))) & > > + (((1 << rootd->interleave_ways) - 1) != target->port_id)) { > > + trace_xhb_valid(region, > > + "One or more devices are not connected to the correct hostbridge."); > > + return false; > > + } > > + } > > + > > return true; > > } > > > > diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h > > index a53f00ba5d0e..4de47d1111ac 100644 > > --- a/drivers/cxl/trace.h > > +++ b/drivers/cxl/trace.h > > @@ -38,6 +38,9 @@ DEFINE_EVENT(cxl_region_template, sanitize_failed, > > DEFINE_EVENT(cxl_region_template, allocation_failed, > > TP_PROTO(const struct cxl_region *region, char *status), > > TP_ARGS(region, status)); > > +DEFINE_EVENT(cxl_region_template, xhb_valid, > > + TP_PROTO(const struct cxl_region *region, char *status), > > + TP_ARGS(region, status)); > > > > #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */ > > >
On 22-01-07 10:38:27, Jonathan Cameron wrote: > On Fri, 7 Jan 2022 10:30:52 +0000 > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: > > > On Thu, 6 Jan 2022 16:37:52 -0800 > > Ben Widawsky <ben.widawsky@intel.com> wrote: > > > > > Cross host bridge verification primarily determines if the requested > > > interleave ordering can be achieved by the root decoder, which isn't as > > > programmable as other decoders. > > > > > > The algorithm implemented here is based on the CXL Type 3 Memory Device > > > Software Guide, chapter 2.13.14 > > > > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > > > Trivial thing inline. > > > > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c > > > index c8e3c48dfbb9..ca559a4b5347 100644 > > > --- a/drivers/cxl/region.c > > > +++ b/drivers/cxl/region.c > > > @@ -28,6 +28,17 @@ > > > */ > > > > > > #define region_ways(region) ((region)->config.eniw) > > > +#define region_ig(region) (ilog2((region)->config.ig)) > > > + > > > +#define for_each_cxl_endpoint(ep, region, idx) \ > > > + for (idx = 0, ep = (region)->config.targets[idx]; \ > > > + idx < region_ways(region); \ > > > + idx++, ep = (region)->config.targets[idx]) > > > + > > > +#define for_each_cxl_decoder_target(target, decoder, idx) \ > > > + for (idx = 0, target = (decoder)->target[idx]; \ > > > > As target is used too often in here, you'll replace it in ->target[idx] as well. > > It happens to work today because the parameter always happens to be target > > > > > + idx < (decoder)->nr_targets; \ > > > + idx++, target++) > I should have read the next few lines :) > > target++ doesn't get (decoder)->target[idx] which is what we want - it indexes > off the end of a particular instance rather than through the array. > > I'm guessing this was from my unclear comment yesterday. I should have spent > a little more time being explicit there. > > Jonathan Yeah. I was working quickly because I ended up losing childcare for this week and wanted to get this out ASAP. I'll fix it up on the next round. > > > > >
On 22-01-07 10:38:27, Jonathan Cameron wrote: > On Fri, 7 Jan 2022 10:30:52 +0000 > Jonathan Cameron <Jonathan.Cameron@huawei.com> wrote: > > > On Thu, 6 Jan 2022 16:37:52 -0800 > > Ben Widawsky <ben.widawsky@intel.com> wrote: > > > > > Cross host bridge verification primarily determines if the requested > > > interleave ordering can be achieved by the root decoder, which isn't as > > > programmable as other decoders. > > > > > > The algorithm implemented here is based on the CXL Type 3 Memory Device > > > Software Guide, chapter 2.13.14 > > > > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > > > Trivial thing inline. > > > > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c > > > index c8e3c48dfbb9..ca559a4b5347 100644 > > > --- a/drivers/cxl/region.c > > > +++ b/drivers/cxl/region.c > > > @@ -28,6 +28,17 @@ > > > */ > > > > > > #define region_ways(region) ((region)->config.eniw) > > > +#define region_ig(region) (ilog2((region)->config.ig)) > > > + > > > +#define for_each_cxl_endpoint(ep, region, idx) \ > > > + for (idx = 0, ep = (region)->config.targets[idx]; \ > > > + idx < region_ways(region); \ > > > + idx++, ep = (region)->config.targets[idx]) > > > + > > > +#define for_each_cxl_decoder_target(target, decoder, idx) \ > > > + for (idx = 0, target = (decoder)->target[idx]; \ > > > > As target is used too often in here, you'll replace it in ->target[idx] as well. > > It happens to work today because the parameter always happens to be target > > > > > + idx < (decoder)->nr_targets; \ > > > + idx++, target++) > I should have read the next few lines :) > > target++ doesn't get (decoder)->target[idx] which is what we want - it indexes > off the end of a particular instance rather than through the array. > > I'm guessing this was from my unclear comment yesterday. I should have spent > a little more time being explicit there. > > Jonathan > > > > > Gotcha. I combined the idx increment as well, what do you think about this (just typed, not tested): #define for_each_cxl_decoder_target(dport, decoder, idx) \ for (idx = 0, dport = (decoder)->target[idx]; \ idx < (decoder)->nr_targets; \ dport = (decoder)->target[++idx])
On 22-01-07 11:55:24, Jonathan Cameron wrote: > On Fri, 7 Jan 2022 10:07:14 +0000 > Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote: > > > On Thu, 6 Jan 2022 16:37:52 -0800 > > Ben Widawsky <ben.widawsky@intel.com> wrote: > > > > > Cross host bridge verification primarily determines if the requested > > > interleave ordering can be achieved by the root decoder, which isn't as > > > programmable as other decoders. > > > > > > The algorithm implemented here is based on the CXL Type 3 Memory Device > > > Software Guide, chapter 2.13.14 > > > > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > > > Hi Ben, > > > > A few things I'm carrying 'fixes' for in here. > > > > Jonathan > > > > > --- > > > .clang-format | 2 + > > > drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++- > > > drivers/cxl/trace.h | 3 ++ > > > 3 files changed, 93 insertions(+), 1 deletion(-) > > > > > > diff --git a/.clang-format b/.clang-format > > > index 15d4eaabc6b5..55f628f21722 100644 > > > --- a/.clang-format > > > +++ b/.clang-format > > > @@ -169,6 +169,8 @@ ForEachMacros: > > > - 'for_each_cpu_and' > > > - 'for_each_cpu_not' > > > - 'for_each_cpu_wrap' > > > + - 'for_each_cxl_decoder_target' > > > + - 'for_each_cxl_endpoint' > > > - 'for_each_dapm_widgets' > > > - 'for_each_dev_addr' > > > - 'for_each_dev_scope' > > > diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c > > > index c8e3c48dfbb9..ca559a4b5347 100644 > > > --- a/drivers/cxl/region.c > > > +++ b/drivers/cxl/region.c > > > @@ -28,6 +28,17 @@ > > > */ > > > > > > #define region_ways(region) ((region)->config.eniw) > > > +#define region_ig(region) (ilog2((region)->config.ig)) > > > + > > > +#define for_each_cxl_endpoint(ep, region, idx) \ > > > + for (idx = 0, ep = (region)->config.targets[idx]; \ > > > + idx < region_ways(region); \ > > > + idx++, ep = (region)->config.targets[idx]) > > > + > > > +#define for_each_cxl_decoder_target(target, decoder, idx) \ > > > + for (idx = 0, target = (decoder)->target[idx]; \ > > > + idx < (decoder)->nr_targets; \ > > > + idx++, target++) > > > > > > static struct cxl_decoder *rootd_from_region(struct cxl_region *r) > > > { > > > @@ -175,6 +186,30 @@ static bool qtg_match(const struct cxl_decoder *rootd, > > > return true; > > > } > > > > > > +static int get_unique_hostbridges(const struct cxl_region *region, > > > + struct cxl_port **hbs) > > > +{ > > > + struct cxl_memdev *ep; > > > + int i, hb_count = 0; > > > + > > > + for_each_cxl_endpoint(ep, region, i) { > > > + struct cxl_port *hb = get_hostbridge(ep); > > > + bool found = false; > > > + int j; > > > + > > > + BUG_ON(!hb); > > > + > > > + for (j = 0; j < hb_count; j++) { > > > + if (hbs[j] == hb) > > > + found = true; > > > + } > > > + if (!found) > > > + hbs[hb_count++] = hb; > > > + } > > > + > > > + return hb_count; > > > +} > > > + > > > /** > > > * region_xhb_config_valid() - determine cross host bridge validity > > > * @rootd: The root decoder to check against > > > @@ -188,7 +223,59 @@ static bool qtg_match(const struct cxl_decoder *rootd, > > > static bool region_xhb_config_valid(const struct cxl_region *region, > > > const struct cxl_decoder *rootd) > > > { > > > - /* TODO: */ > > > + struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE]; > > > + int rootd_ig, i; > > > + struct cxl_dport *target; > > > + > > > + /* Are all devices in this region on the same CXL host bridge */ > > > + if (get_unique_hostbridges(region, hbs) == 1) > > > + return true; > > > + > > > + rootd_ig = rootd->interleave_granularity; > > > + > > > + /* CFMWS.HBIG >= Device.Label.IG */ > > > + if (rootd_ig < region_ig(region)) { > > > + trace_xhb_valid(region, > > > + "granularity does not support the region interleave granularity\n"); > > > + return false; > > > + } > > > + > > > + /* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */ > > > + if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) > > > > > This maths isn't what the comment says it is. > > ((1 << (rootd_ig - region_ig(region))) * rootd->interleaveways) > > so brackets needed to avoid 2^( all the rest) and rootd->interleave_ways seems to the > > actual number of ways not the log2 of it. > > > > That feeds through below. You're correct on both counts. Nice catch. > > > > > > > + region_ways(region)) { > > > + trace_xhb_valid(region, > > > + "granularity to device granularity ratio requires a larger number of devices than currently configured"); > > > + return false; > > > + } > > > + > > > + /* Check that endpoints are hooked up in the correct order */ > > > + for_each_cxl_decoder_target(target, rootd, i) { > > > + struct cxl_memdev *endpoint = region->config.targets[i]; > > > + > > > + if (get_hostbridge(endpoint) != target->port) { > > > > I think this should be > > get_hostbridge(endpoint)->uport != target->dport > > > > As it stands you are comparing the host bridge with the root object. > > On closer inspection this code doesn't do what it is meant to do at all > if there are multiple EP below a given root bridge. > > You'd expect multiple endpoints to match to each target->port. > Something along the lines of this should work: > > { > struct cxl_memdev **epgroupstart = region->config.targets; > struct cxl_memdev **endpoint; > > for_each_cxl_decoder_target(target, rootd, i) { > /* Find start of next endpoint group */ > endpoint = epgroupstart; > if (*endpoint == NULL) { > printk("No endpoints under decoder target\n"); > return false; > } > while (*epgroupstart && > get_hostbridge(*endpoint) == get_hostbridge(*epgroupstart)) > epgroupstart++; > } > if (*epgroupstart) { > printk("still some entries left. boom\n"); > return false; > } > } > > Only lightly tested with correct inputs... > > Next up is figuring out why the EP HDM decoder won't commit. :) > > Jonathan > You're correct that what I have isn't correct. However, I think this was just bogus leftover from an aborted attempt to try to implement this. See below... > > > > > > + trace_xhb_valid(region, "device ordering bad\n"); > > > + return false; > > > + } > > > + } > > > + > > > + /* > > > + * CFMWS.InterleaveTargetList[n] must contain all devices, x where: > > > + * (Device[x],RegionLabel.Position >> (CFMWS.HBIG - > > > + * Device[x].RegionLabel.InterleaveGranularity)) & > > > + * ((2^CFMWS.ENIW) - 1) = n > > > + * > > > + * Linux notes: All devices are known to have the same interleave > > > + * granularity at this point. > > > + */ > > > + for_each_cxl_decoder_target(target, rootd, i) { > > > + if (((i >> (rootd_ig - region_ig(region)))) & > > > + (((1 << rootd->interleave_ways) - 1) != target->port_id)) { > > > + trace_xhb_valid(region, > > > + "One or more devices are not connected to the correct hostbridge."); > > > + return false; > > > + } > > > + } > > > + I think this does the correct XHB calculation. So I think I can just remove the top hunk and we're good. Do you agree? > > > return true; > > > } > > > > > > diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h > > > index a53f00ba5d0e..4de47d1111ac 100644 > > > --- a/drivers/cxl/trace.h > > > +++ b/drivers/cxl/trace.h > > > @@ -38,6 +38,9 @@ DEFINE_EVENT(cxl_region_template, sanitize_failed, > > > DEFINE_EVENT(cxl_region_template, allocation_failed, > > > TP_PROTO(const struct cxl_region *region, char *status), > > > TP_ARGS(region, status)); > > > +DEFINE_EVENT(cxl_region_template, xhb_valid, > > > + TP_PROTO(const struct cxl_region *region, char *status), > > > + TP_ARGS(region, status)); > > > > > > #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */ > > > > > >
diff --git a/.clang-format b/.clang-format index 15d4eaabc6b5..55f628f21722 100644 --- a/.clang-format +++ b/.clang-format @@ -169,6 +169,8 @@ ForEachMacros: - 'for_each_cpu_and' - 'for_each_cpu_not' - 'for_each_cpu_wrap' + - 'for_each_cxl_decoder_target' + - 'for_each_cxl_endpoint' - 'for_each_dapm_widgets' - 'for_each_dev_addr' - 'for_each_dev_scope' diff --git a/drivers/cxl/region.c b/drivers/cxl/region.c index c8e3c48dfbb9..ca559a4b5347 100644 --- a/drivers/cxl/region.c +++ b/drivers/cxl/region.c @@ -28,6 +28,17 @@ */ #define region_ways(region) ((region)->config.eniw) +#define region_ig(region) (ilog2((region)->config.ig)) + +#define for_each_cxl_endpoint(ep, region, idx) \ + for (idx = 0, ep = (region)->config.targets[idx]; \ + idx < region_ways(region); \ + idx++, ep = (region)->config.targets[idx]) + +#define for_each_cxl_decoder_target(target, decoder, idx) \ + for (idx = 0, target = (decoder)->target[idx]; \ + idx < (decoder)->nr_targets; \ + idx++, target++) static struct cxl_decoder *rootd_from_region(struct cxl_region *r) { @@ -175,6 +186,30 @@ static bool qtg_match(const struct cxl_decoder *rootd, return true; } +static int get_unique_hostbridges(const struct cxl_region *region, + struct cxl_port **hbs) +{ + struct cxl_memdev *ep; + int i, hb_count = 0; + + for_each_cxl_endpoint(ep, region, i) { + struct cxl_port *hb = get_hostbridge(ep); + bool found = false; + int j; + + BUG_ON(!hb); + + for (j = 0; j < hb_count; j++) { + if (hbs[j] == hb) + found = true; + } + if (!found) + hbs[hb_count++] = hb; + } + + return hb_count; +} + /** * region_xhb_config_valid() - determine cross host bridge validity * @rootd: The root decoder to check against @@ -188,7 +223,59 @@ static bool qtg_match(const struct cxl_decoder *rootd, static bool region_xhb_config_valid(const struct cxl_region *region, const struct cxl_decoder *rootd) { - /* TODO: */ + struct cxl_port *hbs[CXL_DECODER_MAX_INTERLEAVE]; + int rootd_ig, i; + struct cxl_dport *target; + + /* Are all devices in this region on the same CXL host bridge */ + if (get_unique_hostbridges(region, hbs) == 1) + return true; + + rootd_ig = rootd->interleave_granularity; + + /* CFMWS.HBIG >= Device.Label.IG */ + if (rootd_ig < region_ig(region)) { + trace_xhb_valid(region, + "granularity does not support the region interleave granularity\n"); + return false; + } + + /* ((2^(CFMWS.HBIG - Device.RLabel.IG) * (2^CFMWS.ENIW)) > Device.RLabel.NLabel) */ + if (1 << (rootd_ig - region_ig(region)) * (1 << rootd->interleave_ways) > + region_ways(region)) { + trace_xhb_valid(region, + "granularity to device granularity ratio requires a larger number of devices than currently configured"); + return false; + } + + /* Check that endpoints are hooked up in the correct order */ + for_each_cxl_decoder_target(target, rootd, i) { + struct cxl_memdev *endpoint = region->config.targets[i]; + + if (get_hostbridge(endpoint) != target->port) { + trace_xhb_valid(region, "device ordering bad\n"); + return false; + } + } + + /* + * CFMWS.InterleaveTargetList[n] must contain all devices, x where: + * (Device[x],RegionLabel.Position >> (CFMWS.HBIG - + * Device[x].RegionLabel.InterleaveGranularity)) & + * ((2^CFMWS.ENIW) - 1) = n + * + * Linux notes: All devices are known to have the same interleave + * granularity at this point. + */ + for_each_cxl_decoder_target(target, rootd, i) { + if (((i >> (rootd_ig - region_ig(region)))) & + (((1 << rootd->interleave_ways) - 1) != target->port_id)) { + trace_xhb_valid(region, + "One or more devices are not connected to the correct hostbridge."); + return false; + } + } + return true; } diff --git a/drivers/cxl/trace.h b/drivers/cxl/trace.h index a53f00ba5d0e..4de47d1111ac 100644 --- a/drivers/cxl/trace.h +++ b/drivers/cxl/trace.h @@ -38,6 +38,9 @@ DEFINE_EVENT(cxl_region_template, sanitize_failed, DEFINE_EVENT(cxl_region_template, allocation_failed, TP_PROTO(const struct cxl_region *region, char *status), TP_ARGS(region, status)); +DEFINE_EVENT(cxl_region_template, xhb_valid, + TP_PROTO(const struct cxl_region *region, char *status), + TP_ARGS(region, status)); #endif /* if !defined (__CXL_TRACE_H__) || defined(TRACE_HEADER_MULTI_READ) */
Cross host bridge verification primarily determines if the requested interleave ordering can be achieved by the root decoder, which isn't as programmable as other decoders. The algorithm implemented here is based on the CXL Type 3 Memory Device Software Guide, chapter 2.13.14 Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> --- .clang-format | 2 + drivers/cxl/region.c | 89 +++++++++++++++++++++++++++++++++++++++++++- drivers/cxl/trace.h | 3 ++ 3 files changed, 93 insertions(+), 1 deletion(-)