Message ID | 5-v7-cb149db3a320+3b5-smmuv3_newapi_p2_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Make the SMMUv3 CD logic match the new STE design (part 2a/3) | expand |
On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote: > Only the attach callers can perform an allocation for the CD table entry, > the other callers must not do so, they do not have the correct locking and > they cannot sleep. Split up the functions so this is clear. > > arm_smmu_get_cd_ptr() will return pointer to a CD table entry without > doing any kind of allocation. > > arm_smmu_alloc_cd_ptr() will allocate the table and any required > leaf. > > A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr() > once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never > called in the wrong context. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Hi Jason, On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote: > Only the attach callers can perform an allocation for the CD table entry, > the other callers must not do so, they do not have the correct locking and > they cannot sleep. Split up the functions so this is clear. > > arm_smmu_get_cd_ptr() will return pointer to a CD table entry without > doing any kind of allocation. > > arm_smmu_alloc_cd_ptr() will allocate the table and any required > leaf. > > A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr() > once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never > called in the wrong context. > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++-------- > 1 file changed, 39 insertions(+), 22 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index f3df1ec8d258dc..a0d1237272936f 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { > > static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, > struct arm_smmu_device *smmu); > +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); > > static void parse_driver_options(struct arm_smmu_device *smmu) > { > @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, > struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, > u32 ssid) > { > - __le64 *l1ptr; > - unsigned int idx; > struct arm_smmu_l1_ctx_desc *l1_desc; > - struct arm_smmu_device *smmu = master->smmu; > struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; > > + if (!cd_table->cdtab) > + return NULL; > + > if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) > return (struct arm_smmu_cd *)(cd_table->cdtab + > ssid * CTXDESC_CD_DWORDS); > > - idx = ssid >> CTXDESC_SPLIT; > - l1_desc = &cd_table->l1_desc[idx]; > - if (!l1_desc->l2ptr) { > - if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) > - return NULL; > + l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES]; These operations used to be shift and bit masking which made sense as it does what hardware does, is there any reason you changed it to division and modulo? I checked the disassembly and gcc does the right thing as constants are power of 2, but I am just curious. > + if (!l1_desc->l2ptr) > + return NULL; > + return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; > +} > > - l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; > - arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); > - /* An invalid L1CD can be cached */ > - arm_smmu_sync_cd(master, ssid, false); > +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, > + u32 ssid) > +{ > + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; > + struct arm_smmu_device *smmu = master->smmu; > + > + if (!cd_table->cdtab) { > + if (arm_smmu_alloc_cd_tables(master)) > + return NULL; > } > - idx = ssid & (CTXDESC_L2_ENTRIES - 1); > - return &l1_desc->l2ptr[idx]; > + > + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { > + unsigned int idx = ssid >> CTXDESC_SPLIT; Ok, now it’s a shift, I think we should be consistent with how we calculate the index. > + struct arm_smmu_l1_ctx_desc *l1_desc; > + > + l1_desc = &cd_table->l1_desc[idx]; > + if (!l1_desc->l2ptr) { > + __le64 *l1ptr; > + > + if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) > + return NULL; > + > + l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; > + arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); > + /* An invalid L1CD can be cached */ > + arm_smmu_sync_cd(master, ssid, false); > + } > + } > + return arm_smmu_get_cd_ptr(master, ssid); > } > > struct arm_smmu_cd_writer { > @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, > if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) > return -E2BIG; > > - cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); > + cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid); The only path allocates the main table is “arm_smmu_attach_dev”, I guess it would be more robust to leave that as is and have 2 versions of get_cd, one that allocates leaf and one that is not allocating, what do you think? Thanks, Mostafa > if (!cd_table_entry) > return -ENOMEM; > > @@ -2687,13 +2710,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) > struct arm_smmu_cd target_cd; > struct arm_smmu_cd *cdptr; > > - if (!master->cd_table.cdtab) { > - ret = arm_smmu_alloc_cd_tables(master); > - if (ret) > - goto out_list_del; > - } > - > - cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); > + cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); > if (!cdptr) { > ret = -ENOMEM; > goto out_list_del; > -- > 2.43.2 >
On Fri, Apr 19, 2024 at 09:14:21PM +0000, Mostafa Saleh wrote: > Hi Jason, > > On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote: > > Only the attach callers can perform an allocation for the CD table entry, > > the other callers must not do so, they do not have the correct locking and > > they cannot sleep. Split up the functions so this is clear. > > > > arm_smmu_get_cd_ptr() will return pointer to a CD table entry without > > doing any kind of allocation. > > > > arm_smmu_alloc_cd_ptr() will allocate the table and any required > > leaf. > > > > A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr() > > once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never > > called in the wrong context. > > > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > > --- > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++-------- > > 1 file changed, 39 insertions(+), 22 deletions(-) > > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > index f3df1ec8d258dc..a0d1237272936f 100644 > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { > > > > static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, > > struct arm_smmu_device *smmu); > > +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); > > > > static void parse_driver_options(struct arm_smmu_device *smmu) > > { > > @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, > > struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, > > u32 ssid) > > { > > - __le64 *l1ptr; > > - unsigned int idx; > > struct arm_smmu_l1_ctx_desc *l1_desc; > > - struct arm_smmu_device *smmu = master->smmu; > > struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; > > > > + if (!cd_table->cdtab) > > + return NULL; > > + > > if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) > > return (struct arm_smmu_cd *)(cd_table->cdtab + > > ssid * CTXDESC_CD_DWORDS); > > > > - idx = ssid >> CTXDESC_SPLIT; > > - l1_desc = &cd_table->l1_desc[idx]; > > - if (!l1_desc->l2ptr) { > > - if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) > > - return NULL; > > + l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES]; > > These operations used to be shift and bit masking which made sense as it does > what hardware does, is there any reason you changed it to division and modulo? > I checked the disassembly and gcc does the right thing as constants are power > of 2, but I am just curious. I generally prefer the clarity and succinctness of / and % instead of hacking up bit operations that the compiler will generate automatically anyhow. If bit extractions should be used it is better to wrap it in FIELD_GET() than open code it.. > > +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, > > + u32 ssid) > > +{ > > + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; > > + struct arm_smmu_device *smmu = master->smmu; > > + > > + if (!cd_table->cdtab) { > > + if (arm_smmu_alloc_cd_tables(master)) > > + return NULL; > > } > > - idx = ssid & (CTXDESC_L2_ENTRIES - 1); > > - return &l1_desc->l2ptr[idx]; > > + > > + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { > > + unsigned int idx = ssid >> CTXDESC_SPLIT; > > Ok, now it’s a shift, I think we should be consistent with how we > calculate the index. Sure. Change that to / will make CTXDESC_SPLIT unused except in computing CTXDESC_L2_ENTRIES so that can be simplified too: -#define CTXDESC_SPLIT 10 -#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT) +#define CTXDESC_L2_ENTRIES 1024 > > @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, > > if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) > > return -E2BIG; > > > > - cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); > > + cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid); > > The only path allocates the main table is “arm_smmu_attach_dev”, There are two places that allocate the leaf, arm_smmu_attach_dev() (for the RID) and arm_smmu_sva_set_dev_pasid() (for a PASID) At this moment all the paths are relying on the above to allocate the leaf. The next patch makes arm_smmu_attach_dev() allocate the leaf itself. A few more patches also makes the PASID path allocate the leaf itself, when the above is removed. > I guess it would be more robust to leave that as is and have 2 > versions of get_cd, one that allocates leaf and one that is not > allocating, what do you think? I'm not sure what you are asking? We have two versions. One is called alloc and one is called get. That have different locking requirements on the caller so they have different names. I would not call them both get? Thanks, Jason
On Mon, Apr 22, 2024 at 11:20:53AM -0300, Jason Gunthorpe wrote: > On Fri, Apr 19, 2024 at 09:14:21PM +0000, Mostafa Saleh wrote: > > Hi Jason, > > > > On Tue, Apr 16, 2024 at 04:28:16PM -0300, Jason Gunthorpe wrote: > > > Only the attach callers can perform an allocation for the CD table entry, > > > the other callers must not do so, they do not have the correct locking and > > > they cannot sleep. Split up the functions so this is clear. > > > > > > arm_smmu_get_cd_ptr() will return pointer to a CD table entry without > > > doing any kind of allocation. > > > > > > arm_smmu_alloc_cd_ptr() will allocate the table and any required > > > leaf. > > > > > > A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr() > > > once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never > > > called in the wrong context. > > > > > > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > > > --- > > > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++-------- > > > 1 file changed, 39 insertions(+), 22 deletions(-) > > > > > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > index f3df1ec8d258dc..a0d1237272936f 100644 > > > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > > > @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { > > > > > > static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, > > > struct arm_smmu_device *smmu); > > > +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); > > > > > > static void parse_driver_options(struct arm_smmu_device *smmu) > > > { > > > @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, > > > struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, > > > u32 ssid) > > > { > > > - __le64 *l1ptr; > > > - unsigned int idx; > > > struct arm_smmu_l1_ctx_desc *l1_desc; > > > - struct arm_smmu_device *smmu = master->smmu; > > > struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; > > > > > > + if (!cd_table->cdtab) > > > + return NULL; > > > + > > > if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) > > > return (struct arm_smmu_cd *)(cd_table->cdtab + > > > ssid * CTXDESC_CD_DWORDS); > > > > > > - idx = ssid >> CTXDESC_SPLIT; > > > - l1_desc = &cd_table->l1_desc[idx]; > > > - if (!l1_desc->l2ptr) { > > > - if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) > > > - return NULL; > > > + l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES]; > > > > These operations used to be shift and bit masking which made sense as it does > > what hardware does, is there any reason you changed it to division and modulo? > > I checked the disassembly and gcc does the right thing as constants are power > > of 2, but I am just curious. > > I generally prefer the clarity and succinctness of / and % instead of > hacking up bit operations that the compiler will generate > automatically anyhow. > > If bit extractions should be used it is better to wrap it in > FIELD_GET() than open code it.. > > > > +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, > > > + u32 ssid) > > > +{ > > > + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; > > > + struct arm_smmu_device *smmu = master->smmu; > > > + > > > + if (!cd_table->cdtab) { > > > + if (arm_smmu_alloc_cd_tables(master)) > > > + return NULL; > > > } > > > - idx = ssid & (CTXDESC_L2_ENTRIES - 1); > > > - return &l1_desc->l2ptr[idx]; > > > + > > > + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { > > > + unsigned int idx = ssid >> CTXDESC_SPLIT; > > > > Ok, now it’s a shift, I think we should be consistent with how we > > calculate the index. > > Sure. Change that to / will make CTXDESC_SPLIT unused except in > computing CTXDESC_L2_ENTRIES so that can be simplified too: > > -#define CTXDESC_SPLIT 10 > -#define CTXDESC_L2_ENTRIES (1 << CTXDESC_SPLIT) > +#define CTXDESC_L2_ENTRIES 1024 > Sounds good, I don’t think it matters much as long as its consistent, but anyway the split is defined by the spec to be either 6, 8 or 10. So split size has to be a power of 2. > > > > @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, > > > if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) > > > return -E2BIG; > > > > > > - cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); > > > + cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid); > > > > The only path allocates the main table is “arm_smmu_attach_dev”, > > There are two places that allocate the leaf, arm_smmu_attach_dev() > (for the RID) and arm_smmu_sva_set_dev_pasid() (for a PASID) > > At this moment all the paths are relying on the above to allocate the > leaf. The next patch makes arm_smmu_attach_dev() allocate the leaf > itself. A few more patches also makes the PASID path allocate the leaf > itself, when the above is removed. > > > I guess it would be more robust to leave that as is and have 2 > > versions of get_cd, one that allocates leaf and one that is not > > allocating, what do you think? > > I'm not sure what you are asking? We have two versions. One is called > alloc and one is called get. That have different locking requirements > on the caller so they have different names. I would not call them both > get? > My point is that arm_smmu_alloc_cd_ptr() doesn’t only allocate the leaf, but also the L1 through arm_smmu_alloc_cd_tables() IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables(). This makes it clear which path is expected to allocate the L1 table. And arm_smmu_get_cd_ptr() will remain as is. Thanks, Mostafa > Thanks, > Jason
On Sat, Apr 27, 2024 at 10:19:37PM +0000, Mostafa Saleh wrote: > > I'm not sure what you are asking? We have two versions. One is called > > alloc and one is called get. That have different locking requirements > > on the caller so they have different names. I would not call them both > > get? > > > > My point is that arm_smmu_alloc_cd_ptr() doesn’t only allocate the leaf, > but also the L1 through arm_smmu_alloc_cd_tables() Sure, it is called alloc, it allocs everything to make the CD table entry usable. > IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside > arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables(). > This makes it clear which path is expected to allocate the L1 table. The PASID path sometimes has to allocate the L1 table too, why duplicate the allocation code? What is different about the L1 vs L2 that it should be open coded? Jason
On Mon, Apr 29, 2024 at 11:01:37AM -0300, Jason Gunthorpe wrote: > On Sat, Apr 27, 2024 at 10:19:37PM +0000, Mostafa Saleh wrote: > > > > I'm not sure what you are asking? We have two versions. One is called > > > alloc and one is called get. That have different locking requirements > > > on the caller so they have different names. I would not call them both > > > get? > > > > > > > My point is that arm_smmu_alloc_cd_ptr() doesn’t only allocate the leaf, > > but also the L1 through arm_smmu_alloc_cd_tables() > > Sure, it is called alloc, it allocs everything to make the CD table > entry usable. Maybe if it’s called alloc_leaf, it only allocates leafs :) > > > IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside > > arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables(). > > This makes it clear which path is expected to allocate the L1 table. > > The PASID path sometimes has to allocate the L1 table too, why > duplicate the allocation code? > > What is different about the L1 vs L2 that it should be open coded? > I don’t think it is a big problem, but my main concern is robustness, for example a small erroneous code change might trigger allocation for L1 table from a path that shouldn’t, and that might go unnoticed as this function will allow it, leading to memory leaks, or other issues that might be harder to triage later, instead with limiting which path allocates which level, would return a NULL in that case and fail immediately. Thanks, Mostafa > Jason
On Mon, Apr 29, 2024 at 02:47:26PM +0000, Mostafa Saleh wrote: > > > IMO, arm_smmu_alloc_cd_ptr() should only allocate leafs. And inside > > > arm_smmu_attach_dev() it calls arm_smmu_alloc_cd_tables(). > > > This makes it clear which path is expected to allocate the L1 table. > > > > The PASID path sometimes has to allocate the L1 table too, why > > duplicate the allocation code? > > > > What is different about the L1 vs L2 that it should be open coded? > > I don’t think it is a big problem, but my main concern is robustness, > for example a small erroneous code change might trigger allocation for > L1 table from a path that shouldn’t, A few patches more we add a lockdep, so a wrongly placed allocation is *very* likely to hit the lockdep. If the lockdep satisfies then it is not going to cause a functional problem. > and that might go unnoticed as > this function will allow it, leading to memory leaks, Any cd table memory allocated by arm_smmu_alloc_cd_ptr() is reliably freed in the arm_smmu_release_device(). > or other issues that might be harder to triage later, instead with > limiting which path allocates which level, would return a NULL in > that case and fail immediately. All cases that need to allocate a leaf need to allocate the L1 too, it is artifical to make a distinction between them. Jason
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f3df1ec8d258dc..a0d1237272936f 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -98,6 +98,7 @@ static struct arm_smmu_option_prop arm_smmu_options[] = { static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu); +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master); static void parse_driver_options(struct arm_smmu_device *smmu) { @@ -1207,29 +1208,51 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, struct arm_smmu_cd *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) { - __le64 *l1ptr; - unsigned int idx; struct arm_smmu_l1_ctx_desc *l1_desc; - struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; + if (!cd_table->cdtab) + return NULL; + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) return (struct arm_smmu_cd *)(cd_table->cdtab + ssid * CTXDESC_CD_DWORDS); - idx = ssid >> CTXDESC_SPLIT; - l1_desc = &cd_table->l1_desc[idx]; - if (!l1_desc->l2ptr) { - if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) - return NULL; + l1_desc = &cd_table->l1_desc[ssid / CTXDESC_L2_ENTRIES]; + if (!l1_desc->l2ptr) + return NULL; + return &l1_desc->l2ptr[ssid % CTXDESC_L2_ENTRIES]; +} - l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; - arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); - /* An invalid L1CD can be cached */ - arm_smmu_sync_cd(master, ssid, false); +static struct arm_smmu_cd *arm_smmu_alloc_cd_ptr(struct arm_smmu_master *master, + u32 ssid) +{ + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; + struct arm_smmu_device *smmu = master->smmu; + + if (!cd_table->cdtab) { + if (arm_smmu_alloc_cd_tables(master)) + return NULL; } - idx = ssid & (CTXDESC_L2_ENTRIES - 1); - return &l1_desc->l2ptr[idx]; + + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_64K_L2) { + unsigned int idx = ssid >> CTXDESC_SPLIT; + struct arm_smmu_l1_ctx_desc *l1_desc; + + l1_desc = &cd_table->l1_desc[idx]; + if (!l1_desc->l2ptr) { + __le64 *l1ptr; + + if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) + return NULL; + + l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; + arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); + /* An invalid L1CD can be cached */ + arm_smmu_sync_cd(master, ssid, false); + } + } + return arm_smmu_get_cd_ptr(master, ssid); } struct arm_smmu_cd_writer { @@ -1357,7 +1380,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG; - cd_table_entry = arm_smmu_get_cd_ptr(master, ssid); + cd_table_entry = arm_smmu_alloc_cd_ptr(master, ssid); if (!cd_table_entry) return -ENOMEM; @@ -2687,13 +2710,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_cd target_cd; struct arm_smmu_cd *cdptr; - if (!master->cd_table.cdtab) { - ret = arm_smmu_alloc_cd_tables(master); - if (ret) - goto out_list_del; - } - - cdptr = arm_smmu_get_cd_ptr(master, IOMMU_NO_PASID); + cdptr = arm_smmu_alloc_cd_ptr(master, IOMMU_NO_PASID); if (!cdptr) { ret = -ENOMEM; goto out_list_del;
Only the attach callers can perform an allocation for the CD table entry, the other callers must not do so, they do not have the correct locking and they cannot sleep. Split up the functions so this is clear. arm_smmu_get_cd_ptr() will return pointer to a CD table entry without doing any kind of allocation. arm_smmu_alloc_cd_ptr() will allocate the table and any required leaf. A following patch will add lockdep assertions to arm_smmu_alloc_cd_ptr() once the restructuring is completed and arm_smmu_alloc_cd_ptr() is never called in the wrong context. Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 61 +++++++++++++-------- 1 file changed, 39 insertions(+), 22 deletions(-)