Message ID | 1-v7-cb149db3a320+3b5-smmuv3_newapi_p2_jgg@nvidia.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Make the SMMUv3 CD logic match the new STE design (part 2a/3) | expand |
On Tue, Apr 16, 2024 at 04:28:12PM -0300, Jason Gunthorpe wrote: > Prepare to put the CD code into the same mechanism. Add an ops indirection > around all the STE specific code and make the worker functions independent > of the entry content being processed. > > get_used and sync ops are provided to hook the correct code. > > Signed-off-by: Michael Shavit <mshavit@google.com> > Reviewed-by: Michael Shavit <mshavit@google.com> > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Hi Jason, On Tue, Apr 16, 2024 at 04:28:12PM -0300, Jason Gunthorpe wrote: > Prepare to put the CD code into the same mechanism. Add an ops indirection > around all the STE specific code and make the worker functions independent > of the entry content being processed. > > get_used and sync ops are provided to hook the correct code. > > Signed-off-by: Michael Shavit <mshavit@google.com> > Reviewed-by: Michael Shavit <mshavit@google.com> > Tested-by: Nicolin Chen <nicolinc@nvidia.com> > Tested-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com> > Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> > --- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 178 ++++++++++++-------- > 1 file changed, 106 insertions(+), 72 deletions(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > index 79c18e95dd293e..bf105e914d38b1 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c > @@ -42,8 +42,20 @@ enum arm_smmu_msi_index { > ARM_SMMU_MAX_MSIS, > }; > > -static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, > - ioasid_t sid); > +struct arm_smmu_entry_writer_ops; > +struct arm_smmu_entry_writer { > + const struct arm_smmu_entry_writer_ops *ops; > + struct arm_smmu_master *master; > +}; > + > +struct arm_smmu_entry_writer_ops { > + __le64 v_bit; > + void (*get_used)(const __le64 *entry, __le64 *used); > + void (*sync)(struct arm_smmu_entry_writer *writer); > +}; > + > +#define NUM_ENTRY_QWORDS 8 > +static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64)); > > static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { > [EVTQ_MSI_INDEX] = { > @@ -972,43 +984,42 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) > * would be nice if this was complete according to the spec, but minimally it > * has to capture the bits this driver uses. > */ > -static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent, > - struct arm_smmu_ste *used_bits) > +static void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) > { > - unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0])); > + unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])); > > - used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V); > - if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V))) > + used_bits[0] = cpu_to_le64(STRTAB_STE_0_V); > + if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V))) > return; > > - used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG); > + used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG); > > /* S1 translates */ > if (cfg & BIT(0)) { > - used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | > - STRTAB_STE_0_S1CTXPTR_MASK | > - STRTAB_STE_0_S1CDMAX); > - used_bits->data[1] |= > + used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | > + STRTAB_STE_0_S1CTXPTR_MASK | > + STRTAB_STE_0_S1CDMAX); > + used_bits[1] |= > cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | > STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | > STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | > STRTAB_STE_1_EATS); > - used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); > + used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); > } > > /* S2 translates */ > if (cfg & BIT(1)) { > - used_bits->data[1] |= > + used_bits[1] |= > cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); > - used_bits->data[2] |= > + used_bits[2] |= > cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | > STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | > STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R); > - used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); > + used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); > } > > if (cfg == STRTAB_STE_0_CFG_BYPASS) > - used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); > + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); > } > > /* > @@ -1017,57 +1028,55 @@ static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent, > * unused_update is an intermediate value of entry that has unused bits set to > * their new values. > */ > -static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry, > - const struct arm_smmu_ste *target, > - struct arm_smmu_ste *unused_update) > +static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, > + const __le64 *entry, const __le64 *target, > + __le64 *unused_update) > { > - struct arm_smmu_ste target_used = {}; > - struct arm_smmu_ste cur_used = {}; > + __le64 target_used[NUM_ENTRY_QWORDS] = {}; > + __le64 cur_used[NUM_ENTRY_QWORDS] = {}; > u8 used_qword_diff = 0; > unsigned int i; > > - arm_smmu_get_ste_used(entry, &cur_used); > - arm_smmu_get_ste_used(target, &target_used); > + writer->ops->get_used(entry, cur_used); > + writer->ops->get_used(target, target_used); > > - for (i = 0; i != ARRAY_SIZE(target_used.data); i++) { > + for (i = 0; i != NUM_ENTRY_QWORDS; i++) { > /* > * Check that masks are up to date, the make functions are not > * allowed to set a bit to 1 if the used function doesn't say it > * is used. > */ > - WARN_ON_ONCE(target->data[i] & ~target_used.data[i]); > + WARN_ON_ONCE(target[i] & ~target_used[i]); > > /* Bits can change because they are not currently being used */ > - unused_update->data[i] = (entry->data[i] & cur_used.data[i]) | > - (target->data[i] & ~cur_used.data[i]); > + unused_update[i] = (entry[i] & cur_used[i]) | > + (target[i] & ~cur_used[i]); > /* > * Each bit indicates that a used bit in a qword needs to be > * changed after unused_update is applied. > */ > - if ((unused_update->data[i] & target_used.data[i]) != > - target->data[i]) > + if ((unused_update[i] & target_used[i]) != target[i]) > used_qword_diff |= 1 << i; > } > return used_qword_diff; > } > > -static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid, > - struct arm_smmu_ste *entry, > - const struct arm_smmu_ste *target, unsigned int start, > +static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry, > + const __le64 *target, unsigned int start, > unsigned int len) > { > bool changed = false; > unsigned int i; > > for (i = start; len != 0; len--, i++) { > - if (entry->data[i] != target->data[i]) { > - WRITE_ONCE(entry->data[i], target->data[i]); > + if (entry[i] != target[i]) { > + WRITE_ONCE(entry[i], target[i]); > changed = true; > } > } > > if (changed) > - arm_smmu_sync_ste_for_sid(smmu, sid); > + writer->ops->sync(writer); > return changed; > } > > @@ -1097,24 +1106,21 @@ static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid, > * V=0 process. This relies on the IGNORED behavior described in the > * specification. > */ > -static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, > - struct arm_smmu_ste *entry, > - const struct arm_smmu_ste *target) > +static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, > + __le64 *entry, const __le64 *target) > { > - unsigned int num_entry_qwords = ARRAY_SIZE(target->data); > - struct arm_smmu_device *smmu = master->smmu; > - struct arm_smmu_ste unused_update; > + __le64 unused_update[NUM_ENTRY_QWORDS]; > u8 used_qword_diff; > > used_qword_diff = > - arm_smmu_entry_qword_diff(entry, target, &unused_update); > + arm_smmu_entry_qword_diff(writer, entry, target, unused_update); > if (hweight8(used_qword_diff) == 1) { > /* > * Only one qword needs its used bits to be changed. This is a > - * hitless update, update all bits the current STE is ignoring > - * to their new values, then update a single "critical qword" to > - * change the STE and finally 0 out any bits that are now unused > - * in the target configuration. > + * hitless update, update all bits the current STE/CD is > + * ignoring to their new values, then update a single "critical > + * qword" to change the STE/CD and finally 0 out any bits that > + * are now unused in the target configuration. > */ > unsigned int critical_qword_index = ffs(used_qword_diff) - 1; > > @@ -1123,22 +1129,21 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, > * writing it in the next step anyways. This can save a sync > * when the only change is in that qword. > */ > - unused_update.data[critical_qword_index] = > - entry->data[critical_qword_index]; > - entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords); > - entry_set(smmu, sid, entry, target, critical_qword_index, 1); > - entry_set(smmu, sid, entry, target, 0, num_entry_qwords); > + unused_update[critical_qword_index] = > + entry[critical_qword_index]; > + entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS); > + entry_set(writer, entry, target, critical_qword_index, 1); > + entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS); > } else if (used_qword_diff) { > /* > * At least two qwords need their inuse bits to be changed. This > * requires a breaking update, zero the V bit, write all qwords > * but 0, then set qword 0 > */ > - unused_update.data[0] = entry->data[0] & > - cpu_to_le64(~STRTAB_STE_0_V); > - entry_set(smmu, sid, entry, &unused_update, 0, 1); > - entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1); > - entry_set(smmu, sid, entry, target, 0, 1); > + unused_update[0] = entry[0] & (~writer->ops->v_bit); arm_smmu_write_entry() assumes that v_bit is in entry[0] and that “1” means valid (which is true for both STE and CD) so why do we care about it, if we break the STE/CD anyway, why not just do: unused_update[0] = 0; entry_set(writer, entry, unused_update, 0, 1); entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1) entry_set(writer, entry, target, 0, 1); That makes the code simpler by avoiding having the v_bit in arm_smmu_entry_writer_ops. Thanks, Mostafa > + entry_set(writer, entry, unused_update, 0, 1); > + entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1); > + entry_set(writer, entry, target, 0, 1); > } else { > /* > * No inuse bit changed. Sanity check that all unused bits are 0 > @@ -1146,18 +1151,7 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, > * compute_qword_diff(). > */ > WARN_ON_ONCE( > - entry_set(smmu, sid, entry, target, 0, num_entry_qwords)); > - } > - > - /* It's likely that we'll want to use the new STE soon */ > - if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { > - struct arm_smmu_cmdq_ent > - prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, > - .prefetch = { > - .sid = sid, > - } }; > - > - arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); > + entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS)); > } > } > > @@ -1430,17 +1424,57 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) > WRITE_ONCE(*dst, cpu_to_le64(val)); > } > > -static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) > +struct arm_smmu_ste_writer { > + struct arm_smmu_entry_writer writer; > + u32 sid; > +}; > + > +static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer) > { > + struct arm_smmu_ste_writer *ste_writer = > + container_of(writer, struct arm_smmu_ste_writer, writer); > struct arm_smmu_cmdq_ent cmd = { > .opcode = CMDQ_OP_CFGI_STE, > .cfgi = { > - .sid = sid, > + .sid = ste_writer->sid, > .leaf = true, > }, > }; > > - arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); > + arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd); > +} > + > +static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = { > + .sync = arm_smmu_ste_writer_sync_entry, > + .get_used = arm_smmu_get_ste_used, > + .v_bit = cpu_to_le64(STRTAB_STE_0_V), > +}; > + > +static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, > + struct arm_smmu_ste *ste, > + const struct arm_smmu_ste *target) > +{ > + struct arm_smmu_device *smmu = master->smmu; > + struct arm_smmu_ste_writer ste_writer = { > + .writer = { > + .ops = &arm_smmu_ste_writer_ops, > + .master = master, > + }, > + .sid = sid, > + }; > + > + arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data); > + > + /* It's likely that we'll want to use the new STE soon */ > + if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { > + struct arm_smmu_cmdq_ent > + prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, > + .prefetch = { > + .sid = sid, > + } }; > + > + arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); > + } > } > > static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target) > -- > 2.43.2 >
On Fri, Apr 19, 2024 at 09:02:32PM +0000, Mostafa Saleh wrote: > > } else if (used_qword_diff) { > > /* > > * At least two qwords need their inuse bits to be changed. This > > * requires a breaking update, zero the V bit, write all qwords > > * but 0, then set qword 0 > > */ > > - unused_update.data[0] = entry->data[0] & > > - cpu_to_le64(~STRTAB_STE_0_V); > > - entry_set(smmu, sid, entry, &unused_update, 0, 1); > > - entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1); > > - entry_set(smmu, sid, entry, target, 0, 1); > > + unused_update[0] = entry[0] & (~writer->ops->v_bit); > > arm_smmu_write_entry() assumes that v_bit is in entry[0] and that “1” means valid > (which is true for both STE and CD) so why do we care about it, if we break the > STE/CD anyway, why not just do: > > unused_update[0] = 0; > entry_set(writer, entry, unused_update, 0, 1); > entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1) > entry_set(writer, entry, target, 0, 1); > > That makes the code simpler by avoiding having the v_bit in > arm_smmu_entry_writer_ops. Sure, done Jason
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 79c18e95dd293e..bf105e914d38b1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -42,8 +42,20 @@ enum arm_smmu_msi_index { ARM_SMMU_MAX_MSIS, }; -static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, - ioasid_t sid); +struct arm_smmu_entry_writer_ops; +struct arm_smmu_entry_writer { + const struct arm_smmu_entry_writer_ops *ops; + struct arm_smmu_master *master; +}; + +struct arm_smmu_entry_writer_ops { + __le64 v_bit; + void (*get_used)(const __le64 *entry, __le64 *used); + void (*sync)(struct arm_smmu_entry_writer *writer); +}; + +#define NUM_ENTRY_QWORDS 8 +static_assert(sizeof(struct arm_smmu_ste) == NUM_ENTRY_QWORDS * sizeof(u64)); static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = { [EVTQ_MSI_INDEX] = { @@ -972,43 +984,42 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) * would be nice if this was complete according to the spec, but minimally it * has to capture the bits this driver uses. */ -static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent, - struct arm_smmu_ste *used_bits) +static void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits) { - unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent->data[0])); + unsigned int cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(ent[0])); - used_bits->data[0] = cpu_to_le64(STRTAB_STE_0_V); - if (!(ent->data[0] & cpu_to_le64(STRTAB_STE_0_V))) + used_bits[0] = cpu_to_le64(STRTAB_STE_0_V); + if (!(ent[0] & cpu_to_le64(STRTAB_STE_0_V))) return; - used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_CFG); + used_bits[0] |= cpu_to_le64(STRTAB_STE_0_CFG); /* S1 translates */ if (cfg & BIT(0)) { - used_bits->data[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | - STRTAB_STE_0_S1CTXPTR_MASK | - STRTAB_STE_0_S1CDMAX); - used_bits->data[1] |= + used_bits[0] |= cpu_to_le64(STRTAB_STE_0_S1FMT | + STRTAB_STE_0_S1CTXPTR_MASK | + STRTAB_STE_0_S1CDMAX); + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_S1DSS | STRTAB_STE_1_S1CIR | STRTAB_STE_1_S1COR | STRTAB_STE_1_S1CSH | STRTAB_STE_1_S1STALLD | STRTAB_STE_1_STRW | STRTAB_STE_1_EATS); - used_bits->data[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); + used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID); } /* S2 translates */ if (cfg & BIT(1)) { - used_bits->data[1] |= + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG); - used_bits->data[2] |= + used_bits[2] |= cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR | STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI | STRTAB_STE_2_S2PTW | STRTAB_STE_2_S2R); - used_bits->data[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); + used_bits[3] |= cpu_to_le64(STRTAB_STE_3_S2TTB_MASK); } if (cfg == STRTAB_STE_0_CFG_BYPASS) - used_bits->data[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); + used_bits[1] |= cpu_to_le64(STRTAB_STE_1_SHCFG); } /* @@ -1017,57 +1028,55 @@ static void arm_smmu_get_ste_used(const struct arm_smmu_ste *ent, * unused_update is an intermediate value of entry that has unused bits set to * their new values. */ -static u8 arm_smmu_entry_qword_diff(const struct arm_smmu_ste *entry, - const struct arm_smmu_ste *target, - struct arm_smmu_ste *unused_update) +static u8 arm_smmu_entry_qword_diff(struct arm_smmu_entry_writer *writer, + const __le64 *entry, const __le64 *target, + __le64 *unused_update) { - struct arm_smmu_ste target_used = {}; - struct arm_smmu_ste cur_used = {}; + __le64 target_used[NUM_ENTRY_QWORDS] = {}; + __le64 cur_used[NUM_ENTRY_QWORDS] = {}; u8 used_qword_diff = 0; unsigned int i; - arm_smmu_get_ste_used(entry, &cur_used); - arm_smmu_get_ste_used(target, &target_used); + writer->ops->get_used(entry, cur_used); + writer->ops->get_used(target, target_used); - for (i = 0; i != ARRAY_SIZE(target_used.data); i++) { + for (i = 0; i != NUM_ENTRY_QWORDS; i++) { /* * Check that masks are up to date, the make functions are not * allowed to set a bit to 1 if the used function doesn't say it * is used. */ - WARN_ON_ONCE(target->data[i] & ~target_used.data[i]); + WARN_ON_ONCE(target[i] & ~target_used[i]); /* Bits can change because they are not currently being used */ - unused_update->data[i] = (entry->data[i] & cur_used.data[i]) | - (target->data[i] & ~cur_used.data[i]); + unused_update[i] = (entry[i] & cur_used[i]) | + (target[i] & ~cur_used[i]); /* * Each bit indicates that a used bit in a qword needs to be * changed after unused_update is applied. */ - if ((unused_update->data[i] & target_used.data[i]) != - target->data[i]) + if ((unused_update[i] & target_used[i]) != target[i]) used_qword_diff |= 1 << i; } return used_qword_diff; } -static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid, - struct arm_smmu_ste *entry, - const struct arm_smmu_ste *target, unsigned int start, +static bool entry_set(struct arm_smmu_entry_writer *writer, __le64 *entry, + const __le64 *target, unsigned int start, unsigned int len) { bool changed = false; unsigned int i; for (i = start; len != 0; len--, i++) { - if (entry->data[i] != target->data[i]) { - WRITE_ONCE(entry->data[i], target->data[i]); + if (entry[i] != target[i]) { + WRITE_ONCE(entry[i], target[i]); changed = true; } } if (changed) - arm_smmu_sync_ste_for_sid(smmu, sid); + writer->ops->sync(writer); return changed; } @@ -1097,24 +1106,21 @@ static bool entry_set(struct arm_smmu_device *smmu, ioasid_t sid, * V=0 process. This relies on the IGNORED behavior described in the * specification. */ -static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, - struct arm_smmu_ste *entry, - const struct arm_smmu_ste *target) +static void arm_smmu_write_entry(struct arm_smmu_entry_writer *writer, + __le64 *entry, const __le64 *target) { - unsigned int num_entry_qwords = ARRAY_SIZE(target->data); - struct arm_smmu_device *smmu = master->smmu; - struct arm_smmu_ste unused_update; + __le64 unused_update[NUM_ENTRY_QWORDS]; u8 used_qword_diff; used_qword_diff = - arm_smmu_entry_qword_diff(entry, target, &unused_update); + arm_smmu_entry_qword_diff(writer, entry, target, unused_update); if (hweight8(used_qword_diff) == 1) { /* * Only one qword needs its used bits to be changed. This is a - * hitless update, update all bits the current STE is ignoring - * to their new values, then update a single "critical qword" to - * change the STE and finally 0 out any bits that are now unused - * in the target configuration. + * hitless update, update all bits the current STE/CD is + * ignoring to their new values, then update a single "critical + * qword" to change the STE/CD and finally 0 out any bits that + * are now unused in the target configuration. */ unsigned int critical_qword_index = ffs(used_qword_diff) - 1; @@ -1123,22 +1129,21 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, * writing it in the next step anyways. This can save a sync * when the only change is in that qword. */ - unused_update.data[critical_qword_index] = - entry->data[critical_qword_index]; - entry_set(smmu, sid, entry, &unused_update, 0, num_entry_qwords); - entry_set(smmu, sid, entry, target, critical_qword_index, 1); - entry_set(smmu, sid, entry, target, 0, num_entry_qwords); + unused_update[critical_qword_index] = + entry[critical_qword_index]; + entry_set(writer, entry, unused_update, 0, NUM_ENTRY_QWORDS); + entry_set(writer, entry, target, critical_qword_index, 1); + entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS); } else if (used_qword_diff) { /* * At least two qwords need their inuse bits to be changed. This * requires a breaking update, zero the V bit, write all qwords * but 0, then set qword 0 */ - unused_update.data[0] = entry->data[0] & - cpu_to_le64(~STRTAB_STE_0_V); - entry_set(smmu, sid, entry, &unused_update, 0, 1); - entry_set(smmu, sid, entry, target, 1, num_entry_qwords - 1); - entry_set(smmu, sid, entry, target, 0, 1); + unused_update[0] = entry[0] & (~writer->ops->v_bit); + entry_set(writer, entry, unused_update, 0, 1); + entry_set(writer, entry, target, 1, NUM_ENTRY_QWORDS - 1); + entry_set(writer, entry, target, 0, 1); } else { /* * No inuse bit changed. Sanity check that all unused bits are 0 @@ -1146,18 +1151,7 @@ static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, * compute_qword_diff(). */ WARN_ON_ONCE( - entry_set(smmu, sid, entry, target, 0, num_entry_qwords)); - } - - /* It's likely that we'll want to use the new STE soon */ - if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { - struct arm_smmu_cmdq_ent - prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, - .prefetch = { - .sid = sid, - } }; - - arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + entry_set(writer, entry, target, 0, NUM_ENTRY_QWORDS)); } } @@ -1430,17 +1424,57 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc) WRITE_ONCE(*dst, cpu_to_le64(val)); } -static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) +struct arm_smmu_ste_writer { + struct arm_smmu_entry_writer writer; + u32 sid; +}; + +static void arm_smmu_ste_writer_sync_entry(struct arm_smmu_entry_writer *writer) { + struct arm_smmu_ste_writer *ste_writer = + container_of(writer, struct arm_smmu_ste_writer, writer); struct arm_smmu_cmdq_ent cmd = { .opcode = CMDQ_OP_CFGI_STE, .cfgi = { - .sid = sid, + .sid = ste_writer->sid, .leaf = true, }, }; - arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); + arm_smmu_cmdq_issue_cmd_with_sync(writer->master->smmu, &cmd); +} + +static const struct arm_smmu_entry_writer_ops arm_smmu_ste_writer_ops = { + .sync = arm_smmu_ste_writer_sync_entry, + .get_used = arm_smmu_get_ste_used, + .v_bit = cpu_to_le64(STRTAB_STE_0_V), +}; + +static void arm_smmu_write_ste(struct arm_smmu_master *master, u32 sid, + struct arm_smmu_ste *ste, + const struct arm_smmu_ste *target) +{ + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ste_writer ste_writer = { + .writer = { + .ops = &arm_smmu_ste_writer_ops, + .master = master, + }, + .sid = sid, + }; + + arm_smmu_write_entry(&ste_writer.writer, ste->data, target->data); + + /* It's likely that we'll want to use the new STE soon */ + if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) { + struct arm_smmu_cmdq_ent + prefetch_cmd = { .opcode = CMDQ_OP_PREFETCH_CFG, + .prefetch = { + .sid = sid, + } }; + + arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + } } static void arm_smmu_make_abort_ste(struct arm_smmu_ste *target)