Message ID | 20240522062313.453317-7-zhenzhong.duan@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | intel_iommu: Enable stage-1 translation for emulated device | expand |
Hi Zhenzhong, I already sent you my comments about this patch earlier (question about checking pgtt) but here is a style review On 22/05/2024 08:23, Zhenzhong Duan wrote: > Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe. > > > From: Yi Liu <yi.l.liu@intel.com> > > This adds stage-1 page table walking to support stage-1 only > transltion in scalable modern mode. > > Signed-off-by: Yi Liu <yi.l.liu@intel.com> > Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> > Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> > --- > hw/i386/intel_iommu_internal.h | 17 +++++ > hw/i386/intel_iommu.c | 128 +++++++++++++++++++++++++++++++-- > 2 files changed, 141 insertions(+), 4 deletions(-) > > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h > index 0e240d6d54..abfdbd5f65 100644 > --- a/hw/i386/intel_iommu_internal.h > +++ b/hw/i386/intel_iommu_internal.h > @@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry; > #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */ > #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) > > +#define VTD_SM_PASID_ENTRY_FLPM 3ULL > +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) > + > +/* Paging Structure common */ > +#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7) > +/* Bits to decide the offset for each level */ > +#define VTD_FL_LEVEL_BITS 9 > + > +/* First Level Paging Structure */ > +#define VTD_FL_PT_LEVEL 1 > +#define VTD_FL_PT_ENTRY_NR 512 > + > +/* Masks for First Level Paging Entry */ > +#define VTD_FL_RW_MASK (1ULL << 1) > +#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) > +#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */ > + > /* Second Level Page Translation Pointer*/ > #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) > > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c > index 544e8f0e40..cf29809bc1 100644 > --- a/hw/i386/intel_iommu.c > +++ b/hw/i386/intel_iommu.c > @@ -50,6 +50,8 @@ > /* pe operations */ > #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) > #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) > +#define VTD_PE_GET_FLPT_LEVEL(pe) \ > + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) > > /* > * PCI bus number (or SID) is not reliable since the device is usaully > @@ -823,6 +825,11 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, > return -VTD_FR_PASID_TABLE_ENTRY_INV; > } > > + if (pgtt == VTD_SM_PASID_ENTRY_FLT && > + VTD_PE_GET_FLPT_LEVEL(pe) != 4) { Maybe you could add a function to check if the level is supported. And it would also be nice to rename vtd_is_level_supported (used just above these lines) to make it clear that it's only relevant for second level translations and avoid mistakes > + return -VTD_FR_PASID_TABLE_ENTRY_INV; > + } > + > return 0; > } > > @@ -958,7 +965,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s, > > if (s->root_scalable) { > vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); > - return VTD_PE_GET_LEVEL(&pe); > + if (s->scalable_modern) { > + return VTD_PE_GET_FLPT_LEVEL(&pe); > + } else { > + return VTD_PE_GET_LEVEL(&pe); same, could be renamed > + } > } > > return vtd_ce_get_level(ce); > @@ -1045,7 +1056,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, > > if (s->root_scalable) { > vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); > - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; > + if (s->scalable_modern) { > + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; > + } else { > + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; > + } > } > > return vtd_ce_get_slpt_base(ce); > @@ -1847,6 +1862,106 @@ out: > trace_vtd_pt_enable_fast_path(source_id, success); > } > > +/* The shift of an addr for a certain level of paging structure */ > +static inline uint32_t vtd_flpt_level_shift(uint32_t level) > +{ > + assert(level != 0); > + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS; > +} > + > +/* > + * Given an iova and the level of paging structure, return the offset > + * of current level. > + */ > +static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t level) > +{ > + return (iova >> vtd_flpt_level_shift(level)) & > + ((1ULL << VTD_FL_LEVEL_BITS) - 1); > +} > + > +/* Get the content of a flpte located in @base_addr[@index] */ > +static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index) > +{ > + uint64_t flpte; > + > + assert(index < VTD_FL_PT_ENTRY_NR); > + > + if (dma_memory_read(&address_space_memory, > + base_addr + index * sizeof(flpte), &flpte, > + sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) { > + flpte = (uint64_t)-1; > + return flpte; > + } > + flpte = le64_to_cpu(flpte); > + return flpte; > +} > + > +static inline bool vtd_flpte_present(uint64_t flpte) > +{ > + return !!(flpte & 0x1); Shouldn't we use a #define instead of hardcoding the mask? > +} > + > +/* Whether the pte indicates the address of the page frame */ > +static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level) > +{ > + return level == VTD_FL_PT_LEVEL || (flpte & VTD_FL_PT_PAGE_SIZE_MASK); > +} > + > +static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw) > +{ > + return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw); > +} > + > +/* > + * Given the @iova, get relevant @flptep. @flpte_level will be the last level > + * of the translation, can be used for deciding the size of large page. > + */ > +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, > + uint64_t iova, bool is_write, > + uint64_t *flptep, uint32_t *flpte_level, > + bool *reads, bool *writes, uint8_t aw_bits, > + uint32_t pasid) > +{ > + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); > + uint32_t level = vtd_get_iova_level(s, ce, pasid); > + uint32_t offset; > + uint64_t flpte; > + > + while (true) { > + offset = vtd_iova_fl_level_offset(iova, level); > + flpte = vtd_get_flpte(addr, offset); > + if (flpte == (uint64_t)-1) { > + if (level == vtd_get_iova_level(s, ce, pasid)) { > + /* Invalid programming of context-entry */ > + return -VTD_FR_CONTEXT_ENTRY_INV; > + } else { > + return -VTD_FR_PAGING_ENTRY_INV; > + } > + } > + > + if (!vtd_flpte_present(flpte)) { > + *reads = false; > + *writes = false; > + return -VTD_FR_PAGING_ENTRY_INV; > + } > + > + *reads = true; > + *writes = (*writes) && (flpte & VTD_FL_RW_MASK); > + if (is_write && !(flpte & VTD_FL_RW_MASK)) { > + return -VTD_FR_WRITE; > + } > + > + if (vtd_is_last_flpte(flpte, level)) { > + *flptep = flpte; > + *flpte_level = level; > + return 0; > + } > + > + addr = vtd_get_flpte_addr(flpte, aw_bits); > + level--; > + } > +} > + > static void vtd_report_fault(IntelIOMMUState *s, > int err, bool is_fpd_set, > uint16_t source_id, > @@ -1995,8 +2110,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, > } > } > > - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, > - &reads, &writes, s->aw_bits, pasid); > + if (s->scalable_modern) { > + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, > + &reads, &writes, s->aw_bits, pasid); > + } else { > + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, > + &reads, &writes, s->aw_bits, pasid); > + } > > if (ret_fr) { > vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, > -- > 2.34.1 > #cmd
>-----Original Message----- >From: CLEMENT MATHIEU--DRIF <clement.mathieu--drif@eviden.com> >Subject: Re: [PATCH rfcv2 06/17] intel_iommu: Implement stage-1 >translation > >Hi Zhenzhong, > >I already sent you my comments about this patch earlier (question about >checking pgtt) but here is a style review > >On 22/05/2024 08:23, Zhenzhong Duan wrote: >> Caution: External email. Do not open attachments or click links, unless this >email comes from a known sender and you know the content is safe. >> >> >> From: Yi Liu <yi.l.liu@intel.com> >> >> This adds stage-1 page table walking to support stage-1 only >> transltion in scalable modern mode. >> >> Signed-off-by: Yi Liu <yi.l.liu@intel.com> >> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com> >> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com> >> --- >> hw/i386/intel_iommu_internal.h | 17 +++++ >> hw/i386/intel_iommu.c | 128 >+++++++++++++++++++++++++++++++-- >> 2 files changed, 141 insertions(+), 4 deletions(-) >> >> diff --git a/hw/i386/intel_iommu_internal.h >b/hw/i386/intel_iommu_internal.h >> index 0e240d6d54..abfdbd5f65 100644 >> --- a/hw/i386/intel_iommu_internal.h >> +++ b/hw/i386/intel_iommu_internal.h >> @@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry; >> #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest- >address-width */ >> #define VTD_SM_PASID_ENTRY_DID(val) ((val) & >VTD_DOMAIN_ID_MASK) >> >> +#define VTD_SM_PASID_ENTRY_FLPM 3ULL >> +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) >> + >> +/* Paging Structure common */ >> +#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7) >> +/* Bits to decide the offset for each level */ >> +#define VTD_FL_LEVEL_BITS 9 >> + >> +/* First Level Paging Structure */ >> +#define VTD_FL_PT_LEVEL 1 >> +#define VTD_FL_PT_ENTRY_NR 512 >> + >> +/* Masks for First Level Paging Entry */ >> +#define VTD_FL_RW_MASK (1ULL << 1) >> +#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & >VTD_HAW_MASK(aw)) >> +#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing >Disable */ >> + >> /* Second Level Page Translation Pointer*/ >> #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) >> >> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c >> index 544e8f0e40..cf29809bc1 100644 >> --- a/hw/i386/intel_iommu.c >> +++ b/hw/i386/intel_iommu.c >> @@ -50,6 +50,8 @@ >> /* pe operations */ >> #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & >VTD_SM_PASID_ENTRY_PGTT) >> #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & >VTD_SM_PASID_ENTRY_AW)) >> +#define VTD_PE_GET_FLPT_LEVEL(pe) \ >> + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) >> >> /* >> * PCI bus number (or SID) is not reliable since the device is usaully >> @@ -823,6 +825,11 @@ static int >vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, >> return -VTD_FR_PASID_TABLE_ENTRY_INV; >> } >> >> + if (pgtt == VTD_SM_PASID_ENTRY_FLT && >> + VTD_PE_GET_FLPT_LEVEL(pe) != 4) { >Maybe you could add a function to check if the level is supported. >And it would also be nice to rename vtd_is_level_supported (used just >above these lines) to make it clear that it's only relevant for second >level translations and avoid mistakes Sure, will do. Thanks Zhenzhong >> + return -VTD_FR_PASID_TABLE_ENTRY_INV; >> + } >> + >> return 0; >> } >> >> @@ -958,7 +965,11 @@ static uint32_t >vtd_get_iova_level(IntelIOMMUState *s, >> >> if (s->root_scalable) { >> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); >> - return VTD_PE_GET_LEVEL(&pe); >> + if (s->scalable_modern) { >> + return VTD_PE_GET_FLPT_LEVEL(&pe); >> + } else { >> + return VTD_PE_GET_LEVEL(&pe); >same, could be renamed >> + } >> } >> >> return vtd_ce_get_level(ce); >> @@ -1045,7 +1056,11 @@ static dma_addr_t >vtd_get_iova_pgtbl_base(IntelIOMMUState *s, >> >> if (s->root_scalable) { >> vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); >> - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; >> + if (s->scalable_modern) { >> + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; >> + } else { >> + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; >> + } >> } >> >> return vtd_ce_get_slpt_base(ce); >> @@ -1847,6 +1862,106 @@ out: >> trace_vtd_pt_enable_fast_path(source_id, success); >> } >> >> +/* The shift of an addr for a certain level of paging structure */ >> +static inline uint32_t vtd_flpt_level_shift(uint32_t level) >> +{ >> + assert(level != 0); >> + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS; >> +} >> + >> +/* >> + * Given an iova and the level of paging structure, return the offset >> + * of current level. >> + */ >> +static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t >level) >> +{ >> + return (iova >> vtd_flpt_level_shift(level)) & >> + ((1ULL << VTD_FL_LEVEL_BITS) - 1); >> +} >> + >> +/* Get the content of a flpte located in @base_addr[@index] */ >> +static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index) >> +{ >> + uint64_t flpte; >> + >> + assert(index < VTD_FL_PT_ENTRY_NR); >> + >> + if (dma_memory_read(&address_space_memory, >> + base_addr + index * sizeof(flpte), &flpte, >> + sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) { >> + flpte = (uint64_t)-1; >> + return flpte; >> + } >> + flpte = le64_to_cpu(flpte); >> + return flpte; >> +} >> + >> +static inline bool vtd_flpte_present(uint64_t flpte) >> +{ >> + return !!(flpte & 0x1); >Shouldn't we use a #define instead of hardcoding the mask? >> +} >> + >> +/* Whether the pte indicates the address of the page frame */ >> +static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level) >> +{ >> + return level == VTD_FL_PT_LEVEL || (flpte & >VTD_FL_PT_PAGE_SIZE_MASK); >> +} >> + >> +static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw) >> +{ >> + return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw); >> +} >> + >> +/* >> + * Given the @iova, get relevant @flptep. @flpte_level will be the last >level >> + * of the translation, can be used for deciding the size of large page. >> + */ >> +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, >> + uint64_t iova, bool is_write, >> + uint64_t *flptep, uint32_t *flpte_level, >> + bool *reads, bool *writes, uint8_t aw_bits, >> + uint32_t pasid) >> +{ >> + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); >> + uint32_t level = vtd_get_iova_level(s, ce, pasid); >> + uint32_t offset; >> + uint64_t flpte; >> + >> + while (true) { >> + offset = vtd_iova_fl_level_offset(iova, level); >> + flpte = vtd_get_flpte(addr, offset); >> + if (flpte == (uint64_t)-1) { >> + if (level == vtd_get_iova_level(s, ce, pasid)) { >> + /* Invalid programming of context-entry */ >> + return -VTD_FR_CONTEXT_ENTRY_INV; >> + } else { >> + return -VTD_FR_PAGING_ENTRY_INV; >> + } >> + } >> + >> + if (!vtd_flpte_present(flpte)) { >> + *reads = false; >> + *writes = false; >> + return -VTD_FR_PAGING_ENTRY_INV; >> + } >> + >> + *reads = true; >> + *writes = (*writes) && (flpte & VTD_FL_RW_MASK); >> + if (is_write && !(flpte & VTD_FL_RW_MASK)) { >> + return -VTD_FR_WRITE; >> + } >> + >> + if (vtd_is_last_flpte(flpte, level)) { >> + *flptep = flpte; >> + *flpte_level = level; >> + return 0; >> + } >> + >> + addr = vtd_get_flpte_addr(flpte, aw_bits); >> + level--; >> + } >> +} >> + >> static void vtd_report_fault(IntelIOMMUState *s, >> int err, bool is_fpd_set, >> uint16_t source_id, >> @@ -1995,8 +2110,13 @@ static bool >vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, >> } >> } >> >> - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, >> - &reads, &writes, s->aw_bits, pasid); >> + if (s->scalable_modern) { >> + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, >> + &reads, &writes, s->aw_bits, pasid); >> + } else { >> + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, >> + &reads, &writes, s->aw_bits, pasid); >> + } >> >> if (ret_fr) { >> vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, >> -- >> 2.34.1 >> >#cmd
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 0e240d6d54..abfdbd5f65 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -534,6 +534,23 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) +#define VTD_SM_PASID_ENTRY_FLPM 3ULL +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) + +/* Paging Structure common */ +#define VTD_FL_PT_PAGE_SIZE_MASK (1ULL << 7) +/* Bits to decide the offset for each level */ +#define VTD_FL_LEVEL_BITS 9 + +/* First Level Paging Structure */ +#define VTD_FL_PT_LEVEL 1 +#define VTD_FL_PT_ENTRY_NR 512 + +/* Masks for First Level Paging Entry */ +#define VTD_FL_RW_MASK (1ULL << 1) +#define VTD_FL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) +#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */ + /* Second Level Page Translation Pointer*/ #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 544e8f0e40..cf29809bc1 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -50,6 +50,8 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) #define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) +#define VTD_PE_GET_FLPT_LEVEL(pe) \ + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) /* * PCI bus number (or SID) is not reliable since the device is usaully @@ -823,6 +825,11 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, return -VTD_FR_PASID_TABLE_ENTRY_INV; } + if (pgtt == VTD_SM_PASID_ENTRY_FLT && + VTD_PE_GET_FLPT_LEVEL(pe) != 4) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; + } + return 0; } @@ -958,7 +965,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return VTD_PE_GET_LEVEL(&pe); + if (s->scalable_modern) { + return VTD_PE_GET_FLPT_LEVEL(&pe); + } else { + return VTD_PE_GET_LEVEL(&pe); + } } return vtd_ce_get_level(ce); @@ -1045,7 +1056,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + if (s->scalable_modern) { + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; + } else { + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + } } return vtd_ce_get_slpt_base(ce); @@ -1847,6 +1862,106 @@ out: trace_vtd_pt_enable_fast_path(source_id, success); } +/* The shift of an addr for a certain level of paging structure */ +static inline uint32_t vtd_flpt_level_shift(uint32_t level) +{ + assert(level != 0); + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_FL_LEVEL_BITS; +} + +/* + * Given an iova and the level of paging structure, return the offset + * of current level. + */ +static inline uint32_t vtd_iova_fl_level_offset(uint64_t iova, uint32_t level) +{ + return (iova >> vtd_flpt_level_shift(level)) & + ((1ULL << VTD_FL_LEVEL_BITS) - 1); +} + +/* Get the content of a flpte located in @base_addr[@index] */ +static uint64_t vtd_get_flpte(dma_addr_t base_addr, uint32_t index) +{ + uint64_t flpte; + + assert(index < VTD_FL_PT_ENTRY_NR); + + if (dma_memory_read(&address_space_memory, + base_addr + index * sizeof(flpte), &flpte, + sizeof(flpte), MEMTXATTRS_UNSPECIFIED)) { + flpte = (uint64_t)-1; + return flpte; + } + flpte = le64_to_cpu(flpte); + return flpte; +} + +static inline bool vtd_flpte_present(uint64_t flpte) +{ + return !!(flpte & 0x1); +} + +/* Whether the pte indicates the address of the page frame */ +static inline bool vtd_is_last_flpte(uint64_t flpte, uint32_t level) +{ + return level == VTD_FL_PT_LEVEL || (flpte & VTD_FL_PT_PAGE_SIZE_MASK); +} + +static inline uint64_t vtd_get_flpte_addr(uint64_t flpte, uint8_t aw) +{ + return flpte & VTD_FL_PT_BASE_ADDR_MASK(aw); +} + +/* + * Given the @iova, get relevant @flptep. @flpte_level will be the last level + * of the translation, can be used for deciding the size of large page. + */ +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, + uint64_t iova, bool is_write, + uint64_t *flptep, uint32_t *flpte_level, + bool *reads, bool *writes, uint8_t aw_bits, + uint32_t pasid) +{ + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); + uint32_t offset; + uint64_t flpte; + + while (true) { + offset = vtd_iova_fl_level_offset(iova, level); + flpte = vtd_get_flpte(addr, offset); + if (flpte == (uint64_t)-1) { + if (level == vtd_get_iova_level(s, ce, pasid)) { + /* Invalid programming of context-entry */ + return -VTD_FR_CONTEXT_ENTRY_INV; + } else { + return -VTD_FR_PAGING_ENTRY_INV; + } + } + + if (!vtd_flpte_present(flpte)) { + *reads = false; + *writes = false; + return -VTD_FR_PAGING_ENTRY_INV; + } + + *reads = true; + *writes = (*writes) && (flpte & VTD_FL_RW_MASK); + if (is_write && !(flpte & VTD_FL_RW_MASK)) { + return -VTD_FR_WRITE; + } + + if (vtd_is_last_flpte(flpte, level)) { + *flptep = flpte; + *flpte_level = level; + return 0; + } + + addr = vtd_get_flpte_addr(flpte, aw_bits); + level--; + } +} + static void vtd_report_fault(IntelIOMMUState *s, int err, bool is_fpd_set, uint16_t source_id, @@ -1995,8 +2110,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, } } - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, - &reads, &writes, s->aw_bits, pasid); + if (s->scalable_modern) { + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + } else { + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + } if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, source_id,