Message ID | 20220502163417.2635462-4-matthew.d.roper@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | i915: Introduce Ponte Vecchio | expand |
On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> > > Bspec: 45101, 72161 > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> > Signed-off-by: Fei Yang <fei.yang@intel.com> > Signed-off-by: Matt Roper <matthew.d.roper@intel.com> > --- > drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_mocs.c | 24 ++++++++++++++++++++- > drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++--- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_pci.c | 3 ++- > drivers/gpu/drm/i915/intel_device_info.h | 1 + > 6 files changed, 39 insertions(+), 5 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h > index b06611c1d4ad..7853ea194ea6 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > @@ -221,6 +221,7 @@ struct intel_gt { > > struct { > u8 uc_index; > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ > } mocs; > > struct intel_pxp pxp; > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > index c4c37585ae8c..265812589f87 100644 > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { > unsigned int n_entries; > const struct drm_i915_mocs_entry *table; > u8 uc_index; > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ > u8 unused_entries_index; > }; > > @@ -47,6 +48,7 @@ struct drm_i915_mocs_table { > > /* Helper defines */ > #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ > +#define PVC_NUM_MOCS_ENTRIES 3 Should this be 4? The value here should reflect the number of entries that can defined in hardware rather than the size of the table we're asked to program. Since there are two registers (each with a high and a low entry), that would imply we should set 4 here to ensure that the fourth entry is initialized according to unused_entries_index rather than left at whatever the hardware defaults might be. Matt > > /* (e)LLC caching options */ > /* > @@ -394,6 +396,17 @@ static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { > MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), > }; > > +static const struct drm_i915_mocs_entry pvc_mocs_table[] = { > + /* Error */ > + MOCS_ENTRY(0, 0, L3_3_WB), > + > + /* UC */ > + MOCS_ENTRY(1, 0, L3_1_UC), > + > + /* WB */ > + MOCS_ENTRY(2, 0, L3_3_WB), > +}; > + > enum { > HAS_GLOBAL_MOCS = BIT(0), > HAS_ENGINE_MOCS = BIT(1), > @@ -423,7 +436,14 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, > memset(table, 0, sizeof(struct drm_i915_mocs_table)); > > table->unused_entries_index = I915_MOCS_PTE; > - if (IS_DG2(i915)) { > + if (IS_PONTEVECCHIO(i915)) { > + table->size = ARRAY_SIZE(pvc_mocs_table); > + table->table = pvc_mocs_table; > + table->n_entries = PVC_NUM_MOCS_ENTRIES; > + table->uc_index = 1; > + table->wb_index = 2; > + table->unused_entries_index = 2; > + } else if (IS_DG2(i915)) { > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { > table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); > table->table = dg2_mocs_table_g10_ax; > @@ -622,6 +642,8 @@ void intel_set_mocs_index(struct intel_gt *gt) > > get_mocs_settings(gt->i915, &table); > gt->mocs.uc_index = table.uc_index; > + if (HAS_L3_CCS_READ(gt->i915)) > + gt->mocs.wb_index = table.wb_index; > } > > void intel_mocs_init(struct intel_gt *gt) > diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c > index a05c4b99b3fb..a656d9c2ca2b 100644 > --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c > +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c > @@ -1994,7 +1994,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) > static void > engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > { > - u8 mocs; > + u8 mocs_w, mocs_r; > > /* > * RING_CMD_CCTL are need to be programed to un-cached > @@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > * Streamers on Gen12 onward platforms. > */ > if (GRAPHICS_VER(engine->i915) >= 12) { > - mocs = engine->gt->mocs.uc_index; > + if (HAS_L3_CCS_READ(engine->i915) && > + engine->class == COMPUTE_CLASS) > + mocs_r = engine->gt->mocs.wb_index; > + else > + mocs_r = engine->gt->mocs.uc_index; > + > + mocs_w = engine->gt->mocs.uc_index; > + > wa_masked_field_set(wal, > RING_CMD_CCTL(engine->mmio_base), > CMD_CCTL_MOCS_MASK, > - CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); > + CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r)); > } > } > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 2dddc27a1b0e..8c8e7308502b 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1369,6 +1369,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > > #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10)) > > +#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read) > + > /* DPF == dynamic parity feature */ > #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf) > #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \ > diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c > index 498708b33924..07722cdf63ac 100644 > --- a/drivers/gpu/drm/i915/i915_pci.c > +++ b/drivers/gpu/drm/i915/i915_pci.c > @@ -1076,7 +1076,8 @@ static const struct intel_device_info ats_m_info = { > > #define XE_HPC_FEATURES \ > XE_HP_FEATURES, \ > - .dma_mask_size = 52 > + .dma_mask_size = 52, \ > + .has_l3_ccs_read = 1 > > __maybe_unused > static const struct intel_device_info pvc_info = { > diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h > index e7d2cf7d65c8..09e33296157a 100644 > --- a/drivers/gpu/drm/i915/intel_device_info.h > +++ b/drivers/gpu/drm/i915/intel_device_info.h > @@ -150,6 +150,7 @@ enum intel_ppgtt_type { > func(has_heci_pxp); \ > func(has_heci_gscfi); \ > func(has_guc_deprivilege); \ > + func(has_l3_ccs_read); \ > func(has_l3_dpf); \ > func(has_llc); \ > func(has_logical_ring_contexts); \ > -- > 2.35.1 >
On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote: >On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: >> From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> >> >> Bspec: 45101, 72161 >> Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> >> Signed-off-by: Fei Yang <fei.yang@intel.com> >> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> >> --- >> drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + >> drivers/gpu/drm/i915/gt/intel_mocs.c | 24 ++++++++++++++++++++- >> drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++--- >> drivers/gpu/drm/i915/i915_drv.h | 2 ++ >> drivers/gpu/drm/i915/i915_pci.c | 3 ++- >> drivers/gpu/drm/i915/intel_device_info.h | 1 + >> 6 files changed, 39 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h >> index b06611c1d4ad..7853ea194ea6 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h >> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h >> @@ -221,6 +221,7 @@ struct intel_gt { >> >> struct { >> u8 uc_index; >> + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ >> } mocs; >> >> struct intel_pxp pxp; >> diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c >> index c4c37585ae8c..265812589f87 100644 >> --- a/drivers/gpu/drm/i915/gt/intel_mocs.c >> +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c >> @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { >> unsigned int n_entries; >> const struct drm_i915_mocs_entry *table; >> u8 uc_index; >> + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ >> u8 unused_entries_index; >> }; >> >> @@ -47,6 +48,7 @@ struct drm_i915_mocs_table { >> >> /* Helper defines */ >> #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ >> +#define PVC_NUM_MOCS_ENTRIES 3 > >Should this be 4? The value here should reflect the number of entries >that can defined in hardware rather than the size of the table we're >asked to program. Since there are two registers (each with a high and a >low entry), that would imply we should set 4 here to ensure that the >fourth entry is initialized according to unused_entries_index rather >than left at whatever the hardware defaults might be. not sure I understand what you mean here. The n_entries specifies, as you said, the number of entries we can have. Bspec 45101 shows entries for indexes 0, 1 and 2. As does the pvc_mocs_table below. Also, from bspec 44509: "For PVC, only 3 MOCS states are supported. The allowed index values are in range [0, 2]..." So, I don't think we want to program any fourth entry. Lucas De Marchi
On Mon, May 02, 2022 at 11:39:48AM -0700, Lucas De Marchi wrote: > On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote: > > On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: > > > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> > > > > > > Bspec: 45101, 72161 > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> > > > Signed-off-by: Fei Yang <fei.yang@intel.com> > > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com> > > > --- > > > drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + > > > drivers/gpu/drm/i915/gt/intel_mocs.c | 24 ++++++++++++++++++++- > > > drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++--- > > > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > > > drivers/gpu/drm/i915/i915_pci.c | 3 ++- > > > drivers/gpu/drm/i915/intel_device_info.h | 1 + > > > 6 files changed, 39 insertions(+), 5 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h > > > index b06611c1d4ad..7853ea194ea6 100644 > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > > > @@ -221,6 +221,7 @@ struct intel_gt { > > > > > > struct { > > > u8 uc_index; > > > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ > > > } mocs; > > > > > > struct intel_pxp pxp; > > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > > > index c4c37585ae8c..265812589f87 100644 > > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > > > @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { > > > unsigned int n_entries; > > > const struct drm_i915_mocs_entry *table; > > > u8 uc_index; > > > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ > > > u8 unused_entries_index; > > > }; > > > > > > @@ -47,6 +48,7 @@ struct drm_i915_mocs_table { > > > > > > /* Helper defines */ > > > #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ > > > +#define PVC_NUM_MOCS_ENTRIES 3 > > > > Should this be 4? The value here should reflect the number of entries > > that can defined in hardware rather than the size of the table we're > > asked to program. Since there are two registers (each with a high and a > > low entry), that would imply we should set 4 here to ensure that the > > fourth entry is initialized according to unused_entries_index rather > > than left at whatever the hardware defaults might be. > > not sure I understand what you mean here. The n_entries specifies, as > you said, the number of entries we can have. Bspec 45101 shows entries > for indexes 0, 1 and 2. As does the pvc_mocs_table below. > > Also, from bspec 44509: > "For PVC, only 3 MOCS states are supported. The allowed index values are > in range [0, 2]..." > > So, I don't think we want to program any fourth entry. We don't have a choice; the fourth entry lives in the same register as the third entry, so no matter what we're writing _something_ to those bits. The question is whether we should write all 0's or whether we should treat it like other platforms and ensure it's initialized to the unused entry values. Entry #4 isn't supposed to be used, but if buggy userspace tries to use it, we probably still want well-defined behavior, just like it an invalid entry gets used on any other platform. Matt > > Lucas De Marchi
On Mon, May 02, 2022 at 11:50:22AM -0700, Matt Roper wrote: >On Mon, May 02, 2022 at 11:39:48AM -0700, Lucas De Marchi wrote: >> On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote: >> > On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: >> > > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> >> > > >> > > Bspec: 45101, 72161 >> > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> >> > > Signed-off-by: Fei Yang <fei.yang@intel.com> >> > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com> >> > > --- >> > > drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + >> > > drivers/gpu/drm/i915/gt/intel_mocs.c | 24 ++++++++++++++++++++- >> > > drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++--- >> > > drivers/gpu/drm/i915/i915_drv.h | 2 ++ >> > > drivers/gpu/drm/i915/i915_pci.c | 3 ++- >> > > drivers/gpu/drm/i915/intel_device_info.h | 1 + >> > > 6 files changed, 39 insertions(+), 5 deletions(-) >> > > >> > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h >> > > index b06611c1d4ad..7853ea194ea6 100644 >> > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h >> > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h >> > > @@ -221,6 +221,7 @@ struct intel_gt { >> > > >> > > struct { >> > > u8 uc_index; >> > > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ >> > > } mocs; >> > > >> > > struct intel_pxp pxp; >> > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c >> > > index c4c37585ae8c..265812589f87 100644 >> > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c >> > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c >> > > @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { >> > > unsigned int n_entries; >> > > const struct drm_i915_mocs_entry *table; >> > > u8 uc_index; >> > > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ >> > > u8 unused_entries_index; >> > > }; >> > > >> > > @@ -47,6 +48,7 @@ struct drm_i915_mocs_table { >> > > >> > > /* Helper defines */ >> > > #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ >> > > +#define PVC_NUM_MOCS_ENTRIES 3 >> > >> > Should this be 4? The value here should reflect the number of entries >> > that can defined in hardware rather than the size of the table we're >> > asked to program. Since there are two registers (each with a high and a >> > low entry), that would imply we should set 4 here to ensure that the >> > fourth entry is initialized according to unused_entries_index rather >> > than left at whatever the hardware defaults might be. >> >> not sure I understand what you mean here. The n_entries specifies, as >> you said, the number of entries we can have. Bspec 45101 shows entries >> for indexes 0, 1 and 2. As does the pvc_mocs_table below. >> >> Also, from bspec 44509: >> "For PVC, only 3 MOCS states are supported. The allowed index values are >> in range [0, 2]..." >> >> So, I don't think we want to program any fourth entry. > >We don't have a choice; the fourth entry lives in the same register as >the third entry, so no matter what we're writing _something_ to those >bits. The question is whether we should write all 0's or whether we >should treat it like other platforms and ensure it's initialized to the >unused entry values. Entry #4 isn't supposed to be used, but if buggy >userspace tries to use it, we probably still want well-defined behavior, >just like it an invalid entry gets used on any other platform. Now I understand what you were talking about: each register houses 2 entries. For PVC we have LNCFCMOCS0 and LNCFCMOCS1. Humn... looking at for_each_l3cc(), that is actually handled and the rest of the register is initialized with the value pointed by unused_entries_index. Such situation would only happen for the last entry, which implies the handling for odd size works for this as well. Lucas De Marchi > > >Matt > >> >> Lucas De Marchi > >-- >Matt Roper >Graphics Software Engineer >VTT-OSGC Platform Enablement >Intel Corporation >(916) 356-2795
On Mon, May 02, 2022 at 12:27:29PM -0700, Lucas De Marchi wrote: > On Mon, May 02, 2022 at 11:50:22AM -0700, Matt Roper wrote: > > On Mon, May 02, 2022 at 11:39:48AM -0700, Lucas De Marchi wrote: > > > On Mon, May 02, 2022 at 09:50:23AM -0700, Matt Roper wrote: > > > > On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: > > > > > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> > > > > > > > > > > Bspec: 45101, 72161 > > > > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> > > > > > Signed-off-by: Fei Yang <fei.yang@intel.com> > > > > > Signed-off-by: Matt Roper <matthew.d.roper@intel.com> > > > > > --- > > > > > drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + > > > > > drivers/gpu/drm/i915/gt/intel_mocs.c | 24 ++++++++++++++++++++- > > > > > drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++--- > > > > > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > > > > > drivers/gpu/drm/i915/i915_pci.c | 3 ++- > > > > > drivers/gpu/drm/i915/intel_device_info.h | 1 + > > > > > 6 files changed, 39 insertions(+), 5 deletions(-) > > > > > > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h > > > > > index b06611c1d4ad..7853ea194ea6 100644 > > > > > --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h > > > > > @@ -221,6 +221,7 @@ struct intel_gt { > > > > > > > > > > struct { > > > > > u8 uc_index; > > > > > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ > > > > > } mocs; > > > > > > > > > > struct intel_pxp pxp; > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c > > > > > index c4c37585ae8c..265812589f87 100644 > > > > > --- a/drivers/gpu/drm/i915/gt/intel_mocs.c > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c > > > > > @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { > > > > > unsigned int n_entries; > > > > > const struct drm_i915_mocs_entry *table; > > > > > u8 uc_index; > > > > > + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ > > > > > u8 unused_entries_index; > > > > > }; > > > > > > > > > > @@ -47,6 +48,7 @@ struct drm_i915_mocs_table { > > > > > > > > > > /* Helper defines */ > > > > > #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ > > > > > +#define PVC_NUM_MOCS_ENTRIES 3 > > > > > > > > Should this be 4? The value here should reflect the number of entries > > > > that can defined in hardware rather than the size of the table we're > > > > asked to program. Since there are two registers (each with a high and a > > > > low entry), that would imply we should set 4 here to ensure that the > > > > fourth entry is initialized according to unused_entries_index rather > > > > than left at whatever the hardware defaults might be. > > > > > > not sure I understand what you mean here. The n_entries specifies, as > > > you said, the number of entries we can have. Bspec 45101 shows entries > > > for indexes 0, 1 and 2. As does the pvc_mocs_table below. > > > > > > Also, from bspec 44509: > > > "For PVC, only 3 MOCS states are supported. The allowed index values are > > > in range [0, 2]..." > > > > > > So, I don't think we want to program any fourth entry. > > > > We don't have a choice; the fourth entry lives in the same register as > > the third entry, so no matter what we're writing _something_ to those > > bits. The question is whether we should write all 0's or whether we > > should treat it like other platforms and ensure it's initialized to the > > unused entry values. Entry #4 isn't supposed to be used, but if buggy > > userspace tries to use it, we probably still want well-defined behavior, > > just like it an invalid entry gets used on any other platform. > > Now I understand what you were talking about: each register houses 2 > entries. For PVC we have LNCFCMOCS0 and LNCFCMOCS1. Humn... looking at > for_each_l3cc(), that is actually handled and the rest of the register > is initialized with the value pointed by unused_entries_index. Yep, you're right. It looks like we still do a get_entry_l3cc() for the upper entry of the final register, and that will return the unused_entry value if it's out of bounds. In that case I don't have any concerns here. Matt > > Such situation would only happen for the last entry, which implies the > handling for odd size works for this as well. > > Lucas De Marchi > > > > > > > Matt > > > > > > > > Lucas De Marchi > > > > -- > > Matt Roper > > Graphics Software Engineer > > VTT-OSGC Platform Enablement > > Intel Corporation > > (916) 356-2795
On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: >From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> > >Bspec: 45101, 72161 >Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> >Signed-off-by: Fei Yang <fei.yang@intel.com> >Signed-off-by: Matt Roper <matthew.d.roper@intel.com> >--- > drivers/gpu/drm/i915/gt/intel_gt_types.h | 1 + > drivers/gpu/drm/i915/gt/intel_mocs.c | 24 ++++++++++++++++++++- > drivers/gpu/drm/i915/gt/intel_workarounds.c | 13 ++++++++--- > drivers/gpu/drm/i915/i915_drv.h | 2 ++ > drivers/gpu/drm/i915/i915_pci.c | 3 ++- > drivers/gpu/drm/i915/intel_device_info.h | 1 + > 6 files changed, 39 insertions(+), 5 deletions(-) > >diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h >index b06611c1d4ad..7853ea194ea6 100644 >--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h >+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h >@@ -221,6 +221,7 @@ struct intel_gt { > > struct { > u8 uc_index; >+ u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ I don't like much writting the bspec in code like this. For commit message it's acceptable/desired, but for code I think it's not great as 1) it's not something generally available and 2) it will likely get outdated so one would have to rely on git log/blame to see when this was actually valid. > } mocs; > > struct intel_pxp pxp; >diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c >index c4c37585ae8c..265812589f87 100644 >--- a/drivers/gpu/drm/i915/gt/intel_mocs.c >+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c >@@ -23,6 +23,7 @@ struct drm_i915_mocs_table { > unsigned int n_entries; > const struct drm_i915_mocs_entry *table; > u8 uc_index; >+ u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ > u8 unused_entries_index; > }; > >@@ -47,6 +48,7 @@ struct drm_i915_mocs_table { > > /* Helper defines */ > #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ >+#define PVC_NUM_MOCS_ENTRIES 3 > > /* (e)LLC caching options */ > /* >@@ -394,6 +396,17 @@ static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { > MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), > }; > >+static const struct drm_i915_mocs_entry pvc_mocs_table[] = { >+ /* Error */ >+ MOCS_ENTRY(0, 0, L3_3_WB), >+ >+ /* UC */ >+ MOCS_ENTRY(1, 0, L3_1_UC), >+ >+ /* WB */ >+ MOCS_ENTRY(2, 0, L3_3_WB), >+}; >+ > enum { > HAS_GLOBAL_MOCS = BIT(0), > HAS_ENGINE_MOCS = BIT(1), >@@ -423,7 +436,14 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, > memset(table, 0, sizeof(struct drm_i915_mocs_table)); > > table->unused_entries_index = I915_MOCS_PTE; >- if (IS_DG2(i915)) { >+ if (IS_PONTEVECCHIO(i915)) { >+ table->size = ARRAY_SIZE(pvc_mocs_table); >+ table->table = pvc_mocs_table; >+ table->n_entries = PVC_NUM_MOCS_ENTRIES; >+ table->uc_index = 1; >+ table->wb_index = 2; >+ table->unused_entries_index = 2; >+ } else if (IS_DG2(i915)) { > if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { > table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); > table->table = dg2_mocs_table_g10_ax; >@@ -622,6 +642,8 @@ void intel_set_mocs_index(struct intel_gt *gt) > > get_mocs_settings(gt->i915, &table); > gt->mocs.uc_index = table.uc_index; >+ if (HAS_L3_CCS_READ(gt->i915)) >+ gt->mocs.wb_index = table.wb_index; > } > > void intel_mocs_init(struct intel_gt *gt) >diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c >index a05c4b99b3fb..a656d9c2ca2b 100644 >--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c >+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c >@@ -1994,7 +1994,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) > static void > engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > { >- u8 mocs; >+ u8 mocs_w, mocs_r; > > /* > * RING_CMD_CCTL are need to be programed to un-cached >@@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > * Streamers on Gen12 onward platforms. > */ > if (GRAPHICS_VER(engine->i915) >= 12) { >- mocs = engine->gt->mocs.uc_index; >+ if (HAS_L3_CCS_READ(engine->i915) && >+ engine->class == COMPUTE_CLASS) >+ mocs_r = engine->gt->mocs.wb_index; >+ else >+ mocs_r = engine->gt->mocs.uc_index; shouldn't we add a warning in get_mocs_settings() if HAS_L3_CCS_READ(engine->i915) and mocs.wb_index is 0 (since index 0 shouldn't really be used in latest platforms)? Lucas De Marchi >+ >+ mocs_w = engine->gt->mocs.uc_index; >+ > wa_masked_field_set(wal, > RING_CMD_CCTL(engine->mmio_base), > CMD_CCTL_MOCS_MASK, >- CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); >+ CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r)); > } > } > >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >index 2dddc27a1b0e..8c8e7308502b 100644 >--- a/drivers/gpu/drm/i915/i915_drv.h >+++ b/drivers/gpu/drm/i915/i915_drv.h >@@ -1369,6 +1369,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, > > #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10)) > >+#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read) >+ > /* DPF == dynamic parity feature */ > #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf) > #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \ >diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c >index 498708b33924..07722cdf63ac 100644 >--- a/drivers/gpu/drm/i915/i915_pci.c >+++ b/drivers/gpu/drm/i915/i915_pci.c >@@ -1076,7 +1076,8 @@ static const struct intel_device_info ats_m_info = { > > #define XE_HPC_FEATURES \ > XE_HP_FEATURES, \ >- .dma_mask_size = 52 >+ .dma_mask_size = 52, \ >+ .has_l3_ccs_read = 1 > > __maybe_unused > static const struct intel_device_info pvc_info = { >diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h >index e7d2cf7d65c8..09e33296157a 100644 >--- a/drivers/gpu/drm/i915/intel_device_info.h >+++ b/drivers/gpu/drm/i915/intel_device_info.h >@@ -150,6 +150,7 @@ enum intel_ppgtt_type { > func(has_heci_pxp); \ > func(has_heci_gscfi); \ > func(has_guc_deprivilege); \ >+ func(has_l3_ccs_read); \ > func(has_l3_dpf); \ > func(has_llc); \ > func(has_logical_ring_contexts); \ >-- >2.35.1 >
On Mon, May 02, 2022 at 02:03:28PM -0700, Lucas De Marchi wrote: > On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: > > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> ... > > @@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) > > * Streamers on Gen12 onward platforms. > > */ > > if (GRAPHICS_VER(engine->i915) >= 12) { > > - mocs = engine->gt->mocs.uc_index; > > + if (HAS_L3_CCS_READ(engine->i915) && > > + engine->class == COMPUTE_CLASS) > > + mocs_r = engine->gt->mocs.wb_index; > > + else > > + mocs_r = engine->gt->mocs.uc_index; > > shouldn't we add a warning in get_mocs_settings() if HAS_L3_CCS_READ(engine->i915) > and mocs.wb_index is 0 (since index 0 shouldn't really be used in latest > platforms)? We should be careful about that assumption...index 0 is valid on DG2 today, although HAS_L3_CCS_READ() doesn't apply there. And a couple platforms in the future we're also going to have index 0 being valid on a platform where HAS_L3_CCS_READ() is true (bspec 71582). Index 0 would still be the wrong entry to pick for WB behavior there, but it is a legitimate entry in general. Matt
On Mon, May 02, 2022 at 02:14:02PM -0700, Matt Roper wrote: >On Mon, May 02, 2022 at 02:03:28PM -0700, Lucas De Marchi wrote: >> On Mon, May 02, 2022 at 09:34:09AM -0700, Matt Roper wrote: >> > From: Ayaz A Siddiqui <ayaz.siddiqui@intel.com> >... >> > @@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) >> > * Streamers on Gen12 onward platforms. >> > */ >> > if (GRAPHICS_VER(engine->i915) >= 12) { >> > - mocs = engine->gt->mocs.uc_index; >> > + if (HAS_L3_CCS_READ(engine->i915) && >> > + engine->class == COMPUTE_CLASS) >> > + mocs_r = engine->gt->mocs.wb_index; >> > + else >> > + mocs_r = engine->gt->mocs.uc_index; >> >> shouldn't we add a warning in get_mocs_settings() if HAS_L3_CCS_READ(engine->i915) >> and mocs.wb_index is 0 (since index 0 shouldn't really be used in latest >> platforms)? > >We should be careful about that assumption...index 0 is valid on DG2 >today, although HAS_L3_CCS_READ() doesn't apply there. And a couple >platforms in the future we're also going to have index 0 being valid on >a platform where HAS_L3_CCS_READ() is true (bspec 71582). Index 0 would >still be the wrong entry to pick for WB behavior there, but it is a >legitimate entry in general. ok, but comment is more about "forgetting to initialize it in get_mocs_settings() and then using it here". Using 0 as "it was not initialized" may be an easy way to do that. Lucas De Marchi
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index b06611c1d4ad..7853ea194ea6 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -221,6 +221,7 @@ struct intel_gt { struct { u8 uc_index; + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ } mocs; struct intel_pxp pxp; diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index c4c37585ae8c..265812589f87 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -23,6 +23,7 @@ struct drm_i915_mocs_table { unsigned int n_entries; const struct drm_i915_mocs_entry *table; u8 uc_index; + u8 wb_index; /* Only for platforms listed in Bspec: 72161 */ u8 unused_entries_index; }; @@ -47,6 +48,7 @@ struct drm_i915_mocs_table { /* Helper defines */ #define GEN9_NUM_MOCS_ENTRIES 64 /* 63-64 are reserved, but configured. */ +#define PVC_NUM_MOCS_ENTRIES 3 /* (e)LLC caching options */ /* @@ -394,6 +396,17 @@ static const struct drm_i915_mocs_entry dg2_mocs_table_g10_ax[] = { MOCS_ENTRY(3, 0, L3_3_WB | L3_LKUP(1)), }; +static const struct drm_i915_mocs_entry pvc_mocs_table[] = { + /* Error */ + MOCS_ENTRY(0, 0, L3_3_WB), + + /* UC */ + MOCS_ENTRY(1, 0, L3_1_UC), + + /* WB */ + MOCS_ENTRY(2, 0, L3_3_WB), +}; + enum { HAS_GLOBAL_MOCS = BIT(0), HAS_ENGINE_MOCS = BIT(1), @@ -423,7 +436,14 @@ static unsigned int get_mocs_settings(const struct drm_i915_private *i915, memset(table, 0, sizeof(struct drm_i915_mocs_table)); table->unused_entries_index = I915_MOCS_PTE; - if (IS_DG2(i915)) { + if (IS_PONTEVECCHIO(i915)) { + table->size = ARRAY_SIZE(pvc_mocs_table); + table->table = pvc_mocs_table; + table->n_entries = PVC_NUM_MOCS_ENTRIES; + table->uc_index = 1; + table->wb_index = 2; + table->unused_entries_index = 2; + } else if (IS_DG2(i915)) { if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) { table->size = ARRAY_SIZE(dg2_mocs_table_g10_ax); table->table = dg2_mocs_table_g10_ax; @@ -622,6 +642,8 @@ void intel_set_mocs_index(struct intel_gt *gt) get_mocs_settings(gt->i915, &table); gt->mocs.uc_index = table.uc_index; + if (HAS_L3_CCS_READ(gt->i915)) + gt->mocs.wb_index = table.wb_index; } void intel_mocs_init(struct intel_gt *gt) diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index a05c4b99b3fb..a656d9c2ca2b 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1994,7 +1994,7 @@ void intel_engine_apply_whitelist(struct intel_engine_cs *engine) static void engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - u8 mocs; + u8 mocs_w, mocs_r; /* * RING_CMD_CCTL are need to be programed to un-cached @@ -2002,11 +2002,18 @@ engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Streamers on Gen12 onward platforms. */ if (GRAPHICS_VER(engine->i915) >= 12) { - mocs = engine->gt->mocs.uc_index; + if (HAS_L3_CCS_READ(engine->i915) && + engine->class == COMPUTE_CLASS) + mocs_r = engine->gt->mocs.wb_index; + else + mocs_r = engine->gt->mocs.uc_index; + + mocs_w = engine->gt->mocs.uc_index; + wa_masked_field_set(wal, RING_CMD_CCTL(engine->mmio_base), CMD_CCTL_MOCS_MASK, - CMD_CCTL_MOCS_OVERRIDE(mocs, mocs)); + CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r)); } } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2dddc27a1b0e..8c8e7308502b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1369,6 +1369,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define HAS_LSPCON(dev_priv) (IS_DISPLAY_VER(dev_priv, 9, 10)) +#define HAS_L3_CCS_READ(i915) (INTEL_INFO(i915)->has_l3_ccs_read) + /* DPF == dynamic parity feature */ #define HAS_L3_DPF(dev_priv) (INTEL_INFO(dev_priv)->has_l3_dpf) #define NUM_L3_SLICES(dev_priv) (IS_HSW_GT3(dev_priv) ? \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 498708b33924..07722cdf63ac 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -1076,7 +1076,8 @@ static const struct intel_device_info ats_m_info = { #define XE_HPC_FEATURES \ XE_HP_FEATURES, \ - .dma_mask_size = 52 + .dma_mask_size = 52, \ + .has_l3_ccs_read = 1 __maybe_unused static const struct intel_device_info pvc_info = { diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index e7d2cf7d65c8..09e33296157a 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -150,6 +150,7 @@ enum intel_ppgtt_type { func(has_heci_pxp); \ func(has_heci_gscfi); \ func(has_guc_deprivilege); \ + func(has_l3_ccs_read); \ func(has_l3_dpf); \ func(has_llc); \ func(has_logical_ring_contexts); \