diff mbox series

drm/i915: Add Wa_14015150844

Message ID 20230830160001.2395993-1-shekhar.chauhan@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915: Add Wa_14015150844 | expand

Commit Message

Chauhan, Shekhar Aug. 30, 2023, 4 p.m. UTC
Disables Atomic-chaining of Typed Writes.

BSpec: 54040
Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_regs.h     |  2 ++
 drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 ++++++++++
 2 files changed, 12 insertions(+)

Comments

kernel test robot Aug. 30, 2023, 6:34 p.m. UTC | #1
Hi Shekhar,

kernel test robot noticed the following build errors:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Shekhar-Chauhan/drm-i915-Add-Wa_14015150844/20230831-000233
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230830160001.2395993-1-shekhar.chauhan%40intel.com
patch subject: [Intel-gfx] [PATCH] drm/i915: Add Wa_14015150844
config: x86_64-defconfig (https://download.01.org/0day-ci/archive/20230831/202308310238.2Dnlp2IN-lkp@intel.com/config)
compiler: gcc-11 (Debian 11.3.0-12) 11.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230831/202308310238.2Dnlp2IN-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308310238.2Dnlp2IN-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/gt/intel_workarounds.c: In function 'rcs_engine_wa_init':
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:13: error: implicit declaration of function 'IS_DG2_GRAPHICS_STEP'; did you mean 'IS_GRAPHICS_STEP'? [-Werror=implicit-function-declaration]
    2330 |         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
         |             ^~~~~~~~~~~~~~~~~~~~
         |             IS_GRAPHICS_STEP
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:40: error: 'G10' undeclared (first use in this function)
    2330 |         if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
         |                                        ^~~
   drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:40: note: each undeclared identifier is reported only once for each function it appears in
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2331:40: error: 'G11' undeclared (first use in this function)
    2331 |             IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
         |                                        ^~~
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2333:13: error: implicit declaration of function 'IS_MTL_GRAPHICS_STEP'; did you mean 'INTEL_GRAPHICS_STEP'? [-Werror=implicit-function-declaration]
    2333 |             IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
         |             ^~~~~~~~~~~~~~~~~~~~
         |             INTEL_GRAPHICS_STEP
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2333:40: error: 'M' undeclared (first use in this function)
    2333 |             IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
         |                                        ^
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2334:40: error: 'P' undeclared (first use in this function)
    2334 |             IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) {
         |                                        ^
   cc1: some warnings being treated as errors


vim +2330 drivers/gpu/drm/i915/gt/intel_workarounds.c

  2292	
  2293	static void
  2294	rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
  2295	{
  2296		struct drm_i915_private *i915 = engine->i915;
  2297		struct intel_gt *gt = engine->gt;
  2298	
  2299		if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
  2300		    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
  2301			/* Wa_22014600077 */
  2302			wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
  2303					 ENABLE_EU_COUNT_FOR_TDL_FLUSH);
  2304		}
  2305	
  2306		if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
  2307		    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
  2308		    IS_DG2(i915)) {
  2309			/* Wa_1509727124 */
  2310			wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
  2311					 SC_DISABLE_POWER_OPTIMIZATION_EBB);
  2312		}
  2313	
  2314		if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
  2315		    IS_DG2(i915)) {
  2316			/* Wa_22012856258 */
  2317			wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
  2318					 GEN12_DISABLE_READ_SUPPRESSION);
  2319		}
  2320	
  2321		if (IS_DG2(i915)) {
  2322			/*
  2323			 * Wa_22010960976:dg2
  2324			 * Wa_14013347512:dg2
  2325			 */
  2326			wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
  2327					  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
  2328		}
  2329	
> 2330		if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
> 2331		    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
  2332		    IS_DG2_G12(i915) ||
> 2333		    IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
> 2334		    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) {
  2335			/* Wa_14015150844 */
  2336			wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
  2337					  ATOMIC_CHAINING_TYPED_WRITES);
  2338		}
  2339	
  2340		if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
  2341			/* Wa_22014600077:dg2 */
  2342			wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
  2343				   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
  2344				   0 /* Wa_14012342262 write-only reg, so skip verification */,
  2345				   true);
  2346		}
  2347	
  2348		if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
  2349		    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
  2350			/*
  2351			 * Wa_1606700617:tgl,dg1,adl-p
  2352			 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
  2353			 * Wa_14010826681:tgl,dg1,rkl,adl-p
  2354			 * Wa_18019627453:dg2
  2355			 */
  2356			wa_masked_en(wal,
  2357				     GEN9_CS_DEBUG_MODE1,
  2358				     FF_DOP_CLOCK_GATE_DISABLE);
  2359		}
  2360	
  2361		if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
  2362		    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
  2363			/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
  2364			wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
  2365	
  2366			/*
  2367			 * Wa_1407928979:tgl A*
  2368			 * Wa_18011464164:tgl[B0+],dg1[B0+]
  2369			 * Wa_22010931296:tgl[B0+],dg1[B0+]
  2370			 * Wa_14010919138:rkl,dg1,adl-s,adl-p
  2371			 */
  2372			wa_write_or(wal, GEN7_FF_THREAD_MODE,
  2373				    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
  2374	
  2375			/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
  2376			wa_mcr_masked_en(wal,
  2377					 GEN10_SAMPLER_MODE,
  2378					 ENABLE_SMALLPL);
  2379		}
  2380	
  2381		if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
  2382		    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
  2383			/* Wa_1409804808 */
  2384			wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
  2385					 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
  2386	
  2387			/* Wa_14010229206 */
  2388			wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
  2389		}
  2390	
  2391		if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
  2392			/*
  2393			 * Wa_1607297627
  2394			 *
  2395			 * On TGL and RKL there are multiple entries for this WA in the
  2396			 * BSpec; some indicate this is an A0-only WA, others indicate
  2397			 * it applies to all steppings so we trust the "all steppings."
  2398			 */
  2399			wa_masked_en(wal,
  2400				     RING_PSMI_CTL(RENDER_RING_BASE),
  2401				     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
  2402				     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
  2403		}
  2404	
  2405		if (GRAPHICS_VER(i915) == 11) {
  2406			/* This is not an Wa. Enable for better image quality */
  2407			wa_masked_en(wal,
  2408				     _3D_CHICKEN3,
  2409				     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
  2410	
  2411			/*
  2412			 * Wa_1405543622:icl
  2413			 * Formerly known as WaGAPZPriorityScheme
  2414			 */
  2415			wa_write_or(wal,
  2416				    GEN8_GARBCNTL,
  2417				    GEN11_ARBITRATION_PRIO_ORDER_MASK);
  2418	
  2419			/*
  2420			 * Wa_1604223664:icl
  2421			 * Formerly known as WaL3BankAddressHashing
  2422			 */
  2423			wa_write_clr_set(wal,
  2424					 GEN8_GARBCNTL,
  2425					 GEN11_HASH_CTRL_EXCL_MASK,
  2426					 GEN11_HASH_CTRL_EXCL_BIT0);
  2427			wa_write_clr_set(wal,
  2428					 GEN11_GLBLINVL,
  2429					 GEN11_BANK_HASH_ADDR_EXCL_MASK,
  2430					 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
  2431	
  2432			/*
  2433			 * Wa_1405733216:icl
  2434			 * Formerly known as WaDisableCleanEvicts
  2435			 */
  2436			wa_mcr_write_or(wal,
  2437					GEN8_L3SQCREG4,
  2438					GEN11_LQSC_CLEAN_EVICT_DISABLE);
  2439	
  2440			/* Wa_1606682166:icl */
  2441			wa_write_or(wal,
  2442				    GEN7_SARCHKMD,
  2443				    GEN7_DISABLE_SAMPLER_PREFETCH);
  2444	
  2445			/* Wa_1409178092:icl */
  2446			wa_mcr_write_clr_set(wal,
  2447					     GEN11_SCRATCH2,
  2448					     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
  2449					     0);
  2450	
  2451			/* WaEnable32PlaneMode:icl */
  2452			wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
  2453				     GEN11_ENABLE_32_PLANE_MODE);
  2454	
  2455			/*
  2456			 * Wa_1408767742:icl[a2..forever],ehl[all]
  2457			 * Wa_1605460711:icl[a0..c0]
  2458			 */
  2459			wa_write_or(wal,
  2460				    GEN7_FF_THREAD_MODE,
  2461				    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
  2462	
  2463			/* Wa_22010271021 */
  2464			wa_masked_en(wal,
  2465				     GEN9_CS_DEBUG_MODE1,
  2466				     FF_DOP_CLOCK_GATE_DISABLE);
  2467		}
  2468	
  2469		/*
  2470		 * Intel platforms that support fine-grained preemption (i.e., gen9 and
  2471		 * beyond) allow the kernel-mode driver to choose between two different
  2472		 * options for controlling preemption granularity and behavior.
  2473		 *
  2474		 * Option 1 (hardware default):
  2475		 *   Preemption settings are controlled in a global manner via
  2476		 *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
  2477		 *   and settings chosen by the kernel-mode driver will apply to all
  2478		 *   userspace clients.
  2479		 *
  2480		 * Option 2:
  2481		 *   Preemption settings are controlled on a per-context basis via
  2482		 *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
  2483		 *   context switch and is writable by userspace (e.g., via
  2484		 *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
  2485		 *   which allows different userspace drivers/clients to select
  2486		 *   different settings, or to change those settings on the fly in
  2487		 *   response to runtime needs.  This option was known by name
  2488		 *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
  2489		 *   that name is somewhat misleading as other non-granularity
  2490		 *   preemption settings are also impacted by this decision.
  2491		 *
  2492		 * On Linux, our policy has always been to let userspace drivers
  2493		 * control preemption granularity/settings (Option 2).  This was
  2494		 * originally mandatory on gen9 to prevent ABI breakage (old gen9
  2495		 * userspace developed before object-level preemption was enabled would
  2496		 * not behave well if i915 were to go with Option 1 and enable that
  2497		 * preemption in a global manner).  On gen9 each context would have
  2498		 * object-level preemption disabled by default (see
  2499		 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
  2500		 * userspace drivers could opt-in to object-level preemption as they
  2501		 * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
  2502		 * even though it is no longer necessary for ABI compatibility when
  2503		 * enabling a new platform, it does ensure that userspace will be able
  2504		 * to implement any workarounds that show up requiring temporary
  2505		 * adjustments to preemption behavior at runtime.
  2506		 *
  2507		 * Notes/Workarounds:
  2508		 *  - Wa_14015141709:  On DG2 and early steppings of MTL,
  2509		 *      CS_CHICKEN1[0] does not disable object-level preemption as
  2510		 *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
  2511		 *      using Option 1).  Effectively this means userspace is unable
  2512		 *      to disable object-level preemption on these platforms/steppings
  2513		 *      despite the setting here.
  2514		 *
  2515		 *  - Wa_16013994831:  May require that userspace program
  2516		 *      CS_CHICKEN1[10] when certain runtime conditions are true.
  2517		 *      Userspace requires Option 2 to be in effect for their update of
  2518		 *      CS_CHICKEN1[10] to be effective.
  2519		 *
  2520		 * Other workarounds may appear in the future that will also require
  2521		 * Option 2 behavior to allow proper userspace implementation.
  2522		 */
  2523		if (GRAPHICS_VER(i915) >= 9)
  2524			wa_masked_en(wal,
  2525				     GEN7_FF_SLICE_CS_CHICKEN1,
  2526				     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
  2527	
  2528		if (IS_SKYLAKE(i915) ||
  2529		    IS_KABYLAKE(i915) ||
  2530		    IS_COFFEELAKE(i915) ||
  2531		    IS_COMETLAKE(i915)) {
  2532			/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
  2533			wa_write_or(wal,
  2534				    GEN8_GARBCNTL,
  2535				    GEN9_GAPS_TSV_CREDIT_DISABLE);
  2536		}
  2537	
  2538		if (IS_BROXTON(i915)) {
  2539			/* WaDisablePooledEuLoadBalancingFix:bxt */
  2540			wa_masked_en(wal,
  2541				     FF_SLICE_CS_CHICKEN2,
  2542				     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
  2543		}
  2544	
  2545		if (GRAPHICS_VER(i915) == 9) {
  2546			/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
  2547			wa_masked_en(wal,
  2548				     GEN9_CSFE_CHICKEN1_RCS,
  2549				     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
  2550	
  2551			/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
  2552			wa_mcr_write_or(wal,
  2553					BDW_SCRATCH1,
  2554					GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
  2555	
  2556			/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
  2557			if (IS_GEN9_LP(i915))
  2558				wa_mcr_write_clr_set(wal,
  2559						     GEN8_L3SQCREG1,
  2560						     L3_PRIO_CREDITS_MASK,
  2561						     L3_GENERAL_PRIO_CREDITS(62) |
  2562						     L3_HIGH_PRIO_CREDITS(2));
  2563	
  2564			/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
  2565			wa_mcr_write_or(wal,
  2566					GEN8_L3SQCREG4,
  2567					GEN8_LQSC_FLUSH_COHERENT_LINES);
  2568	
  2569			/* Disable atomics in L3 to prevent unrecoverable hangs */
  2570			wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
  2571					 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
  2572			wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
  2573					     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
  2574			wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
  2575					     EVICTION_PERF_FIX_ENABLE, 0);
  2576		}
  2577	
  2578		if (IS_HASWELL(i915)) {
  2579			/* WaSampleCChickenBitEnable:hsw */
  2580			wa_masked_en(wal,
  2581				     HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
  2582	
  2583			wa_masked_dis(wal,
  2584				      CACHE_MODE_0_GEN7,
  2585				      /* enable HiZ Raw Stall Optimization */
  2586				      HIZ_RAW_STALL_OPT_DISABLE);
  2587		}
  2588	
  2589		if (IS_VALLEYVIEW(i915)) {
  2590			/* WaDisableEarlyCull:vlv */
  2591			wa_masked_en(wal,
  2592				     _3D_CHICKEN3,
  2593				     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
  2594	
  2595			/*
  2596			 * WaVSThreadDispatchOverride:ivb,vlv
  2597			 *
  2598			 * This actually overrides the dispatch
  2599			 * mode for all thread types.
  2600			 */
  2601			wa_write_clr_set(wal,
  2602					 GEN7_FF_THREAD_MODE,
  2603					 GEN7_FF_SCHED_MASK,
  2604					 GEN7_FF_TS_SCHED_HW |
  2605					 GEN7_FF_VS_SCHED_HW |
  2606					 GEN7_FF_DS_SCHED_HW);
  2607	
  2608			/* WaPsdDispatchEnable:vlv */
  2609			/* WaDisablePSDDualDispatchEnable:vlv */
  2610			wa_masked_en(wal,
  2611				     GEN7_HALF_SLICE_CHICKEN1,
  2612				     GEN7_MAX_PS_THREAD_DEP |
  2613				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
  2614		}
  2615	
  2616		if (IS_IVYBRIDGE(i915)) {
  2617			/* WaDisableEarlyCull:ivb */
  2618			wa_masked_en(wal,
  2619				     _3D_CHICKEN3,
  2620				     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
  2621	
  2622			if (0) { /* causes HiZ corruption on ivb:gt1 */
  2623				/* enable HiZ Raw Stall Optimization */
  2624				wa_masked_dis(wal,
  2625					      CACHE_MODE_0_GEN7,
  2626					      HIZ_RAW_STALL_OPT_DISABLE);
  2627			}
  2628	
  2629			/*
  2630			 * WaVSThreadDispatchOverride:ivb,vlv
  2631			 *
  2632			 * This actually overrides the dispatch
  2633			 * mode for all thread types.
  2634			 */
  2635			wa_write_clr_set(wal,
  2636					 GEN7_FF_THREAD_MODE,
  2637					 GEN7_FF_SCHED_MASK,
  2638					 GEN7_FF_TS_SCHED_HW |
  2639					 GEN7_FF_VS_SCHED_HW |
  2640					 GEN7_FF_DS_SCHED_HW);
  2641	
  2642			/* WaDisablePSDDualDispatchEnable:ivb */
  2643			if (IS_IVB_GT1(i915))
  2644				wa_masked_en(wal,
  2645					     GEN7_HALF_SLICE_CHICKEN1,
  2646					     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
  2647		}
  2648	
  2649		if (GRAPHICS_VER(i915) == 7) {
  2650			/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
  2651			wa_masked_en(wal,
  2652				     RING_MODE_GEN7(RENDER_RING_BASE),
  2653				     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
  2654	
  2655			/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
  2656			wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
  2657	
  2658			/*
  2659			 * BSpec says this must be set, even though
  2660			 * WaDisable4x2SubspanOptimization:ivb,hsw
  2661			 * WaDisable4x2SubspanOptimization isn't listed for VLV.
  2662			 */
  2663			wa_masked_en(wal,
  2664				     CACHE_MODE_1,
  2665				     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
  2666	
  2667			/*
  2668			 * BSpec recommends 8x4 when MSAA is used,
  2669			 * however in practice 16x4 seems fastest.
  2670			 *
  2671			 * Note that PS/WM thread counts depend on the WIZ hashing
  2672			 * disable bit, which we don't touch here, but it's good
  2673			 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  2674			 */
  2675			wa_masked_field_set(wal,
  2676					    GEN7_GT_MODE,
  2677					    GEN6_WIZ_HASHING_MASK,
  2678					    GEN6_WIZ_HASHING_16x4);
  2679		}
  2680	
  2681		if (IS_GRAPHICS_VER(i915, 6, 7))
  2682			/*
  2683			 * We need to disable the AsyncFlip performance optimisations in
  2684			 * order to use MI_WAIT_FOR_EVENT within the CS. It should
  2685			 * already be programmed to '1' on all products.
  2686			 *
  2687			 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
  2688			 */
  2689			wa_masked_en(wal,
  2690				     RING_MI_MODE(RENDER_RING_BASE),
  2691				     ASYNC_FLIP_PERF_DISABLE);
  2692	
  2693		if (GRAPHICS_VER(i915) == 6) {
  2694			/*
  2695			 * Required for the hardware to program scanline values for
  2696			 * waiting
  2697			 * WaEnableFlushTlbInvalidationMode:snb
  2698			 */
  2699			wa_masked_en(wal,
  2700				     GFX_MODE,
  2701				     GFX_TLB_INVALIDATE_EXPLICIT);
  2702	
  2703			/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
  2704			wa_masked_en(wal,
  2705				     _3D_CHICKEN,
  2706				     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
  2707	
  2708			wa_masked_en(wal,
  2709				     _3D_CHICKEN3,
  2710				     /* WaStripsFansDisableFastClipPerformanceFix:snb */
  2711				     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
  2712				     /*
  2713				      * Bspec says:
  2714				      * "This bit must be set if 3DSTATE_CLIP clip mode is set
  2715				      * to normal and 3DSTATE_SF number of SF output attributes
  2716				      * is more than 16."
  2717				      */
  2718				     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
  2719	
  2720			/*
  2721			 * BSpec recommends 8x4 when MSAA is used,
  2722			 * however in practice 16x4 seems fastest.
  2723			 *
  2724			 * Note that PS/WM thread counts depend on the WIZ hashing
  2725			 * disable bit, which we don't touch here, but it's good
  2726			 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  2727			 */
  2728			wa_masked_field_set(wal,
  2729					    GEN6_GT_MODE,
  2730					    GEN6_WIZ_HASHING_MASK,
  2731					    GEN6_WIZ_HASHING_16x4);
  2732	
  2733			/* WaDisable_RenderCache_OperationalFlush:snb */
  2734			wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
  2735	
  2736			/*
  2737			 * From the Sandybridge PRM, volume 1 part 3, page 24:
  2738			 * "If this bit is set, STCunit will have LRA as replacement
  2739			 *  policy. [...] This bit must be reset. LRA replacement
  2740			 *  policy is not supported."
  2741			 */
  2742			wa_masked_dis(wal,
  2743				      CACHE_MODE_0,
  2744				      CM0_STC_EVICT_DISABLE_LRA_SNB);
  2745		}
  2746	
  2747		if (IS_GRAPHICS_VER(i915, 4, 6))
  2748			/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
  2749			wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
  2750			       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
  2751			       /* XXX bit doesn't stick on Broadwater */
  2752			       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
  2753	
  2754		if (GRAPHICS_VER(i915) == 4)
  2755			/*
  2756			 * Disable CONSTANT_BUFFER before it is loaded from the context
  2757			 * image. For as it is loaded, it is executed and the stored
  2758			 * address may no longer be valid, leading to a GPU hang.
  2759			 *
  2760			 * This imposes the requirement that userspace reload their
  2761			 * CONSTANT_BUFFER on every batch, fortunately a requirement
  2762			 * they are already accustomed to from before contexts were
  2763			 * enabled.
  2764			 */
  2765			wa_add(wal, ECOSKPD(RENDER_RING_BASE),
  2766			       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
  2767			       0 /* XXX bit doesn't stick on Broadwater */,
  2768			       true);
  2769	}
  2770
kernel test robot Aug. 30, 2023, 6:45 p.m. UTC | #2
Hi Shekhar,

kernel test robot noticed the following build errors:

[auto build test ERROR on drm-tip/drm-tip]

url:    https://github.com/intel-lab-lkp/linux/commits/Shekhar-Chauhan/drm-i915-Add-Wa_14015150844/20230831-000233
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
patch link:    https://lore.kernel.org/r/20230830160001.2395993-1-shekhar.chauhan%40intel.com
patch subject: [Intel-gfx] [PATCH] drm/i915: Add Wa_14015150844
config: x86_64-buildonly-randconfig-004-20230831 (https://download.01.org/0day-ci/archive/20230831/202308310210.60ealdkg-lkp@intel.com/config)
compiler: gcc-7 (Ubuntu 7.5.0-6ubuntu2) 7.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230831/202308310210.60ealdkg-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202308310210.60ealdkg-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/gt/intel_workarounds.c: In function 'rcs_engine_wa_init':
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:6: error: implicit declaration of function 'IS_DG2_GRAPHICS_STEP'; did you mean 'IS_GRAPHICS_STEP'? [-Werror=implicit-function-declaration]
     if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
         ^~~~~~~~~~~~~~~~~~~~
         IS_GRAPHICS_STEP
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:33: error: 'G10' undeclared (first use in this function)
     if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
                                    ^~~
   drivers/gpu/drm/i915/gt/intel_workarounds.c:2330:33: note: each undeclared identifier is reported only once for each function it appears in
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2331:33: error: 'G11' undeclared (first use in this function); did you mean 'G10'?
         IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
                                    ^~~
                                    G10
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2333:6: error: implicit declaration of function 'IS_MTL_GRAPHICS_STEP'; did you mean 'INTEL_GRAPHICS_STEP'? [-Werror=implicit-function-declaration]
         IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
         ^~~~~~~~~~~~~~~~~~~~
         INTEL_GRAPHICS_STEP
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2333:33: error: 'M' undeclared (first use in this function)
         IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
                                    ^
>> drivers/gpu/drm/i915/gt/intel_workarounds.c:2334:33: error: 'P' undeclared (first use in this function)
         IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) {
                                    ^
   cc1: some warnings being treated as errors


vim +2330 drivers/gpu/drm/i915/gt/intel_workarounds.c

  2292	
  2293	static void
  2294	rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
  2295	{
  2296		struct drm_i915_private *i915 = engine->i915;
  2297		struct intel_gt *gt = engine->gt;
  2298	
  2299		if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
  2300		    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
  2301			/* Wa_22014600077 */
  2302			wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
  2303					 ENABLE_EU_COUNT_FOR_TDL_FLUSH);
  2304		}
  2305	
  2306		if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
  2307		    IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
  2308		    IS_DG2(i915)) {
  2309			/* Wa_1509727124 */
  2310			wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
  2311					 SC_DISABLE_POWER_OPTIMIZATION_EBB);
  2312		}
  2313	
  2314		if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
  2315		    IS_DG2(i915)) {
  2316			/* Wa_22012856258 */
  2317			wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
  2318					 GEN12_DISABLE_READ_SUPPRESSION);
  2319		}
  2320	
  2321		if (IS_DG2(i915)) {
  2322			/*
  2323			 * Wa_22010960976:dg2
  2324			 * Wa_14013347512:dg2
  2325			 */
  2326			wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
  2327					  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
  2328		}
  2329	
> 2330		if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
> 2331		    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
  2332		    IS_DG2_G12(i915) ||
> 2333		    IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
> 2334		    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) {
  2335			/* Wa_14015150844 */
  2336			wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
  2337					  ATOMIC_CHAINING_TYPED_WRITES);
  2338		}
  2339	
  2340		if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
  2341			/* Wa_22014600077:dg2 */
  2342			wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
  2343				   _MASKED_BIT_ENABLE(ENABLE_EU_COUNT_FOR_TDL_FLUSH),
  2344				   0 /* Wa_14012342262 write-only reg, so skip verification */,
  2345				   true);
  2346		}
  2347	
  2348		if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
  2349		    IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
  2350			/*
  2351			 * Wa_1606700617:tgl,dg1,adl-p
  2352			 * Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
  2353			 * Wa_14010826681:tgl,dg1,rkl,adl-p
  2354			 * Wa_18019627453:dg2
  2355			 */
  2356			wa_masked_en(wal,
  2357				     GEN9_CS_DEBUG_MODE1,
  2358				     FF_DOP_CLOCK_GATE_DISABLE);
  2359		}
  2360	
  2361		if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
  2362		    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
  2363			/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
  2364			wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
  2365	
  2366			/*
  2367			 * Wa_1407928979:tgl A*
  2368			 * Wa_18011464164:tgl[B0+],dg1[B0+]
  2369			 * Wa_22010931296:tgl[B0+],dg1[B0+]
  2370			 * Wa_14010919138:rkl,dg1,adl-s,adl-p
  2371			 */
  2372			wa_write_or(wal, GEN7_FF_THREAD_MODE,
  2373				    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
  2374	
  2375			/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
  2376			wa_mcr_masked_en(wal,
  2377					 GEN10_SAMPLER_MODE,
  2378					 ENABLE_SMALLPL);
  2379		}
  2380	
  2381		if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
  2382		    IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
  2383			/* Wa_1409804808 */
  2384			wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
  2385					 GEN12_PUSH_CONST_DEREF_HOLD_DIS);
  2386	
  2387			/* Wa_14010229206 */
  2388			wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
  2389		}
  2390	
  2391		if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
  2392			/*
  2393			 * Wa_1607297627
  2394			 *
  2395			 * On TGL and RKL there are multiple entries for this WA in the
  2396			 * BSpec; some indicate this is an A0-only WA, others indicate
  2397			 * it applies to all steppings so we trust the "all steppings."
  2398			 */
  2399			wa_masked_en(wal,
  2400				     RING_PSMI_CTL(RENDER_RING_BASE),
  2401				     GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
  2402				     GEN8_RC_SEMA_IDLE_MSG_DISABLE);
  2403		}
  2404	
  2405		if (GRAPHICS_VER(i915) == 11) {
  2406			/* This is not an Wa. Enable for better image quality */
  2407			wa_masked_en(wal,
  2408				     _3D_CHICKEN3,
  2409				     _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
  2410	
  2411			/*
  2412			 * Wa_1405543622:icl
  2413			 * Formerly known as WaGAPZPriorityScheme
  2414			 */
  2415			wa_write_or(wal,
  2416				    GEN8_GARBCNTL,
  2417				    GEN11_ARBITRATION_PRIO_ORDER_MASK);
  2418	
  2419			/*
  2420			 * Wa_1604223664:icl
  2421			 * Formerly known as WaL3BankAddressHashing
  2422			 */
  2423			wa_write_clr_set(wal,
  2424					 GEN8_GARBCNTL,
  2425					 GEN11_HASH_CTRL_EXCL_MASK,
  2426					 GEN11_HASH_CTRL_EXCL_BIT0);
  2427			wa_write_clr_set(wal,
  2428					 GEN11_GLBLINVL,
  2429					 GEN11_BANK_HASH_ADDR_EXCL_MASK,
  2430					 GEN11_BANK_HASH_ADDR_EXCL_BIT0);
  2431	
  2432			/*
  2433			 * Wa_1405733216:icl
  2434			 * Formerly known as WaDisableCleanEvicts
  2435			 */
  2436			wa_mcr_write_or(wal,
  2437					GEN8_L3SQCREG4,
  2438					GEN11_LQSC_CLEAN_EVICT_DISABLE);
  2439	
  2440			/* Wa_1606682166:icl */
  2441			wa_write_or(wal,
  2442				    GEN7_SARCHKMD,
  2443				    GEN7_DISABLE_SAMPLER_PREFETCH);
  2444	
  2445			/* Wa_1409178092:icl */
  2446			wa_mcr_write_clr_set(wal,
  2447					     GEN11_SCRATCH2,
  2448					     GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
  2449					     0);
  2450	
  2451			/* WaEnable32PlaneMode:icl */
  2452			wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
  2453				     GEN11_ENABLE_32_PLANE_MODE);
  2454	
  2455			/*
  2456			 * Wa_1408767742:icl[a2..forever],ehl[all]
  2457			 * Wa_1605460711:icl[a0..c0]
  2458			 */
  2459			wa_write_or(wal,
  2460				    GEN7_FF_THREAD_MODE,
  2461				    GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
  2462	
  2463			/* Wa_22010271021 */
  2464			wa_masked_en(wal,
  2465				     GEN9_CS_DEBUG_MODE1,
  2466				     FF_DOP_CLOCK_GATE_DISABLE);
  2467		}
  2468	
  2469		/*
  2470		 * Intel platforms that support fine-grained preemption (i.e., gen9 and
  2471		 * beyond) allow the kernel-mode driver to choose between two different
  2472		 * options for controlling preemption granularity and behavior.
  2473		 *
  2474		 * Option 1 (hardware default):
  2475		 *   Preemption settings are controlled in a global manner via
  2476		 *   kernel-only register CS_DEBUG_MODE1 (0x20EC).  Any granularity
  2477		 *   and settings chosen by the kernel-mode driver will apply to all
  2478		 *   userspace clients.
  2479		 *
  2480		 * Option 2:
  2481		 *   Preemption settings are controlled on a per-context basis via
  2482		 *   register CS_CHICKEN1 (0x2580).  CS_CHICKEN1 is saved/restored on
  2483		 *   context switch and is writable by userspace (e.g., via
  2484		 *   MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
  2485		 *   which allows different userspace drivers/clients to select
  2486		 *   different settings, or to change those settings on the fly in
  2487		 *   response to runtime needs.  This option was known by name
  2488		 *   "FtrPerCtxtPreemptionGranularityControl" at one time, although
  2489		 *   that name is somewhat misleading as other non-granularity
  2490		 *   preemption settings are also impacted by this decision.
  2491		 *
  2492		 * On Linux, our policy has always been to let userspace drivers
  2493		 * control preemption granularity/settings (Option 2).  This was
  2494		 * originally mandatory on gen9 to prevent ABI breakage (old gen9
  2495		 * userspace developed before object-level preemption was enabled would
  2496		 * not behave well if i915 were to go with Option 1 and enable that
  2497		 * preemption in a global manner).  On gen9 each context would have
  2498		 * object-level preemption disabled by default (see
  2499		 * WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
  2500		 * userspace drivers could opt-in to object-level preemption as they
  2501		 * saw fit.  For post-gen9 platforms, we continue to utilize Option 2;
  2502		 * even though it is no longer necessary for ABI compatibility when
  2503		 * enabling a new platform, it does ensure that userspace will be able
  2504		 * to implement any workarounds that show up requiring temporary
  2505		 * adjustments to preemption behavior at runtime.
  2506		 *
  2507		 * Notes/Workarounds:
  2508		 *  - Wa_14015141709:  On DG2 and early steppings of MTL,
  2509		 *      CS_CHICKEN1[0] does not disable object-level preemption as
  2510		 *      it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
  2511		 *      using Option 1).  Effectively this means userspace is unable
  2512		 *      to disable object-level preemption on these platforms/steppings
  2513		 *      despite the setting here.
  2514		 *
  2515		 *  - Wa_16013994831:  May require that userspace program
  2516		 *      CS_CHICKEN1[10] when certain runtime conditions are true.
  2517		 *      Userspace requires Option 2 to be in effect for their update of
  2518		 *      CS_CHICKEN1[10] to be effective.
  2519		 *
  2520		 * Other workarounds may appear in the future that will also require
  2521		 * Option 2 behavior to allow proper userspace implementation.
  2522		 */
  2523		if (GRAPHICS_VER(i915) >= 9)
  2524			wa_masked_en(wal,
  2525				     GEN7_FF_SLICE_CS_CHICKEN1,
  2526				     GEN9_FFSC_PERCTX_PREEMPT_CTRL);
  2527	
  2528		if (IS_SKYLAKE(i915) ||
  2529		    IS_KABYLAKE(i915) ||
  2530		    IS_COFFEELAKE(i915) ||
  2531		    IS_COMETLAKE(i915)) {
  2532			/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
  2533			wa_write_or(wal,
  2534				    GEN8_GARBCNTL,
  2535				    GEN9_GAPS_TSV_CREDIT_DISABLE);
  2536		}
  2537	
  2538		if (IS_BROXTON(i915)) {
  2539			/* WaDisablePooledEuLoadBalancingFix:bxt */
  2540			wa_masked_en(wal,
  2541				     FF_SLICE_CS_CHICKEN2,
  2542				     GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
  2543		}
  2544	
  2545		if (GRAPHICS_VER(i915) == 9) {
  2546			/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
  2547			wa_masked_en(wal,
  2548				     GEN9_CSFE_CHICKEN1_RCS,
  2549				     GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
  2550	
  2551			/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
  2552			wa_mcr_write_or(wal,
  2553					BDW_SCRATCH1,
  2554					GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
  2555	
  2556			/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
  2557			if (IS_GEN9_LP(i915))
  2558				wa_mcr_write_clr_set(wal,
  2559						     GEN8_L3SQCREG1,
  2560						     L3_PRIO_CREDITS_MASK,
  2561						     L3_GENERAL_PRIO_CREDITS(62) |
  2562						     L3_HIGH_PRIO_CREDITS(2));
  2563	
  2564			/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
  2565			wa_mcr_write_or(wal,
  2566					GEN8_L3SQCREG4,
  2567					GEN8_LQSC_FLUSH_COHERENT_LINES);
  2568	
  2569			/* Disable atomics in L3 to prevent unrecoverable hangs */
  2570			wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
  2571					 GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
  2572			wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
  2573					     GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
  2574			wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
  2575					     EVICTION_PERF_FIX_ENABLE, 0);
  2576		}
  2577	
  2578		if (IS_HASWELL(i915)) {
  2579			/* WaSampleCChickenBitEnable:hsw */
  2580			wa_masked_en(wal,
  2581				     HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
  2582	
  2583			wa_masked_dis(wal,
  2584				      CACHE_MODE_0_GEN7,
  2585				      /* enable HiZ Raw Stall Optimization */
  2586				      HIZ_RAW_STALL_OPT_DISABLE);
  2587		}
  2588	
  2589		if (IS_VALLEYVIEW(i915)) {
  2590			/* WaDisableEarlyCull:vlv */
  2591			wa_masked_en(wal,
  2592				     _3D_CHICKEN3,
  2593				     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
  2594	
  2595			/*
  2596			 * WaVSThreadDispatchOverride:ivb,vlv
  2597			 *
  2598			 * This actually overrides the dispatch
  2599			 * mode for all thread types.
  2600			 */
  2601			wa_write_clr_set(wal,
  2602					 GEN7_FF_THREAD_MODE,
  2603					 GEN7_FF_SCHED_MASK,
  2604					 GEN7_FF_TS_SCHED_HW |
  2605					 GEN7_FF_VS_SCHED_HW |
  2606					 GEN7_FF_DS_SCHED_HW);
  2607	
  2608			/* WaPsdDispatchEnable:vlv */
  2609			/* WaDisablePSDDualDispatchEnable:vlv */
  2610			wa_masked_en(wal,
  2611				     GEN7_HALF_SLICE_CHICKEN1,
  2612				     GEN7_MAX_PS_THREAD_DEP |
  2613				     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
  2614		}
  2615	
  2616		if (IS_IVYBRIDGE(i915)) {
  2617			/* WaDisableEarlyCull:ivb */
  2618			wa_masked_en(wal,
  2619				     _3D_CHICKEN3,
  2620				     _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
  2621	
  2622			if (0) { /* causes HiZ corruption on ivb:gt1 */
  2623				/* enable HiZ Raw Stall Optimization */
  2624				wa_masked_dis(wal,
  2625					      CACHE_MODE_0_GEN7,
  2626					      HIZ_RAW_STALL_OPT_DISABLE);
  2627			}
  2628	
  2629			/*
  2630			 * WaVSThreadDispatchOverride:ivb,vlv
  2631			 *
  2632			 * This actually overrides the dispatch
  2633			 * mode for all thread types.
  2634			 */
  2635			wa_write_clr_set(wal,
  2636					 GEN7_FF_THREAD_MODE,
  2637					 GEN7_FF_SCHED_MASK,
  2638					 GEN7_FF_TS_SCHED_HW |
  2639					 GEN7_FF_VS_SCHED_HW |
  2640					 GEN7_FF_DS_SCHED_HW);
  2641	
  2642			/* WaDisablePSDDualDispatchEnable:ivb */
  2643			if (IS_IVB_GT1(i915))
  2644				wa_masked_en(wal,
  2645					     GEN7_HALF_SLICE_CHICKEN1,
  2646					     GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
  2647		}
  2648	
  2649		if (GRAPHICS_VER(i915) == 7) {
  2650			/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
  2651			wa_masked_en(wal,
  2652				     RING_MODE_GEN7(RENDER_RING_BASE),
  2653				     GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
  2654	
  2655			/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
  2656			wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
  2657	
  2658			/*
  2659			 * BSpec says this must be set, even though
  2660			 * WaDisable4x2SubspanOptimization:ivb,hsw
  2661			 * WaDisable4x2SubspanOptimization isn't listed for VLV.
  2662			 */
  2663			wa_masked_en(wal,
  2664				     CACHE_MODE_1,
  2665				     PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
  2666	
  2667			/*
  2668			 * BSpec recommends 8x4 when MSAA is used,
  2669			 * however in practice 16x4 seems fastest.
  2670			 *
  2671			 * Note that PS/WM thread counts depend on the WIZ hashing
  2672			 * disable bit, which we don't touch here, but it's good
  2673			 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  2674			 */
  2675			wa_masked_field_set(wal,
  2676					    GEN7_GT_MODE,
  2677					    GEN6_WIZ_HASHING_MASK,
  2678					    GEN6_WIZ_HASHING_16x4);
  2679		}
  2680	
  2681		if (IS_GRAPHICS_VER(i915, 6, 7))
  2682			/*
  2683			 * We need to disable the AsyncFlip performance optimisations in
  2684			 * order to use MI_WAIT_FOR_EVENT within the CS. It should
  2685			 * already be programmed to '1' on all products.
  2686			 *
  2687			 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
  2688			 */
  2689			wa_masked_en(wal,
  2690				     RING_MI_MODE(RENDER_RING_BASE),
  2691				     ASYNC_FLIP_PERF_DISABLE);
  2692	
  2693		if (GRAPHICS_VER(i915) == 6) {
  2694			/*
  2695			 * Required for the hardware to program scanline values for
  2696			 * waiting
  2697			 * WaEnableFlushTlbInvalidationMode:snb
  2698			 */
  2699			wa_masked_en(wal,
  2700				     GFX_MODE,
  2701				     GFX_TLB_INVALIDATE_EXPLICIT);
  2702	
  2703			/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
  2704			wa_masked_en(wal,
  2705				     _3D_CHICKEN,
  2706				     _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
  2707	
  2708			wa_masked_en(wal,
  2709				     _3D_CHICKEN3,
  2710				     /* WaStripsFansDisableFastClipPerformanceFix:snb */
  2711				     _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
  2712				     /*
  2713				      * Bspec says:
  2714				      * "This bit must be set if 3DSTATE_CLIP clip mode is set
  2715				      * to normal and 3DSTATE_SF number of SF output attributes
  2716				      * is more than 16."
  2717				      */
  2718				     _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
  2719	
  2720			/*
  2721			 * BSpec recommends 8x4 when MSAA is used,
  2722			 * however in practice 16x4 seems fastest.
  2723			 *
  2724			 * Note that PS/WM thread counts depend on the WIZ hashing
  2725			 * disable bit, which we don't touch here, but it's good
  2726			 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  2727			 */
  2728			wa_masked_field_set(wal,
  2729					    GEN6_GT_MODE,
  2730					    GEN6_WIZ_HASHING_MASK,
  2731					    GEN6_WIZ_HASHING_16x4);
  2732	
  2733			/* WaDisable_RenderCache_OperationalFlush:snb */
  2734			wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
  2735	
  2736			/*
  2737			 * From the Sandybridge PRM, volume 1 part 3, page 24:
  2738			 * "If this bit is set, STCunit will have LRA as replacement
  2739			 *  policy. [...] This bit must be reset. LRA replacement
  2740			 *  policy is not supported."
  2741			 */
  2742			wa_masked_dis(wal,
  2743				      CACHE_MODE_0,
  2744				      CM0_STC_EVICT_DISABLE_LRA_SNB);
  2745		}
  2746	
  2747		if (IS_GRAPHICS_VER(i915, 4, 6))
  2748			/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
  2749			wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
  2750			       0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
  2751			       /* XXX bit doesn't stick on Broadwater */
  2752			       IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
  2753	
  2754		if (GRAPHICS_VER(i915) == 4)
  2755			/*
  2756			 * Disable CONSTANT_BUFFER before it is loaded from the context
  2757			 * image. For as it is loaded, it is executed and the stored
  2758			 * address may no longer be valid, leading to a GPU hang.
  2759			 *
  2760			 * This imposes the requirement that userspace reload their
  2761			 * CONSTANT_BUFFER on every batch, fortunately a requirement
  2762			 * they are already accustomed to from before contexts were
  2763			 * enabled.
  2764			 */
  2765			wa_add(wal, ECOSKPD(RENDER_RING_BASE),
  2766			       0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
  2767			       0 /* XXX bit doesn't stick on Broadwater */,
  2768			       true);
  2769	}
  2770
Matt Roper Aug. 30, 2023, 11:23 p.m. UTC | #3
On Wed, Aug 30, 2023 at 09:30:01PM +0530, Shekhar Chauhan wrote:
> Disables Atomic-chaining of Typed Writes.
> 
> BSpec: 54040
> Signed-off-by: Shekhar Chauhan <shekhar.chauhan@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h     |  2 ++
>  drivers/gpu/drm/i915/gt/intel_workarounds.c | 10 ++++++++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index 0e4c638fcbbf..82b533aa0c03 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1218,6 +1218,8 @@
>  
>  #define XEHP_HDC_CHICKEN0			MCR_REG(0xe5f0)
>  #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
> +#define   ATOMIC_CHAINING_TYPED_WRITES		REG_BIT(3)
> +
>  #define ICL_HDC_MODE				MCR_REG(0xe5f4)
>  
>  #define EU_PERF_CNTL2				PERF_REG(0xe658)
> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> index 864d41bcf6bb..d54120009334 100644
> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
> @@ -2327,6 +2327,16 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
>  				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
>  	}
>  
> +	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
> +	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
> +	    IS_DG2_G12(i915) ||

Since DG2 is at the point where we only support production steppings, we
don't need to worry about avoiding this workaround on old pre-production
steppings.  So these three conditions can be simplified down to just
"IS_DG2(i915)."

See
https://lore.kernel.org/intel-gfx/20230816214201.534095-7-matthew.d.roper@intel.com/
for details.

> +	    IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
> +	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) {

This macro doesn't exist anymore; I think you need to rebase your patch
on a newer snapshot of drm-tip.  BTW, "A0..forever" covers every single
stepping, so there's no need to use a stepping-based check in such
cases.  Once you rebase, this will just turn into

    IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 71))

Also note that we generally put the newer platform/IP first in our
conditions, so the overall check will become:

    if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
        IS_DG2(i915))


> +		/* Wa_14015150844 */
> +		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
> +				  ATOMIC_CHAINING_TYPED_WRITES);

It's easy to miss, but up near the top of bspec 54040, there's a note
about this register being a "write only" register.  That means that if
we implement this workaround the normal way we'll get driver warnings on
debug builds when it tries to read back the register later and make sure
the workaround was applied properly.  We need to use the more explicit
wa_mcr_add() function to provide an empty readmask to avoid this.  The
next workaround just below this in the code is an example of how to do
that.


Matt

> +	}
> +
>  	if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
>  		/* Wa_22014600077:dg2 */
>  		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
> -- 
> 2.34.1
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 0e4c638fcbbf..82b533aa0c03 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -1218,6 +1218,8 @@ 
 
 #define XEHP_HDC_CHICKEN0			MCR_REG(0xe5f0)
 #define   LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK	REG_GENMASK(13, 11)
+#define   ATOMIC_CHAINING_TYPED_WRITES		REG_BIT(3)
+
 #define ICL_HDC_MODE				MCR_REG(0xe5f4)
 
 #define EU_PERF_CNTL2				PERF_REG(0xe658)
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 864d41bcf6bb..d54120009334 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -2327,6 +2327,16 @@  rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
 				  LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
 	}
 
+	if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_C0, STEP_FOREVER) ||
+	    IS_DG2_GRAPHICS_STEP(i915, G11, STEP_B0, STEP_FOREVER) ||
+	    IS_DG2_G12(i915) ||
+	    IS_MTL_GRAPHICS_STEP(i915, M, STEP_A0, STEP_FOREVER) ||
+	    IS_MTL_GRAPHICS_STEP(i915, P, STEP_A0, STEP_FOREVER)) {
+		/* Wa_14015150844 */
+		wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
+				  ATOMIC_CHAINING_TYPED_WRITES);
+	}
+
 	if (IS_DG2_G11(i915) || IS_DG2_G10(i915)) {
 		/* Wa_22014600077:dg2 */
 		wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,