From patchwork Mon Oct 14 18:55:29 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Umesh Nerlige Ramappa X-Patchwork-Id: 11189339 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id AFBD014DB for ; Mon, 14 Oct 2019 18:55:36 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 8BD7C2067B for ; Mon, 14 Oct 2019 18:55:36 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 8BD7C2067B Authentication-Results: mail.kernel.org; dmarc=fail (p=none dis=none) header.from=intel.com Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id A765D892C1; Mon, 14 Oct 2019 18:55:34 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by gabe.freedesktop.org (Postfix) with ESMTPS id B815E892C1 for ; Mon, 14 Oct 2019 18:55:32 +0000 (UTC) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 14 Oct 2019 11:55:30 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.67,296,1566889200"; d="scan'208";a="370213904" Received: from orsosgc001.ra.intel.com ([10.23.184.150]) by orsmga005.jf.intel.com with ESMTP; 14 Oct 2019 11:55:31 -0700 From: Umesh Nerlige Ramappa To: intel-gfx@lists.freedesktop.org Date: Mon, 14 Oct 2019 11:55:29 -0700 Message-Id: <20191014185531.62855-1-umesh.nerlige.ramappa@intel.com> X-Mailer: git-send-email 2.20.1 MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Lucas De Marchi , Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" Add helper macros for range and equality comparisons and use them to check with whitelisted registers in oa configurations. Signed-off-by: Umesh Nerlige Ramappa --- drivers/gpu/drm/i915/i915_perf.c | 54 +++++++++++++++++--------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 366580701ba2..d54d3a41ceca 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -3455,56 +3455,58 @@ static bool gen8_is_valid_flex_addr(struct i915_perf *perf, u32 addr) return false; } +#define ADDR_IN_RANGE(addr, start, end) \ + ((addr) >= (start) && \ + (addr) <= (end)) + +#define REG_IN_RANGE(addr, start, end) \ + ((addr) >= i915_mmio_reg_offset(start) && \ + (addr) <= i915_mmio_reg_offset(end)) + +#define REG_EQUAL(addr, mmio) \ + ((addr) == i915_mmio_reg_offset(mmio)) + static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) { - return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && - addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || - (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && - addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || - (addr >= i915_mmio_reg_offset(OACEC0_0) && - addr <= i915_mmio_reg_offset(OACEC7_1)); + return REG_IN_RANGE(addr, OASTARTTRIG1, OASTARTTRIG8) || + REG_IN_RANGE(addr, OAREPORTTRIG1, OAREPORTTRIG8) || + REG_IN_RANGE(addr, OACEC0_0, OACEC7_1); } static bool gen7_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { - return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || - (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && - addr <= i915_mmio_reg_offset(NOA_WRITE)) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || - (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && - addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); + return REG_EQUAL(addr, HALF_SLICE_CHICKEN2) || + REG_IN_RANGE(addr, MICRO_BP0_0, NOA_WRITE) || + REG_IN_RANGE(addr, OA_PERFCNT1_LO, OA_PERFCNT2_HI) || + REG_IN_RANGE(addr, OA_PERFMATRIX_LO, OA_PERFMATRIX_HI); } static bool gen8_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || - (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && - addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_IN_RANGE(addr, RPM_CONFIG0, NOA_CONFIG(8)); } static bool gen10_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen8_is_valid_mux_addr(perf, addr) || - addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || - (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && - addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_IN_RANGE(addr, OA_PERFCNT3_LO, OA_PERFCNT4_HI); } static bool hsw_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - (addr >= 0x25100 && addr <= 0x2FF90) || - (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && - addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || - addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); + ADDR_IN_RANGE(addr, 0x25100, 0x2FF90) || + REG_IN_RANGE(addr, HSW_MBVID2_NOA0, HSW_MBVID2_NOA9) || + REG_EQUAL(addr, HSW_MBVID2_MISR0); } static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) { return gen7_is_valid_mux_addr(perf, addr) || - (addr >= 0x182300 && addr <= 0x1823A4); + ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); } static u32 mask_reg_value(u32 reg, u32 val) @@ -3513,14 +3515,14 @@ static u32 mask_reg_value(u32 reg, u32 val) * WaDisableSTUnitPowerOptimization workaround. Make sure the value * programmed by userspace doesn't change this. */ - if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) + if (REG_EQUAL(reg, HALF_SLICE_CHICKEN2)) val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function * indicated by its name and a bunch of selection fields used by OA * configs. */ - if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) + if (REG_EQUAL(reg, WAIT_FOR_RC6_EXIT)) val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); return val; From patchwork Mon Oct 14 18:55:30 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: Umesh Nerlige Ramappa X-Patchwork-Id: 11189343 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id 2DBA0112B for ; Mon, 14 Oct 2019 18:55:39 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id 1604C2067B for ; Mon, 14 Oct 2019 18:55:39 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org 1604C2067B Authentication-Results: mail.kernel.org; dmarc=fail (p=none dis=none) header.from=intel.com Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id 4BFA48989A; Mon, 14 Oct 2019 18:55:35 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by gabe.freedesktop.org (Postfix) with ESMTPS id D515D8981B for ; Mon, 14 Oct 2019 18:55:32 +0000 (UTC) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 14 Oct 2019 11:55:31 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.67,296,1566889200"; d="scan'208";a="370213908" Received: from orsosgc001.ra.intel.com ([10.23.184.150]) by orsmga005.jf.intel.com with ESMTP; 14 Oct 2019 11:55:31 -0700 From: Umesh Nerlige Ramappa To: intel-gfx@lists.freedesktop.org Date: Mon, 14 Oct 2019 11:55:30 -0700 Message-Id: <20191014185531.62855-2-umesh.nerlige.ramappa@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20191014185531.62855-1-umesh.nerlige.ramappa@intel.com> References: <20191014185531.62855-1-umesh.nerlige.ramappa@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 2/3] drm/i915/tgl: Add perf support on TGL X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Lucas De Marchi , Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" From: Lionel Landwerlin The design of the OA unit has been split into several units. We now have a global unit (OAG) and a render specific unit (OAR). This leads to some changes on how we program things. Some details : OAR: - has its own set of counter registers, they are per-context saved/restored - counters are not written to the circular OA buffer - a snapshot of the counters can be acquired with MI_RECORD_PERF_COUNT, or a single counter can be read with MI_STORE_REGISTER_MEM. OAG: - has global counters that increment across context switches - counters are written into the circular OA buffer (if requested) v2: Fix checkpatch warnings on code style (Lucas) v3: (Umesh) - Update register from which tail, status and head are read - Update logic to sample context reports - Update whitelist mux and b counter regs BSpec: 28727, 30021 Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_perf.c | 280 +++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 103 ++++++++++ drivers/gpu/drm/i915/oa/i915_oa_tgl.c | 121 +++++++++++ drivers/gpu/drm/i915/oa/i915_oa_tgl.h | 16 ++ 5 files changed, 492 insertions(+), 31 deletions(-) create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.c create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e791d9323b51..0ec9fee58baa 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -242,7 +242,8 @@ i915-y += \ oa/i915_oa_cflgt2.o \ oa/i915_oa_cflgt3.o \ oa/i915_oa_cnl.o \ - oa/i915_oa_icl.o + oa/i915_oa_icl.o \ + oa/i915_oa_tgl.o i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index d54d3a41ceca..df3b6976ba5b 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -217,6 +217,7 @@ #include "oa/i915_oa_cflgt3.h" #include "oa/i915_oa_cnl.h" #include "oa/i915_oa_icl.h" +#include "oa/i915_oa_tgl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -292,7 +293,8 @@ static u32 i915_perf_stream_paranoid = true; #define INVALID_CTX_ID 0xffffffff /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ -#define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_MASK (IS_GEN(stream->perf->i915, 12) ? \ + 0x7f : 0x3f) #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) #define OAREPORT_REASON_CTX_SWITCH (1<<3) @@ -338,6 +340,10 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; +static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, +}; + #define SAMPLE_OA_REPORT (1<<0) /** @@ -415,6 +421,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) kfree(oa_bo); } +static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & + GEN12_OAG_OATAILPTR_MASK; +} + static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -535,7 +549,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aging_tail = hw_tail; stream->oa_buffer.aging_timestamp = now; } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n", hw_tail); } } @@ -754,7 +768,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && + INTEL_GEN(stream->perf->i915) <= 11) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -821,6 +836,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { + i915_reg_t oaheadptr; + + oaheadptr = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OAHEADPTR : GEN8_OAHEADPTR; + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); /* @@ -828,9 +848,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * relative to oa_buf_base so put back here... */ head += gtt_offset; - - intel_uncore_write(uncore, GEN8_OAHEADPTR, - head & GEN8_OAHEADPTR_MASK); + intel_uncore_write(uncore, oaheadptr, + head & GEN12_OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -866,12 +885,16 @@ static int gen8_oa_read(struct i915_perf_stream *stream, { struct intel_uncore *uncore = stream->uncore; u32 oastatus; + i915_reg_t oastatus_reg; int ret; if (WARN_ON(!stream->oa_buffer.vaddr)) return -EIO; - oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); + oastatus_reg = IS_GEN(stream->perf->i915, 12) ? + GEN12_OAG_OASTATUS : GEN8_OASTATUS; + + oastatus = intel_uncore_read(uncore, oastatus_reg); /* * We treat OABUFFER_OVERFLOW as a significant error: @@ -903,7 +926,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, * Note: .oa_enable() is expected to re-init the oabuffer and * reset GEN8_OASTATUS for us */ - oastatus = intel_uncore_read(uncore, GEN8_OASTATUS); + oastatus = intel_uncore_read(uncore, oastatus_reg); } if (oastatus & GEN8_OASTATUS_REPORT_LOST) { @@ -911,7 +934,7 @@ static int gen8_oa_read(struct i915_perf_stream *stream, DRM_I915_PERF_RECORD_OA_REPORT_LOST); if (ret) return ret; - intel_uncore_write(uncore, GEN8_OASTATUS, + intel_uncore_write(uncore, oastatus_reg, oastatus & ~GEN8_OASTATUS_REPORT_LOST); } @@ -1485,6 +1508,67 @@ static void gen8_init_oa_buffer(struct i915_perf_stream *stream) stream->pollin = false; } +static void gen12_init_oa_buffer(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + intel_uncore_write(uncore, GEN12_OAG_OASTATUS, 0); + intel_uncore_write(uncore, GEN12_OAG_OAHEADPTR, + gtt_offset & GEN12_OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = gtt_offset; + + /* + * PRM says: + * + * "This MMIO must be set before the OATAILPTR + * register and after the OAHEADPTR register. This is + * to enable proper functionality of the overflow + * bit." + */ + intel_uncore_write(uncore, GEN12_OAG_OABUFFER, gtt_offset | + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); + intel_uncore_write(uncore, GEN12_OAG_OATAILPTR, + gtt_offset & GEN12_OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointers to read from... */ + stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; + stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; + + /* + * Reset state used to recognise context switches, affecting which + * reports we will forward to userspace while filtering for a single + * context. + */ + stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* + * NB: although the OA buffer will initially be allocated + * zeroed via shmfs (and so this memset is redundant when + * first allocating), we may re-init the OA buffer, either + * when re-enabling a stream or in error/reset paths. + * + * The reason we clear the buffer for each re-init is for the + * sanity check in gen8_append_oa_reports() that looks at the + * reason field to make sure it's non-zero which relies on + * the assumption that new reports are being written to zeroed + * memory... + */ + memset(stream->oa_buffer.vaddr, 0, + stream->oa_buffer.vma->size); + + /* + * Maybe make ->pollin per-stream state if we support multiple + * concurrent streams in the future. + */ + stream->pollin = false; +} + static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_gem_object *bo; @@ -1991,7 +2075,7 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce, (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | GEN8_OA_COUNTER_RESUME; - for (i = 0; i < ARRAY_SIZE(flex_regs); i++) + for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++) reg_state[ctx_flexeu0 + i * 2 + 1] = oa_config_flex_reg(stream->oa_config, flex_regs[i]); @@ -2148,8 +2232,8 @@ static int gen8_configure_context(struct i915_gem_context *ctx, * * Note: it's only the RCS/Render context that has any OA state. */ -static int gen8_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config) +static int lrc_configure_all_contexts(struct i915_perf_stream *stream, + const struct i915_oa_config *oa_config) { struct drm_i915_private *i915 = stream->perf->i915; /* The MMIO offsets for Flex EU registers aren't contiguous */ @@ -2161,11 +2245,9 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, CTX_R_PWR_CLK_STATE, }, { - GEN8_OACTXCONTROL, + IS_GEN(i915, 12) ? + GEN12_OAR_OACONTROL: GEN8_OACTXCONTROL, stream->perf->ctx_oactxctrl_offset + 1, - ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | - (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | - GEN8_OA_COUNTER_RESUME) }, { EU_PERF_CNTL0, ctx_flexeuN(0) }, { EU_PERF_CNTL1, ctx_flexeuN(1) }, @@ -2178,9 +2260,23 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, #undef ctx_flexeuN struct intel_engine_cs *engine; struct i915_gem_context *ctx, *cn; + size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs); int i, err; - for (i = 2; i < ARRAY_SIZE(regs); i++) + if (IS_GEN(i915, 12)) { + u32 format = stream->oa_buffer.format; + + regs[1].value = + (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) | + (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0); + } else { + regs[1].value = + (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | + (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | + GEN8_OA_COUNTER_RESUME; + } + + for (i = 2; !!ctx_flexeu0 && i < array_size; i++) regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); lockdep_assert_held(&stream->perf->lock); @@ -2211,7 +2307,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, spin_unlock(&i915->gem.contexts.lock); - err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); + err = gen8_configure_context(ctx, regs, array_size); if (err) { i915_gem_context_put(ctx); return err; @@ -2236,7 +2332,7 @@ static int gen8_configure_all_contexts(struct i915_perf_stream *stream, regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); - err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); + err = gen8_modify_self(ce, regs, array_size); if (err) return err; } @@ -2284,7 +2380,45 @@ static int gen8_enable_metric_set(struct i915_perf_stream *stream) * to make sure all slices/subslices are ON before writing to NOA * registers. */ - ret = gen8_configure_all_contexts(stream, oa_config); + ret = lrc_configure_all_contexts(stream, oa_config); + if (ret) + return ret; + + return emit_oa_config(stream, oa_context(stream)); +} + +static int gen12_enable_metric_set(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + struct i915_oa_config *oa_config = stream->oa_config; + bool periodic = stream->periodic; + u32 period_exponent = stream->period_exponent; + int ret; + + intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG, + /* Disable clk ratio reports, like previous Gens. */ + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | + GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO) | + /* + * If the user didn't require OA reports, instruct the + * hardware not to emit ctx switch reports. + */ + !(stream->sample_flags & SAMPLE_OA_REPORT) ? + _MASKED_BIT_ENABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS) : + _MASKED_BIT_DISABLE(GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS)); + + intel_uncore_write(uncore, GEN12_OAG_OAGLBCTXCTRL, periodic ? + (GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME | + GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE | + (period_exponent << GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT)) + : 0); + + /* + * Update all contexts prior writing the mux configurations as we need + * to make sure all slices/subslices are ON before writing to NOA + * registers. + */ + ret = lrc_configure_all_contexts(stream, oa_config); if (ret) return ret; @@ -2296,7 +2430,7 @@ static void gen8_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL); intel_uncore_rmw(uncore, GDT_CHICKEN_BITS, GT_NOA_ENABLE, 0); } @@ -2306,7 +2440,7 @@ static void gen10_disable_metric_set(struct i915_perf_stream *stream) struct intel_uncore *uncore = stream->uncore; /* Reset all contexts' slices/subslices configurations. */ - gen8_configure_all_contexts(stream, NULL); + lrc_configure_all_contexts(stream, NULL); /* Make sure we disable noa to save power. */ intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0); @@ -2368,6 +2502,25 @@ static void gen8_oa_enable(struct i915_perf_stream *stream) GEN8_OA_COUNTER_ENABLE); } +static void gen12_oa_enable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + u32 report_format = stream->oa_buffer.format; + + /* + * If we don't want OA reports from the OA buffer, then we don't even + * need to program the OAG unit. + */ + if (!(stream->sample_flags & SAMPLE_OA_REPORT)) + return; + + gen12_init_oa_buffer(stream); + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, + (report_format << GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT) | + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE); +} + /** * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2409,6 +2562,18 @@ static void gen8_oa_disable(struct i915_perf_stream *stream) DRM_ERROR("wait for OA to be disabled timed out\n"); } +static void gen12_oa_disable(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + intel_uncore_write(uncore, GEN12_OAG_OACONTROL, 0); + if (intel_wait_for_register(uncore, + GEN12_OAG_OACONTROL, + GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); +} + /** * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream * @stream: An i915 perf stream opened for OA metrics @@ -2608,7 +2773,7 @@ void i915_oa_init_reg_state(const struct intel_context *ce, { struct i915_perf_stream *stream; - /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ + /* perf.exclusive_stream serialised by lrc_configure_all_contexts() */ lockdep_assert_held(&ce->pin_mutex); if (engine->class != RENDER_CLASS) @@ -3037,16 +3202,24 @@ i915_perf_open_ioctl_locked(struct i915_perf *perf, * rest of the system, which we consider acceptable for a * non-privileged client. * - * For Gen8+ the OA unit no longer supports clock gating off for a + * For Gen8->11 the OA unit no longer supports clock gating off for a * specific context and the kernel can't securely stop the counters * from updating as system-wide / global values. Even though we can * filter reports based on the included context ID we can't block * clients from seeing the raw / global counter values via * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to * enable the OA unit by default. + * + * For Gen12+ we gain a new OAR unit that only monitors the RCS on a + * per context basis. So we can relax requirements there if the user + * doesn't request global stream access (i.e. query based sampling + * using MI_RECORD_PERF_COUNT. */ if (IS_HASWELL(perf->i915) && specific_ctx) privileged_op = false; + else if (IS_GEN(perf->i915, 12) && specific_ctx && + (props->sample_flags & SAMPLE_OA_REPORT) == 0) + privileged_op = false; /* Similar to perf's kernel.perf_paranoid_cpu sysctl option * we check a dev.i915.perf_stream_paranoid sysctl option @@ -3358,7 +3531,9 @@ void i915_perf_register(struct drm_i915_private *i915) sysfs_attr_init(&perf->test_config.sysfs_metric_id.attr); - if (INTEL_GEN(i915) >= 11) { + if (IS_TIGERLAKE(i915)) { + i915_perf_load_test_config_tgl(i915); + } else if (INTEL_GEN(i915) >= 11) { i915_perf_load_test_config_icl(i915); } else if (IS_CANNONLAKE(i915)) { i915_perf_load_test_config_cnl(i915); @@ -3509,6 +3684,28 @@ static bool chv_is_valid_mux_addr(struct i915_perf *perf, u32 addr) ADDR_IN_RANGE(addr, 0x182300, 0x1823A4); } +static bool gen12_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr) +{ + return REG_IN_RANGE(addr, GEN12_OAG_OASTARTTRIG1, GEN12_OAG_OASTARTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_OAREPORTTRIG1, GEN12_OAG_OAREPORTTRIG8) || + REG_IN_RANGE(addr, GEN12_OAG_CEC0_0, GEN12_OAG_CEC7_1) || + REG_IN_RANGE(addr, GEN12_OAG_SCEC0_0, GEN12_OAG_SCEC7_1) || + REG_EQUAL(addr, GEN12_OAA_DBG_REG) || + REG_EQUAL(addr, GEN12_OAG_OA_PESS) || + REG_EQUAL(addr, GEN12_OAG_SPCTR_CNF); +} + +static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr) +{ + return REG_EQUAL(addr, NOA_WRITE) || + REG_EQUAL(addr, GEN10_NOA_WRITE_HIGH) || + REG_EQUAL(addr, GDT_CHICKEN_BITS) || + REG_EQUAL(addr, WAIT_FOR_RC6_EXIT) || + REG_EQUAL(addr, RPM_CONFIG0) || + REG_EQUAL(addr, RPM_CONFIG1) || + REG_IN_RANGE(addr, NOA_CONFIG(0), NOA_CONFIG(8)); +} + static u32 mask_reg_value(u32 reg, u32 val) { /* HALF_SLICE_CHICKEN2 is programmed with a the @@ -3902,14 +4099,11 @@ void i915_perf_init(struct drm_i915_private *i915) * worth the complexity to maintain now that BDW+ enable * execlist mode by default. */ - perf->oa_formats = gen8_plus_oa_formats; - - perf->ops.oa_enable = gen8_oa_enable; - perf->ops.oa_disable = gen8_oa_disable; perf->ops.read = gen8_oa_read; - perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN_RANGE(i915, 8, 9)) { + perf->oa_formats = gen8_plus_oa_formats; + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -3922,8 +4116,11 @@ void i915_perf_init(struct drm_i915_private *i915) chv_is_valid_mux_addr; } + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen8_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 8)) { perf->ctx_oactxctrl_offset = 0x120; @@ -3937,6 +4134,8 @@ void i915_perf_init(struct drm_i915_private *i915) perf->gen8_valid_ctx_bit = BIT(16); } } else if (IS_GEN_RANGE(i915, 10, 11)) { + perf->oa_formats = gen8_plus_oa_formats; + perf->ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; perf->ops.is_valid_mux_reg = @@ -3944,8 +4143,11 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ops.is_valid_flex_reg = gen8_is_valid_flex_addr; + perf->ops.oa_enable = gen8_oa_enable; + perf->ops.oa_disable = gen8_oa_disable; perf->ops.enable_metric_set = gen8_enable_metric_set; perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.oa_hw_tail_read = gen8_oa_hw_tail_read; if (IS_GEN(i915, 10)) { perf->ctx_oactxctrl_offset = 0x128; @@ -3955,6 +4157,24 @@ void i915_perf_init(struct drm_i915_private *i915) perf->ctx_flexeu0_offset = 0x78e; } perf->gen8_valid_ctx_bit = BIT(16); + } else if (IS_GEN(i915, 12)) { + perf->oa_formats = gen12_oa_formats; + + perf->ops.is_valid_b_counter_reg = + gen12_is_valid_b_counter_addr; + perf->ops.is_valid_mux_reg = + gen12_is_valid_mux_addr; + perf->ops.is_valid_flex_reg = + gen8_is_valid_flex_addr; + + perf->ops.oa_enable = gen12_oa_enable; + perf->ops.oa_disable = gen12_oa_disable; + perf->ops.enable_metric_set = gen12_enable_metric_set; + perf->ops.disable_metric_set = gen10_disable_metric_set; + perf->ops.oa_hw_tail_read = gen12_oa_hw_tail_read; + + perf->ctx_flexeu0_offset = 0; + perf->ctx_oactxctrl_offset = 0x144; } } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e24991e54897..2e11963f3eb5 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -684,6 +684,45 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M (6 << 3) #define OABUFFER_SIZE_16M (7 << 3) +/* Gen12 OAR unit */ +#define GEN12_OAR_OACONTROL _MMIO(0x2960) +#define GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT 1 +#define GEN12_OAR_OACONTROL_COUNTER_ENABLE (1 << 0) + +#define GEN12_OACTXCONTROL _MMIO(0x2360) +#define GEN12_OAR_OASTATUS _MMIO(0x2968) + +/* Gen12 OAG unit */ +#define GEN12_OAG_OAHEADPTR _MMIO(0xdb00) +#define GEN12_OAG_OAHEADPTR_MASK 0xffffffc0 +#define GEN12_OAG_OATAILPTR _MMIO(0xdb04) +#define GEN12_OAG_OATAILPTR_MASK 0xffffffc0 + +#define GEN12_OAG_OABUFFER _MMIO(0xdb08) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_MASK (0x7) +#define GEN12_OAG_OABUFFER_BUFFER_SIZE_SHIFT (3) +#define GEN12_OAG_OABUFFER_MEMORY_SELECT (1 << 0) /* 0: PPGTT, 1: GGTT */ + +#define GEN12_OAG_OAGLBCTXCTRL _MMIO(0x2b28) +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT 2 +#define GEN12_OAG_OAGLBCTXCTRL_TIMER_ENABLE (1 << 1) +#define GEN12_OAG_OAGLBCTXCTRL_COUNTER_RESUME (1 << 0) + +#define GEN12_OAG_OACONTROL _MMIO(0xdaf4) +#define GEN12_OAG_OACONTROL_OA_COUNTER_FORMAT_SHIFT 2 +#define GEN12_OAG_OACONTROL_OA_COUNTER_ENABLE (1 << 0) + +#define GEN12_OAG_OA_DEBUG _MMIO(0xdaf8) +#define GEN12_OAG_OA_DEBUG_INCLUDE_CLK_RATIO (1 << 6) +#define GEN12_OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS (1 << 5) +#define GEN12_OAG_OA_DEBUG_DISABLE_GO_1_0_REPORTS (1 << 2) +#define GEN12_OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS (1 << 1) + +#define GEN12_OAG_OASTATUS _MMIO(0xdafc) +#define GEN12_OAG_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define GEN12_OAG_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define GEN12_OAG_OASTATUS_REPORT_LOST (1 << 0) + /* * Flexible, Aggregate EU Counter Registers. * Note: these aren't contiguous @@ -920,6 +959,26 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OAREPORTTRIG8_NOA_SELECT_6_SHIFT 24 #define OAREPORTTRIG8_NOA_SELECT_7_SHIFT 28 +/* Same layout as OASTARTTRIGX */ +#define GEN12_OAG_OASTARTTRIG1 _MMIO(0xd900) +#define GEN12_OAG_OASTARTTRIG2 _MMIO(0xd904) +#define GEN12_OAG_OASTARTTRIG3 _MMIO(0xd908) +#define GEN12_OAG_OASTARTTRIG4 _MMIO(0xd90c) +#define GEN12_OAG_OASTARTTRIG5 _MMIO(0xd910) +#define GEN12_OAG_OASTARTTRIG6 _MMIO(0xd914) +#define GEN12_OAG_OASTARTTRIG7 _MMIO(0xd918) +#define GEN12_OAG_OASTARTTRIG8 _MMIO(0xd91c) + +/* Same layout as OAREPORTTRIGX */ +#define GEN12_OAG_OAREPORTTRIG1 _MMIO(0xd920) +#define GEN12_OAG_OAREPORTTRIG2 _MMIO(0xd924) +#define GEN12_OAG_OAREPORTTRIG3 _MMIO(0xd928) +#define GEN12_OAG_OAREPORTTRIG4 _MMIO(0xd92c) +#define GEN12_OAG_OAREPORTTRIG5 _MMIO(0xd930) +#define GEN12_OAG_OAREPORTTRIG6 _MMIO(0xd934) +#define GEN12_OAG_OAREPORTTRIG7 _MMIO(0xd938) +#define GEN12_OAG_OAREPORTTRIG8 _MMIO(0xd93c) + /* CECX_0 */ #define OACEC_COMPARE_LESS_OR_EQUAL 6 #define OACEC_COMPARE_NOT_EQUAL 5 @@ -936,6 +995,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC_SELECT_PREV (1 << 19) #define OACEC_SELECT_BOOLEAN (2 << 19) +/* 11-bit array 0: pass-through, 1: negated */ +#define GEN12_OASCEC_NEGATE_MASK 0x7ff +#define GEN12_OASCEC_NEGATE_SHIFT 21 + /* CECX_1 */ #define OACEC_MASK_MASK 0xffff #define OACEC_CONSIDERATIONS_MASK 0xffff @@ -958,6 +1021,42 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OACEC7_0 _MMIO(0x27a8) #define OACEC7_1 _MMIO(0x27ac) +/* Same layout as CECX_Y */ +#define GEN12_OAG_CEC0_0 _MMIO(0xd940) +#define GEN12_OAG_CEC0_1 _MMIO(0xd944) +#define GEN12_OAG_CEC1_0 _MMIO(0xd948) +#define GEN12_OAG_CEC1_1 _MMIO(0xd94c) +#define GEN12_OAG_CEC2_0 _MMIO(0xd950) +#define GEN12_OAG_CEC2_1 _MMIO(0xd954) +#define GEN12_OAG_CEC3_0 _MMIO(0xd958) +#define GEN12_OAG_CEC3_1 _MMIO(0xd95c) +#define GEN12_OAG_CEC4_0 _MMIO(0xd960) +#define GEN12_OAG_CEC4_1 _MMIO(0xd964) +#define GEN12_OAG_CEC5_0 _MMIO(0xd968) +#define GEN12_OAG_CEC5_1 _MMIO(0xd96c) +#define GEN12_OAG_CEC6_0 _MMIO(0xd970) +#define GEN12_OAG_CEC6_1 _MMIO(0xd974) +#define GEN12_OAG_CEC7_0 _MMIO(0xd978) +#define GEN12_OAG_CEC7_1 _MMIO(0xd97c) + +/* Same layout as CECX_Y + negate 11-bit array */ +#define GEN12_OAG_SCEC0_0 _MMIO(0xdc00) +#define GEN12_OAG_SCEC0_1 _MMIO(0xdc04) +#define GEN12_OAG_SCEC1_0 _MMIO(0xdc08) +#define GEN12_OAG_SCEC1_1 _MMIO(0xdc0c) +#define GEN12_OAG_SCEC2_0 _MMIO(0xdc10) +#define GEN12_OAG_SCEC2_1 _MMIO(0xdc14) +#define GEN12_OAG_SCEC3_0 _MMIO(0xdc18) +#define GEN12_OAG_SCEC3_1 _MMIO(0xdc1c) +#define GEN12_OAG_SCEC4_0 _MMIO(0xdc20) +#define GEN12_OAG_SCEC4_1 _MMIO(0xdc24) +#define GEN12_OAG_SCEC5_0 _MMIO(0xdc28) +#define GEN12_OAG_SCEC5_1 _MMIO(0xdc2c) +#define GEN12_OAG_SCEC6_0 _MMIO(0xdc30) +#define GEN12_OAG_SCEC6_1 _MMIO(0xdc34) +#define GEN12_OAG_SCEC7_0 _MMIO(0xdc38) +#define GEN12_OAG_SCEC7_1 _MMIO(0xdc3c) + /* OA perf counters */ #define OA_PERFCNT1_LO _MMIO(0x91B8) #define OA_PERFCNT1_HI _MMIO(0x91BC) @@ -1038,6 +1137,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define MICRO_BP3_COUNT_STATUS23 _MMIO(0x9838) #define MICRO_BP_FIRED_ARMED _MMIO(0x983C) +#define GEN12_OAA_DBG_REG _MMIO(0xdc44) +#define GEN12_OAG_OA_PESS _MMIO(0x2b2c) +#define GEN12_OAG_SPCTR_CNF _MMIO(0xdc40) + #define GDT_CHICKEN_BITS _MMIO(0x9840) #define GT_NOA_ENABLE 0x00000080 diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.c b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c new file mode 100644 index 000000000000..a29d93707345 --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#include + +#include "i915_drv.h" +#include "i915_oa_tgl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0xD920), 0x00000000 }, + { _MMIO(0xD900), 0x00000000 }, + { _MMIO(0xD904), 0xF0800000 }, + { _MMIO(0xD910), 0x00000000 }, + { _MMIO(0xD914), 0xF0800000 }, + { _MMIO(0xDC40), 0x00FF0000 }, + { _MMIO(0xD940), 0x00000004 }, + { _MMIO(0xD944), 0x0000FFFF }, + { _MMIO(0xDC00), 0x00000004 }, + { _MMIO(0xDC04), 0x0000FFFF }, + { _MMIO(0xD948), 0x00000003 }, + { _MMIO(0xD94C), 0x0000FFFF }, + { _MMIO(0xDC08), 0x00000003 }, + { _MMIO(0xDC0C), 0x0000FFFF }, + { _MMIO(0xD950), 0x00000007 }, + { _MMIO(0xD954), 0x0000FFFF }, + { _MMIO(0xDC10), 0x00000007 }, + { _MMIO(0xDC14), 0x0000FFFF }, + { _MMIO(0xD958), 0x00100002 }, + { _MMIO(0xD95C), 0x0000FFF7 }, + { _MMIO(0xDC18), 0x00100002 }, + { _MMIO(0xDC1C), 0x0000FFF7 }, + { _MMIO(0xD960), 0x00100002 }, + { _MMIO(0xD964), 0x0000FFCF }, + { _MMIO(0xDC20), 0x00100002 }, + { _MMIO(0xDC24), 0x0000FFCF }, + { _MMIO(0xD968), 0x00100082 }, + { _MMIO(0xD96C), 0x0000FFEF }, + { _MMIO(0xDC28), 0x00100082 }, + { _MMIO(0xDC2C), 0x0000FFEF }, + { _MMIO(0xD970), 0x001000C2 }, + { _MMIO(0xD974), 0x0000FFE7 }, + { _MMIO(0xDC30), 0x001000C2 }, + { _MMIO(0xDC34), 0x0000FFE7 }, + { _MMIO(0xD978), 0x00100001 }, + { _MMIO(0xD97C), 0x0000FFE7 }, + { _MMIO(0xDC38), 0x00100001 }, + { _MMIO(0xDC3C), 0x0000FFE7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0x0D04), 0x00000200 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x280E0000 }, + { _MMIO(0x9888), 0x1E0E0147 }, + { _MMIO(0x9888), 0x180E0000 }, + { _MMIO(0x9888), 0x160E0000 }, + { _MMIO(0x9888), 0x1E0F1000 }, + { _MMIO(0x9888), 0x1E104000 }, + { _MMIO(0x9888), 0x2E020100 }, + { _MMIO(0x9888), 0x2C030004 }, + { _MMIO(0x9888), 0x38003000 }, + { _MMIO(0x9888), 0x1E0A8000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x49110000 }, + { _MMIO(0x9888), 0x5D101400 }, + { _MMIO(0x9888), 0x1D140020 }, + { _MMIO(0x9888), 0x1D1103A3 }, + { _MMIO(0x9888), 0x01110000 }, + { _MMIO(0x9888), 0x61111000 }, + { _MMIO(0x9888), 0x1F128000 }, + { _MMIO(0x9888), 0x17100000 }, + { _MMIO(0x9888), 0x55100630 }, + { _MMIO(0x9888), 0x57100000 }, + { _MMIO(0x9888), 0x31100000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x65100002 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x42000001 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.test_config.uuid, + "80a833f0-2504-4321-8894-e9277844ce7b", + sizeof(dev_priv->perf.test_config.uuid)); + dev_priv->perf.test_config.id = 1; + + dev_priv->perf.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.test_config.sysfs_metric.name = "80a833f0-2504-4321-8894-e9277844ce7b"; + dev_priv->perf.test_config.sysfs_metric.attrs = dev_priv->perf.test_config.attrs; + + dev_priv->perf.test_config.attrs[0] = &dev_priv->perf.test_config.sysfs_metric_id.attr; + + dev_priv->perf.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/oa/i915_oa_tgl.h b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h new file mode 100644 index 000000000000..4c25f0be825c --- /dev/null +++ b/drivers/gpu/drm/i915/oa/i915_oa_tgl.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2018 Intel Corporation + * + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + */ + +#ifndef __I915_OA_TGL_H__ +#define __I915_OA_TGL_H__ + +struct drm_i915_private; + +void i915_perf_load_test_config_tgl(struct drm_i915_private *dev_priv); + +#endif From patchwork Mon Oct 14 18:55:31 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Umesh Nerlige Ramappa X-Patchwork-Id: 11189341 Return-Path: Received: from mail.kernel.org (pdx-korg-mail-1.web.codeaurora.org [172.30.200.123]) by pdx-korg-patchwork-2.web.codeaurora.org (Postfix) with ESMTP id CE5B314DB for ; Mon, 14 Oct 2019 18:55:37 +0000 (UTC) Received: from gabe.freedesktop.org (gabe.freedesktop.org [131.252.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mail.kernel.org (Postfix) with ESMTPS id B6AD62067B for ; Mon, 14 Oct 2019 18:55:37 +0000 (UTC) DMARC-Filter: OpenDMARC Filter v1.3.2 mail.kernel.org B6AD62067B Authentication-Results: mail.kernel.org; dmarc=fail (p=none dis=none) header.from=intel.com Authentication-Results: mail.kernel.org; spf=none smtp.mailfrom=intel-gfx-bounces@lists.freedesktop.org Received: from gabe.freedesktop.org (localhost [127.0.0.1]) by gabe.freedesktop.org (Postfix) with ESMTP id CBEDC8981B; Mon, 14 Oct 2019 18:55:34 +0000 (UTC) X-Original-To: intel-gfx@lists.freedesktop.org Delivered-To: intel-gfx@lists.freedesktop.org Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by gabe.freedesktop.org (Postfix) with ESMTPS id 101C98982E for ; Mon, 14 Oct 2019 18:55:33 +0000 (UTC) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga005.jf.intel.com ([10.7.209.41]) by fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 14 Oct 2019 11:55:31 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.67,296,1566889200"; d="scan'208";a="370213910" Received: from orsosgc001.ra.intel.com ([10.23.184.150]) by orsmga005.jf.intel.com with ESMTP; 14 Oct 2019 11:55:31 -0700 From: Umesh Nerlige Ramappa To: intel-gfx@lists.freedesktop.org Date: Mon, 14 Oct 2019 11:55:31 -0700 Message-Id: <20191014185531.62855-3-umesh.nerlige.ramappa@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20191014185531.62855-1-umesh.nerlige.ramappa@intel.com> References: <20191014185531.62855-1-umesh.nerlige.ramappa@intel.com> MIME-Version: 1.0 Subject: [Intel-gfx] [PATCH 3/3] drm/i915/perf: enable OAR context save/restore of performance counters X-BeenThere: intel-gfx@lists.freedesktop.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: Intel graphics driver community testing & development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Lucas De Marchi , Chris Wilson Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" From: Lionel Landwerlin We want this so we can preempt performance queries and keep the system responsive even when long running queries are ongoing. We avoid doing it for all contexts. Signed-off-by: Lionel Landwerlin --- drivers/gpu/drm/i915/gt/intel_lrc.c | 23 +++++--- drivers/gpu/drm/i915/gt/intel_lrc.h | 3 + drivers/gpu/drm/i915/i915_perf.c | 89 +++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9d8eaf8edaab..1bab671dc7a0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1218,6 +1218,19 @@ static bool can_merge_rq(const struct i915_request *prev, return true; } +u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine) +{ + u32 value = + _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | + _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + + if (INTEL_GEN(engine->i915) < 11) + value |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + + return value; +} + static void virtual_update_register_offsets(u32 *regs, struct intel_engine_cs *engine) { @@ -2124,6 +2137,7 @@ __execlists_update_reg_state(const struct intel_context *ce, i915_oa_init_reg_state(ce, engine); } + regs[CTX_CONTEXT_CONTROL] |= intel_lrc_make_ctx_control(engine); } static int @@ -3628,14 +3642,7 @@ static void init_common_reg_state(u32 * const regs, const struct intel_engine_cs *engine, const struct intel_ring *ring) { - regs[CTX_CONTEXT_CONTROL] = - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) | - _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); - if (INTEL_GEN(engine->i915) < 11) - regs[CTX_CONTEXT_CONTROL] |= - _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); - + regs[CTX_CONTEXT_CONTROL] = intel_lrc_make_ctx_control(engine); regs[CTX_RING_BUFFER_CONTROL] = RING_CTL_SIZE(ring->size) | RING_VALID; regs[CTX_BB_STATE] = RING_BB_PPGTT; } diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index 99dc576a4e25..6b2b196f09e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -43,6 +43,7 @@ struct intel_engine_cs; #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) #define CTX_CTRL_RS_CTX_ENABLE (1 << 1) #define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) #define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) #define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) #define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) @@ -145,4 +146,6 @@ struct intel_engine_cs * intel_virtual_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling); +u32 intel_lrc_make_ctx_control(const struct intel_engine_cs *engine); + #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index df3b6976ba5b..d0ddd37dc078 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2208,6 +2208,70 @@ static int gen8_configure_context(struct i915_gem_context *ctx, return err; } +static int gen12_emit_oar_config(struct intel_context *ce, bool enable) +{ + struct i915_request *rq; + u32 *cs, offset; + int err = 0; + + rq = i915_request_create(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out; + } + + offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset + CTX_CONTEXT_CONTROL * sizeof(u32); + *cs++ = 0; + *cs++ = intel_lrc_make_ctx_control(ce->engine) | + (enable ? + _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) : + _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE)); + + intel_ring_advance(rq, cs); + +out: + i915_request_add(rq); + + return err; +} + +static int gen12_configure_context_oar(struct i915_gem_context *ctx, + bool enable) +{ + struct i915_gem_engines_iter it; + struct intel_context *ce; + int err = 0; + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + GEM_BUG_ON(ce == ce->engine->kernel_context); + + if (ce->engine->class != RENDER_CLASS) + continue; + + err = intel_context_lock_pinned(ce); + if (err) + break; + + /* Otherwise OA settings will be set upon first use */ + if (intel_context_is_pinned(ce)) + err = gen12_emit_oar_config(ce, enable); + + intel_context_unlock_pinned(ce); + if (err) + break; + } + i915_gem_context_unlock_engines(ctx); + + return err; +} + /* * Manages updating the per-context aspects of the OA stream * configuration across all contexts. @@ -2313,6 +2377,17 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream, return err; } + /* + * For Gen12, performance counters are context + * saved/restored. Only enable it for the context that + * requested this. + */ + if (ctx == stream->ctx && IS_GEN(i915, 12)) { + err = gen12_configure_context_oar(ctx, oa_config != NULL); + if (err) + return err; + } + spin_lock(&i915->gem.contexts.lock); list_safe_reset_next(ctx, cn, link); i915_gem_context_put(ctx); @@ -2782,6 +2857,20 @@ void i915_oa_init_reg_state(const struct intel_context *ce, stream = engine->i915->perf.exclusive_stream; if (stream) gen8_update_reg_state_unlocked(ce, stream); + + /* + * Enable context save & restore of performance counters for + * the OAR unit only on the context selected for performance + * queries. + */ + if (IS_GEN(engine->i915, 12)) { + u32 *regs = ce->lrc_reg_state; + + regs[CTX_CONTEXT_CONTROL] |= + (stream && ce->gem_context == stream->ctx) ? + _MASKED_BIT_ENABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE) : + _MASKED_BIT_DISABLE(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE); + } } /**