Message ID | 20220823204155.8178-11-umesh.nerlige.ramappa@intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add DG2 OA support | expand |
On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote: > User passes uabi engine class and instance to the perf OA interface. Use > gt corresponding to the engine to pin the buffers to the right ggtt. > > Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> I didn't know there was a GGTT per engine. Do I understand this correct? Thanks, -Lionel > --- > drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++-- > 1 file changed, 19 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c > index 87b92d2946f4..f7621b45966c 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -1765,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) > static int alloc_oa_buffer(struct i915_perf_stream *stream) > { > struct drm_i915_private *i915 = stream->perf->i915; > + struct intel_gt *gt = stream->engine->gt; > struct drm_i915_gem_object *bo; > struct i915_vma *vma; > int ret; > @@ -1784,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) > i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); > > /* PreHSW required 512K alignment, HSW requires 16M */ > - vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); > + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); > if (IS_ERR(vma)) { > ret = PTR_ERR(vma); > goto err_unref; > } > + > + /* > + * PreHSW required 512K alignment. > + * HSW and onwards, align to requested size of OA buffer. > + */ > + ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH); > + if (ret) { > + drm_err(>->i915->drm, "Failed to pin OA buffer %d\n", ret); > + goto err_unref; > + } > + > stream->oa_buffer.vma = vma; > > stream->oa_buffer.vaddr = > @@ -1838,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, > static int alloc_noa_wait(struct i915_perf_stream *stream) > { > struct drm_i915_private *i915 = stream->perf->i915; > + struct intel_gt *gt = stream->engine->gt; > struct drm_i915_gem_object *bo; > struct i915_vma *vma; > const u64 delay_ticks = 0xffffffffffffffff - > @@ -1878,12 +1891,16 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) > * multiple OA config BOs will have a jump to this address and it > * needs to be fixed during the lifetime of the i915/perf stream. > */ > - vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH); > + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); > if (IS_ERR(vma)) { > ret = PTR_ERR(vma); > goto out_ww; > } > > + ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); > + if (ret) > + goto out_ww; > + > batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); > if (IS_ERR(batch)) { > ret = PTR_ERR(batch);
On Tue, Sep 06, 2022 at 10:56:13PM +0300, Lionel Landwerlin wrote: >On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote: >>User passes uabi engine class and instance to the perf OA interface. Use >>gt corresponding to the engine to pin the buffers to the right ggtt. >> >>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> > >I didn't know there was a GGTT per engine. > >Do I understand this correct? No, GGTT is still per-gt. We just derive the gt from engine class instance passed (as in engine->gt). > > >Thanks, > >-Lionel > > >>--- >> drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++-- >> 1 file changed, 19 insertions(+), 2 deletions(-) >> >>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c >>index 87b92d2946f4..f7621b45966c 100644 >>--- a/drivers/gpu/drm/i915/i915_perf.c >>+++ b/drivers/gpu/drm/i915/i915_perf.c >>@@ -1765,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) >> static int alloc_oa_buffer(struct i915_perf_stream *stream) >> { >> struct drm_i915_private *i915 = stream->perf->i915; >>+ struct intel_gt *gt = stream->engine->gt; >> struct drm_i915_gem_object *bo; >> struct i915_vma *vma; >> int ret; >>@@ -1784,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) >> i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); >> /* PreHSW required 512K alignment, HSW requires 16M */ >>- vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); >>+ vma = i915_vma_instance(bo, >->ggtt->vm, NULL); >> if (IS_ERR(vma)) { >> ret = PTR_ERR(vma); >> goto err_unref; >> } >>+ >>+ /* >>+ * PreHSW required 512K alignment. >>+ * HSW and onwards, align to requested size of OA buffer. >>+ */ >>+ ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH); >>+ if (ret) { >>+ drm_err(>->i915->drm, "Failed to pin OA buffer %d\n", ret); >>+ goto err_unref; >>+ } >>+ >> stream->oa_buffer.vma = vma; >> stream->oa_buffer.vaddr = >>@@ -1838,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, >> static int alloc_noa_wait(struct i915_perf_stream *stream) >> { >> struct drm_i915_private *i915 = stream->perf->i915; >>+ struct intel_gt *gt = stream->engine->gt; >> struct drm_i915_gem_object *bo; >> struct i915_vma *vma; >> const u64 delay_ticks = 0xffffffffffffffff - >>@@ -1878,12 +1891,16 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) >> * multiple OA config BOs will have a jump to this address and it >> * needs to be fixed during the lifetime of the i915/perf stream. >> */ >>- vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH); >>+ vma = i915_vma_instance(bo, >->ggtt->vm, NULL); >> if (IS_ERR(vma)) { >> ret = PTR_ERR(vma); >> goto out_ww; >> } >>+ ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); >>+ if (ret) >>+ goto out_ww; >>+ >> batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); >> if (IS_ERR(batch)) { >> ret = PTR_ERR(batch); > >
On 06/09/2022 23:28, Umesh Nerlige Ramappa wrote: > On Tue, Sep 06, 2022 at 10:56:13PM +0300, Lionel Landwerlin wrote: >> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote: >>> User passes uabi engine class and instance to the perf OA interface. >>> Use >>> gt corresponding to the engine to pin the buffers to the right ggtt. >>> >>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> >> >> I didn't know there was a GGTT per engine. >> >> Do I understand this correct? > > No, GGTT is still per-gt. We just derive the gt from engine class > instance passed (as in engine->gt). Oh thanks I understand now. Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > >> >> >> Thanks, >> >> -Lionel >> >> >>> --- >>> drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++-- >>> 1 file changed, 19 insertions(+), 2 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/i915/i915_perf.c >>> b/drivers/gpu/drm/i915/i915_perf.c >>> index 87b92d2946f4..f7621b45966c 100644 >>> --- a/drivers/gpu/drm/i915/i915_perf.c >>> +++ b/drivers/gpu/drm/i915/i915_perf.c >>> @@ -1765,6 +1765,7 @@ static void gen12_init_oa_buffer(struct >>> i915_perf_stream *stream) >>> static int alloc_oa_buffer(struct i915_perf_stream *stream) >>> { >>> struct drm_i915_private *i915 = stream->perf->i915; >>> + struct intel_gt *gt = stream->engine->gt; >>> struct drm_i915_gem_object *bo; >>> struct i915_vma *vma; >>> int ret; >>> @@ -1784,11 +1785,22 @@ static int alloc_oa_buffer(struct >>> i915_perf_stream *stream) >>> i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); >>> /* PreHSW required 512K alignment, HSW requires 16M */ >>> - vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); >>> + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); >>> if (IS_ERR(vma)) { >>> ret = PTR_ERR(vma); >>> goto err_unref; >>> } >>> + >>> + /* >>> + * PreHSW required 512K alignment. >>> + * HSW and onwards, align to requested size of OA buffer. >>> + */ >>> + ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH); >>> + if (ret) { >>> + drm_err(>->i915->drm, "Failed to pin OA buffer %d\n", ret); >>> + goto err_unref; >>> + } >>> + >>> stream->oa_buffer.vma = vma; >>> stream->oa_buffer.vaddr = >>> @@ -1838,6 +1850,7 @@ static u32 *save_restore_register(struct >>> i915_perf_stream *stream, u32 *cs, >>> static int alloc_noa_wait(struct i915_perf_stream *stream) >>> { >>> struct drm_i915_private *i915 = stream->perf->i915; >>> + struct intel_gt *gt = stream->engine->gt; >>> struct drm_i915_gem_object *bo; >>> struct i915_vma *vma; >>> const u64 delay_ticks = 0xffffffffffffffff - >>> @@ -1878,12 +1891,16 @@ static int alloc_noa_wait(struct >>> i915_perf_stream *stream) >>> * multiple OA config BOs will have a jump to this address and it >>> * needs to be fixed during the lifetime of the i915/perf stream. >>> */ >>> - vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH); >>> + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); >>> if (IS_ERR(vma)) { >>> ret = PTR_ERR(vma); >>> goto out_ww; >>> } >>> + ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); >>> + if (ret) >>> + goto out_ww; >>> + >>> batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); >>> if (IS_ERR(batch)) { >>> ret = PTR_ERR(batch); >> >>
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 87b92d2946f4..f7621b45966c 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1765,6 +1765,7 @@ static void gen12_init_oa_buffer(struct i915_perf_stream *stream) static int alloc_oa_buffer(struct i915_perf_stream *stream) { struct drm_i915_private *i915 = stream->perf->i915; + struct intel_gt *gt = stream->engine->gt; struct drm_i915_gem_object *bo; struct i915_vma *vma; int ret; @@ -1784,11 +1785,22 @@ static int alloc_oa_buffer(struct i915_perf_stream *stream) i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); /* PreHSW required 512K alignment, HSW requires 16M */ - vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err_unref; } + + /* + * PreHSW required 512K alignment. + * HSW and onwards, align to requested size of OA buffer. + */ + ret = i915_vma_pin(vma, 0, SZ_16M, PIN_GLOBAL | PIN_HIGH); + if (ret) { + drm_err(>->i915->drm, "Failed to pin OA buffer %d\n", ret); + goto err_unref; + } + stream->oa_buffer.vma = vma; stream->oa_buffer.vaddr = @@ -1838,6 +1850,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs, static int alloc_noa_wait(struct i915_perf_stream *stream) { struct drm_i915_private *i915 = stream->perf->i915; + struct intel_gt *gt = stream->engine->gt; struct drm_i915_gem_object *bo; struct i915_vma *vma; const u64 delay_ticks = 0xffffffffffffffff - @@ -1878,12 +1891,16 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) * multiple OA config BOs will have a jump to this address and it * needs to be fixed during the lifetime of the i915/perf stream. */ - vma = i915_gem_object_ggtt_pin_ww(bo, &ww, NULL, 0, 0, PIN_HIGH); + vma = i915_vma_instance(bo, >->ggtt->vm, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto out_ww; } + ret = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto out_ww; + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); if (IS_ERR(batch)) { ret = PTR_ERR(batch);
User passes uabi engine class and instance to the perf OA interface. Use gt corresponding to the engine to pin the buffers to the right ggtt. Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com> --- drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-)