Message ID | 1412700449-21187-1-git-send-email-John.C.Harrison@Intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Oct 07, 2014 at 05:47:29PM +0100, John.C.Harrison@Intel.com wrote: > From: John Harrison <John.C.Harrison@Intel.com> > > For: VIZ-4377 > Signed-off-by: John.C.Harrison@Intel.com Why? If it's just for performance I think we should do this as part of the switch to struct fence, which already has this. > --- > drivers/gpu/drm/i915/i915_drv.h | 34 ++++++++++++++++--------------- > drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 1 + > drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ > drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ > 5 files changed, 45 insertions(+), 16 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index cdbbdeb..4ab3b23 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1913,6 +1913,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, > struct drm_i915_gem_request { > struct kref ref; > > + /** Is this request known to be complete? */ > + bool complete; > + > /** On Which ring this request was generated */ > struct intel_engine_cs *ring; > > @@ -1943,6 +1946,8 @@ struct drm_i915_gem_request { > }; > > void i915_gem_request_free(struct kref *req_ref); > +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring, > + bool lazy_coherency); > > static inline uint32_t > i915_gem_request_get_seqno(struct drm_i915_gem_request *req) > @@ -1968,7 +1973,19 @@ i915_gem_request_unreference(struct drm_i915_gem_request *req) > kref_put(&req->ref, i915_gem_request_free); > } > > -/* ??? i915_gem_request_completed should be here ??? */ > +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, > + bool lazy_coherency) > +{ > + if (req->complete) > + return true; > + > + if (req->ring == NULL) > + return false; > + > + i915_gem_complete_requests_ring(req->ring, lazy_coherency); > + > + return req->complete; > +} Also, this is looking way too big now I think ;-) If you have a full non-inline function call in your inline it's always a net loss. -Daniel > > struct drm_i915_file_private { > struct drm_i915_private *dev_priv; > @@ -3019,19 +3036,4 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) > } > } > > -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, > - bool lazy_coherency) > -{ > - u32 seqno; > - > - BUG_ON(req == NULL); > - > - if (req->ring == NULL) > - return false; > - > - seqno = req->ring->get_seqno(req->ring, lazy_coherency); > - > - return i915_seqno_passed(seqno, req->seqno); > -} > - > #endif > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 0f14333..0a9b29e 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -2641,6 +2641,27 @@ void i915_gem_reset(struct drm_device *dev) > i915_gem_restore_fences(dev); > } > > +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring, > + bool lazy_coherency) > +{ > + struct drm_i915_gem_request *req; > + u32 seqno; > + > + seqno = ring->get_seqno(ring, lazy_coherency); > + if (seqno == ring->last_read_seqno) > + return; > + > + list_for_each_entry(req, &ring->request_list, list) { > + if (req->complete) > + continue; > + > + if (i915_seqno_passed(seqno, req->seqno)) > + req->complete = true; > + } > + > + ring->last_read_seqno = seqno; > +} > + > /** > * This function clears the request list as sequence numbers are passed. > */ > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 744684a..57acd2a 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -808,6 +808,7 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, > > kref_init(&request->ref); > request->ring = NULL; > + request->complete = false; > > ret = i915_gem_get_seqno(ring->dev, &request->seqno); > if (ret) { > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c > index 0a3c24a..392dc25 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.c > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c > @@ -2023,6 +2023,7 @@ intel_ring_alloc_seqno(struct intel_engine_cs *ring) > > kref_init(&request->ref); > request->ring = NULL; > + request->complete = false; > > ret = i915_gem_get_seqno(ring->dev, &request->seqno); > if (ret) { > @@ -2115,6 +2116,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) > I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); > } > > + ring->last_read_seqno = 0; > ring->set_seqno(ring, seqno); > ring->hangcheck.seqno = seqno; > } > diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h > index 64a4346..40394d3 100644 > --- a/drivers/gpu/drm/i915/intel_ringbuffer.h > +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h > @@ -269,6 +269,9 @@ struct intel_engine_cs { > bool gpu_caches_dirty; > bool fbc_dirty; > > + /* For optimising request completion events */ > + u32 last_read_seqno; > + > wait_queue_head_t irq_queue; > > struct intel_context *default_context; > -- > 1.7.9.5 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On 19/10/2014 15:14, Daniel Vetter wrote: > On Tue, Oct 07, 2014 at 05:47:29PM +0100, John.C.Harrison@Intel.com wrote: >> From: John Harrison <John.C.Harrison@Intel.com> >> >> For: VIZ-4377 >> Signed-off-by: John.C.Harrison@Intel.com > Why? If it's just for performance I think we should do this as part of the > switch to struct fence, which already has this. For performance and also as part of getting rid of all the i915_seqno_passed() calls. > >> --- >> drivers/gpu/drm/i915/i915_drv.h | 34 ++++++++++++++++--------------- >> drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++++++++ >> drivers/gpu/drm/i915/intel_lrc.c | 1 + >> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ >> drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ >> 5 files changed, 45 insertions(+), 16 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h >> index cdbbdeb..4ab3b23 100644 >> --- a/drivers/gpu/drm/i915/i915_drv.h >> +++ b/drivers/gpu/drm/i915/i915_drv.h >> @@ -1913,6 +1913,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, >> struct drm_i915_gem_request { >> struct kref ref; >> >> + /** Is this request known to be complete? */ >> + bool complete; >> + >> /** On Which ring this request was generated */ >> struct intel_engine_cs *ring; >> >> @@ -1943,6 +1946,8 @@ struct drm_i915_gem_request { >> }; >> >> void i915_gem_request_free(struct kref *req_ref); >> +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring, >> + bool lazy_coherency); >> >> static inline uint32_t >> i915_gem_request_get_seqno(struct drm_i915_gem_request *req) >> @@ -1968,7 +1973,19 @@ i915_gem_request_unreference(struct drm_i915_gem_request *req) >> kref_put(&req->ref, i915_gem_request_free); >> } >> >> -/* ??? i915_gem_request_completed should be here ??? */ >> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >> + bool lazy_coherency) >> +{ >> + if (req->complete) >> + return true; >> + >> + if (req->ring == NULL) >> + return false; >> + >> + i915_gem_complete_requests_ring(req->ring, lazy_coherency); >> + >> + return req->complete; >> +} > Also, this is looking way too big now I think ;-) If you have a full > non-inline function call in your inline it's always a net loss. > -Daniel That depends how you define gain/loss. In terms of performance, it can still be a gain because the function call is not always taken. Whereas the alternative is at least one function calls and possibly two. Either way, as noted already, the final intention is for this to become simply 'return req->complete' and not have any function calls at all. > >> >> struct drm_i915_file_private { >> struct drm_i915_private *dev_priv; >> @@ -3019,19 +3036,4 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) >> } >> } >> >> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, >> - bool lazy_coherency) >> -{ >> - u32 seqno; >> - >> - BUG_ON(req == NULL); >> - >> - if (req->ring == NULL) >> - return false; >> - >> - seqno = req->ring->get_seqno(req->ring, lazy_coherency); >> - >> - return i915_seqno_passed(seqno, req->seqno); >> -} >> - >> #endif >> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c >> index 0f14333..0a9b29e 100644 >> --- a/drivers/gpu/drm/i915/i915_gem.c >> +++ b/drivers/gpu/drm/i915/i915_gem.c >> @@ -2641,6 +2641,27 @@ void i915_gem_reset(struct drm_device *dev) >> i915_gem_restore_fences(dev); >> } >> >> +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring, >> + bool lazy_coherency) >> +{ >> + struct drm_i915_gem_request *req; >> + u32 seqno; >> + >> + seqno = ring->get_seqno(ring, lazy_coherency); >> + if (seqno == ring->last_read_seqno) >> + return; >> + >> + list_for_each_entry(req, &ring->request_list, list) { >> + if (req->complete) >> + continue; >> + >> + if (i915_seqno_passed(seqno, req->seqno)) >> + req->complete = true; >> + } >> + >> + ring->last_read_seqno = seqno; >> +} >> + >> /** >> * This function clears the request list as sequence numbers are passed. >> */ >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c >> index 744684a..57acd2a 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -808,6 +808,7 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, >> >> kref_init(&request->ref); >> request->ring = NULL; >> + request->complete = false; >> >> ret = i915_gem_get_seqno(ring->dev, &request->seqno); >> if (ret) { >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c >> index 0a3c24a..392dc25 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c >> @@ -2023,6 +2023,7 @@ intel_ring_alloc_seqno(struct intel_engine_cs *ring) >> >> kref_init(&request->ref); >> request->ring = NULL; >> + request->complete = false; >> >> ret = i915_gem_get_seqno(ring->dev, &request->seqno); >> if (ret) { >> @@ -2115,6 +2116,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) >> I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); >> } >> >> + ring->last_read_seqno = 0; >> ring->set_seqno(ring, seqno); >> ring->hangcheck.seqno = seqno; >> } >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h >> index 64a4346..40394d3 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h >> @@ -269,6 +269,9 @@ struct intel_engine_cs { >> bool gpu_caches_dirty; >> bool fbc_dirty; >> >> + /* For optimising request completion events */ >> + u32 last_read_seqno; >> + >> wait_queue_head_t irq_queue; >> >> struct intel_context *default_context; >> -- >> 1.7.9.5 >> >> _______________________________________________ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
On Tue, Oct 28, 2014 at 03:36:29PM +0000, John Harrison wrote: > On 19/10/2014 15:14, Daniel Vetter wrote: > >On Tue, Oct 07, 2014 at 05:47:29PM +0100, John.C.Harrison@Intel.com wrote: > >>From: John Harrison <John.C.Harrison@Intel.com> > >> > >>For: VIZ-4377 > >>Signed-off-by: John.C.Harrison@Intel.com > >Why? If it's just for performance I think we should do this as part of the > >switch to struct fence, which already has this. > For performance and also as part of getting rid of all the > i915_seqno_passed() calls. The why was more a request for a proper commit message. If you claim perfomance, then the commit message also needs the relevant data. > >>--- > >> drivers/gpu/drm/i915/i915_drv.h | 34 ++++++++++++++++--------------- > >> drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++++++++ > >> drivers/gpu/drm/i915/intel_lrc.c | 1 + > >> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++ > >> drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++ > >> 5 files changed, 45 insertions(+), 16 deletions(-) > >> > >>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > >>index cdbbdeb..4ab3b23 100644 > >>--- a/drivers/gpu/drm/i915/i915_drv.h > >>+++ b/drivers/gpu/drm/i915/i915_drv.h > >>@@ -1913,6 +1913,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, > >> struct drm_i915_gem_request { > >> struct kref ref; > >>+ /** Is this request known to be complete? */ > >>+ bool complete; > >>+ > >> /** On Which ring this request was generated */ > >> struct intel_engine_cs *ring; > >>@@ -1943,6 +1946,8 @@ struct drm_i915_gem_request { > >> }; > >> void i915_gem_request_free(struct kref *req_ref); > >>+void i915_gem_complete_requests_ring(struct intel_engine_cs *ring, > >>+ bool lazy_coherency); > >> static inline uint32_t > >> i915_gem_request_get_seqno(struct drm_i915_gem_request *req) > >>@@ -1968,7 +1973,19 @@ i915_gem_request_unreference(struct drm_i915_gem_request *req) > >> kref_put(&req->ref, i915_gem_request_free); > >> } > >>-/* ??? i915_gem_request_completed should be here ??? */ > >>+static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, > >>+ bool lazy_coherency) > >>+{ > >>+ if (req->complete) > >>+ return true; > >>+ > >>+ if (req->ring == NULL) > >>+ return false; > >>+ > >>+ i915_gem_complete_requests_ring(req->ring, lazy_coherency); > >>+ > >>+ return req->complete; > >>+} > >Also, this is looking way too big now I think ;-) If you have a full > >non-inline function call in your inline it's always a net loss. > >-Daniel > That depends how you define gain/loss. In terms of performance, it can still > be a gain because the function call is not always taken. Whereas the > alternative is at least one function calls and possibly two. Either way, as > noted already, the final intention is for this to become simply 'return > req->complete' and not have any function calls at all. Thrashing instructions caches is usually what this does, so even when you microbenchmark a gain it might be a net loss. Rule-of-thumb is static inline is ok if it results in a net reduction of the binary, not for any other case. Unless you supply solid performance data justifying it. Also, static inlines in random headers are a pain for documentation. -Daniel
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cdbbdeb..4ab3b23 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1913,6 +1913,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_request { struct kref ref; + /** Is this request known to be complete? */ + bool complete; + /** On Which ring this request was generated */ struct intel_engine_cs *ring; @@ -1943,6 +1946,8 @@ struct drm_i915_gem_request { }; void i915_gem_request_free(struct kref *req_ref); +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring, + bool lazy_coherency); static inline uint32_t i915_gem_request_get_seqno(struct drm_i915_gem_request *req) @@ -1968,7 +1973,19 @@ i915_gem_request_unreference(struct drm_i915_gem_request *req) kref_put(&req->ref, i915_gem_request_free); } -/* ??? i915_gem_request_completed should be here ??? */ +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, + bool lazy_coherency) +{ + if (req->complete) + return true; + + if (req->ring == NULL) + return false; + + i915_gem_complete_requests_ring(req->ring, lazy_coherency); + + return req->complete; +} struct drm_i915_file_private { struct drm_i915_private *dev_priv; @@ -3019,19 +3036,4 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms) } } -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req, - bool lazy_coherency) -{ - u32 seqno; - - BUG_ON(req == NULL); - - if (req->ring == NULL) - return false; - - seqno = req->ring->get_seqno(req->ring, lazy_coherency); - - return i915_seqno_passed(seqno, req->seqno); -} - #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0f14333..0a9b29e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2641,6 +2641,27 @@ void i915_gem_reset(struct drm_device *dev) i915_gem_restore_fences(dev); } +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring, + bool lazy_coherency) +{ + struct drm_i915_gem_request *req; + u32 seqno; + + seqno = ring->get_seqno(ring, lazy_coherency); + if (seqno == ring->last_read_seqno) + return; + + list_for_each_entry(req, &ring->request_list, list) { + if (req->complete) + continue; + + if (i915_seqno_passed(seqno, req->seqno)) + req->complete = true; + } + + ring->last_read_seqno = seqno; +} + /** * This function clears the request list as sequence numbers are passed. */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 744684a..57acd2a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -808,6 +808,7 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring, kref_init(&request->ref); request->ring = NULL; + request->complete = false; ret = i915_gem_get_seqno(ring->dev, &request->seqno); if (ret) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 0a3c24a..392dc25 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2023,6 +2023,7 @@ intel_ring_alloc_seqno(struct intel_engine_cs *ring) kref_init(&request->ref); request->ring = NULL; + request->complete = false; ret = i915_gem_get_seqno(ring->dev, &request->seqno); if (ret) { @@ -2115,6 +2116,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); } + ring->last_read_seqno = 0; ring->set_seqno(ring, seqno); ring->hangcheck.seqno = seqno; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 64a4346..40394d3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -269,6 +269,9 @@ struct intel_engine_cs { bool gpu_caches_dirty; bool fbc_dirty; + /* For optimising request completion events */ + u32 last_read_seqno; + wait_queue_head_t irq_queue; struct intel_context *default_context;