diff mbox

[14/17] drm/i915: Add stats for GuC log buffer flush interrupts

Message ID 1468158084-22028-15-git-send-email-akash.goel@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

akash.goel@intel.com July 10, 2016, 1:41 p.m. UTC
From: Akash Goel <akash.goel@intel.com>

GuC firmware sends an interrupt to flush the log buffer when it
becomes half full. GuC firmware also tracks how many times the
buffer overflowed.
It would be useful to maintain a statistics of how many flush
interrupts were received and for which type of log buffer,
along with the overflow count of each buffer type.
Augmented i915_log_info debugfs to report back these statistics.

Signed-off-by: Akash Goel <akash.goel@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 26 ++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_guc_submission.c |  8 ++++++++
 drivers/gpu/drm/i915/i915_irq.c            |  1 +
 drivers/gpu/drm/i915/intel_guc.h           |  6 ++++++
 4 files changed, 41 insertions(+)

Comments

Tvrtko Ursulin July 15, 2016, 11:51 a.m. UTC | #1
On 10/07/16 14:41, akash.goel@intel.com wrote:
> From: Akash Goel <akash.goel@intel.com>
>
> GuC firmware sends an interrupt to flush the log buffer when it
> becomes half full. GuC firmware also tracks how many times the
> buffer overflowed.
> It would be useful to maintain a statistics of how many flush
> interrupts were received and for which type of log buffer,
> along with the overflow count of each buffer type.
> Augmented i915_log_info debugfs to report back these statistics.
>
> Signed-off-by: Akash Goel <akash.goel@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c        | 26 ++++++++++++++++++++++++++
>   drivers/gpu/drm/i915/i915_guc_submission.c |  8 ++++++++
>   drivers/gpu/drm/i915/i915_irq.c            |  1 +
>   drivers/gpu/drm/i915/intel_guc.h           |  6 ++++++
>   4 files changed, 41 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 3c9c7f7..888a18a 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -2538,6 +2538,30 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
>   	return 0;
>   }
>
> +static void i915_guc_log_info(struct seq_file *m,
> +				 struct drm_i915_private *dev_priv)
> +{
> +	struct intel_guc *guc = &dev_priv->guc;
> +
> +	seq_printf(m, "\nGuC logging stats:\n");
> +
> +	seq_printf(m, "\tISR:   flush count %10u, overflow count %8u\n",
> +		guc->log.flush_count[GUC_ISR_LOG_BUFFER],
> +		guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
> +
> +	seq_printf(m, "\tDPC:   flush count %10u, overflow count %8u\n",
> +		guc->log.flush_count[GUC_DPC_LOG_BUFFER],
> +		guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
> +
> +	seq_printf(m, "\tCRASH: flush count %10u, overflow count %8u\n",
> +		guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
> +		guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
> +
> +	seq_printf(m, "\tTotal flush interrupt count: %u\n",
> +			guc->log.flush_interrupt_count);
> +
> +}
> +
>   static void i915_guc_client_info(struct seq_file *m,
>   				 struct drm_i915_private *dev_priv,
>   				 struct i915_guc_client *client)
> @@ -2611,6 +2635,8 @@ static int i915_guc_info(struct seq_file *m, void *data)
>   	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
>   	i915_guc_client_info(m, dev_priv, &client);
>
> +	i915_guc_log_info(m, dev_priv);
> +
>   	/* Add more as required ... */
>
>   	return 0;
> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
> index c1e637f..9c94a43 100644
> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
> @@ -914,6 +914,14 @@ static void guc_read_update_log_buffer(struct drm_device *dev)
>   		log_buffer_state_local = *log_buffer_state;
>   		buffer_size = log_buffer_state_local.size;
>
> +		guc->log.flush_count[i] += log_buffer_state_local.flush_to_file;
> +		if (log_buffer_state_local.buffer_full_cnt !=
> +					guc->log.prev_overflow_count[i]) {
> +			guc->log.prev_overflow_count[i] =
> +					log_buffer_state_local.buffer_full_cnt;
> +			guc->log.total_overflow_count[i]++;

Is log_buffer_state_local.buffer_full_cnt guaranteed to be one here? Or 
you would need to increase total_overflow_count by its value?

> +		}
> +
>   		if (log_buffer_copy_state) {
>   			/* First copy the state structure */
>   			memcpy(log_buffer_copy_state, &log_buffer_state_local,
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index bdd7a67..c3fb67e 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1711,6 +1711,7 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
>   						&dev_priv->guc.events_work);
>   			}
>   		}
> +		dev_priv->guc.log.flush_interrupt_count++;
>   		spin_unlock(&dev_priv->irq_lock);
>   	}
>   }
> diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
> index 611f4a7..e911a32 100644
> --- a/drivers/gpu/drm/i915/intel_guc.h
> +++ b/drivers/gpu/drm/i915/intel_guc.h
> @@ -128,6 +128,12 @@ struct intel_guc_log {
>   	struct workqueue_struct *wq;
>   	void *buf_addr;
>   	struct rchan *relay_chan;
> +
> +	/* logging related stats */
> +	u32 flush_interrupt_count;
> +	u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
> +	u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
> +	u32 flush_count[GUC_MAX_LOG_BUFFER];
>   };
>
>   struct intel_guc {
>

Regards,

Tvrtko
akash.goel@intel.com July 15, 2016, 3:58 p.m. UTC | #2
On 7/15/2016 5:21 PM, Tvrtko Ursulin wrote:
>
> On 10/07/16 14:41, akash.goel@intel.com wrote:
>> From: Akash Goel <akash.goel@intel.com>
>>
>> GuC firmware sends an interrupt to flush the log buffer when it
>> becomes half full. GuC firmware also tracks how many times the
>> buffer overflowed.
>> It would be useful to maintain a statistics of how many flush
>> interrupts were received and for which type of log buffer,
>> along with the overflow count of each buffer type.
>> Augmented i915_log_info debugfs to report back these statistics.
>>
>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c        | 26
>> ++++++++++++++++++++++++++
>>   drivers/gpu/drm/i915/i915_guc_submission.c |  8 ++++++++
>>   drivers/gpu/drm/i915/i915_irq.c            |  1 +
>>   drivers/gpu/drm/i915/intel_guc.h           |  6 ++++++
>>   4 files changed, 41 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 3c9c7f7..888a18a 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -2538,6 +2538,30 @@ static int i915_guc_load_status_info(struct
>> seq_file *m, void *data)
>>       return 0;
>>   }
>>
>> +static void i915_guc_log_info(struct seq_file *m,
>> +                 struct drm_i915_private *dev_priv)
>> +{
>> +    struct intel_guc *guc = &dev_priv->guc;
>> +
>> +    seq_printf(m, "\nGuC logging stats:\n");
>> +
>> +    seq_printf(m, "\tISR:   flush count %10u, overflow count %8u\n",
>> +        guc->log.flush_count[GUC_ISR_LOG_BUFFER],
>> +        guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
>> +
>> +    seq_printf(m, "\tDPC:   flush count %10u, overflow count %8u\n",
>> +        guc->log.flush_count[GUC_DPC_LOG_BUFFER],
>> +        guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
>> +
>> +    seq_printf(m, "\tCRASH: flush count %10u, overflow count %8u\n",
>> +        guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
>> +        guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
>> +
>> +    seq_printf(m, "\tTotal flush interrupt count: %u\n",
>> +            guc->log.flush_interrupt_count);
>> +
>> +}
>> +
>>   static void i915_guc_client_info(struct seq_file *m,
>>                    struct drm_i915_private *dev_priv,
>>                    struct i915_guc_client *client)
>> @@ -2611,6 +2635,8 @@ static int i915_guc_info(struct seq_file *m,
>> void *data)
>>       seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
>>       i915_guc_client_info(m, dev_priv, &client);
>>
>> +    i915_guc_log_info(m, dev_priv);
>> +
>>       /* Add more as required ... */
>>
>>       return 0;
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index c1e637f..9c94a43 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -914,6 +914,14 @@ static void guc_read_update_log_buffer(struct
>> drm_device *dev)
>>           log_buffer_state_local = *log_buffer_state;
>>           buffer_size = log_buffer_state_local.size;
>>
>> +        guc->log.flush_count[i] += log_buffer_state_local.flush_to_file;
>> +        if (log_buffer_state_local.buffer_full_cnt !=
>> +                    guc->log.prev_overflow_count[i]) {
>> +            guc->log.prev_overflow_count[i] =
>> +                    log_buffer_state_local.buffer_full_cnt;
>> +            guc->log.total_overflow_count[i]++;
>
> Is log_buffer_state_local.buffer_full_cnt guaranteed to be one here? Or
> you would need to increase total_overflow_count by its value?
>

buffer_full_cnt will not remain as one. Its a 4 bit counter, will be 
incremented monotonically by GuC firmware on every new detection of 
overflow, so will increase from 0 to 15 & then wrap around.
Hence have to use '!=' in the condition instead of '>'.

Best regards
Akash

>> +        }
>> +
>>           if (log_buffer_copy_state) {
>>               /* First copy the state structure */
>>               memcpy(log_buffer_copy_state, &log_buffer_state_local,
>> diff --git a/drivers/gpu/drm/i915/i915_irq.c
>> b/drivers/gpu/drm/i915/i915_irq.c
>> index bdd7a67..c3fb67e 100644
>> --- a/drivers/gpu/drm/i915/i915_irq.c
>> +++ b/drivers/gpu/drm/i915/i915_irq.c
>> @@ -1711,6 +1711,7 @@ static void gen9_guc_irq_handler(struct
>> drm_i915_private *dev_priv, u32 gt_iir)
>>                           &dev_priv->guc.events_work);
>>               }
>>           }
>> +        dev_priv->guc.log.flush_interrupt_count++;
>>           spin_unlock(&dev_priv->irq_lock);
>>       }
>>   }
>> diff --git a/drivers/gpu/drm/i915/intel_guc.h
>> b/drivers/gpu/drm/i915/intel_guc.h
>> index 611f4a7..e911a32 100644
>> --- a/drivers/gpu/drm/i915/intel_guc.h
>> +++ b/drivers/gpu/drm/i915/intel_guc.h
>> @@ -128,6 +128,12 @@ struct intel_guc_log {
>>       struct workqueue_struct *wq;
>>       void *buf_addr;
>>       struct rchan *relay_chan;
>> +
>> +    /* logging related stats */
>> +    u32 flush_interrupt_count;
>> +    u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
>> +    u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
>> +    u32 flush_count[GUC_MAX_LOG_BUFFER];
>>   };
>>
>>   struct intel_guc {
>>
>
> Regards,
>
> Tvrtko
>
Tvrtko Ursulin July 18, 2016, 10:16 a.m. UTC | #3
On 15/07/16 16:58, Goel, Akash wrote:
> On 7/15/2016 5:21 PM, Tvrtko Ursulin wrote:
>> On 10/07/16 14:41, akash.goel@intel.com wrote:
>>> From: Akash Goel <akash.goel@intel.com>
>>>
>>> GuC firmware sends an interrupt to flush the log buffer when it
>>> becomes half full. GuC firmware also tracks how many times the
>>> buffer overflowed.
>>> It would be useful to maintain a statistics of how many flush
>>> interrupts were received and for which type of log buffer,
>>> along with the overflow count of each buffer type.
>>> Augmented i915_log_info debugfs to report back these statistics.
>>>
>>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c        | 26
>>> ++++++++++++++++++++++++++
>>>   drivers/gpu/drm/i915/i915_guc_submission.c |  8 ++++++++
>>>   drivers/gpu/drm/i915/i915_irq.c            |  1 +
>>>   drivers/gpu/drm/i915/intel_guc.h           |  6 ++++++
>>>   4 files changed, 41 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 3c9c7f7..888a18a 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -2538,6 +2538,30 @@ static int i915_guc_load_status_info(struct
>>> seq_file *m, void *data)
>>>       return 0;
>>>   }
>>>
>>> +static void i915_guc_log_info(struct seq_file *m,
>>> +                 struct drm_i915_private *dev_priv)
>>> +{
>>> +    struct intel_guc *guc = &dev_priv->guc;
>>> +
>>> +    seq_printf(m, "\nGuC logging stats:\n");
>>> +
>>> +    seq_printf(m, "\tISR:   flush count %10u, overflow count %8u\n",
>>> +        guc->log.flush_count[GUC_ISR_LOG_BUFFER],
>>> +        guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
>>> +
>>> +    seq_printf(m, "\tDPC:   flush count %10u, overflow count %8u\n",
>>> +        guc->log.flush_count[GUC_DPC_LOG_BUFFER],
>>> +        guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
>>> +
>>> +    seq_printf(m, "\tCRASH: flush count %10u, overflow count %8u\n",
>>> +        guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
>>> +        guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
>>> +
>>> +    seq_printf(m, "\tTotal flush interrupt count: %u\n",
>>> +            guc->log.flush_interrupt_count);
>>> +
>>> +}
>>> +
>>>   static void i915_guc_client_info(struct seq_file *m,
>>>                    struct drm_i915_private *dev_priv,
>>>                    struct i915_guc_client *client)
>>> @@ -2611,6 +2635,8 @@ static int i915_guc_info(struct seq_file *m,
>>> void *data)
>>>       seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
>>>       i915_guc_client_info(m, dev_priv, &client);
>>>
>>> +    i915_guc_log_info(m, dev_priv);
>>> +
>>>       /* Add more as required ... */
>>>
>>>       return 0;
>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> index c1e637f..9c94a43 100644
>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>> @@ -914,6 +914,14 @@ static void guc_read_update_log_buffer(struct
>>> drm_device *dev)
>>>           log_buffer_state_local = *log_buffer_state;
>>>           buffer_size = log_buffer_state_local.size;
>>>
>>> +        guc->log.flush_count[i] +=
>>> log_buffer_state_local.flush_to_file;
>>> +        if (log_buffer_state_local.buffer_full_cnt !=
>>> +                    guc->log.prev_overflow_count[i]) {
>>> +            guc->log.prev_overflow_count[i] =
>>> +                    log_buffer_state_local.buffer_full_cnt;
>>> +            guc->log.total_overflow_count[i]++;
>>
>> Is log_buffer_state_local.buffer_full_cnt guaranteed to be one here? Or
>> you would need to increase total_overflow_count by its value?
>>
>
> buffer_full_cnt will not remain as one. Its a 4 bit counter, will be
> incremented monotonically by GuC firmware on every new detection of
> overflow, so will increase from 0 to 15 & then wrap around.
> Hence have to use '!=' in the condition instead of '>'.

But can it happen that it jumps by more than one between being sampled 
here? In which case you would need to replace:

guc->log.total_overflow_count[i]++;

by something like:


guc->log.total_overflow_count[i] += 
log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i];

(Doesn't handle the wrap though, just to illustrate my point.)

Regards,

Tvrtko
akash.goel@intel.com July 18, 2016, 10:59 a.m. UTC | #4
On 7/18/2016 3:46 PM, Tvrtko Ursulin wrote:
>
> On 15/07/16 16:58, Goel, Akash wrote:
>> On 7/15/2016 5:21 PM, Tvrtko Ursulin wrote:
>>> On 10/07/16 14:41, akash.goel@intel.com wrote:
>>>> From: Akash Goel <akash.goel@intel.com>
>>>>
>>>> GuC firmware sends an interrupt to flush the log buffer when it
>>>> becomes half full. GuC firmware also tracks how many times the
>>>> buffer overflowed.
>>>> It would be useful to maintain a statistics of how many flush
>>>> interrupts were received and for which type of log buffer,
>>>> along with the overflow count of each buffer type.
>>>> Augmented i915_log_info debugfs to report back these statistics.
>>>>
>>>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/i915_debugfs.c        | 26
>>>> ++++++++++++++++++++++++++
>>>>   drivers/gpu/drm/i915/i915_guc_submission.c |  8 ++++++++
>>>>   drivers/gpu/drm/i915/i915_irq.c            |  1 +
>>>>   drivers/gpu/drm/i915/intel_guc.h           |  6 ++++++
>>>>   4 files changed, 41 insertions(+)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>>> index 3c9c7f7..888a18a 100644
>>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>>> @@ -2538,6 +2538,30 @@ static int i915_guc_load_status_info(struct
>>>> seq_file *m, void *data)
>>>>       return 0;
>>>>   }
>>>>
>>>> +static void i915_guc_log_info(struct seq_file *m,
>>>> +                 struct drm_i915_private *dev_priv)
>>>> +{
>>>> +    struct intel_guc *guc = &dev_priv->guc;
>>>> +
>>>> +    seq_printf(m, "\nGuC logging stats:\n");
>>>> +
>>>> +    seq_printf(m, "\tISR:   flush count %10u, overflow count %8u\n",
>>>> +        guc->log.flush_count[GUC_ISR_LOG_BUFFER],
>>>> +        guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
>>>> +
>>>> +    seq_printf(m, "\tDPC:   flush count %10u, overflow count %8u\n",
>>>> +        guc->log.flush_count[GUC_DPC_LOG_BUFFER],
>>>> +        guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
>>>> +
>>>> +    seq_printf(m, "\tCRASH: flush count %10u, overflow count %8u\n",
>>>> +        guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
>>>> +        guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
>>>> +
>>>> +    seq_printf(m, "\tTotal flush interrupt count: %u\n",
>>>> +            guc->log.flush_interrupt_count);
>>>> +
>>>> +}
>>>> +
>>>>   static void i915_guc_client_info(struct seq_file *m,
>>>>                    struct drm_i915_private *dev_priv,
>>>>                    struct i915_guc_client *client)
>>>> @@ -2611,6 +2635,8 @@ static int i915_guc_info(struct seq_file *m,
>>>> void *data)
>>>>       seq_printf(m, "\nGuC execbuf client @ %p:\n",
>>>> guc.execbuf_client);
>>>>       i915_guc_client_info(m, dev_priv, &client);
>>>>
>>>> +    i915_guc_log_info(m, dev_priv);
>>>> +
>>>>       /* Add more as required ... */
>>>>
>>>>       return 0;
>>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> index c1e637f..9c94a43 100644
>>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>> @@ -914,6 +914,14 @@ static void guc_read_update_log_buffer(struct
>>>> drm_device *dev)
>>>>           log_buffer_state_local = *log_buffer_state;
>>>>           buffer_size = log_buffer_state_local.size;
>>>>
>>>> +        guc->log.flush_count[i] +=
>>>> log_buffer_state_local.flush_to_file;
>>>> +        if (log_buffer_state_local.buffer_full_cnt !=
>>>> +                    guc->log.prev_overflow_count[i]) {
>>>> +            guc->log.prev_overflow_count[i] =
>>>> +                    log_buffer_state_local.buffer_full_cnt;
>>>> +            guc->log.total_overflow_count[i]++;
>>>
>>> Is log_buffer_state_local.buffer_full_cnt guaranteed to be one here? Or
>>> you would need to increase total_overflow_count by its value?
>>>
>>
>> buffer_full_cnt will not remain as one. Its a 4 bit counter, will be
>> incremented monotonically by GuC firmware on every new detection of
>> overflow, so will increase from 0 to 15 & then wrap around.
>> Hence have to use '!=' in the condition instead of '>'.
>
> But can it happen that it jumps by more than one between being sampled
> here? In which case you would need to replace:
>
> guc->log.total_overflow_count[i]++;
>
> by something like:
>
>
> guc->log.total_overflow_count[i] +=
> log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i];
>
> (Doesn't handle the wrap though, just to illustrate my point.)
>
Actually logic in GuC firmware is such that overflow counter cannot 
increment by more than 1 without Driver coming into picture in between, 
by the virtue of flush interrupt.
But nevertheless the logic on Driver side should be like the way you
suggested.

Does this revised logic looks fine ?

if (log_buffer_state_local.buffer_full_cnt !=
		guc->log.prev_overflow_count[i]) {
	new_overflow = 1;
	guc->log.total_overflow_count[i] += 
(log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i]);

	if (log_buffer_state_local.buffer_full_cnt < 
guc->log.prev_overflow_count[i])
		guc->log.total_overflow_count[i] += 15;

	log_buffer_state_local.buffer_full_cnt = guc->log.prev_overflow_count[i];
}


> Regards,
>
> Tvrtko
>
>
>
>
Tvrtko Ursulin July 18, 2016, 11:33 a.m. UTC | #5
On 18/07/16 11:59, Goel, Akash wrote:
> On 7/18/2016 3:46 PM, Tvrtko Ursulin wrote:
>>
>> On 15/07/16 16:58, Goel, Akash wrote:
>>> On 7/15/2016 5:21 PM, Tvrtko Ursulin wrote:
>>>> On 10/07/16 14:41, akash.goel@intel.com wrote:
>>>>> From: Akash Goel <akash.goel@intel.com>
>>>>>
>>>>> GuC firmware sends an interrupt to flush the log buffer when it
>>>>> becomes half full. GuC firmware also tracks how many times the
>>>>> buffer overflowed.
>>>>> It would be useful to maintain a statistics of how many flush
>>>>> interrupts were received and for which type of log buffer,
>>>>> along with the overflow count of each buffer type.
>>>>> Augmented i915_log_info debugfs to report back these statistics.
>>>>>
>>>>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>>>>> ---
>>>>>   drivers/gpu/drm/i915/i915_debugfs.c        | 26
>>>>> ++++++++++++++++++++++++++
>>>>>   drivers/gpu/drm/i915/i915_guc_submission.c |  8 ++++++++
>>>>>   drivers/gpu/drm/i915/i915_irq.c            |  1 +
>>>>>   drivers/gpu/drm/i915/intel_guc.h           |  6 ++++++
>>>>>   4 files changed, 41 insertions(+)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>>>> index 3c9c7f7..888a18a 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>>>> @@ -2538,6 +2538,30 @@ static int i915_guc_load_status_info(struct
>>>>> seq_file *m, void *data)
>>>>>       return 0;
>>>>>   }
>>>>>
>>>>> +static void i915_guc_log_info(struct seq_file *m,
>>>>> +                 struct drm_i915_private *dev_priv)
>>>>> +{
>>>>> +    struct intel_guc *guc = &dev_priv->guc;
>>>>> +
>>>>> +    seq_printf(m, "\nGuC logging stats:\n");
>>>>> +
>>>>> +    seq_printf(m, "\tISR:   flush count %10u, overflow count %8u\n",
>>>>> +        guc->log.flush_count[GUC_ISR_LOG_BUFFER],
>>>>> +        guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
>>>>> +
>>>>> +    seq_printf(m, "\tDPC:   flush count %10u, overflow count %8u\n",
>>>>> +        guc->log.flush_count[GUC_DPC_LOG_BUFFER],
>>>>> +        guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
>>>>> +
>>>>> +    seq_printf(m, "\tCRASH: flush count %10u, overflow count %8u\n",
>>>>> +        guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
>>>>> +        guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
>>>>> +
>>>>> +    seq_printf(m, "\tTotal flush interrupt count: %u\n",
>>>>> +            guc->log.flush_interrupt_count);
>>>>> +
>>>>> +}
>>>>> +
>>>>>   static void i915_guc_client_info(struct seq_file *m,
>>>>>                    struct drm_i915_private *dev_priv,
>>>>>                    struct i915_guc_client *client)
>>>>> @@ -2611,6 +2635,8 @@ static int i915_guc_info(struct seq_file *m,
>>>>> void *data)
>>>>>       seq_printf(m, "\nGuC execbuf client @ %p:\n",
>>>>> guc.execbuf_client);
>>>>>       i915_guc_client_info(m, dev_priv, &client);
>>>>>
>>>>> +    i915_guc_log_info(m, dev_priv);
>>>>> +
>>>>>       /* Add more as required ... */
>>>>>
>>>>>       return 0;
>>>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>> index c1e637f..9c94a43 100644
>>>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>> @@ -914,6 +914,14 @@ static void guc_read_update_log_buffer(struct
>>>>> drm_device *dev)
>>>>>           log_buffer_state_local = *log_buffer_state;
>>>>>           buffer_size = log_buffer_state_local.size;
>>>>>
>>>>> +        guc->log.flush_count[i] +=
>>>>> log_buffer_state_local.flush_to_file;
>>>>> +        if (log_buffer_state_local.buffer_full_cnt !=
>>>>> +                    guc->log.prev_overflow_count[i]) {
>>>>> +            guc->log.prev_overflow_count[i] =
>>>>> +                    log_buffer_state_local.buffer_full_cnt;
>>>>> +            guc->log.total_overflow_count[i]++;
>>>>
>>>> Is log_buffer_state_local.buffer_full_cnt guaranteed to be one here? Or
>>>> you would need to increase total_overflow_count by its value?
>>>>
>>>
>>> buffer_full_cnt will not remain as one. Its a 4 bit counter, will be
>>> incremented monotonically by GuC firmware on every new detection of
>>> overflow, so will increase from 0 to 15 & then wrap around.
>>> Hence have to use '!=' in the condition instead of '>'.
>>
>> But can it happen that it jumps by more than one between being sampled
>> here? In which case you would need to replace:
>>
>> guc->log.total_overflow_count[i]++;
>>
>> by something like:
>>
>>
>> guc->log.total_overflow_count[i] +=
>> log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i];
>>
>> (Doesn't handle the wrap though, just to illustrate my point.)
>>
> Actually logic in GuC firmware is such that overflow counter cannot
> increment by more than 1 without Driver coming into picture in between,
> by the virtue of flush interrupt.

Hm, and what happens to the data and overflow counter if the driver is 
not responsive enough?

> But nevertheless the logic on Driver side should be like the way you
> suggested.
>
> Does this revised logic looks fine ?
>
> if (log_buffer_state_local.buffer_full_cnt !=
>          guc->log.prev_overflow_count[i]) {
>      new_overflow = 1;
>      guc->log.total_overflow_count[i] +=
> (log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i]);
>
>      if (log_buffer_state_local.buffer_full_cnt <
> guc->log.prev_overflow_count[i])
>          guc->log.total_overflow_count[i] += 15;
>
>      log_buffer_state_local.buffer_full_cnt =
> guc->log.prev_overflow_count[i];
> }

Not sure, maybe += 16 ? If counter goes from 15 to zero, then -15, +15 = 
no change which is wrong?

But in general if the condition should not happen I would also put a 
WARN_ON_ONCE or a rate limited DRM_ERROR in that case.

Regards,

Tvrtko
akash.goel@intel.com July 18, 2016, 11:47 a.m. UTC | #6
On 7/18/2016 5:03 PM, Tvrtko Ursulin wrote:
>
> On 18/07/16 11:59, Goel, Akash wrote:
>> On 7/18/2016 3:46 PM, Tvrtko Ursulin wrote:
>>>
>>> On 15/07/16 16:58, Goel, Akash wrote:
>>>> On 7/15/2016 5:21 PM, Tvrtko Ursulin wrote:
>>>>> On 10/07/16 14:41, akash.goel@intel.com wrote:
>>>>>> From: Akash Goel <akash.goel@intel.com>
>>>>>>
>>>>>> GuC firmware sends an interrupt to flush the log buffer when it
>>>>>> becomes half full. GuC firmware also tracks how many times the
>>>>>> buffer overflowed.
>>>>>> It would be useful to maintain a statistics of how many flush
>>>>>> interrupts were received and for which type of log buffer,
>>>>>> along with the overflow count of each buffer type.
>>>>>> Augmented i915_log_info debugfs to report back these statistics.
>>>>>>
>>>>>> Signed-off-by: Akash Goel <akash.goel@intel.com>
>>>>>> ---
>>>>>>   drivers/gpu/drm/i915/i915_debugfs.c        | 26
>>>>>> ++++++++++++++++++++++++++
>>>>>>   drivers/gpu/drm/i915/i915_guc_submission.c |  8 ++++++++
>>>>>>   drivers/gpu/drm/i915/i915_irq.c            |  1 +
>>>>>>   drivers/gpu/drm/i915/intel_guc.h           |  6 ++++++
>>>>>>   4 files changed, 41 insertions(+)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
>>>>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>>>>> index 3c9c7f7..888a18a 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>>>>> @@ -2538,6 +2538,30 @@ static int i915_guc_load_status_info(struct
>>>>>> seq_file *m, void *data)
>>>>>>       return 0;
>>>>>>   }
>>>>>>
>>>>>> +static void i915_guc_log_info(struct seq_file *m,
>>>>>> +                 struct drm_i915_private *dev_priv)
>>>>>> +{
>>>>>> +    struct intel_guc *guc = &dev_priv->guc;
>>>>>> +
>>>>>> +    seq_printf(m, "\nGuC logging stats:\n");
>>>>>> +
>>>>>> +    seq_printf(m, "\tISR:   flush count %10u, overflow count %8u\n",
>>>>>> +        guc->log.flush_count[GUC_ISR_LOG_BUFFER],
>>>>>> +        guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
>>>>>> +
>>>>>> +    seq_printf(m, "\tDPC:   flush count %10u, overflow count %8u\n",
>>>>>> +        guc->log.flush_count[GUC_DPC_LOG_BUFFER],
>>>>>> +        guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
>>>>>> +
>>>>>> +    seq_printf(m, "\tCRASH: flush count %10u, overflow count %8u\n",
>>>>>> +        guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
>>>>>> +        guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
>>>>>> +
>>>>>> +    seq_printf(m, "\tTotal flush interrupt count: %u\n",
>>>>>> +            guc->log.flush_interrupt_count);
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>>   static void i915_guc_client_info(struct seq_file *m,
>>>>>>                    struct drm_i915_private *dev_priv,
>>>>>>                    struct i915_guc_client *client)
>>>>>> @@ -2611,6 +2635,8 @@ static int i915_guc_info(struct seq_file *m,
>>>>>> void *data)
>>>>>>       seq_printf(m, "\nGuC execbuf client @ %p:\n",
>>>>>> guc.execbuf_client);
>>>>>>       i915_guc_client_info(m, dev_priv, &client);
>>>>>>
>>>>>> +    i915_guc_log_info(m, dev_priv);
>>>>>> +
>>>>>>       /* Add more as required ... */
>>>>>>
>>>>>>       return 0;
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>>> index c1e637f..9c94a43 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>>>>>> @@ -914,6 +914,14 @@ static void guc_read_update_log_buffer(struct
>>>>>> drm_device *dev)
>>>>>>           log_buffer_state_local = *log_buffer_state;
>>>>>>           buffer_size = log_buffer_state_local.size;
>>>>>>
>>>>>> +        guc->log.flush_count[i] +=
>>>>>> log_buffer_state_local.flush_to_file;
>>>>>> +        if (log_buffer_state_local.buffer_full_cnt !=
>>>>>> +                    guc->log.prev_overflow_count[i]) {
>>>>>> +            guc->log.prev_overflow_count[i] =
>>>>>> +                    log_buffer_state_local.buffer_full_cnt;
>>>>>> +            guc->log.total_overflow_count[i]++;
>>>>>
>>>>> Is log_buffer_state_local.buffer_full_cnt guaranteed to be one
>>>>> here? Or
>>>>> you would need to increase total_overflow_count by its value?
>>>>>
>>>>
>>>> buffer_full_cnt will not remain as one. Its a 4 bit counter, will be
>>>> incremented monotonically by GuC firmware on every new detection of
>>>> overflow, so will increase from 0 to 15 & then wrap around.
>>>> Hence have to use '!=' in the condition instead of '>'.
>>>
>>> But can it happen that it jumps by more than one between being sampled
>>> here? In which case you would need to replace:
>>>
>>> guc->log.total_overflow_count[i]++;
>>>
>>> by something like:
>>>
>>>
>>> guc->log.total_overflow_count[i] +=
>>> log_buffer_state_local.buffer_full_cnt -
>>> guc->log.prev_overflow_count[i];
>>>
>>> (Doesn't handle the wrap though, just to illustrate my point.)
>>>
>> Actually logic in GuC firmware is such that overflow counter cannot
>> increment by more than 1 without Driver coming into picture in between,
>> by the virtue of flush interrupt.
>
> Hm, and what happens to the data and overflow counter if the driver is
> not responsive enough?
>
GuC will not stall and keep writing the logs into the buffer, if Driver 
is slow in responding to the previous flush interrupt.

But the overflow detection is done through a bit weird logic, which is 
executed only when GuC receives the response of the last flush interrupt 
from Driver, and increment is done by 1 only irrespective of how late 
the acknowledgement came from Driver side.

>> But nevertheless the logic on Driver side should be like the way you
>> suggested.
>>
>> Does this revised logic looks fine ?
>>
>> if (log_buffer_state_local.buffer_full_cnt !=
>>          guc->log.prev_overflow_count[i]) {
>>      new_overflow = 1;
>>      guc->log.total_overflow_count[i] +=
>> (log_buffer_state_local.buffer_full_cnt -
>> guc->log.prev_overflow_count[i]);
>>
>>      if (log_buffer_state_local.buffer_full_cnt <
>> guc->log.prev_overflow_count[i])
>>          guc->log.total_overflow_count[i] += 15;
>>
>>      log_buffer_state_local.buffer_full_cnt =
>> guc->log.prev_overflow_count[i];
>> }
>
> Not sure, maybe += 16 ? If counter goes from 15 to zero, then -15, +15 =
> no change which is wrong?
>
Right, it should be 16. Sorry, my bad.

> But in general if the condition should not happen I would also put a
> WARN_ON_ONCE or a rate limited DRM_ERROR in that case.
>
I think rate limited DRM_ERROR would be apt here.

Best regards
Akash
> Regards,
>
> Tvrtko
>
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3c9c7f7..888a18a 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2538,6 +2538,30 @@  static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static void i915_guc_log_info(struct seq_file *m,
+				 struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+
+	seq_printf(m, "\nGuC logging stats:\n");
+
+	seq_printf(m, "\tISR:   flush count %10u, overflow count %8u\n",
+		guc->log.flush_count[GUC_ISR_LOG_BUFFER],
+		guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]);
+
+	seq_printf(m, "\tDPC:   flush count %10u, overflow count %8u\n",
+		guc->log.flush_count[GUC_DPC_LOG_BUFFER],
+		guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]);
+
+	seq_printf(m, "\tCRASH: flush count %10u, overflow count %8u\n",
+		guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER],
+		guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]);
+
+	seq_printf(m, "\tTotal flush interrupt count: %u\n",
+			guc->log.flush_interrupt_count);
+
+}
+
 static void i915_guc_client_info(struct seq_file *m,
 				 struct drm_i915_private *dev_priv,
 				 struct i915_guc_client *client)
@@ -2611,6 +2635,8 @@  static int i915_guc_info(struct seq_file *m, void *data)
 	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client);
 	i915_guc_client_info(m, dev_priv, &client);
 
+	i915_guc_log_info(m, dev_priv);
+
 	/* Add more as required ... */
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index c1e637f..9c94a43 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -914,6 +914,14 @@  static void guc_read_update_log_buffer(struct drm_device *dev)
 		log_buffer_state_local = *log_buffer_state;
 		buffer_size = log_buffer_state_local.size;
 
+		guc->log.flush_count[i] += log_buffer_state_local.flush_to_file;
+		if (log_buffer_state_local.buffer_full_cnt !=
+					guc->log.prev_overflow_count[i]) {
+			guc->log.prev_overflow_count[i] =
+					log_buffer_state_local.buffer_full_cnt;
+			guc->log.total_overflow_count[i]++;
+		}
+
 		if (log_buffer_copy_state) {
 			/* First copy the state structure */
 			memcpy(log_buffer_copy_state, &log_buffer_state_local,
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index bdd7a67..c3fb67e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1711,6 +1711,7 @@  static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 						&dev_priv->guc.events_work);
 			}
 		}
+		dev_priv->guc.log.flush_interrupt_count++;
 		spin_unlock(&dev_priv->irq_lock);
 	}
 }
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 611f4a7..e911a32 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -128,6 +128,12 @@  struct intel_guc_log {
 	struct workqueue_struct *wq;
 	void *buf_addr;
 	struct rchan *relay_chan;
+
+	/* logging related stats */
+	u32 flush_interrupt_count;
+	u32 prev_overflow_count[GUC_MAX_LOG_BUFFER];
+	u32 total_overflow_count[GUC_MAX_LOG_BUFFER];
+	u32 flush_count[GUC_MAX_LOG_BUFFER];
 };
 
 struct intel_guc {