diff mbox series

[5/8] drm/i915/guc: Move lrc desc setup to where it is needed

Message ID 20220217235207.930153-6-John.C.Harrison@Intel.com (mailing list archive)
State New, archived
Headers show
Series Prep work for next GuC release | expand

Commit Message

John Harrison Feb. 17, 2022, 11:52 p.m. UTC
From: John Harrison <John.C.Harrison@Intel.com>

The LRC descriptor was being initialised early on in the context
registration sequence. It could then be determined that the actual
registration needs to be delayed and the descriptor would be wiped
out. This is inefficient, so move the setup to later in the process
after the point of no return.

Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Comments

Daniele Ceraolo Spurio Feb. 23, 2022, 1:12 a.m. UTC | #1
On 2/17/2022 3:52 PM, John.C.Harrison@Intel.com wrote:
> From: John Harrison <John.C.Harrison@Intel.com>
>
> The LRC descriptor was being initialised early on in the context
> registration sequence. It could then be determined that the actual
> registration needs to be delayed and the descriptor would be wiped
> out. This is inefficient, so move the setup to later in the process
> after the point of no return.
>
> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
> ---
>   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++++++++--
>   1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 0ab2d1a24bf6..aa74ec74194a 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -2153,6 +2153,8 @@ static int __guc_action_register_context(struct intel_guc *guc,
>   					     0, loop);
>   }
>   
> +static void prepare_context_registration_info(struct intel_context *ce);
> +
>   static int register_context(struct intel_context *ce, bool loop)
>   {
>   	struct intel_guc *guc = ce_to_guc(ce);
> @@ -2163,6 +2165,8 @@ static int register_context(struct intel_context *ce, bool loop)
>   	GEM_BUG_ON(intel_context_is_child(ce));
>   	trace_intel_context_register(ce);
>   
> +	prepare_context_registration_info(ce);
> +
>   	if (intel_context_is_parent(ce))
>   		ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
>   						      offset, loop);
> @@ -2246,7 +2250,6 @@ static void prepare_context_registration_info(struct intel_context *ce)
>   	struct intel_context *child;
>   
>   	GEM_BUG_ON(!engine->mask);
> -	GEM_BUG_ON(!sched_state_is_init(ce));
>   
>   	/*
>   	 * Ensure LRC + CT vmas are is same region as write barrier is done
> @@ -2314,9 +2317,13 @@ static int try_context_registration(struct intel_context *ce, bool loop)
>   	bool context_registered;
>   	int ret = 0;
>   
> +	GEM_BUG_ON(!sched_state_is_init(ce));
> +
>   	context_registered = ctx_id_mapped(guc, desc_idx);
>   
> -	prepare_context_registration_info(ce);
> +	if (context_registered)
> +		clr_ctx_id_mapping(guc, desc_idx);
> +	set_ctx_id_mapping(guc, desc_idx, ce);

I think we can do the clr unconditionally. Also, should we drop the 
clr/set pair in prepare_context_registration_info? it shouldn't be 
needed, unless I'm missing a path where we don;t pass through here.

Daniele

>   
>   	/*
>   	 * The context_lookup xarray is used to determine if the hardware
John Harrison Feb. 23, 2022, 8:23 p.m. UTC | #2
On 2/22/2022 17:12, Ceraolo Spurio, Daniele wrote:
> On 2/17/2022 3:52 PM, John.C.Harrison@Intel.com wrote:
>> From: John Harrison <John.C.Harrison@Intel.com>
>>
>> The LRC descriptor was being initialised early on in the context
>> registration sequence. It could then be determined that the actual
>> registration needs to be delayed and the descriptor would be wiped
>> out. This is inefficient, so move the setup to later in the process
>> after the point of no return.
>>
>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++++++++--
>>   1 file changed, 9 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> index 0ab2d1a24bf6..aa74ec74194a 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>> @@ -2153,6 +2153,8 @@ static int __guc_action_register_context(struct 
>> intel_guc *guc,
>>                            0, loop);
>>   }
>>   +static void prepare_context_registration_info(struct intel_context 
>> *ce);
>> +
>>   static int register_context(struct intel_context *ce, bool loop)
>>   {
>>       struct intel_guc *guc = ce_to_guc(ce);
>> @@ -2163,6 +2165,8 @@ static int register_context(struct 
>> intel_context *ce, bool loop)
>>       GEM_BUG_ON(intel_context_is_child(ce));
>>       trace_intel_context_register(ce);
>>   +    prepare_context_registration_info(ce);
>> +
>>       if (intel_context_is_parent(ce))
>>           ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
>>                                 offset, loop);
>> @@ -2246,7 +2250,6 @@ static void 
>> prepare_context_registration_info(struct intel_context *ce)
>>       struct intel_context *child;
>>         GEM_BUG_ON(!engine->mask);
>> -    GEM_BUG_ON(!sched_state_is_init(ce));
>>         /*
>>        * Ensure LRC + CT vmas are is same region as write barrier is 
>> done
>> @@ -2314,9 +2317,13 @@ static int try_context_registration(struct 
>> intel_context *ce, bool loop)
>>       bool context_registered;
>>       int ret = 0;
>>   +    GEM_BUG_ON(!sched_state_is_init(ce));
>> +
>>       context_registered = ctx_id_mapped(guc, desc_idx);
>>   -    prepare_context_registration_info(ce);
>> +    if (context_registered)
>> +        clr_ctx_id_mapping(guc, desc_idx);
>> +    set_ctx_id_mapping(guc, desc_idx, ce);
>
> I think we can do the clr unconditionally. Also, should we drop the 
> clr/set pair in prepare_context_registration_info? it shouldn't be 
> needed, unless I'm missing a path where we don;t pass through here.
>
> Daniele
I don't believe so.

The point is that the context id might have changed (it got stolen, 
re-used, etc. - all the state machine code below can cause aborts and 
retries and such like if something is pending and the register needs to 
be delayed). So we need to clear out the old mapping and add a new one 
to be safe. Also, I'm not sure if it is safe to do a xa_store to an 
already used entry as an update or if you are supposed to clear it 
first? But that's what the code did before and I'm trying to not change 
any actual behaviour here.

John.

>
>>         /*
>>        * The context_lookup xarray is used to determine if the hardware
>
Daniele Ceraolo Spurio Feb. 24, 2022, 2:03 a.m. UTC | #3
On 2/23/2022 12:23 PM, John Harrison wrote:
> On 2/22/2022 17:12, Ceraolo Spurio, Daniele wrote:
>> On 2/17/2022 3:52 PM, John.C.Harrison@Intel.com wrote:
>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>
>>> The LRC descriptor was being initialised early on in the context
>>> registration sequence. It could then be determined that the actual
>>> registration needs to be delayed and the descriptor would be wiped
>>> out. This is inefficient, so move the setup to later in the process
>>> after the point of no return.
>>>
>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++++++++--
>>>   1 file changed, 9 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> index 0ab2d1a24bf6..aa74ec74194a 100644
>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>> @@ -2153,6 +2153,8 @@ static int 
>>> __guc_action_register_context(struct intel_guc *guc,
>>>                            0, loop);
>>>   }
>>>   +static void prepare_context_registration_info(struct 
>>> intel_context *ce);
>>> +
>>>   static int register_context(struct intel_context *ce, bool loop)
>>>   {
>>>       struct intel_guc *guc = ce_to_guc(ce);
>>> @@ -2163,6 +2165,8 @@ static int register_context(struct 
>>> intel_context *ce, bool loop)
>>>       GEM_BUG_ON(intel_context_is_child(ce));
>>>       trace_intel_context_register(ce);
>>>   +    prepare_context_registration_info(ce);
>>> +
>>>       if (intel_context_is_parent(ce))
>>>           ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
>>>                                 offset, loop);
>>> @@ -2246,7 +2250,6 @@ static void 
>>> prepare_context_registration_info(struct intel_context *ce)
>>>       struct intel_context *child;
>>>         GEM_BUG_ON(!engine->mask);
>>> -    GEM_BUG_ON(!sched_state_is_init(ce));
>>>         /*
>>>        * Ensure LRC + CT vmas are is same region as write barrier is 
>>> done
>>> @@ -2314,9 +2317,13 @@ static int try_context_registration(struct 
>>> intel_context *ce, bool loop)
>>>       bool context_registered;
>>>       int ret = 0;
>>>   +    GEM_BUG_ON(!sched_state_is_init(ce));
>>> +
>>>       context_registered = ctx_id_mapped(guc, desc_idx);
>>>   -    prepare_context_registration_info(ce);
>>> +    if (context_registered)
>>> +        clr_ctx_id_mapping(guc, desc_idx);
>>> +    set_ctx_id_mapping(guc, desc_idx, ce);
>>
>> I think we can do the clr unconditionally. Also, should we drop the 
>> clr/set pair in prepare_context_registration_info? it shouldn't be 
>> needed, unless I'm missing a path where we don;t pass through here.
>>
>> Daniele
> I don't believe so.
>
> The point is that the context id might have changed (it got stolen, 
> re-used, etc. - all the state machine code below can cause aborts and 
> retries and such like if something is pending and the register needs 
> to be delayed). So we need to clear out the old mapping and add a new 
> one to be safe. Also, I'm not sure if it is safe to do a xa_store to 
> an already used entry as an update or if you are supposed to clear it 
> first? But that's what the code did before and I'm trying to not 
> change any actual behaviour here.

I was comparing with previous behavior. before this patch, we only do 
the setting of the ctx_id here (inside 
prepare_context_registration_info) and you're not changing any of the 
abort/retry behavior, so if it was enough before it should be enough now.

Regarding the xa ops, we did an unconditional clear before, so it should 
be ok to just do the same and have the clear and set back to back 
without checking if the context ID was already in use or not.

Daniele

>
> John.
>
>>
>>>         /*
>>>        * The context_lookup xarray is used to determine if the hardware
>>
>
John Harrison Feb. 24, 2022, 9:13 p.m. UTC | #4
On 2/23/2022 18:03, Ceraolo Spurio, Daniele wrote:
> On 2/23/2022 12:23 PM, John Harrison wrote:
>> On 2/22/2022 17:12, Ceraolo Spurio, Daniele wrote:
>>> On 2/17/2022 3:52 PM, John.C.Harrison@Intel.com wrote:
>>>> From: John Harrison <John.C.Harrison@Intel.com>
>>>>
>>>> The LRC descriptor was being initialised early on in the context
>>>> registration sequence. It could then be determined that the actual
>>>> registration needs to be delayed and the descriptor would be wiped
>>>> out. This is inefficient, so move the setup to later in the process
>>>> after the point of no return.
>>>>
>>>> Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
>>>> ---
>>>>   drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 11 +++++++++--
>>>>   1 file changed, 9 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
>>>> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>>> index 0ab2d1a24bf6..aa74ec74194a 100644
>>>> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
>>>> @@ -2153,6 +2153,8 @@ static int 
>>>> __guc_action_register_context(struct intel_guc *guc,
>>>>                            0, loop);
>>>>   }
>>>>   +static void prepare_context_registration_info(struct 
>>>> intel_context *ce);
>>>> +
>>>>   static int register_context(struct intel_context *ce, bool loop)
>>>>   {
>>>>       struct intel_guc *guc = ce_to_guc(ce);
>>>> @@ -2163,6 +2165,8 @@ static int register_context(struct 
>>>> intel_context *ce, bool loop)
>>>>       GEM_BUG_ON(intel_context_is_child(ce));
>>>>       trace_intel_context_register(ce);
>>>>   +    prepare_context_registration_info(ce);
>>>> +
>>>>       if (intel_context_is_parent(ce))
>>>>           ret = __guc_action_register_multi_lrc(guc, ce, 
>>>> ce->guc_id.id,
>>>>                                 offset, loop);
>>>> @@ -2246,7 +2250,6 @@ static void 
>>>> prepare_context_registration_info(struct intel_context *ce)
>>>>       struct intel_context *child;
>>>>         GEM_BUG_ON(!engine->mask);
>>>> -    GEM_BUG_ON(!sched_state_is_init(ce));
>>>>         /*
>>>>        * Ensure LRC + CT vmas are is same region as write barrier 
>>>> is done
>>>> @@ -2314,9 +2317,13 @@ static int try_context_registration(struct 
>>>> intel_context *ce, bool loop)
>>>>       bool context_registered;
>>>>       int ret = 0;
>>>>   +    GEM_BUG_ON(!sched_state_is_init(ce));
>>>> +
>>>>       context_registered = ctx_id_mapped(guc, desc_idx);
>>>>   -    prepare_context_registration_info(ce);
>>>> +    if (context_registered)
>>>> +        clr_ctx_id_mapping(guc, desc_idx);
>>>> +    set_ctx_id_mapping(guc, desc_idx, ce);
>>>
>>> I think we can do the clr unconditionally. Also, should we drop the 
>>> clr/set pair in prepare_context_registration_info? it shouldn't be 
>>> needed, unless I'm missing a path where we don;t pass through here.
>>>
>>> Daniele
>> I don't believe so.
>>
>> The point is that the context id might have changed (it got stolen, 
>> re-used, etc. - all the state machine code below can cause aborts and 
>> retries and such like if something is pending and the register needs 
>> to be delayed). So we need to clear out the old mapping and add a new 
>> one to be safe. Also, I'm not sure if it is safe to do a xa_store to 
>> an already used entry as an update or if you are supposed to clear it 
>> first? But that's what the code did before and I'm trying to not 
>> change any actual behaviour here.
>
> I was comparing with previous behavior. before this patch, we only do 
> the setting of the ctx_id here (inside 
> prepare_context_registration_info) and you're not changing any of the 
> abort/retry behavior, so if it was enough before it should be enough now.
Hmm, I think I must have confused myself with the intermediate steps 
along the way. Yes, it looks like the clr/set in prepare is redundant by 
the end.

>
> Regarding the xa ops, we did an unconditional clear before, so it 
> should be ok to just do the same and have the clear and set back to 
> back without checking if the context ID was already in use or not.
Actually, I was thinking you meant to drop the clr completely rather 
than just drop the condition. Yeah, that sounds fine.

Will post an update.

John.

>
> Daniele
>
>>
>> John.
>>
>>>
>>>>         /*
>>>>        * The context_lookup xarray is used to determine if the 
>>>> hardware
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 0ab2d1a24bf6..aa74ec74194a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2153,6 +2153,8 @@  static int __guc_action_register_context(struct intel_guc *guc,
 					     0, loop);
 }
 
+static void prepare_context_registration_info(struct intel_context *ce);
+
 static int register_context(struct intel_context *ce, bool loop)
 {
 	struct intel_guc *guc = ce_to_guc(ce);
@@ -2163,6 +2165,8 @@  static int register_context(struct intel_context *ce, bool loop)
 	GEM_BUG_ON(intel_context_is_child(ce));
 	trace_intel_context_register(ce);
 
+	prepare_context_registration_info(ce);
+
 	if (intel_context_is_parent(ce))
 		ret = __guc_action_register_multi_lrc(guc, ce, ce->guc_id.id,
 						      offset, loop);
@@ -2246,7 +2250,6 @@  static void prepare_context_registration_info(struct intel_context *ce)
 	struct intel_context *child;
 
 	GEM_BUG_ON(!engine->mask);
-	GEM_BUG_ON(!sched_state_is_init(ce));
 
 	/*
 	 * Ensure LRC + CT vmas are is same region as write barrier is done
@@ -2314,9 +2317,13 @@  static int try_context_registration(struct intel_context *ce, bool loop)
 	bool context_registered;
 	int ret = 0;
 
+	GEM_BUG_ON(!sched_state_is_init(ce));
+
 	context_registered = ctx_id_mapped(guc, desc_idx);
 
-	prepare_context_registration_info(ce);
+	if (context_registered)
+		clr_ctx_id_mapping(guc, desc_idx);
+	set_ctx_id_mapping(guc, desc_idx, ce);
 
 	/*
 	 * The context_lookup xarray is used to determine if the hardware