diff mbox series

[6/6] ath11k: support GTK rekey offload

Message ID 20211011193750.4891-7-cjhuang@codeaurora.org (mailing list archive)
State Changes Requested
Delegated to: Kalle Valo
Headers show
Series ath11k: support WoW functionalities | expand

Commit Message

Carl Huang Oct. 11, 2021, 7:37 p.m. UTC
Host sets GTK related info to firmware before WoW is enabled, and
gets rekey replay_count and then disables GTK rekey when WoW quits.

Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1

Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
---
 drivers/net/wireless/ath/ath11k/core.h |   8 +++
 drivers/net/wireless/ath/ath11k/mac.c  |  32 ++++++++++
 drivers/net/wireless/ath/ath11k/wmi.c  | 110 +++++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/wmi.h  |  39 ++++++++++++
 drivers/net/wireless/ath/ath11k/wow.c  |  49 ++++++++++++++-
 5 files changed, 237 insertions(+), 1 deletion(-)

Comments

Kalle Valo Dec. 9, 2021, 4:05 p.m. UTC | #1
Carl Huang <cjhuang@codeaurora.org> writes:

> Host sets GTK related info to firmware before WoW is enabled, and
> gets rekey replay_count and then disables GTK rekey when WoW quits.
>
> Tested-on: QCA6390 hw2.0 PCI WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1
>
> Signed-off-by: Carl Huang <cjhuang@codeaurora.org>

[...]

> --- a/drivers/net/wireless/ath/ath11k/wmi.h
> +++ b/drivers/net/wireless/ath/ath11k/wmi.h
> @@ -5850,6 +5850,41 @@ struct wmi_set_arp_ns_offload_cmd {
>  	 */
>  } __packed;
>  
> +#define GTK_OFFLOAD_OPCODE_MASK             0xFF000000
> +#define GTK_OFFLOAD_ENABLE_OPCODE           0x01000000
> +#define GTK_OFFLOAD_DISABLE_OPCODE          0x02000000
> +#define GTK_OFFLOAD_REQUEST_STATUS_OPCODE   0x04000000
> +
> +#define GTK_OFFLOAD_KEK_BYTES       16
> +#define GTK_OFFLOAD_KCK_BYTES       16
> +#define GTK_REPLAY_COUNTER_BYTES    8
> +#define WMI_MAX_KEY_LEN             32
> +#define IGTK_PN_SIZE                6
> +
> +struct wmi_gtk_offload_status_event {
> +	u32 vdev_id;
> +	u32 flags;
> +	u32 refresh_cnt;
> +	u8 replay_counter[GTK_REPLAY_COUNTER_BYTES];
> +	u8 igtk_key_index;
> +	u8 igtk_key_length;
> +	u8 igtk_key_rsc[IGTK_PN_SIZE];
> +	u8 igtk_key[WMI_MAX_KEY_LEN];
> +	u8 gtk_key_index;
> +	u8 gtk_key_length;
> +	u8 gtk_key_rsc[GTK_REPLAY_COUNTER_BYTES];
> +	u8 gtk_key[WMI_MAX_KEY_LEN];
> +} __packed;

[...]

> +	arvif->rekey_data.replay_ctr = le64_to_cpup((__le64 *)ev->replay_counter);
> +	/* supplicant expects big-endian replay counter */
> +	replay_ctr = cpu_to_be64(le64_to_cpup((__le64 *)ev->replay_counter));
> +
> +	ieee80211_gtk_rekey_notify(arvif->vif, arvif->bssid,
> +				   (void *)&replay_ctr, GFP_KERNEL);

Please avoid casting as much possible, and also otherwise this just
looks weird. Isn't ath11k WMI commands and events supposed to be in CPU
endian and the firmware automatically translates them if CPU is little
or big endian? So why do you cast ev->replay_counter to __le64 then?
Wouldn't that break on big endian?
Carl Huang Dec. 17, 2021, 11:04 a.m. UTC | #2
On 2021-12-10 00:05, Kalle Valo wrote:
> Carl Huang <cjhuang@codeaurora.org> writes:
> 
>> Host sets GTK related info to firmware before WoW is enabled, and
>> gets rekey replay_count and then disables GTK rekey when WoW quits.
>> 
>> Tested-on: QCA6390 hw2.0 PCI 
>> WLAN.HST.1.0.1-01740-QCAHSTSWPLZ_V2_TO_X86-1
>> 
>> Signed-off-by: Carl Huang <cjhuang@codeaurora.org>
> 
> [...]
> 
>> --- a/drivers/net/wireless/ath/ath11k/wmi.h
>> +++ b/drivers/net/wireless/ath/ath11k/wmi.h
>> @@ -5850,6 +5850,41 @@ struct wmi_set_arp_ns_offload_cmd {
>>  	 */
>>  } __packed;
>> 
>> +#define GTK_OFFLOAD_OPCODE_MASK             0xFF000000
>> +#define GTK_OFFLOAD_ENABLE_OPCODE           0x01000000
>> +#define GTK_OFFLOAD_DISABLE_OPCODE          0x02000000
>> +#define GTK_OFFLOAD_REQUEST_STATUS_OPCODE   0x04000000
>> +
>> +#define GTK_OFFLOAD_KEK_BYTES       16
>> +#define GTK_OFFLOAD_KCK_BYTES       16
>> +#define GTK_REPLAY_COUNTER_BYTES    8
>> +#define WMI_MAX_KEY_LEN             32
>> +#define IGTK_PN_SIZE                6
>> +
>> +struct wmi_gtk_offload_status_event {
>> +	u32 vdev_id;
>> +	u32 flags;
>> +	u32 refresh_cnt;
>> +	u8 replay_counter[GTK_REPLAY_COUNTER_BYTES];
>> +	u8 igtk_key_index;
>> +	u8 igtk_key_length;
>> +	u8 igtk_key_rsc[IGTK_PN_SIZE];
>> +	u8 igtk_key[WMI_MAX_KEY_LEN];
>> +	u8 gtk_key_index;
>> +	u8 gtk_key_length;
>> +	u8 gtk_key_rsc[GTK_REPLAY_COUNTER_BYTES];
>> +	u8 gtk_key[WMI_MAX_KEY_LEN];
>> +} __packed;
> 
> [...]
> 
>> +	arvif->rekey_data.replay_ctr = le64_to_cpup((__le64 
>> *)ev->replay_counter);
>> +	/* supplicant expects big-endian replay counter */
>> +	replay_ctr = cpu_to_be64(le64_to_cpup((__le64 
>> *)ev->replay_counter));
>> +
>> +	ieee80211_gtk_rekey_notify(arvif->vif, arvif->bssid,
>> +				   (void *)&replay_ctr, GFP_KERNEL);
> 
> Please avoid casting as much possible, and also otherwise this just
> looks weird. Isn't ath11k WMI commands and events supposed to be in CPU
> endian and the firmware automatically translates them if CPU is little
> or big endian? So why do you cast ev->replay_counter to __le64 then?
> Wouldn't that break on big endian?
> 
Both cpu and firmware are supposed to be little endian in ath11k.
However, supplicant expects the replay_ctr in big endian format, that's
why an endian conversion is done here.

> --
> https://patchwork.kernel.org/project/linux-wireless/list/
> 
> https://wireless.wiki.kernel.org/en/developers/documentation/submittingpatches
Sven Eckelmann Dec. 18, 2021, 8:37 a.m. UTC | #3
> On Thursday, 9 December 2021 17:05:14 CET Kalle Valo wrote:
>> Isn't ath11k WMI commands and events supposed to be in CPU
>> endian and the firmware automatically translates them if CPU is little
>> or big endian? 
[...]
On Friday, 17 December 2021 12:04:45 CET Carl Huang wrote:
> Both cpu and firmware are supposed to be little endian in ath11k.

I hope this statement is incorrect. But if it isn't:

You cannot limit a non-architecture dependent driver to be only used by little 
endian CPUs. This would be grave bug in ath11k.

If your firmware requires wmi messages and similar things in little endian 
then you have to mark types correctly as big/little endian. E.g. __le32 
instead of u32. And then you have to convert everything manually with 
cpu_to_le32 and so on. See the ath10k code for examples.

Tools like sparse can assist you in your search for problematic places when 
your kernel has the __CHECK_ENDIAN__ related code activated. This is the 
default for kernels >= 4.10.


If Kalle' statement is true that the firmware takes care of endianness 
translation of WMI messages to host endianness, then your code would still be 
questionable:

>> +       /* supplicant expects big-endian replay counter */
>> +       replay_ctr = cpu_to_be64(le64_to_cpup((__le64 
>> *)ev->replay_counter));

Why isn't the firmware taking care of the conversion at that place?

Why are you defining it as `u8 replay_counter[GTK_REPLAY_COUNTER_BYTES];` in 
the struct instead of using `__le64 replay_counter;`?

What ensures that this is value is 64 bit aligned in memory? Wouldn't it be 
more correct to (see above) use

    replay_ctr = cpu_to_be64(get_unaligned_le64(ev->replay_counter));

Kind regards,
	Sven
Sven Eckelmann Dec. 18, 2021, 8:43 a.m. UTC | #4
On Saturday, 18 December 2021 09:37:02 CET Sven Eckelmann wrote:
> Why isn't the firmware taking care of the conversion at that place?

Forget this part - this was non-sense.

But the rest of the questions still stand.

Kind regards,
	Sven
Sven Eckelmann Dec. 18, 2021, 9:16 a.m. UTC | #5
On Saturday, 18 December 2021 09:37:02 CET Sven Eckelmann wrote:
> Why are you defining it as `u8 replay_counter[GTK_REPLAY_COUNTER_BYTES];` in 
> the struct instead of using `__le64 replay_counter;`?
> 
> What ensures that this is value is 64 bit aligned in memory? Wouldn't it be 
> more correct to (see above) use
> 
>     replay_ctr = cpu_to_be64(get_unaligned_le64(ev->replay_counter));
> 

Sorry for the noise, but the part of not knowing in which endianness the 
firmware return multi-byte values is freaking me out. The above statements 
assume that it is returning everything as little endian.

If it is actually returns in host byte order (no idea how the firmware 
determines this) then of course, the questions should be:

* Why are you defining it as `u8 replay_counter[GTK_REPLAY_COUNTER_BYTES];` in 
  the struct instead of using `u64 replay_counter;`?

* What ensures that this is value is 64 bit aligned in memory? Wouldn't it be 
  more correct (assuming it is a u64) to use

    replay_ctr = cpu_to_be64(get_unaligned64(ev->replay_counter));


Kind regards,
	Sven
Kalle Valo Dec. 20, 2021, 10:03 a.m. UTC | #6
Sven Eckelmann <sven@narfation.org> writes:

>> On Thursday, 9 December 2021 17:05:14 CET Kalle Valo wrote:
>>> Isn't ath11k WMI commands and events supposed to be in CPU
>>> endian and the firmware automatically translates them if CPU is little
>>> or big endian? 
> [...]
> On Friday, 17 December 2021 12:04:45 CET Carl Huang wrote:
>> Both cpu and firmware are supposed to be little endian in ath11k.
>
> I hope this statement is incorrect. But if it isn't:
>
> You cannot limit a non-architecture dependent driver to be only used by little 
> endian CPUs. This would be grave bug in ath11k.
>
> If your firmware requires wmi messages and similar things in little endian 
> then you have to mark types correctly as big/little endian. E.g. __le32 
> instead of u32. And then you have to convert everything manually with 
> cpu_to_le32 and so on. See the ath10k code for examples.
>
> Tools like sparse can assist you in your search for problematic places when 
> your kernel has the __CHECK_ENDIAN__ related code activated. This is the 
> default for kernels >= 4.10.

This is what I would have preferred to do in ath11k as well but a lot of
people preferred the firmware conversion method as the proprietary
driver uses the same, so I yielded. ath11k should work on big endian
cpus, but to my knowledge nobody has tested it so I do not know if it
really works or not. If someone can test please do let me know, I am
very curious to know if it really works.

ath11k enables the firmware swap feature like this:

/* Host software's Copy Engine configuration. */
#ifdef __BIG_ENDIAN
#define CE_ATTR_FLAGS CE_ATTR_BYTE_SWAP_DATA
#else
#define CE_ATTR_FLAGS 0
#endif

Also grep for BIG_ENDIAN, few functions have that.

> If Kalle' statement is true that the firmware takes care of endianness 
> translation of WMI messages to host endianness, then your code would still be 
> questionable:
>
>>> +       /* supplicant expects big-endian replay counter */
>>> +       replay_ctr = cpu_to_be64(le64_to_cpup((__le64 
>>> *)ev->replay_counter));
>
> Why isn't the firmware taking care of the conversion at that place?
>
> Why are you defining it as `u8 replay_counter[GTK_REPLAY_COUNTER_BYTES];` in 
> the struct instead of using `__le64 replay_counter;`?
>
> What ensures that this is value is 64 bit aligned in memory? Wouldn't it be 
> more correct to (see above) use
>
>     replay_ctr = cpu_to_be64(get_unaligned_le64(ev->replay_counter));

Yeah, if the host does the conversion we would use __le64. But at the
moment the firmware does the conversion so I think we should use
ath11k_ce_byte_swap():

/* For Big Endian Host, Copy Engine byte_swap is enabled
 * When Copy Engine does byte_swap, need to byte swap again for the
 * Host to get/put buffer content in the correct byte order
 */
void ath11k_ce_byte_swap(void *mem, u32 len)
{
	int i;

	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
		if (!mem)
			return;

		for (i = 0; i < (len / 4); i++) {
			*(u32 *)mem = swab32(*(u32 *)mem);
			mem += 4;
		}
	}
}
Sven Eckelmann Dec. 20, 2021, 11:50 a.m. UTC | #7
On Monday, 20 December 2021 11:03:08 CET Kalle Valo wrote:
[...]

Thanks for all the explanation and pointers. I will try to use this to more 
clearly formulate my concern.

If I understood it correctly then ev->replay_counter is:

* __le64 on little endian systems
* __be64 on big endian systems

Or in short: it is just an u64.

> Yeah, if the host does the conversion we would use __le64. But at the
> moment the firmware does the conversion so I think we should use
> ath11k_ce_byte_swap():
> 
> /* For Big Endian Host, Copy Engine byte_swap is enabled
>  * When Copy Engine does byte_swap, need to byte swap again for the
>  * Host to get/put buffer content in the correct byte order
>  */
> void ath11k_ce_byte_swap(void *mem, u32 len)
> {
> 	int i;
> 
> 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
> 		if (!mem)
> 			return;
> 
> 		for (i = 0; i < (len / 4); i++) {
> 			*(u32 *)mem = swab32(*(u32 *)mem);
> 			mem += 4;
> 		}
> 	}
> }

This function doesn't work for 64 bit values (if they are actually in big 
endian). It just rearranges (len / 4) u32s to host byte order - so the upper 
and lower 32 bit values for an u64 would still be swapped.

Unless I misunderstood what CE_ATTR_BYTE_SWAP_DATA is supposed to do. Maybe it 
is not causing returned data to be in big/little endian but causes for one of 
the host endianess' that the data for 64-bit values in mixed endianness.

And if the function would operate on a struct with 16 bit or 8 bit values then 
we have something which we call here Kuddelmuddel [1].



But if the value is an u64, then the code in the patch is wrong:

> /* supplicant expects big-endian replay counter */
> replay_ctr = cpu_to_be64(le64_to_cpup((__le64 *)ev->replay_counter));

This would break on big endian architectures because ev->replay_counter is a 
__be64 and not a __le64 on these systems. Just from the way the byte ordering 
is supposed to look like - not the data type for the C-compiler).

If you have a look at what the code does (beside 64 bit load by _cpup), is 
just to add a single swap64 - either by cpu_to_be64 or by le64_to_cpup - 
depending on whether the host system is little endian or big endian.

So for a __le64, it would (besides the incorrectly aligned 64 bit load from 
struct wmi_gtk_offload_status_event),  do a single swap64 to __be64. This 
swap64 is from cpu_to_be64 and le64_to_cpup doesn't swap anything.

But on a big endian system, the __be64 would also be sent through a swap64 
(from le64_to_cpup) and cpu_to_be64 wouldn't swap anything. So at the end, it 
would be a __le64. So something which conflicts with the comment above this 
code line.


There are now various ways to correctly implement it:

* change replay_counter to an u64 in the (packed) struct and:

      replay_ctr = cpu_to_be64(ev->replay_counter);

* keep it as u8[8] in the struct and make sure yourself that an unaligned-safe
  64 bit load is used:

      replay_ctr = cpu_to_be64(get_unaligned((u64 *)ev->replay_counter));


Kind regards,
	Sven


[1] It is something like "jumble" or "mess"
Kalle Valo Dec. 21, 2021, 2:48 p.m. UTC | #8
Sven Eckelmann <sven@narfation.org> writes:

> On Monday, 20 December 2021 11:03:08 CET Kalle Valo wrote:
> [...]
>
> Thanks for all the explanation and pointers. I will try to use this to more 
> clearly formulate my concern.

Good idea, this is too complex.

> If I understood it correctly then ev->replay_counter is:
>
> * __le64 on little endian systems
> * __be64 on big endian systems
>
> Or in short: it is just an u64.

My understanding is that on little endian host it's (the number representing
the byte index):

1 2 3 4 5 6 7 8

And on big endian host it's (as the firmware automatically swapped the
values):

4 3 2 1 8 7 6 5

So for on big endian we need to use ath11k_ce_byte_swap() to get them
back to correct order. (Or to be exact we need to use
ath11k_ce_byte_swap() every time as it does nothing on a little endian
host.)

Completely untested, of course. I don't have a big endian system.

>> Yeah, if the host does the conversion we would use __le64. But at the
>> moment the firmware does the conversion so I think we should use
>> ath11k_ce_byte_swap():
>> 
>> /* For Big Endian Host, Copy Engine byte_swap is enabled
>>  * When Copy Engine does byte_swap, need to byte swap again for the
>>  * Host to get/put buffer content in the correct byte order
>>  */
>> void ath11k_ce_byte_swap(void *mem, u32 len)
>> {
>> 	int i;
>> 
>> 	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) {
>> 		if (!mem)
>> 			return;
>> 
>> 		for (i = 0; i < (len / 4); i++) {
>> 			*(u32 *)mem = swab32(*(u32 *)mem);
>> 			mem += 4;
>> 		}
>> 	}
>> }
>
> This function doesn't work for 64 bit values (if they are actually in big 
> endian). It just rearranges (len / 4) u32s to host byte order - so the upper 
> and lower 32 bit values for an u64 would still be swapped.
>
> Unless I misunderstood what CE_ATTR_BYTE_SWAP_DATA is supposed to do. Maybe it 
> is not causing returned data to be in big/little endian but causes for one of 
> the host endianess' that the data for 64-bit values in mixed
> endianness.

So my understanding is that when CE_ATTR_BYTE_SWAP_DATA is enabled the
firmware automatically swaps the packets per every four bytes. That's
why all the fields in WMI commands and events are u32.

> And if the function would operate on a struct with 16 bit or 8 bit values then 
> we have something which we call here Kuddelmuddel [1].

Heh, need to remember that word :)

> But if the value is an u64, then the code in the patch is wrong:

The firmware interface should not have u16 or u8 fields. And for
anything larger ath11k_ce_byte_swap() should be used. Again, this is
just my recollection from discussions years back and I have not tested
this myself.
Sven Eckelmann Dec. 21, 2021, 8:39 p.m. UTC | #9
On Tuesday, 21 December 2021 15:48:53 CET Kalle Valo wrote:
> My understanding is that on little endian host it's (the number representing
> the byte index):
> 
> 1 2 3 4 5 6 7 8
> 
> And on big endian host it's (as the firmware automatically swapped the
> values):
> 
> 4 3 2 1 8 7 6 5
> 
> So for on big endian we need to use ath11k_ce_byte_swap() to get them
> back to correct order. (Or to be exact we need to use
> ath11k_ce_byte_swap() every time as it does nothing on a little endian
> host.)
> 
> Completely untested, of course. I don't have a big endian system.

Ok, just to summarize: the value is 0x0807060504030201 -> which is is 
correctly stored in memory as 0102030405060708 for little endian systems. Fine 
with this part. So if there would only be little endian systems than following 
code would be fine:

     replay_ctr = cpu_to_be64(get_unaligned((u64 *)ev->replay_counter));

According to the info from here, the memory from the firmware on big endian 
systems is 0403020108070605. So after switching it with the ath11k swap 
function, it is back to 0102030405060708 -> which is little endian again (and 
not aligned in memory). We must take care of it by converting in from __le64 
to a u64 before converting it to __be64. So we would end up with:

    __le64 replay_ctr_le;
    __be64 replay_ctr_be;
    u64 replay_ctr;

    /* TODO also swap bytes for (i)gt_key* back to little endian */
    ath11k_ce_byte_swap(ev->replay_counter, sizeof(ev->replay_counter));

    replay_ctr_le = get_unaligned((__le64 *)ev->replay_counter);
    replay_ctr = le64_to_cpu(replay_ctr_le);
    arvif->rekey_data.replay_ctr = replay_ctr;
    replay_ctr_be = cpu_to_be64(replay_ctr);

Of course, completely untested.


Another idea would be to change wmi_gtk_offload_status_event->replay_counter 
into two u32. In that case, it would be enough to do:

   __be64 replay_ctr_be;
    u64 replay_ctr;

    replay_ctr = ev->replay_counter[1];
    replay_ctr <<= 32;
    replay_ctr |= ev->replay_counter[0];

    arvif->rekey_data.replay_ctr = replay_ctr;
    replay_ctr_be = cpu_to_be64(replay_ctr);

replay_counter[1] could also be called replay_counter_upper - and 
replay_counter[0] just replay_counter_lower.


Which reminds me of that the memcpy from a u64 to
wmi_gtk_rekey_offload_cmd->replay_ctrl in ath11k_wmi_gtk_rekey_offload. This 
is of course also wrong. It must first be converted into a __le64 and the 
bytes must be pre-swapped (see below).


> So my understanding is that when CE_ATTR_BYTE_SWAP_DATA is enabled the
> firmware automatically swaps the packets per every four bytes. That's
> why all the fields in WMI commands and events are u32.
[...]
> The firmware interface should not have u16 or u8 fields. And for
> anything larger ath11k_ce_byte_swap() should be used. Again, this is
> just my recollection from discussions years back and I have not tested
> this myself.

Ok, so wmi_gtk_offload_status_event and wmi_gtk_rekey_offload_cmd are breaking 
this assertion because they are full of u8's. :(

Especially wmi_gtk_offload_status_event is problematic here because there are 
now a lot of single u8's in it. The original code must therefore use 
ath11k_ce_byte_swap on everything after
wmi_gtk_offload_status_event->refresh_cnt before accessing any of the members.

And ath11k_wmi_gtk_rekey_offload must perform the ath11k_ce_byte_swap on

* wmi_gtk_rekey_offload_cmd->kek
* wmi_gtk_rekey_offload_cmd->kck
* wmi_gtk_rekey_offload_cmd->replay_ctr

See also above why the memcpy to wmi_gtk_rekey_offload_cmd->replay_ctr is 
wrong in this function.


And it seems like wmi_obss_color_collision_event->obss_color_bitmap (see 
ath11k_wmi_obss_color_collision_event) could suffer from a similar problem.
Maybe key_rsc_counter in ath11k_wmi_vdev_install_key too - but this doesn't 
have any producers yet.

Kind regards,
	Sven
diff mbox series

Patch

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index d9c6c1d..69461c3 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -256,6 +256,13 @@  struct ath11k_arp_ns_offload {
 	u8  mac_addr[ETH_ALEN];
 };
 
+struct ath11k_rekey_data {
+	u8 kck[NL80211_KCK_LEN];
+	u8 kek[NL80211_KCK_LEN];
+	u64 replay_ctr;
+	bool enable_offload;
+};
+
 struct ath11k_vif {
 	u32 vdev_id;
 	enum wmi_vdev_type vdev_type;
@@ -307,6 +314,7 @@  struct ath11k_vif {
 	struct ieee80211_chanctx_conf chanctx;
 	struct ath11k_reg_tpc_power_info reg_tpc_info;
 	struct ath11k_arp_ns_offload arp_ns_offload;
+	struct ath11k_rekey_data rekey_data;
 };
 
 struct ath11k_vif_iter {
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index e3dc998..797e45e 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -2206,6 +2206,7 @@  static void ath11k_bss_assoc(struct ieee80211_hw *hw,
 	}
 
 	arvif->is_up = true;
+	arvif->rekey_data.enable_offload = false;
 
 	ath11k_dbg(ar->ab, ATH11K_DBG_MAC,
 		   "mac vdev %d up (associated) bssid %pM aid %d\n",
@@ -2239,6 +2240,8 @@  static void ath11k_bss_disassoc(struct ieee80211_hw *hw,
 
 	arvif->is_up = false;
 
+	memset(&arvif->rekey_data, 0, sizeof(arvif->rekey_data));
+
 	cancel_delayed_work_sync(&arvif->connection_loss_work);
 }
 
@@ -7451,6 +7454,34 @@  static void ath11k_mac_op_ipv6_changed(struct ieee80211_hw *hw,
 	ath11k_generate_ns_mc_addr(ar, offload);
 }
 
+static void ath11k_mac_op_set_rekey_data(struct ieee80211_hw *hw,
+					 struct ieee80211_vif *vif,
+					 struct cfg80211_gtk_rekey_data *data)
+{
+	struct ath11k *ar = hw->priv;
+	struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif);
+	struct ath11k_rekey_data *rekey_data = &arvif->rekey_data;
+
+	ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "Set rekey data vdev_id %d\n",
+		   arvif->vdev_id);
+	mutex_lock(&ar->conf_mutex);
+
+	memcpy(rekey_data->kck, data->kck, NL80211_KCK_LEN);
+	memcpy(rekey_data->kek, data->kek, NL80211_KEK_LEN);
+
+	/* supplicant works on big-endian, converts to cpu-endian */
+	rekey_data->replay_ctr = be64_to_cpup((__be64 *)data->replay_ctr);
+	arvif->rekey_data.enable_offload = true;
+
+	ath11k_dbg_dump(ar->ab, ATH11K_DBG_MAC, "KCK", NULL,
+			rekey_data->kck, NL80211_KCK_LEN);
+	ath11k_dbg_dump(ar->ab, ATH11K_DBG_MAC, "KEK", NULL,
+			rekey_data->kck, NL80211_KEK_LEN);
+	ath11k_dbg_dump(ar->ab, ATH11K_DBG_MAC, "replay ctr", NULL,
+			&rekey_data->replay_ctr, sizeof(rekey_data->replay_ctr));
+	mutex_unlock(&ar->conf_mutex);
+}
+
 static const struct ieee80211_ops ath11k_ops = {
 	.tx				= ath11k_mac_op_tx,
 	.start                          = ath11k_mac_op_start,
@@ -7465,6 +7496,7 @@  static const struct ieee80211_ops ath11k_ops = {
 	.hw_scan                        = ath11k_mac_op_hw_scan,
 	.cancel_hw_scan                 = ath11k_mac_op_cancel_hw_scan,
 	.set_key                        = ath11k_mac_op_set_key,
+	.set_rekey_data	                = ath11k_mac_op_set_rekey_data,
 	.sta_state                      = ath11k_mac_op_sta_state,
 	.sta_set_txpwr			= ath11k_mac_op_sta_set_txpwr,
 	.sta_rc_update			= ath11k_mac_op_sta_rc_update,
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 1e276d51..5b02e95 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -8005,6 +8005,53 @@  ath11k_wmi_tm_event_segmented(struct ath11k_base *ab, u32 cmd_id,
 	}
 }
 
+static void
+ath11k_wmi_gtk_offload_status_event(struct ath11k_base *ab,
+				    struct sk_buff *skb)
+{
+	const void **tb;
+	const struct wmi_gtk_offload_status_event *ev;
+	struct ath11k_vif *arvif;
+	__be64 replay_ctr;
+	int ret;
+
+	tb = ath11k_wmi_tlv_parse_alloc(ab, skb->data, skb->len, GFP_ATOMIC);
+	if (IS_ERR(tb)) {
+		ret = PTR_ERR(tb);
+		ath11k_warn(ab, "failed to parse tlv: %d\n", ret);
+		return;
+	}
+
+	ev = tb[WMI_TAG_GTK_OFFLOAD_STATUS_EVENT];
+	if (!ev) {
+		ath11k_warn(ab, "failed to fetch gtk offload status ev");
+		kfree(tb);
+		return;
+	}
+
+	arvif = ath11k_mac_get_arvif_by_vdev_id(ab, ev->vdev_id);
+	if (!arvif) {
+		ath11k_warn(ab, "failed to get arvif for vdev_id:%d\n",
+			    ev->vdev_id);
+		kfree(tb);
+		return;
+	}
+
+	ath11k_dbg(ab, ATH11K_DBG_WMI, "gtk offload event refresh_cnt:%d\n",
+		   ev->refresh_cnt);
+	ath11k_dbg_dump(ab, ATH11K_DBG_WMI, "gtk offload event replay_cnt",
+			NULL, ev->replay_counter, GTK_REPLAY_COUNTER_BYTES);
+
+	arvif->rekey_data.replay_ctr = le64_to_cpup((__le64 *)ev->replay_counter);
+	/* supplicant expects big-endian replay counter */
+	replay_ctr = cpu_to_be64(le64_to_cpup((__le64 *)ev->replay_counter));
+
+	ieee80211_gtk_rekey_notify(arvif->vif, arvif->bssid,
+				   (void *)&replay_ctr, GFP_KERNEL);
+
+	kfree(tb);
+}
+
 static void ath11k_wmi_tlv_op_rx(struct ath11k_base *ab, struct sk_buff *skb)
 {
 	struct wmi_cmd_hdr *cmd_hdr;
@@ -8133,6 +8180,9 @@  static void ath11k_wmi_tlv_op_rx(struct ath11k_base *ab, struct sk_buff *skb)
 	case WMI_RFKILL_STATE_CHANGE_EVENTID:
 		ath11k_rfkill_state_change_event(ab, skb);
 		break;
+	case WMI_GTK_OFFLOAD_STATUS_EVENTID:
+		ath11k_wmi_gtk_offload_status_event(ab, skb);
+		break;
 	/* TODO: Add remaining events */
 	default:
 		ath11k_dbg(ab, ATH11K_DBG_WMI, "Unknown eventid: 0x%x\n", id);
@@ -8873,3 +8923,63 @@  int ath11k_wmi_arp_ns_offload(struct ath11k *ar,
 
 	return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_SET_ARP_NS_OFFLOAD_CMDID);
 }
+
+int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar,
+				 struct ath11k_vif *arvif, bool enable)
+{
+	struct wmi_gtk_rekey_offload_cmd *cmd;
+	struct ath11k_rekey_data *rekey_data = &arvif->rekey_data;
+	int len;
+	struct sk_buff *skb;
+
+	len = sizeof(*cmd);
+	skb =  ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (struct wmi_gtk_rekey_offload_cmd *)skb->data;
+	cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_GTK_OFFLOAD_CMD) |
+			  FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+
+	cmd->vdev_id = arvif->vdev_id;
+
+	if (enable) {
+		cmd->flags = GTK_OFFLOAD_ENABLE_OPCODE;
+
+		/* the length in rekey_data and cmd is equal */
+		memcpy(cmd->kck, rekey_data->kck, sizeof(cmd->kck));
+		memcpy(cmd->kek, rekey_data->kek, sizeof(cmd->kek));
+		memcpy(cmd->replay_ctr, &rekey_data->replay_ctr,
+		       sizeof(cmd->replay_ctr));
+	} else {
+		cmd->flags = GTK_OFFLOAD_DISABLE_OPCODE;
+	}
+
+	ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "offload gtk rekey vdev: %d %d\n",
+		   arvif->vdev_id, enable);
+	return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+}
+
+int ath11k_wmi_gtk_rekey_getinfo(struct ath11k *ar,
+				 struct ath11k_vif *arvif)
+{
+	struct wmi_gtk_rekey_offload_cmd *cmd;
+	int len;
+	struct sk_buff *skb;
+
+	len = sizeof(*cmd);
+	skb =  ath11k_wmi_alloc_skb(ar->wmi->wmi_ab, len);
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (struct wmi_gtk_rekey_offload_cmd *)skb->data;
+	cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_GTK_OFFLOAD_CMD) |
+			  FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+
+	cmd->vdev_id = arvif->vdev_id;
+	cmd->flags = GTK_OFFLOAD_REQUEST_STATUS_OPCODE;
+
+	ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "get gtk rekey vdev_id: %d\n",
+		   arvif->vdev_id);
+	return ath11k_wmi_cmd_send(ar->wmi, skb, WMI_GTK_OFFLOAD_CMDID);
+}
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index c2e5557..61dc5ea 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -5850,6 +5850,41 @@  struct wmi_set_arp_ns_offload_cmd {
 	 */
 } __packed;
 
+#define GTK_OFFLOAD_OPCODE_MASK             0xFF000000
+#define GTK_OFFLOAD_ENABLE_OPCODE           0x01000000
+#define GTK_OFFLOAD_DISABLE_OPCODE          0x02000000
+#define GTK_OFFLOAD_REQUEST_STATUS_OPCODE   0x04000000
+
+#define GTK_OFFLOAD_KEK_BYTES       16
+#define GTK_OFFLOAD_KCK_BYTES       16
+#define GTK_REPLAY_COUNTER_BYTES    8
+#define WMI_MAX_KEY_LEN             32
+#define IGTK_PN_SIZE                6
+
+struct wmi_gtk_offload_status_event {
+	u32 vdev_id;
+	u32 flags;
+	u32 refresh_cnt;
+	u8 replay_counter[GTK_REPLAY_COUNTER_BYTES];
+	u8 igtk_key_index;
+	u8 igtk_key_length;
+	u8 igtk_key_rsc[IGTK_PN_SIZE];
+	u8 igtk_key[WMI_MAX_KEY_LEN];
+	u8 gtk_key_index;
+	u8 gtk_key_length;
+	u8 gtk_key_rsc[GTK_REPLAY_COUNTER_BYTES];
+	u8 gtk_key[WMI_MAX_KEY_LEN];
+} __packed;
+
+struct wmi_gtk_rekey_offload_cmd {
+	u32 tlv_header;
+	u32 vdev_id;
+	u32 flags;
+	u8 kek[GTK_OFFLOAD_KEK_BYTES];
+	u8 kck[GTK_OFFLOAD_KCK_BYTES];
+	u8 replay_ctr[GTK_REPLAY_COUNTER_BYTES];
+} __packed;
+
 int ath11k_wmi_cmd_send(struct ath11k_pdev_wmi *wmi, struct sk_buff *skb,
 			u32 cmd_id);
 struct sk_buff *ath11k_wmi_alloc_skb(struct ath11k_wmi_base *wmi_sc, u32 len);
@@ -6017,4 +6052,8 @@  int ath11k_wmi_send_hw_data_filter_cmd(struct ath11k *ar, u32 vdev_id,
 				       u32 filter_bitmap, bool enable);
 int ath11k_wmi_arp_ns_offload(struct ath11k *ar,
 			      struct ath11k_vif *arvif, bool enable);
+int ath11k_wmi_gtk_rekey_offload(struct ath11k *ar,
+				 struct ath11k_vif *arvif, bool enable);
+int ath11k_wmi_gtk_rekey_getinfo(struct ath11k *ar,
+				 struct ath11k_vif *arvif);
 #endif
diff --git a/drivers/net/wireless/ath/ath11k/wow.c b/drivers/net/wireless/ath/ath11k/wow.c
index c1bef84..2632628 100644
--- a/drivers/net/wireless/ath/ath11k/wow.c
+++ b/drivers/net/wireless/ath/ath11k/wow.c
@@ -17,7 +17,9 @@ 
 
 static const struct wiphy_wowlan_support ath11k_wowlan_support = {
 	.flags = WIPHY_WOWLAN_DISCONNECT |
-		 WIPHY_WOWLAN_MAGIC_PKT,
+		 WIPHY_WOWLAN_MAGIC_PKT |
+		 WIPHY_WOWLAN_SUPPORTS_GTK_REKEY |
+		 WIPHY_WOWLAN_GTK_REKEY_FAILURE,
 	.pattern_min_len = WOW_MIN_PATTERN_SIZE,
 	.pattern_max_len = WOW_MAX_PATTERN_SIZE,
 	.max_pkt_offset = WOW_MAX_PKT_OFFSET,
@@ -573,11 +575,56 @@  static int ath11k_wow_arp_ns_offload(struct ath11k *ar, bool enable)
 	return 0;
 }
 
+static int ath11k_gtk_rekey_offload(struct ath11k *ar, bool enable)
+{
+	struct ath11k_vif *arvif;
+	int ret;
+
+	lockdep_assert_held(&ar->conf_mutex);
+
+	list_for_each_entry(arvif, &ar->arvifs, list) {
+		if (arvif->vdev_type != WMI_VDEV_TYPE_STA ||
+		    !arvif->is_up ||
+		    !arvif->rekey_data.enable_offload)
+			continue;
+
+		/* get rekey info before disable rekey offload */
+		if (!enable) {
+			ret = ath11k_wmi_gtk_rekey_getinfo(ar, arvif);
+			if (ret) {
+				ath11k_warn(ar->ab, "failed to request rekey info vdev %i, ret %d\n",
+					    arvif->vdev_id, ret);
+				return ret;
+			}
+		}
+
+		ret = ath11k_wmi_gtk_rekey_offload(ar, arvif, enable);
+
+		if (ret) {
+			ath11k_warn(ar->ab, "failed to offload gtk reky vdev %i: enable %d, ret %d\n",
+				    arvif->vdev_id, enable, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int ath11k_wow_protocol_offload(struct ath11k *ar, bool enable)
 {
 	int ret;
 
 	ret = ath11k_wow_arp_ns_offload(ar, enable);
+	if (ret) {
+		ath11k_warn(ar->ab, "failed to offload ARP and NS %d %d\n",
+			    enable, ret);
+		return ret;
+	}
+
+	ret = ath11k_gtk_rekey_offload(ar, enable);
+	if (ret)
+		ath11k_warn(ar->ab, "failed to offload gtk rekey %d %d\n",
+			    enable, ret);
 
 	return ret;
 }