diff mbox series

Question on xen-netfront code to fix a potential ring buffer corruption

Message ID c45b306e-c67b-49f5-8fe8-3913557a8774@default (mailing list archive)
State New, archived
Headers show
Series Question on xen-netfront code to fix a potential ring buffer corruption | expand

Commit Message

Dongli Zhang Aug. 18, 2019, 8:31 a.m. UTC
Hi,

Would you please help confirm why the condition at line 908 is ">="?

In my opinion, we would only hit "skb_shinfo(skb)->nr_frag == MAX_SKB_FRAGS" at
line 908.

890 static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
891                                   struct sk_buff *skb,
892                                   struct sk_buff_head *list)
893 {
894         RING_IDX cons = queue->rx.rsp_cons;
895         struct sk_buff *nskb;
896 
897         while ((nskb = __skb_dequeue(list))) {
898                 struct xen_netif_rx_response *rx =
899                         RING_GET_RESPONSE(&queue->rx, ++cons);
900                 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
901 
902                 if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
903                         unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
904 
905                         BUG_ON(pull_to < skb_headlen(skb));
906                         __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
907                 }
908                 if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
909                         queue->rx.rsp_cons = ++cons;
910                         kfree_skb(nskb);
911                         return ~0U;
912                 }
913 
914                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
915                                 skb_frag_page(nfrag),
916                                 rx->offset, rx->status, PAGE_SIZE);
917 
918                 skb_shinfo(nskb)->nr_frags = 0;
919                 kfree_skb(nskb);
920         }
921 
922         return cons;
923 }


The reason that I ask about this is because I am considering below patch to
avoid a potential xen-netfront ring buffer corruption.



If there is skb left in list when we return ~0U, queue->rx.rsp_cons may be set
incorrectly.

While I am trying to make up a case that would hit the corruption, I could not
explain why (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)), but not
just "==". Perhaps __pskb_pull_tail() may fail although pull_to is less than
skb_headlen(skb).

Thank you very much!

Dongli Zhang

Comments

Jürgen Groß Aug. 27, 2019, 6:13 a.m. UTC | #1
On 18.08.19 10:31, Dongli Zhang wrote:
> Hi,
> 
> Would you please help confirm why the condition at line 908 is ">="?
> 
> In my opinion, we would only hit "skb_shinfo(skb)->nr_frag == MAX_SKB_FRAGS" at
> line 908.
> 
> 890 static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
> 891                                   struct sk_buff *skb,
> 892                                   struct sk_buff_head *list)
> 893 {
> 894         RING_IDX cons = queue->rx.rsp_cons;
> 895         struct sk_buff *nskb;
> 896
> 897         while ((nskb = __skb_dequeue(list))) {
> 898                 struct xen_netif_rx_response *rx =
> 899                         RING_GET_RESPONSE(&queue->rx, ++cons);
> 900                 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
> 901
> 902                 if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
> 903                         unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
> 904
> 905                         BUG_ON(pull_to < skb_headlen(skb));
> 906                         __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
> 907                 }
> 908                 if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
> 909                         queue->rx.rsp_cons = ++cons;
> 910                         kfree_skb(nskb);
> 911                         return ~0U;
> 912                 }
> 913
> 914                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> 915                                 skb_frag_page(nfrag),
> 916                                 rx->offset, rx->status, PAGE_SIZE);
> 917
> 918                 skb_shinfo(nskb)->nr_frags = 0;
> 919                 kfree_skb(nskb);
> 920         }
> 921
> 922         return cons;
> 923 }
> 
> 
> The reason that I ask about this is because I am considering below patch to
> avoid a potential xen-netfront ring buffer corruption.
> 
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index 8d33970..48a2162 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -906,7 +906,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
>                          __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
>                  }
>                  if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
> -                       queue->rx.rsp_cons = ++cons;
> +                       queue->rx.rsp_cons = cons + skb_queue_len(list) + 1;
>                          kfree_skb(nskb);
>                          return ~0U;
>                  }
> 
> 
> If there is skb left in list when we return ~0U, queue->rx.rsp_cons may be set
> incorrectly.

Sa basically you want to consume the responses for all outstanding skbs
in the list?

> 
> While I am trying to make up a case that would hit the corruption, I could not
> explain why (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)), but not
> just "==". Perhaps __pskb_pull_tail() may fail although pull_to is less than
> skb_headlen(skb).

I don't think nr_frags can be larger than MAX_SKB_FRAGS. OTOH I don't
think it hurts to have a safety net here in order to avoid problems.

Originally this was BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)
so that might explain the ">=".


Juergen
Dongli Zhang Aug. 27, 2019, 6:43 a.m. UTC | #2
Hi Juergen,

On 8/27/19 2:13 PM, Juergen Gross wrote:
> On 18.08.19 10:31, Dongli Zhang wrote:
>> Hi,
>>
>> Would you please help confirm why the condition at line 908 is ">="?
>>
>> In my opinion, we would only hit "skb_shinfo(skb)->nr_frag == MAX_SKB_FRAGS" at
>> line 908.
>>
>> 890 static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
>> 891                                   struct sk_buff *skb,
>> 892                                   struct sk_buff_head *list)
>> 893 {
>> 894         RING_IDX cons = queue->rx.rsp_cons;
>> 895         struct sk_buff *nskb;
>> 896
>> 897         while ((nskb = __skb_dequeue(list))) {
>> 898                 struct xen_netif_rx_response *rx =
>> 899                         RING_GET_RESPONSE(&queue->rx, ++cons);
>> 900                 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
>> 901
>> 902                 if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
>> 903                         unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
>> 904
>> 905                         BUG_ON(pull_to < skb_headlen(skb));
>> 906                         __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
>> 907                 }
>> 908                 if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
>> 909                         queue->rx.rsp_cons = ++cons;
>> 910                         kfree_skb(nskb);
>> 911                         return ~0U;
>> 912                 }
>> 913
>> 914                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
>> 915                                 skb_frag_page(nfrag),
>> 916                                 rx->offset, rx->status, PAGE_SIZE);
>> 917
>> 918                 skb_shinfo(nskb)->nr_frags = 0;
>> 919                 kfree_skb(nskb);
>> 920         }
>> 921
>> 922         return cons;
>> 923 }
>>
>>
>> The reason that I ask about this is because I am considering below patch to
>> avoid a potential xen-netfront ring buffer corruption.
>>
>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>> index 8d33970..48a2162 100644
>> --- a/drivers/net/xen-netfront.c
>> +++ b/drivers/net/xen-netfront.c
>> @@ -906,7 +906,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue
>> *queue,
>>                          __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
>>                  }
>>                  if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
>> -                       queue->rx.rsp_cons = ++cons;
>> +                       queue->rx.rsp_cons = cons + skb_queue_len(list) + 1;
>>                          kfree_skb(nskb);
>>                          return ~0U;
>>                  }
>>
>>
>> If there is skb left in list when we return ~0U, queue->rx.rsp_cons may be set
>> incorrectly.
> 
> Sa basically you want to consume the responses for all outstanding skbs
> in the list?
> 

I think there would be bug if there is skb left in the list.

This is what is implanted in xennet_poll() when there is error of processing
extra info like below. As at line 1034, if there is error, all outstanding skb
are consumed.

 985 static int xennet_poll(struct napi_struct *napi, int budget)
... ...
1028                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
1029                         struct xen_netif_extra_info *gso;
1030                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
1031
1032                         if (unlikely(xennet_set_skb_gso(skb, gso))) {
1033                                 __skb_queue_head(&tmpq, skb);
1034                                 queue->rx.rsp_cons += skb_queue_len(&tmpq);
1035                                 goto err;
1036                         }
1037                 }

The reason we need to consume all outstanding skb is because
xennet_get_responses() would reset both queue->rx_skbs[i] and
queue->grant_rx_ref[i] to NULL before enqueue all outstanding skb to the list
(e.g., &tmpq), by xennet_get_rx_skb() and xennet_get_rx_ref().

If we do not consume all of them, we would hit NULL in queue->rx_skbs[i] in next
iteration of while loop in xennet_poll().

That is, if we do not consume all outstanding skb, the queue->rx.rsp_cons may
refer to a response whose queue->rx_skbs[i] and queue->grant_rx_ref[i] are
already reset to NULL.

Dongli Zhang

>>
>> While I am trying to make up a case that would hit the corruption, I could not
>> explain why (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)), but not
>> just "==". Perhaps __pskb_pull_tail() may fail although pull_to is less than
>> skb_headlen(skb).
> 
> I don't think nr_frags can be larger than MAX_SKB_FRAGS. OTOH I don't
> think it hurts to have a safety net here in order to avoid problems.
> 
> Originally this was BUG_ON(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)
> so that might explain the ">=".
> 
> 
> Juergen
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xenproject.org
> https://lists.xenproject.org/mailman/listinfo/xen-devel
Jürgen Groß Aug. 27, 2019, 7:04 a.m. UTC | #3
On 27.08.19 08:43, Dongli Zhang wrote:
> Hi Juergen,
> 
> On 8/27/19 2:13 PM, Juergen Gross wrote:
>> On 18.08.19 10:31, Dongli Zhang wrote:
>>> Hi,
>>>
>>> Would you please help confirm why the condition at line 908 is ">="?
>>>
>>> In my opinion, we would only hit "skb_shinfo(skb)->nr_frag == MAX_SKB_FRAGS" at
>>> line 908.
>>>
>>> 890 static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
>>> 891                                   struct sk_buff *skb,
>>> 892                                   struct sk_buff_head *list)
>>> 893 {
>>> 894         RING_IDX cons = queue->rx.rsp_cons;
>>> 895         struct sk_buff *nskb;
>>> 896
>>> 897         while ((nskb = __skb_dequeue(list))) {
>>> 898                 struct xen_netif_rx_response *rx =
>>> 899                         RING_GET_RESPONSE(&queue->rx, ++cons);
>>> 900                 skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0];
>>> 901
>>> 902                 if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) {
>>> 903                         unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to;
>>> 904
>>> 905                         BUG_ON(pull_to < skb_headlen(skb));
>>> 906                         __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
>>> 907                 }
>>> 908                 if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
>>> 909                         queue->rx.rsp_cons = ++cons;
>>> 910                         kfree_skb(nskb);
>>> 911                         return ~0U;
>>> 912                 }
>>> 913
>>> 914                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
>>> 915                                 skb_frag_page(nfrag),
>>> 916                                 rx->offset, rx->status, PAGE_SIZE);
>>> 917
>>> 918                 skb_shinfo(nskb)->nr_frags = 0;
>>> 919                 kfree_skb(nskb);
>>> 920         }
>>> 921
>>> 922         return cons;
>>> 923 }
>>>
>>>
>>> The reason that I ask about this is because I am considering below patch to
>>> avoid a potential xen-netfront ring buffer corruption.
>>>
>>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>>> index 8d33970..48a2162 100644
>>> --- a/drivers/net/xen-netfront.c
>>> +++ b/drivers/net/xen-netfront.c
>>> @@ -906,7 +906,7 @@ static RING_IDX xennet_fill_frags(struct netfront_queue
>>> *queue,
>>>                           __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
>>>                   }
>>>                   if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
>>> -                       queue->rx.rsp_cons = ++cons;
>>> +                       queue->rx.rsp_cons = cons + skb_queue_len(list) + 1;
>>>                           kfree_skb(nskb);
>>>                           return ~0U;
>>>                   }
>>>
>>>
>>> If there is skb left in list when we return ~0U, queue->rx.rsp_cons may be set
>>> incorrectly.
>>
>> Sa basically you want to consume the responses for all outstanding skbs
>> in the list?
>>
> 
> I think there would be bug if there is skb left in the list.
> 
> This is what is implanted in xennet_poll() when there is error of processing
> extra info like below. As at line 1034, if there is error, all outstanding skb
> are consumed.
> 
>   985 static int xennet_poll(struct napi_struct *napi, int budget)
> ... ...
> 1028                 if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
> 1029                         struct xen_netif_extra_info *gso;
> 1030                         gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
> 1031
> 1032                         if (unlikely(xennet_set_skb_gso(skb, gso))) {
> 1033                                 __skb_queue_head(&tmpq, skb);
> 1034                                 queue->rx.rsp_cons += skb_queue_len(&tmpq);
> 1035                                 goto err;
> 1036                         }
> 1037                 }
> 
> The reason we need to consume all outstanding skb is because
> xennet_get_responses() would reset both queue->rx_skbs[i] and
> queue->grant_rx_ref[i] to NULL before enqueue all outstanding skb to the list
> (e.g., &tmpq), by xennet_get_rx_skb() and xennet_get_rx_ref().
> 
> If we do not consume all of them, we would hit NULL in queue->rx_skbs[i] in next
> iteration of while loop in xennet_poll().
> 
> That is, if we do not consume all outstanding skb, the queue->rx.rsp_cons may
> refer to a response whose queue->rx_skbs[i] and queue->grant_rx_ref[i] are
> already reset to NULL.

Thanks for confirming. I just wanted to make sure I understand your
patch correctly. :-)


Juergen
diff mbox series

Patch

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 8d33970..48a2162 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -906,7 +906,7 @@  static RING_IDX xennet_fill_frags(struct netfront_queue *queue,
                        __pskb_pull_tail(skb, pull_to - skb_headlen(skb));
                }
                if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) {
-                       queue->rx.rsp_cons = ++cons;
+                       queue->rx.rsp_cons = cons + skb_queue_len(list) + 1;
                        kfree_skb(nskb);
                        return ~0U;
                }