Message ID | 1500912555.2458.12.camel@redhat.com (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
On Mon, Jul 24, 2017 at 12:09 PM, Paolo Abeni <pabeni@redhat.com> wrote: > Hi, > > On Mon, 2017-07-24 at 10:42 -0400, Paul Moore wrote: >> The change in behavior for userspace makes me a little nervous as >> there is no way of knowing how any random application may be coded. >> Even if we are confident that the majority of applications set >> IP_PASSSEC before calling bind(), we are likely still stuck with a few >> that will break, and that means a lot of hard to debug problem >> reports. >> >> I would feel much more comfortable if we could preserve the existing behavior. > > I agree, we must preserve the original behavior. > > Re-thinking about the problem, checking skb->sp in the BH, and storing > the status in the scratch area should both fix the issue in a sane way > and preserve the optimization. > > Something like the code below. Could you please try in your > environment? (or point me to simple reproducer ;-) I'm happy to test this, but if you are curious, you can find the selinux-testsuite at the link below; the "inet_socket" tests are the ones relevant to this problem. * https://github.com/SELinuxProject/selinux-testsuite However, I believe there is a problem with this patch, see below. > --- > diff --git a/include/net/udp.h b/include/net/udp.h > index 972ce4b..8d2c406 100644 > --- a/include/net/udp.h > +++ b/include/net/udp.h > @@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, > /* UDP uses skb->dev_scratch to cache as much information as possible and avoid > * possibly multiple cache miss on dequeue() > */ > -#if BITS_PER_LONG == 64 > - > -/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on > - * cold cache lines at recvmsg time. > - * skb->len can be stored on 16 bits since the udp header has been already > - * validated and pulled. > - */ > struct udp_dev_scratch { > - u32 truesize; > + /* skb->truesize and the stateless bit embeded in a single field; > + * do not use a bitfield since the compiler emits better/smaller code > + * this way > + */ > + u32 _tsize_state; > + > +#if BITS_PER_LONG == 64 > + /* len and the bit needed to compute skb_csum_unnecessary > + * will be on cold cache lines at recvmsg time. > + * skb->len can be stored on 16 bits since the udp header has been > + * already validated and pulled. > + */ > u16 len; > bool is_linear; > bool csum_unnecessary; > +#endif > }; ... > diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c > index b057653..d243772 100644 > --- a/net/ipv4/udp.c > +++ b/net/ipv4/udp.c > @@ -1163,34 +1163,32 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, > return ret; > } > > -#if BITS_PER_LONG == 64 > +#define UDP_SKB_IS_STATELESS 0x80000000 > + > static void udp_set_dev_scratch(struct sk_buff *skb) > { > - struct udp_dev_scratch *scratch; > + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); > > BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); The BUILD_BUG_ON() assertion no longer appears to be correct with this patch. > - scratch = (struct udp_dev_scratch *)&skb->dev_scratch; > - scratch->truesize = skb->truesize; > + scratch->_tsize_state = skb->truesize; > +#if BITS_PER_LONG == 64 > scratch->len = skb->len; > scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); > scratch->is_linear = !skb_is_nonlinear(skb); > +#endif > + if (likely(!skb->_skb_refdst)) > + scratch->_tsize_state |= UDP_SKB_IS_STATELESS; > }
On Mon, Jul 24, 2017 at 3:00 PM, Paul Moore <paul@paul-moore.com> wrote: > On Mon, Jul 24, 2017 at 12:09 PM, Paolo Abeni <pabeni@redhat.com> wrote: >> Hi, >> >> On Mon, 2017-07-24 at 10:42 -0400, Paul Moore wrote: >>> The change in behavior for userspace makes me a little nervous as >>> there is no way of knowing how any random application may be coded. >>> Even if we are confident that the majority of applications set >>> IP_PASSSEC before calling bind(), we are likely still stuck with a few >>> that will break, and that means a lot of hard to debug problem >>> reports. >>> >>> I would feel much more comfortable if we could preserve the existing behavior. >> >> I agree, we must preserve the original behavior. >> >> Re-thinking about the problem, checking skb->sp in the BH, and storing >> the status in the scratch area should both fix the issue in a sane way >> and preserve the optimization. >> >> Something like the code below. Could you please try in your >> environment? (or point me to simple reproducer ;-) > > I'm happy to test this, but if you are curious, you can find the > selinux-testsuite at the link below; the "inet_socket" tests are the > ones relevant to this problem. > > * https://github.com/SELinuxProject/selinux-testsuite > > However, I believe there is a problem with this patch, see below. > >> --- >> diff --git a/include/net/udp.h b/include/net/udp.h >> index 972ce4b..8d2c406 100644 >> --- a/include/net/udp.h >> +++ b/include/net/udp.h >> @@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, >> /* UDP uses skb->dev_scratch to cache as much information as possible and avoid >> * possibly multiple cache miss on dequeue() >> */ >> -#if BITS_PER_LONG == 64 >> - >> -/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on >> - * cold cache lines at recvmsg time. >> - * skb->len can be stored on 16 bits since the udp header has been already >> - * validated and pulled. >> - */ >> struct udp_dev_scratch { >> - u32 truesize; >> + /* skb->truesize and the stateless bit embeded in a single field; >> + * do not use a bitfield since the compiler emits better/smaller code >> + * this way >> + */ >> + u32 _tsize_state; >> + >> +#if BITS_PER_LONG == 64 >> + /* len and the bit needed to compute skb_csum_unnecessary >> + * will be on cold cache lines at recvmsg time. >> + * skb->len can be stored on 16 bits since the udp header has been >> + * already validated and pulled. >> + */ >> u16 len; >> bool is_linear; >> bool csum_unnecessary; >> +#endif >> }; > > ... > >> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c >> index b057653..d243772 100644 >> --- a/net/ipv4/udp.c >> +++ b/net/ipv4/udp.c >> @@ -1163,34 +1163,32 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, >> return ret; >> } >> >> -#if BITS_PER_LONG == 64 >> +#define UDP_SKB_IS_STATELESS 0x80000000 >> + >> static void udp_set_dev_scratch(struct sk_buff *skb) >> { >> - struct udp_dev_scratch *scratch; >> + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); >> >> BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); > > The BUILD_BUG_ON() assertion no longer appears to be correct with this patch. Nevermind, I just took a closer look at this and realized I made a mistake when applying your patch (had to apply manually for some reason). I'm building a test kernel now.
On Mon, 2017-07-24 at 22:00 -0400, Paul Moore wrote: > > I'm happy to test this, but if you are curious, you can find the > > selinux-testsuite at the link below; the "inet_socket" tests are the > > ones relevant to this problem. > > > > * https://github.com/SELinuxProject/selinux-testsuite Thanks, I'll have a look. > > However, I believe there is a problem with this patch, see below. [...] > > > -#if BITS_PER_LONG == 64 > > > +#define UDP_SKB_IS_STATELESS 0x80000000 > > > + > > > static void udp_set_dev_scratch(struct sk_buff *skb) > > > { > > > - struct udp_dev_scratch *scratch; > > > + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); > > > > > > BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); > > > > The BUILD_BUG_ON() assertion no longer appears to be correct with this patch. > > Nevermind, I just took a closer look at this and realized I made a > mistake when applying your patch (had to apply manually for some > reason). I'm building a test kernel now. Yup, I compile-tested the code, plus some basic sanity checks, so the build breakage felt unexpected. Thanks for testing, Paolo
On Tue, Jul 25, 2017 at 5:59 AM, Paolo Abeni <pabeni@redhat.com> wrote: > On Mon, 2017-07-24 at 22:00 -0400, Paul Moore wrote: >> > I'm happy to test this, but if you are curious, you can find the >> > selinux-testsuite at the link below; the "inet_socket" tests are the >> > ones relevant to this problem. >> > >> > * https://github.com/SELinuxProject/selinux-testsuite > > Thanks, I'll have a look. > >> > However, I believe there is a problem with this patch, see below. > > [...] > >> > > -#if BITS_PER_LONG == 64 >> > > +#define UDP_SKB_IS_STATELESS 0x80000000 >> > > + >> > > static void udp_set_dev_scratch(struct sk_buff *skb) >> > > { >> > > - struct udp_dev_scratch *scratch; >> > > + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); >> > > >> > > BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); >> > >> > The BUILD_BUG_ON() assertion no longer appears to be correct with this patch. >> >> Nevermind, I just took a closer look at this and realized I made a >> mistake when applying your patch (had to apply manually for some >> reason). I'm building a test kernel now. > > Yup, I compile-tested the code, plus some basic sanity checks, so the > build breakage felt unexpected. > > Thanks for testing, I just did a quick run through the selinux-testsuite and the regression would appear to be fixed, thanks! I'm guessing you'll send this to DaveM so we can get this fixed before v4.13 is released? Tested-by: Paul Moore <paul@paul-moore.com>
On Tue, 2017-07-25 at 10:45 -0400, Paul Moore wrote: > On Tue, Jul 25, 2017 at 5:59 AM, Paolo Abeni <pabeni@redhat.com> wrote: > > On Mon, 2017-07-24 at 22:00 -0400, Paul Moore wrote: > > > > I'm happy to test this, but if you are curious, you can find the > > > > selinux-testsuite at the link below; the "inet_socket" tests are the > > > > ones relevant to this problem. > > > > > > > > * https://github.com/SELinuxProject/selinux-testsuite > > > > Thanks, I'll have a look. > > > > > > However, I believe there is a problem with this patch, see below. > > > > [...] > > > > > > > -#if BITS_PER_LONG == 64 > > > > > +#define UDP_SKB_IS_STATELESS 0x80000000 > > > > > + > > > > > static void udp_set_dev_scratch(struct sk_buff *skb) > > > > > { > > > > > - struct udp_dev_scratch *scratch; > > > > > + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); > > > > > > > > > > BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); > > > > > > > > The BUILD_BUG_ON() assertion no longer appears to be correct with this patch. > > > > > > Nevermind, I just took a closer look at this and realized I made a > > > mistake when applying your patch (had to apply manually for some > > > reason). I'm building a test kernel now. > > > > Yup, I compile-tested the code, plus some basic sanity checks, so the > > build breakage felt unexpected. > > > > Thanks for testing, > > I just did a quick run through the selinux-testsuite and the > regression would appear to be fixed, thanks! I'm guessing you'll send > this to DaveM so we can get this fixed before v4.13 is released? > > Tested-by: Paul Moore <paul@paul-moore.com> Sure. I'll submit soon for -net. Cheers, Paolo
diff --git a/include/net/udp.h b/include/net/udp.h index 972ce4b..8d2c406 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -305,33 +305,44 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, /* UDP uses skb->dev_scratch to cache as much information as possible and avoid * possibly multiple cache miss on dequeue() */ -#if BITS_PER_LONG == 64 - -/* truesize, len and the bit needed to compute skb_csum_unnecessary will be on - * cold cache lines at recvmsg time. - * skb->len can be stored on 16 bits since the udp header has been already - * validated and pulled. - */ struct udp_dev_scratch { - u32 truesize; + /* skb->truesize and the stateless bit embeded in a single field; + * do not use a bitfield since the compiler emits better/smaller code + * this way + */ + u32 _tsize_state; + +#if BITS_PER_LONG == 64 + /* len and the bit needed to compute skb_csum_unnecessary + * will be on cold cache lines at recvmsg time. + * skb->len can be stored on 16 bits since the udp header has been + * already validated and pulled. + */ u16 len; bool is_linear; bool csum_unnecessary; +#endif }; +static inline struct udp_dev_scratch* udp_skb_scratch(struct sk_buff *skb) +{ + return (struct udp_dev_scratch *)&skb->dev_scratch; +} + +#if BITS_PER_LONG == 64 static inline unsigned int udp_skb_len(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->len; + return udp_skb_scratch(skb)->len; } static inline bool udp_skb_csum_unnecessary(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->csum_unnecessary; + return udp_skb_scratch(skb)->csum_unnecessary; } static inline bool udp_skb_is_linear(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->is_linear; + return udp_skb_scratch(skb)->is_linear; } #else diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b057653..d243772 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1163,34 +1163,32 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, return ret; } -#if BITS_PER_LONG == 64 +#define UDP_SKB_IS_STATELESS 0x80000000 + static void udp_set_dev_scratch(struct sk_buff *skb) { - struct udp_dev_scratch *scratch; + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); - scratch = (struct udp_dev_scratch *)&skb->dev_scratch; - scratch->truesize = skb->truesize; + scratch->_tsize_state = skb->truesize; +#if BITS_PER_LONG == 64 scratch->len = skb->len; scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->is_linear = !skb_is_nonlinear(skb); +#endif + if (likely(!skb->_skb_refdst)) + scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } static int udp_skb_truesize(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize; -} -#else -static void udp_set_dev_scratch(struct sk_buff *skb) -{ - skb->dev_scratch = skb->truesize; + return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; } -static int udp_skb_truesize(struct sk_buff *skb) +static bool udp_skb_has_head_state(struct sk_buff *skb) { - return skb->dev_scratch; + return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS); } -#endif /* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial, @@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) unlock_sock_fast(sk, slow); } - /* we cleared the head states previously only if the skb lacks any IP - * options, see __udp_queue_rcv_skb(). + /* In the more common cases we cleared the head states previously, + * see __udp_queue_rcv_skb(). */ - if (unlikely(IPCB(skb)->opt.optlen > 0)) + if (unlikely(udp_skb_has_head_state(skb))) skb_release_head_state(skb); consume_stateless_skb(skb); } @@ -1784,11 +1782,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } - /* At recvmsg() time we need skb->dst to process IP options-related - * cmsg, elsewhere can we clear all pending head states while they are - * hot in the cache + /* At recvmsg() time we may access skb->dst or skb->sp depending on + * the IP options and the cmsg flags, elsewhere can we clear all + * pending head states while they are hot in the cache */ - if (likely(IPCB(skb)->opt.optlen == 0)) + if (likely(IPCB(skb)->opt.optlen == 0 && !skb->sp)) skb_release_head_state(skb); rc = __udp_enqueue_schedule_skb(sk, skb);