Message ID | 20241018114535.35712-4-lulie@linux.alibaba.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | udp: Add 4-tuple hash for connected sockets | expand |
On 10/18/24 13:45, Philo Lu wrote: [...] > +/* In hash4, rehash can also happen in connect(), where hash4_cnt keeps unchanged. */ > +static void udp4_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) > +{ > + struct udp_hslot *hslot4, *nhslot4; > + > + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); > + nhslot4 = udp_hashslot4(udptable, newhash4); > + udp_sk(sk)->udp_lrpa_hash = newhash4; > + > + if (hslot4 != nhslot4) { > + spin_lock_bh(&hslot4->lock); > + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); > + hslot4->count--; > + spin_unlock_bh(&hslot4->lock); > + > + synchronize_rcu(); This deserve a comment explaining why it's needed. I had to dig in past revision to understand it. > + > + spin_lock_bh(&nhslot4->lock); > + hlist_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, &nhslot4->head); > + nhslot4->count++; > + spin_unlock_bh(&nhslot4->lock); > + } > +} > + > +static void udp4_unhash4(struct udp_table *udptable, struct sock *sk) > +{ > + struct udp_hslot *hslot2, *hslot4; > + > + if (udp_hashed4(sk)) { > + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); > + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); > + > + spin_lock(&hslot4->lock); > + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); > + hslot4->count--; > + spin_unlock(&hslot4->lock); > + > + spin_lock(&hslot2->lock); > + udp_hash4_dec(hslot2); > + spin_unlock(&hslot2->lock); > + } > +} > + > +/* call with sock lock */ > +static void udp4_hash4(struct sock *sk) > +{ > + struct udp_hslot *hslot, *hslot2, *hslot4; > + struct net *net = sock_net(sk); > + struct udp_table *udptable; > + unsigned int hash; > + > + if (sk_unhashed(sk) || inet_sk(sk)->inet_rcv_saddr == htonl(INADDR_ANY)) > + return; > + > + hash = udp_ehashfn(net, inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num, > + inet_sk(sk)->inet_daddr, inet_sk(sk)->inet_dport); > + > + udptable = net->ipv4.udp_table; > + if (udp_hashed4(sk)) { > + udp4_rehash4(udptable, sk, hash); It's unclear to me how we can enter this branch. Also it's unclear why here you don't need to call udp_hash4_inc()udp_hash4_dec, too. Why such accounting can't be placed in udp4_rehash4()? [...] > @@ -2031,6 +2180,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) > spin_unlock(&nhslot2->lock); > } > > + if (udp_hashed4(sk)) { > + udp4_rehash4(udptable, sk, newhash4); > + > + if (hslot2 != nhslot2) { > + spin_lock(&hslot2->lock); > + udp_hash4_dec(hslot2); > + spin_unlock(&hslot2->lock); > + > + spin_lock(&nhslot2->lock); > + udp_hash4_inc(nhslot2); > + spin_unlock(&nhslot2->lock); > + } > + } > spin_unlock_bh(&hslot->lock); The udp4_rehash4() call above is in atomic context and could end-up calling synchronize_rcu() which is a blocking function. You must avoid that. Cheers, Paolo
On 10/24/24 17:01, Paolo Abeni wrote: > The udp4_rehash4() call above is in atomic context and could end-up > calling synchronize_rcu() which is a blocking function. You must avoid that. I almost forgot: please include in this commit message or in the cover letter, the performance figures for unconnected sockets before and after this series and for a stress test with a lot of connected sockets, before and after this series. Thanks, Paolo
On 2024/10/24 23:01, Paolo Abeni wrote: > On 10/18/24 13:45, Philo Lu wrote: > [...] >> +/* In hash4, rehash can also happen in connect(), where hash4_cnt keeps unchanged. */ >> +static void udp4_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) >> +{ >> + struct udp_hslot *hslot4, *nhslot4; >> + >> + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); >> + nhslot4 = udp_hashslot4(udptable, newhash4); >> + udp_sk(sk)->udp_lrpa_hash = newhash4; >> + >> + if (hslot4 != nhslot4) { >> + spin_lock_bh(&hslot4->lock); >> + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); >> + hslot4->count--; >> + spin_unlock_bh(&hslot4->lock); >> + >> + synchronize_rcu(); > > This deserve a comment explaining why it's needed. I had to dig in past > revision to understand it. > Got it. And a short explanation here (see [1] for detail): Here, we move a node from a hlist to another new one, i.e., update node->next from the old hlist to the new hlist. For readers traversing the old hlist, if we update node->next just when readers move onto the moved node, then the readers also move to the new hlist. This is unexpected. Reader(lookup) Writer(rehash) ----------------- --------------- 1. rcu_read_lock() 2. pos = sk; 3. hlist_del_init_rcu(sk, old_slot) 4. hlist_add_head_rcu(sk, new_slot) 5. pos = pos->next; <= 6. rcu_read_unlock() [1] https://lore.kernel.org/all/0fb425e0-5482-4cdf-9dc1-3906751f8f81@linux.alibaba.com/ >> + >> + spin_lock_bh(&nhslot4->lock); >> + hlist_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, &nhslot4->head); >> + nhslot4->count++; >> + spin_unlock_bh(&nhslot4->lock); >> + } >> +} >> + >> +static void udp4_unhash4(struct udp_table *udptable, struct sock *sk) >> +{ >> + struct udp_hslot *hslot2, *hslot4; >> + >> + if (udp_hashed4(sk)) { >> + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); >> + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); >> + >> + spin_lock(&hslot4->lock); >> + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); >> + hslot4->count--; >> + spin_unlock(&hslot4->lock); >> + >> + spin_lock(&hslot2->lock); >> + udp_hash4_dec(hslot2); >> + spin_unlock(&hslot2->lock); >> + } >> +} >> + >> +/* call with sock lock */ >> +static void udp4_hash4(struct sock *sk) >> +{ >> + struct udp_hslot *hslot, *hslot2, *hslot4; >> + struct net *net = sock_net(sk); >> + struct udp_table *udptable; >> + unsigned int hash; >> + >> + if (sk_unhashed(sk) || inet_sk(sk)->inet_rcv_saddr == htonl(INADDR_ANY)) >> + return; >> + >> + hash = udp_ehashfn(net, inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num, >> + inet_sk(sk)->inet_daddr, inet_sk(sk)->inet_dport); >> + >> + udptable = net->ipv4.udp_table; >> + if (udp_hashed4(sk)) { >> + udp4_rehash4(udptable, sk, hash); > > It's unclear to me how we can enter this branch. Also it's unclear why > here you don't need to call udp_hash4_inc()udp_hash4_dec, too. Why such > accounting can't be placed in udp4_rehash4()? > It's possible that a connected udp socket _re-connect_ to another remote address. Then, because the local address is not changed, hash2 and its hash4_cnt keep unchanged. But rehash4 need to be done. I'll also add a comment here. > [...] >> @@ -2031,6 +2180,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) >> spin_unlock(&nhslot2->lock); >> } >> >> + if (udp_hashed4(sk)) { >> + udp4_rehash4(udptable, sk, newhash4); >> + >> + if (hslot2 != nhslot2) { >> + spin_lock(&hslot2->lock); >> + udp_hash4_dec(hslot2); >> + spin_unlock(&hslot2->lock); >> + >> + spin_lock(&nhslot2->lock); >> + udp_hash4_inc(nhslot2); >> + spin_unlock(&nhslot2->lock); >> + } >> + } >> spin_unlock_bh(&hslot->lock); > > The udp4_rehash4() call above is in atomic context and could end-up > calling synchronize_rcu() which is a blocking function. You must avoid that. > I see, synchronize_rcu() cannot be used with spinlock. However, I don't have a good idea to solve it by now. Do you have any thoughts or suggestions? > Cheers, > > Paolo Thanks for your reviewing, Paolo. I'll address all your concerns in the next version.
On 10/25/24 05:50, Philo Lu wrote: > On 2024/10/24 23:01, Paolo Abeni wrote: >> On 10/18/24 13:45, Philo Lu wrote: >> [...] >>> +/* In hash4, rehash can also happen in connect(), where hash4_cnt keeps unchanged. */ >>> +static void udp4_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) >>> +{ >>> + struct udp_hslot *hslot4, *nhslot4; >>> + >>> + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); >>> + nhslot4 = udp_hashslot4(udptable, newhash4); >>> + udp_sk(sk)->udp_lrpa_hash = newhash4; >>> + >>> + if (hslot4 != nhslot4) { >>> + spin_lock_bh(&hslot4->lock); >>> + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); >>> + hslot4->count--; >>> + spin_unlock_bh(&hslot4->lock); >>> + >>> + synchronize_rcu(); >> >> This deserve a comment explaining why it's needed. I had to dig in past >> revision to understand it. >> > > Got it. And a short explanation here (see [1] for detail): > > Here, we move a node from a hlist to another new one, i.e., update > node->next from the old hlist to the new hlist. For readers traversing > the old hlist, if we update node->next just when readers move onto the > moved node, then the readers also move to the new hlist. This is unexpected. > > Reader(lookup) Writer(rehash) > ----------------- --------------- > 1. rcu_read_lock() > 2. pos = sk; > 3. hlist_del_init_rcu(sk, old_slot) > 4. hlist_add_head_rcu(sk, new_slot) > 5. pos = pos->next; <= > 6. rcu_read_unlock() > > [1] > https://lore.kernel.org/all/0fb425e0-5482-4cdf-9dc1-3906751f8f81@linux.alibaba.com/ Thanks. AFAICS the problem that such thing could cause is a lookup failure for a socket positioned later in the same chain when a previous entry is moved on a different slot during a concurrent lookup. I think that could be solved the same way TCP is handling such scenario: using hlist_null RCU list for the hash4 bucket, checking that a failed lookup ends in the same bucket where it started and eventually reiterating from the original bucket. Have a look at __inet_lookup_established() for a more descriptive reference, especially: https://elixir.bootlin.com/linux/v6.12-rc4/source/net/ipv4/inet_hashtables.c#L528 >>> + >>> + spin_lock_bh(&nhslot4->lock); >>> + hlist_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, &nhslot4->head); >>> + nhslot4->count++; >>> + spin_unlock_bh(&nhslot4->lock); >>> + } >>> +} >>> + >>> +static void udp4_unhash4(struct udp_table *udptable, struct sock *sk) >>> +{ >>> + struct udp_hslot *hslot2, *hslot4; >>> + >>> + if (udp_hashed4(sk)) { >>> + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); >>> + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); >>> + >>> + spin_lock(&hslot4->lock); >>> + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); >>> + hslot4->count--; >>> + spin_unlock(&hslot4->lock); >>> + >>> + spin_lock(&hslot2->lock); >>> + udp_hash4_dec(hslot2); >>> + spin_unlock(&hslot2->lock); >>> + } >>> +} >>> + >>> +/* call with sock lock */ >>> +static void udp4_hash4(struct sock *sk) >>> +{ >>> + struct udp_hslot *hslot, *hslot2, *hslot4; >>> + struct net *net = sock_net(sk); >>> + struct udp_table *udptable; >>> + unsigned int hash; >>> + >>> + if (sk_unhashed(sk) || inet_sk(sk)->inet_rcv_saddr == htonl(INADDR_ANY)) >>> + return; >>> + >>> + hash = udp_ehashfn(net, inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num, >>> + inet_sk(sk)->inet_daddr, inet_sk(sk)->inet_dport); >>> + >>> + udptable = net->ipv4.udp_table; >>> + if (udp_hashed4(sk)) { >>> + udp4_rehash4(udptable, sk, hash); >> >> It's unclear to me how we can enter this branch. Also it's unclear why >> here you don't need to call udp_hash4_inc()udp_hash4_dec, too. Why such >> accounting can't be placed in udp4_rehash4()? >> > > It's possible that a connected udp socket _re-connect_ to another remote > address. Then, because the local address is not changed, hash2 and its > hash4_cnt keep unchanged. But rehash4 need to be done. > I'll also add a comment here. Right, UDP socket could actually connect() successfully twice in a row without a disconnect in between... I almost missed the point that the ipv6 implementation is planned to land afterwards. I'm sorry, but I think that would be problematic - i.e. if ipv4 support will land in 6.13, but ipv6 will not make it - due to time constraints - we will have (at least a release with inconsistent behavior between ipv4 and ipv6. I think it will be better bundle such changes together. Thanks, Paolo
On 2024/10/25 17:02, Paolo Abeni wrote: > On 10/25/24 05:50, Philo Lu wrote: >> On 2024/10/24 23:01, Paolo Abeni wrote: >>> On 10/18/24 13:45, Philo Lu wrote: >>> [...] >>>> +/* In hash4, rehash can also happen in connect(), where hash4_cnt keeps unchanged. */ >>>> +static void udp4_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) >>>> +{ >>>> + struct udp_hslot *hslot4, *nhslot4; >>>> + >>>> + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); >>>> + nhslot4 = udp_hashslot4(udptable, newhash4); >>>> + udp_sk(sk)->udp_lrpa_hash = newhash4; >>>> + >>>> + if (hslot4 != nhslot4) { >>>> + spin_lock_bh(&hslot4->lock); >>>> + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); >>>> + hslot4->count--; >>>> + spin_unlock_bh(&hslot4->lock); >>>> + >>>> + synchronize_rcu(); >>> >>> This deserve a comment explaining why it's needed. I had to dig in past >>> revision to understand it. >>> >> >> Got it. And a short explanation here (see [1] for detail): >> >> Here, we move a node from a hlist to another new one, i.e., update >> node->next from the old hlist to the new hlist. For readers traversing >> the old hlist, if we update node->next just when readers move onto the >> moved node, then the readers also move to the new hlist. This is unexpected. >> >> Reader(lookup) Writer(rehash) >> ----------------- --------------- >> 1. rcu_read_lock() >> 2. pos = sk; >> 3. hlist_del_init_rcu(sk, old_slot) >> 4. hlist_add_head_rcu(sk, new_slot) >> 5. pos = pos->next; <= >> 6. rcu_read_unlock() >> >> [1] >> https://lore.kernel.org/all/0fb425e0-5482-4cdf-9dc1-3906751f8f81@linux.alibaba.com/ > > Thanks. AFAICS the problem that such thing could cause is a lookup > failure for a socket positioned later in the same chain when a previous > entry is moved on a different slot during a concurrent lookup. > Yes, you're right. > I think that could be solved the same way TCP is handling such scenario: > using hlist_null RCU list for the hash4 bucket, checking that a failed > lookup ends in the same bucket where it started and eventually > reiterating from the original bucket. > > Have a look at __inet_lookup_established() for a more descriptive > reference, especially: > > https://elixir.bootlin.com/linux/v6.12-rc4/source/net/ipv4/inet_hashtables.c#L528 > Thank you! I'll try it in the next version. >>>> + ... >>>> + >>>> +/* call with sock lock */ >>>> +static void udp4_hash4(struct sock *sk) >>>> +{ >>>> + struct udp_hslot *hslot, *hslot2, *hslot4; >>>> + struct net *net = sock_net(sk); >>>> + struct udp_table *udptable; >>>> + unsigned int hash; >>>> + >>>> + if (sk_unhashed(sk) || inet_sk(sk)->inet_rcv_saddr == htonl(INADDR_ANY)) >>>> + return; >>>> + >>>> + hash = udp_ehashfn(net, inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num, >>>> + inet_sk(sk)->inet_daddr, inet_sk(sk)->inet_dport); >>>> + >>>> + udptable = net->ipv4.udp_table; >>>> + if (udp_hashed4(sk)) { >>>> + udp4_rehash4(udptable, sk, hash); >>> >>> It's unclear to me how we can enter this branch. Also it's unclear why >>> here you don't need to call udp_hash4_inc()udp_hash4_dec, too. Why such >>> accounting can't be placed in udp4_rehash4()? >>> >> >> It's possible that a connected udp socket _re-connect_ to another remote >> address. Then, because the local address is not changed, hash2 and its >> hash4_cnt keep unchanged. But rehash4 need to be done. >> I'll also add a comment here. > > Right, UDP socket could actually connect() successfully twice in a row > without a disconnect in between... > > I almost missed the point that the ipv6 implementation is planned to > land afterwards. > > I'm sorry, but I think that would be problematic - i.e. if ipv4 support > will land in 6.13, but ipv6 will not make it - due to time constraints - > we will have (at least a release with inconsistent behavior between ipv4 > and ipv6. I think it will be better bundle such changes together. > No problem. I can add ipv6 support in the next version too. Thanks.
diff --git a/include/net/udp.h b/include/net/udp.h index 8aefdc404362..97c5ae83723c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -293,7 +293,7 @@ static inline int udp_lib_hash(struct sock *sk) } void udp_lib_unhash(struct sock *sk); -void udp_lib_rehash(struct sock *sk, u16 new_hash); +void udp_lib_rehash(struct sock *sk, u16 new_hash, u16 new_hash4); static inline void udp_lib_close(struct sock *sk, long timeout) { @@ -386,6 +386,7 @@ int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, int *karg); int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); +int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 74bfab0f44f8..5d944cec7a27 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -478,6 +478,134 @@ static struct sock *udp4_lib_lookup2(const struct net *net, return result; } +#if IS_ENABLED(CONFIG_BASE_SMALL) +static struct sock *udp4_lib_lookup4(const struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, + int dif, int sdif, + struct udp_table *udptable) +{ + return NULL; +} + +static void udp4_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) +{ +} + +static void udp4_unhash4(struct udp_table *udptable, struct sock *sk) +{ +} + +static void udp4_hash4(struct sock *sk) +{ +} +#else /* !CONFIG_BASE_SMALL */ +static struct sock *udp4_lib_lookup4(const struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, + int dif, int sdif, + struct udp_table *udptable) +{ + unsigned int hash4 = udp_ehashfn(net, daddr, hnum, saddr, sport); + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); + struct udp_hslot *hslot4 = udp_hashslot4(udptable, hash4); + struct udp_sock *up; + struct sock *sk; + + INET_ADDR_COOKIE(acookie, saddr, daddr); + udp_lrpa_for_each_entry_rcu(up, &hslot4->head) { + sk = (struct sock *)up; + if (inet_match(net, sk, acookie, ports, dif, sdif)) + return sk; + } + return NULL; +} + +/* In hash4, rehash can also happen in connect(), where hash4_cnt keeps unchanged. */ +static void udp4_rehash4(struct udp_table *udptable, struct sock *sk, u16 newhash4) +{ + struct udp_hslot *hslot4, *nhslot4; + + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); + nhslot4 = udp_hashslot4(udptable, newhash4); + udp_sk(sk)->udp_lrpa_hash = newhash4; + + if (hslot4 != nhslot4) { + spin_lock_bh(&hslot4->lock); + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); + hslot4->count--; + spin_unlock_bh(&hslot4->lock); + + synchronize_rcu(); + + spin_lock_bh(&nhslot4->lock); + hlist_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, &nhslot4->head); + nhslot4->count++; + spin_unlock_bh(&nhslot4->lock); + } +} + +static void udp4_unhash4(struct udp_table *udptable, struct sock *sk) +{ + struct udp_hslot *hslot2, *hslot4; + + if (udp_hashed4(sk)) { + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + hslot4 = udp_hashslot4(udptable, udp_sk(sk)->udp_lrpa_hash); + + spin_lock(&hslot4->lock); + hlist_del_init_rcu(&udp_sk(sk)->udp_lrpa_node); + hslot4->count--; + spin_unlock(&hslot4->lock); + + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); + } +} + +/* call with sock lock */ +static void udp4_hash4(struct sock *sk) +{ + struct udp_hslot *hslot, *hslot2, *hslot4; + struct net *net = sock_net(sk); + struct udp_table *udptable; + unsigned int hash; + + if (sk_unhashed(sk) || inet_sk(sk)->inet_rcv_saddr == htonl(INADDR_ANY)) + return; + + hash = udp_ehashfn(net, inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num, + inet_sk(sk)->inet_daddr, inet_sk(sk)->inet_dport); + + udptable = net->ipv4.udp_table; + if (udp_hashed4(sk)) { + udp4_rehash4(udptable, sk, hash); + return; + } + + hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); + hslot4 = udp_hashslot4(udptable, hash); + udp_sk(sk)->udp_lrpa_hash = hash; + + spin_lock_bh(&hslot->lock); + if (rcu_access_pointer(sk->sk_reuseport_cb)) + reuseport_detach_sock(sk); + + spin_lock(&hslot4->lock); + hlist_add_head_rcu(&udp_sk(sk)->udp_lrpa_node, &hslot4->head); + hslot4->count++; + spin_unlock(&hslot4->lock); + + spin_lock(&hslot2->lock); + udp_hash4_inc(hslot2); + spin_unlock(&hslot2->lock); + + spin_unlock_bh(&hslot->lock); +} +#endif /* CONFIG_BASE_SMALL */ + /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ @@ -493,6 +621,12 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, hash2 = ipv4_portaddr_hash(net, daddr, hnum); hslot2 = udp_hashslot2(udptable, hash2); + if (udp_has_hash4(hslot2)) { + result = udp4_lib_lookup4(net, saddr, sport, daddr, hnum, dif, sdif, udptable); + if (result) /* udp4_lib_lookup4 return sk or NULL */ + return result; + } + /* Lookup connected or non-wildcard socket */ result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, sdif, @@ -1931,6 +2065,19 @@ int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) } EXPORT_SYMBOL(udp_pre_connect); +int udp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip4_datagram_connect(sk, uaddr, addr_len); + if (!res) + udp4_hash4(sk); + release_sock(sk); + return res; +} +EXPORT_SYMBOL(udp_connect); + int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -1990,6 +2137,8 @@ void udp_lib_unhash(struct sock *sk) hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); hslot2->count--; spin_unlock(&hslot2->lock); + + udp4_unhash4(udptable, sk); } spin_unlock_bh(&hslot->lock); } @@ -1999,7 +2148,7 @@ EXPORT_SYMBOL(udp_lib_unhash); /* * inet_rcv_saddr was changed, we must rehash secondary hash */ -void udp_lib_rehash(struct sock *sk, u16 newhash) +void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) { if (sk_hashed(sk)) { struct udp_table *udptable = udp_get_table_prot(sk); @@ -2031,6 +2180,19 @@ void udp_lib_rehash(struct sock *sk, u16 newhash) spin_unlock(&nhslot2->lock); } + if (udp_hashed4(sk)) { + udp4_rehash4(udptable, sk, newhash4); + + if (hslot2 != nhslot2) { + spin_lock(&hslot2->lock); + udp_hash4_dec(hslot2); + spin_unlock(&hslot2->lock); + + spin_lock(&nhslot2->lock); + udp_hash4_inc(nhslot2); + spin_unlock(&nhslot2->lock); + } + } spin_unlock_bh(&hslot->lock); } } @@ -2042,7 +2204,10 @@ void udp_v4_rehash(struct sock *sk) u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); - udp_lib_rehash(sk, new_hash); + u16 new_hash4 = udp_ehashfn(sock_net(sk), + inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num, + inet_sk(sk)->inet_daddr, inet_sk(sk)->inet_dport); + udp_lib_rehash(sk, new_hash, new_hash4); } static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -2935,7 +3100,7 @@ struct proto udp_prot = { .owner = THIS_MODULE, .close = udp_lib_close, .pre_connect = udp_pre_connect, - .connect = ip4_datagram_connect, + .connect = udp_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .init = udp_init_sock, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bbf3352213c4..4d3dfcb48a39 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -111,7 +111,7 @@ void udp_v6_rehash(struct sock *sk) &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); - udp_lib_rehash(sk, new_hash); + udp_lib_rehash(sk, new_hash, 0); /* 4-tuple hash not implemented */ } static int compute_score(struct sock *sk, const struct net *net,