Message ID | 20210323003808.16074-3-xiyou.wangcong@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | sockmap: introduce BPF_SK_SKB_VERDICT and support UDP | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | warning | 11 maintainers not CCed: dsahern@kernel.org yhs@fb.com kpsingh@kernel.org yoshfuji@linux-ipv6.org andrii@kernel.org kafai@fb.com ast@kernel.org kuba@kernel.org songliubraving@fb.com davem@davemloft.net edumazet@google.com |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 114 this patch: 114 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | CHECK: spinlock_t definition without comment WARNING: line length of 81 exceeds 80 columns |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 114 this patch: 114 |
netdev/header_inline | success | Link |
On 2021/3/23 8:37, Cong Wang wrote: > From: Cong Wang <cong.wang@bytedance.com> > > Currently we rely on lock_sock to protect ingress_msg, > it is too big for this, we can actually just use a spinlock > to protect this list like protecting other skb queues. > > __tcp_bpf_recvmsg() is still special because of peeking, > it still has to use lock_sock. > > Cc: John Fastabend <john.fastabend@gmail.com> > Cc: Daniel Borkmann <daniel@iogearbox.net> > Cc: Lorenz Bauer <lmb@cloudflare.com> > Acked-by: Jakub Sitnicki <jakub@cloudflare.com> > Signed-off-by: Cong Wang <cong.wang@bytedance.com> > --- > include/linux/skmsg.h | 46 +++++++++++++++++++++++++++++++++++++++++++ > net/core/skmsg.c | 3 +++ > net/ipv4/tcp_bpf.c | 18 ++++++----------- > 3 files changed, 55 insertions(+), 12 deletions(-) > > diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h > index 6c09d94be2e9..f2d45a73b2b2 100644 > --- a/include/linux/skmsg.h > +++ b/include/linux/skmsg.h > @@ -89,6 +89,7 @@ struct sk_psock { > #endif > struct sk_buff_head ingress_skb; > struct list_head ingress_msg; > + spinlock_t ingress_lock; > unsigned long state; > struct list_head link; > spinlock_t link_lock; > @@ -284,7 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) > static inline void sk_psock_queue_msg(struct sk_psock *psock, > struct sk_msg *msg) > { > + spin_lock_bh(&psock->ingress_lock); > list_add_tail(&msg->list, &psock->ingress_msg); > + spin_unlock_bh(&psock->ingress_lock); > +} > + > +static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) > +{ > + struct sk_msg *msg; > + > + spin_lock_bh(&psock->ingress_lock); > + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); > + if (msg) > + list_del(&msg->list); > + spin_unlock_bh(&psock->ingress_lock); > + return msg; > +} > + > +static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock) > +{ > + struct sk_msg *msg; > + > + spin_lock_bh(&psock->ingress_lock); > + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); > + spin_unlock_bh(&psock->ingress_lock); > + return msg; > +} > + > +static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock, > + struct sk_msg *msg) > +{ > + struct sk_msg *ret; Nit: Use msg instead of ret to be consistently with sk_psock_dequeue_msg() and sk_psock_next_msg(). > + > + spin_lock_bh(&psock->ingress_lock); > + if (list_is_last(&msg->list, &psock->ingress_msg)) > + ret = NULL; > + else > + ret = list_next_entry(msg, list); > + spin_unlock_bh(&psock->ingress_lock); > + return ret; > }
On Mon, Mar 22, 2021 at 6:25 PM Yunsheng Lin <linyunsheng@huawei.com> wrote: > > On 2021/3/23 8:37, Cong Wang wrote: > > +static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock, > > + struct sk_msg *msg) > > +{ > > + struct sk_msg *ret; > > Nit: > Use msg instead of ret to be consistently with sk_psock_dequeue_msg() > and sk_psock_next_msg(). Please read it carefully, clearly 'msg' is already a parameter name here. Thanks.
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 6c09d94be2e9..f2d45a73b2b2 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -89,6 +89,7 @@ struct sk_psock { #endif struct sk_buff_head ingress_skb; struct list_head ingress_msg; + spinlock_t ingress_lock; unsigned long state; struct list_head link; spinlock_t link_lock; @@ -284,7 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { + spin_lock_bh(&psock->ingress_lock); list_add_tail(&msg->list, &psock->ingress_msg); + spin_unlock_bh(&psock->ingress_lock); +} + +static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) +{ + struct sk_msg *msg; + + spin_lock_bh(&psock->ingress_lock); + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); + if (msg) + list_del(&msg->list); + spin_unlock_bh(&psock->ingress_lock); + return msg; +} + +static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock) +{ + struct sk_msg *msg; + + spin_lock_bh(&psock->ingress_lock); + msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list); + spin_unlock_bh(&psock->ingress_lock); + return msg; +} + +static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock, + struct sk_msg *msg) +{ + struct sk_msg *ret; + + spin_lock_bh(&psock->ingress_lock); + if (list_is_last(&msg->list, &psock->ingress_msg)) + ret = NULL; + else + ret = list_next_entry(msg, list); + spin_unlock_bh(&psock->ingress_lock); + return ret; } static inline bool sk_psock_queue_empty(const struct sk_psock *psock) @@ -292,6 +331,13 @@ static inline bool sk_psock_queue_empty(const struct sk_psock *psock) return psock ? list_empty(&psock->ingress_msg) : true; } +static inline void kfree_sk_msg(struct sk_msg *msg) +{ + if (msg->skb) + consume_skb(msg->skb); + kfree(msg); +} + static inline void sk_psock_report_error(struct sk_psock *psock, int err) { struct sock *sk = psock->sk; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index bebf84ed4e30..305dddc51857 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -592,6 +592,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) INIT_WORK(&psock->work, sk_psock_backlog); INIT_LIST_HEAD(&psock->ingress_msg); + spin_lock_init(&psock->ingress_lock); skb_queue_head_init(&psock->ingress_skb); sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED); @@ -638,7 +639,9 @@ static void sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); kfree_skb(skb); } + spin_lock_bh(&psock->ingress_lock); __sk_psock_purge_ingress_msg(psock); + spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_link_destroy(struct sk_psock *psock) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 17c322b875fd..ae980716d896 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -18,9 +18,7 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, struct sk_msg *msg_rx; int i, copied = 0; - msg_rx = list_first_entry_or_null(&psock->ingress_msg, - struct sk_msg, list); - + msg_rx = sk_psock_peek_msg(psock); while (copied != len) { struct scatterlist *sge; @@ -68,22 +66,18 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, } while (i != msg_rx->sg.end); if (unlikely(peek)) { - if (msg_rx == list_last_entry(&psock->ingress_msg, - struct sk_msg, list)) + msg_rx = sk_psock_next_msg(psock, msg_rx); + if (!msg_rx) break; - msg_rx = list_next_entry(msg_rx, list); continue; } msg_rx->sg.start = i; if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { - list_del(&msg_rx->list); - if (msg_rx->skb) - consume_skb(msg_rx->skb); - kfree(msg_rx); + msg_rx = sk_psock_dequeue_msg(psock); + kfree_sk_msg(msg_rx); } - msg_rx = list_first_entry_or_null(&psock->ingress_msg, - struct sk_msg, list); + msg_rx = sk_psock_peek_msg(psock); } return copied;