Message ID | 20230105191339.506839-2-xiyou.wangcong@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | l2tp: fix race conditions in l2tp_tunnel_register() | expand |
On 05 Jan 11:13, Cong Wang wrote: >From: Cong Wang <cong.wang@bytedance.com> > >l2tp uses l2tp_tunnel_list to track all registered tunnels and >to allocate tunnel ID's. IDR can do the same job. > >More importantly, with IDR we can hold the ID before a successful >registration so that we don't need to worry about late error >hanlding, it is not easy to rollback socket changes. > >This is a preparation for the following fix. > >Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> >Cc: Guillaume Nault <g.nault@alphalink.fr> >Cc: Jakub Sitnicki <jakub@cloudflare.com> >Cc: Eric Dumazet <edumazet@google.com> >Signed-off-by: Cong Wang <cong.wang@bytedance.com> >--- > net/l2tp/l2tp_core.c | 93 ++++++++++++++++++++++------------------- > net/l2tp/l2tp_core.h | 3 +- > net/l2tp/l2tp_netlink.c | 3 +- > net/l2tp/l2tp_ppp.c | 3 +- > 4 files changed, 57 insertions(+), 45 deletions(-) > >diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c >index 9a1415fe3fa7..570249a91c6c 100644 >--- a/net/l2tp/l2tp_core.c >+++ b/net/l2tp/l2tp_core.c >@@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq; > /* per-net private data for this module */ > static unsigned int l2tp_net_id; > struct l2tp_net { >- struct list_head l2tp_tunnel_list; >- /* Lock for write access to l2tp_tunnel_list */ >- spinlock_t l2tp_tunnel_list_lock; >+ /* Lock for write access to l2tp_tunnel_idr */ >+ spinlock_t l2tp_tunnel_idr_lock; >+ struct idr l2tp_tunnel_idr; > struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; > /* Lock for write access to l2tp_session_hlist */ > spinlock_t l2tp_session_hlist_lock; >@@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) > struct l2tp_tunnel *tunnel; > > rcu_read_lock_bh(); >- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { >- if (tunnel->tunnel_id == tunnel_id && >- refcount_inc_not_zero(&tunnel->ref_count)) { >- rcu_read_unlock_bh(); >- >- return tunnel; >- } >+ tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id); >+ if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) { >+ rcu_read_unlock_bh(); >+ return tunnel; > } > rcu_read_unlock_bh(); > >@@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get); > > struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) > { >- const struct l2tp_net *pn = l2tp_pernet(net); >+ struct l2tp_net *pn = l2tp_pernet(net); Any reason to remove the const keyword ? Other than that LGTM: Reviewed-by: Saeed Mahameed <saeed@kernel.org>
On Sat, Jan 07, 2023 at 10:25:04AM -0800, Saeed Mahameed wrote: > On 05 Jan 11:13, Cong Wang wrote: > > From: Cong Wang <cong.wang@bytedance.com> > > > > l2tp uses l2tp_tunnel_list to track all registered tunnels and > > to allocate tunnel ID's. IDR can do the same job. > > > > More importantly, with IDR we can hold the ID before a successful > > registration so that we don't need to worry about late error > > hanlding, it is not easy to rollback socket changes. > > > > This is a preparation for the following fix. > > > > Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> > > Cc: Guillaume Nault <g.nault@alphalink.fr> > > Cc: Jakub Sitnicki <jakub@cloudflare.com> > > Cc: Eric Dumazet <edumazet@google.com> > > Signed-off-by: Cong Wang <cong.wang@bytedance.com> > > --- > > net/l2tp/l2tp_core.c | 93 ++++++++++++++++++++++------------------- > > net/l2tp/l2tp_core.h | 3 +- > > net/l2tp/l2tp_netlink.c | 3 +- > > net/l2tp/l2tp_ppp.c | 3 +- > > 4 files changed, 57 insertions(+), 45 deletions(-) > > > > diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c > > index 9a1415fe3fa7..570249a91c6c 100644 > > --- a/net/l2tp/l2tp_core.c > > +++ b/net/l2tp/l2tp_core.c > > @@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq; > > /* per-net private data for this module */ > > static unsigned int l2tp_net_id; > > struct l2tp_net { > > - struct list_head l2tp_tunnel_list; > > - /* Lock for write access to l2tp_tunnel_list */ > > - spinlock_t l2tp_tunnel_list_lock; > > + /* Lock for write access to l2tp_tunnel_idr */ > > + spinlock_t l2tp_tunnel_idr_lock; > > + struct idr l2tp_tunnel_idr; > > struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; > > /* Lock for write access to l2tp_session_hlist */ > > spinlock_t l2tp_session_hlist_lock; > > @@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) > > struct l2tp_tunnel *tunnel; > > > > rcu_read_lock_bh(); > > - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { > > - if (tunnel->tunnel_id == tunnel_id && > > - refcount_inc_not_zero(&tunnel->ref_count)) { > > - rcu_read_unlock_bh(); > > - > > - return tunnel; > > - } > > + tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id); > > + if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) { > > + rcu_read_unlock_bh(); > > + return tunnel; > > } > > rcu_read_unlock_bh(); > > > > @@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get); > > > > struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) > > { > > - const struct l2tp_net *pn = l2tp_pernet(net); > > + struct l2tp_net *pn = l2tp_pernet(net); > > Any reason to remove the const keyword ? Yes, GCC complained about discarding const in idr_for_each_entry_ul(). Thanks.
On Thu, Jan 05, 2023 at 11:13:38AM -0800, Cong Wang wrote: > +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, > + u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, > + struct l2tp_tunnel **tunnelp) > { > struct l2tp_tunnel *tunnel = NULL; > int err; > enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; > + struct l2tp_net *pn = l2tp_pernet(net); > > if (cfg) > encap = cfg->encap; > > + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); > + err = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, > + GFP_ATOMIC); > + if (err) { > + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); > + return err; > + } > + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); Why reserving the tunnel_id in l2tp_tunnel_create()? This function is supposed to just allocate a structure and pre-initialise some fields. The only cleanup required upon error after this call is to kfree() the new structure. So I can't see any reason to guarantee the id will be accepted by the future l2tp_tunnel_register() call. Looks like you could reserve the id at the beginning of l2tp_tunnel_register() instead. That'd avoid changing the API and thus the side effects on l2tp_{ppp,netlink}.c. Also we wouldn't need create l2tp_tunnel_remove().
On Sat, Jan 07, 2023 at 20:48:51 +0100, Guillaume Nault wrote: > On Thu, Jan 05, 2023 at 11:13:38AM -0800, Cong Wang wrote: > > +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, > > + u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, > > + struct l2tp_tunnel **tunnelp) > > { > > struct l2tp_tunnel *tunnel = NULL; > > int err; > > enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; > > + struct l2tp_net *pn = l2tp_pernet(net); > > > > if (cfg) > > encap = cfg->encap; > > > > + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); > > + err = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, > > + GFP_ATOMIC); > > + if (err) { > > + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); > > + return err; > > + } > > + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); > > Why reserving the tunnel_id in l2tp_tunnel_create()? This function is > supposed to just allocate a structure and pre-initialise some fields. > The only cleanup required upon error after this call is to kfree() the > new structure. So I can't see any reason to guarantee the id will be > accepted by the future l2tp_tunnel_register() call. > > Looks like you could reserve the id at the beginning of > l2tp_tunnel_register() instead. That'd avoid changing the API and thus > the side effects on l2tp_{ppp,netlink}.c. Also we wouldn't need create > l2tp_tunnel_remove(). > FWIW I also think that'd make more sense, and leave callsites will less tidyup to do on behalf of l2tp_core.
On Sat, Jan 07, 2023 at 08:48:51PM +0100, Guillaume Nault wrote: > On Thu, Jan 05, 2023 at 11:13:38AM -0800, Cong Wang wrote: > > +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, > > + u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, > > + struct l2tp_tunnel **tunnelp) > > { > > struct l2tp_tunnel *tunnel = NULL; > > int err; > > enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; > > + struct l2tp_net *pn = l2tp_pernet(net); > > > > if (cfg) > > encap = cfg->encap; > > > > + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); > > + err = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, > > + GFP_ATOMIC); > > + if (err) { > > + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); > > + return err; > > + } > > + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); > > Why reserving the tunnel_id in l2tp_tunnel_create()? This function is > supposed to just allocate a structure and pre-initialise some fields. > The only cleanup required upon error after this call is to kfree() the > new structure. So I can't see any reason to guarantee the id will be > accepted by the future l2tp_tunnel_register() call. > > Looks like you could reserve the id at the beginning of > l2tp_tunnel_register() instead. That'd avoid changing the API and thus > the side effects on l2tp_{ppp,netlink}.c. Also we wouldn't need create > l2tp_tunnel_remove(). > The idr_replace() is guaranteed to succeed in terms of ID allocation. So either way could work, but I think you are right that the patch could be smaller if we do it in l2tp_tunnel_register(). Thanks.
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 9a1415fe3fa7..570249a91c6c 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ static unsigned int l2tp_net_id; struct l2tp_net { - struct list_head l2tp_tunnel_list; - /* Lock for write access to l2tp_tunnel_list */ - spinlock_t l2tp_tunnel_list_lock; + /* Lock for write access to l2tp_tunnel_idr */ + spinlock_t l2tp_tunnel_idr_lock; + struct idr l2tp_tunnel_idr; struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2]; /* Lock for write access to l2tp_session_hlist */ spinlock_t l2tp_session_hlist_lock; @@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) struct l2tp_tunnel *tunnel; rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (tunnel->tunnel_id == tunnel_id && - refcount_inc_not_zero(&tunnel->ref_count)) { - rcu_read_unlock_bh(); - - return tunnel; - } + tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id); + if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) { + rcu_read_unlock_bh(); + return tunnel; } rcu_read_unlock_bh(); @@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get); struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth) { - const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_net *pn = l2tp_pernet(net); + unsigned long tunnel_id, tmp; struct l2tp_tunnel *tunnel; int count = 0; rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - if (++count > nth && + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel && ++count > nth && refcount_inc_not_zero(&tunnel->ref_count)) { rcu_read_unlock_bh(); return tunnel; @@ -1234,7 +1232,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work) del_work); struct sock *sk = tunnel->sock; struct socket *sock = sk->sk_socket; - struct l2tp_net *pn; l2tp_tunnel_closeall(tunnel); @@ -1248,12 +1245,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work) } } - /* Remove the tunnel struct from the tunnel list */ - pn = l2tp_pernet(tunnel->l2tp_net); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_del_rcu(&tunnel->list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - + l2tp_tunnel_remove(tunnel); /* drop initial ref */ l2tp_tunnel_dec_refcount(tunnel); @@ -1386,16 +1378,37 @@ static int l2tp_tunnel_sock_create(struct net *net, static struct lock_class_key l2tp_socket_class; -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, - struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp) +void l2tp_tunnel_remove(struct l2tp_tunnel *tunnel) +{ + struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net); + + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_remove); + +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, + u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, + struct l2tp_tunnel **tunnelp) { struct l2tp_tunnel *tunnel = NULL; int err; enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP; + struct l2tp_net *pn = l2tp_pernet(net); if (cfg) encap = cfg->encap; + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + err = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id, + GFP_ATOMIC); + if (err) { + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); + return err; + } + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); + tunnel = kzalloc(sizeof(*tunnel), GFP_KERNEL); if (!tunnel) { err = -ENOMEM; @@ -1412,6 +1425,7 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, tunnel->acpt_newsess = true; tunnel->encap = encap; + tunnel->l2tp_net = net; refcount_set(&tunnel->ref_count, 1); tunnel->fd = fd; @@ -1425,6 +1439,11 @@ int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, err: if (tunnelp) *tunnelp = tunnel; + if (err) { + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_remove(&pn->l2tp_tunnel_idr, tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); + } return err; } @@ -1455,7 +1474,6 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net, int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg) { - struct l2tp_tunnel *tunnel_walk; struct l2tp_net *pn; struct socket *sock; struct sock *sk; @@ -1481,23 +1499,14 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, rcu_assign_sk_user_data(sk, tunnel); write_unlock_bh(&sk->sk_callback_lock); - tunnel->l2tp_net = net; pn = l2tp_pernet(net); sock_hold(sk); tunnel->sock = sk; - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) { - if (tunnel_walk->tunnel_id == tunnel->tunnel_id) { - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - sock_put(sk); - ret = -EEXIST; - goto err_sock; - } - } - list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); + spin_lock_bh(&pn->l2tp_tunnel_idr_lock); + idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id); + spin_unlock_bh(&pn->l2tp_tunnel_idr_lock); if (tunnel->encap == L2TP_ENCAPTYPE_UDP) { struct udp_tunnel_sock_cfg udp_cfg = { @@ -1523,9 +1532,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, return 0; -err_sock: - write_lock_bh(&sk->sk_callback_lock); - rcu_assign_sk_user_data(sk, NULL); err_inval_sock: write_unlock_bh(&sk->sk_callback_lock); @@ -1647,8 +1653,8 @@ static __net_init int l2tp_init_net(struct net *net) struct l2tp_net *pn = net_generic(net, l2tp_net_id); int hash; - INIT_LIST_HEAD(&pn->l2tp_tunnel_list); - spin_lock_init(&pn->l2tp_tunnel_list_lock); + idr_init(&pn->l2tp_tunnel_idr); + spin_lock_init(&pn->l2tp_tunnel_idr_lock); for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]); @@ -1662,11 +1668,13 @@ static __net_exit void l2tp_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; + unsigned long tunnel_id, tmp; int hash; rcu_read_lock_bh(); - list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - l2tp_tunnel_delete(tunnel); + idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) { + if (tunnel) + l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); @@ -1676,6 +1684,7 @@ static __net_exit void l2tp_exit_net(struct net *net) for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash])); + idr_destroy(&pn->l2tp_tunnel_idr); } static struct pernet_operations l2tp_net_ops = { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index a88e070b431d..aec7e0611591 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -236,9 +236,10 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, * Creation of a new instance is a two-step process: create, then register. * Destruction is triggered using the *_delete functions, and completes asynchronously. */ -int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, +int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); +void l2tp_tunnel_remove(struct l2tp_tunnel *tunnel); int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net, struct l2tp_tunnel_cfg *cfg); void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index a901fd14fe3b..6fcf36a8eafd 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -233,7 +233,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info switch (cfg.encap) { case L2TP_ENCAPTYPE_UDP: case L2TP_ENCAPTYPE_IP: - ret = l2tp_tunnel_create(fd, proto_version, tunnel_id, + ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id, peer_tunnel_id, &cfg, &tunnel); break; } @@ -244,6 +244,7 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info l2tp_tunnel_inc_refcount(tunnel); ret = l2tp_tunnel_register(tunnel, net, &cfg); if (ret < 0) { + l2tp_tunnel_remove(tunnel); kfree(tunnel); goto out; } diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index db2e584c625e..813ada637485 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -711,7 +711,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, goto end; } - error = l2tp_tunnel_create(info.fd, + error = l2tp_tunnel_create(sock_net(sk), info.fd, info.version, info.tunnel_id, info.peer_tunnel_id, &tcfg, @@ -723,6 +723,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = l2tp_tunnel_register(tunnel, sock_net(sk), &tcfg); if (error < 0) { + l2tp_tunnel_remove(tunnel); kfree(tunnel); goto end; }