Message ID | 20240710212555.1617795-6-amery.hung@bytedance.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | virtio/vsock: support datagrams | expand |
On Wed, Jul 10, 2024 at 09:25:46PM GMT, Amery Hung wrote: >From: Bobby Eshleman <bobby.eshleman@bytedance.com> > >This commit adds support for bound dgram sockets to be tracked in a >separate bind table from connectible sockets in order to avoid address >collisions. With this commit, users can simultaneously bind a dgram >socket and connectible socket to the same CID and port. > >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> >--- > net/vmw_vsock/af_vsock.c | 103 +++++++++++++++++++++++++++++---------- > 1 file changed, 76 insertions(+), 27 deletions(-) > >diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >index d571be9cdbf0..ab08cd81720e 100644 >--- a/net/vmw_vsock/af_vsock.c >+++ b/net/vmw_vsock/af_vsock.c >@@ -10,18 +10,23 @@ > * - There are two kinds of sockets: those created by user action (such as > * calling socket(2)) and those created by incoming connection request packets. > * >- * - There are two "global" tables, one for bound sockets (sockets that have >- * specified an address that they are responsible for) and one for connected >- * sockets (sockets that have established a connection with another socket). >- * These tables are "global" in that all sockets on the system are placed >- * within them. - Note, though, that the bound table contains an extra entry >- * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in >- * that list. The bound table is used solely for lookup of sockets when packets >- * are received and that's not necessary for SOCK_DGRAM sockets since we create >- * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM >- * sockets out of the bound hash buckets will reduce the chance of collisions >- * when looking for SOCK_STREAM sockets and prevents us from having to check the >- * socket type in the hash table lookups. >+ * - There are three "global" tables, one for bound connectible (stream / >+ * seqpacket) sockets, one for bound datagram sockets, and one for connected >+ * sockets. Bound sockets are sockets that have specified an address that >+ * they are responsible for. Connected sockets are sockets that have >+ * established a connection with another socket. These tables are "global" in >+ * that all sockets on the system are placed within them. - Note, though, >+ * that the bound tables contain an extra entry for a list of unbound >+ * sockets. The bound tables are used solely for lookup of sockets when packets >+ * are received. >+ * >+ * - There are separate bind tables for connectible and datagram sockets to avoid >+ * address collisions between stream/seqpacket sockets and datagram sockets. >+ * >+ * - Transports may elect to NOT use the global datagram bind table by >+ * implementing the ->dgram_bind() callback. If that callback is implemented, >+ * the global bind table is not used and the responsibility of bound datagram >+ * socket tracking is deferred to the transport. > * > * - Sockets created by user action will either be "client" sockets that > * initiate a connection or "server" sockets that listen for connections; we do >@@ -116,6 +121,7 @@ > static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); > static void vsock_sk_destruct(struct sock *sk); > static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); >+static bool sock_type_connectible(u16 type); > > /* Protocol family. */ > struct proto vsock_proto = { >@@ -152,21 +158,25 @@ static DEFINE_MUTEX(vsock_register_mutex); > * VSocket is stored in the connected hash table. > * > * Unbound sockets are all put on the same list attached to the end of the hash >- * table (vsock_unbound_sockets). Bound sockets are added to the hash table in >- * the bucket that their local address hashes to (vsock_bound_sockets(addr) >- * represents the list that addr hashes to). >+ * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets). Bound sockets >+ * are added to the hash table in the bucket that their local address hashes to >+ * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents >+ * the list that addr hashes to). > * >- * Specifically, we initialize the vsock_bind_table array to a size of >- * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through >- * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and >- * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function >- * mods with VSOCK_HASH_SIZE to ensure this. >+ * Specifically, taking connectible sockets as an example we initialize the >+ * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that >+ * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for >+ * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. >+ * The hash function mods with VSOCK_HASH_SIZE to ensure this. >+ * Datagrams and vsock_dgram_bind_table operate in the same way. > */ > #define MAX_PORT_RETRIES 24 > > #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) > #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) >+#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)]) > #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) >+#define vsock_unbound_dgram_sockets (&vsock_dgram_bind_table[VSOCK_HASH_SIZE]) > > /* XXX This can probably be implemented in a better way. */ > #define VSOCK_CONN_HASH(src, dst) \ >@@ -182,6 +192,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; > EXPORT_SYMBOL_GPL(vsock_connected_table); > DEFINE_SPINLOCK(vsock_table_lock); > EXPORT_SYMBOL_GPL(vsock_table_lock); >+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1]; >+static DEFINE_SPINLOCK(vsock_dgram_table_lock); > > /* Autobind this socket to the local address if necessary. */ > static int vsock_auto_bind(struct vsock_sock *vsk) >@@ -204,6 +216,9 @@ static void vsock_init_tables(void) > > for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) > INIT_LIST_HEAD(&vsock_connected_table[i]); >+ >+ for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++) >+ INIT_LIST_HEAD(&vsock_dgram_bind_table[i]); > } > > static void __vsock_insert_bound(struct list_head *list, >@@ -271,13 +286,28 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, > return NULL; > } > >-static void vsock_insert_unbound(struct vsock_sock *vsk) >+static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk) >+{ >+ spin_lock_bh(&vsock_dgram_table_lock); >+ __vsock_insert_bound(vsock_unbound_dgram_sockets, vsk); >+ spin_unlock_bh(&vsock_dgram_table_lock); >+} >+ >+static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk) > { > spin_lock_bh(&vsock_table_lock); > __vsock_insert_bound(vsock_unbound_sockets, vsk); > spin_unlock_bh(&vsock_table_lock); > } > >+static void vsock_insert_unbound(struct vsock_sock *vsk) >+{ >+ if (sock_type_connectible(sk_vsock(vsk)->sk_type)) >+ __vsock_insert_connectible_unbound(vsk); >+ else >+ __vsock_insert_dgram_unbound(vsk); >+} >+ > void vsock_insert_connected(struct vsock_sock *vsk) > { > struct list_head *list = vsock_connected_sockets( >@@ -289,6 +319,14 @@ void vsock_insert_connected(struct vsock_sock *vsk) > } > EXPORT_SYMBOL_GPL(vsock_insert_connected); > >+static void vsock_remove_dgram_bound(struct vsock_sock *vsk) >+{ >+ spin_lock_bh(&vsock_dgram_table_lock); >+ if (__vsock_in_bound_table(vsk)) >+ __vsock_remove_bound(vsk); >+ spin_unlock_bh(&vsock_dgram_table_lock); >+} >+ > void vsock_remove_bound(struct vsock_sock *vsk) > { > spin_lock_bh(&vsock_table_lock); >@@ -340,7 +378,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); > > void vsock_remove_sock(struct vsock_sock *vsk) > { >- vsock_remove_bound(vsk); >+ if (sock_type_connectible(sk_vsock(vsk)->sk_type)) >+ vsock_remove_bound(vsk); >+ else >+ vsock_remove_dgram_bound(vsk); Can we try to be consistent, for example we have vsock_insert_unbound() which calls internally sock_type_connectible(), while vsock_remove_bound() is just for connectible sockets. It's a bit confusing. > vsock_remove_connected(vsk); > } > EXPORT_SYMBOL_GPL(vsock_remove_sock); >@@ -746,11 +787,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, > return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1); > } > >-static int __vsock_bind_dgram(struct vsock_sock *vsk, >- struct sockaddr_vm *addr) >+static int vsock_bind_dgram(struct vsock_sock *vsk, >+ struct sockaddr_vm *addr) Why we are renaming this? > { >- if (!vsk->transport || !vsk->transport->dgram_bind) >- return -EINVAL; >+ if (!vsk->transport || !vsk->transport->dgram_bind) { Why this condition? Maybe a comment here is needed because I'm lost... >+ int retval; >+ >+ spin_lock_bh(&vsock_dgram_table_lock); >+ retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table, >+ VSOCK_HASH_SIZE); Should we use VSOCK_HASH_SIZE + 1 here? Using ARRAY_SIZE(x) should avoid this problem. >+ spin_unlock_bh(&vsock_dgram_table_lock); >+ >+ return retval; >+ } > > return vsk->transport->dgram_bind(vsk, addr); > } >@@ -781,7 +830,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) > break; > > case SOCK_DGRAM: >- retval = __vsock_bind_dgram(vsk, addr); >+ retval = vsock_bind_dgram(vsk, addr); > break; > > default: >-- >2.20.1 >
On Tue, Jul 23, 2024 at 7:41 AM Stefano Garzarella <sgarzare@redhat.com> wrote: > > On Wed, Jul 10, 2024 at 09:25:46PM GMT, Amery Hung wrote: > >From: Bobby Eshleman <bobby.eshleman@bytedance.com> > > > >This commit adds support for bound dgram sockets to be tracked in a > >separate bind table from connectible sockets in order to avoid address > >collisions. With this commit, users can simultaneously bind a dgram > >socket and connectible socket to the same CID and port. > > > >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> > >--- > > net/vmw_vsock/af_vsock.c | 103 +++++++++++++++++++++++++++++---------- > > 1 file changed, 76 insertions(+), 27 deletions(-) > > > >diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c > >index d571be9cdbf0..ab08cd81720e 100644 > >--- a/net/vmw_vsock/af_vsock.c > >+++ b/net/vmw_vsock/af_vsock.c > >@@ -10,18 +10,23 @@ > > * - There are two kinds of sockets: those created by user action (such as > > * calling socket(2)) and those created by incoming connection request packets. > > * > >- * - There are two "global" tables, one for bound sockets (sockets that have > >- * specified an address that they are responsible for) and one for connected > >- * sockets (sockets that have established a connection with another socket). > >- * These tables are "global" in that all sockets on the system are placed > >- * within them. - Note, though, that the bound table contains an extra entry > >- * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in > >- * that list. The bound table is used solely for lookup of sockets when packets > >- * are received and that's not necessary for SOCK_DGRAM sockets since we create > >- * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM > >- * sockets out of the bound hash buckets will reduce the chance of collisions > >- * when looking for SOCK_STREAM sockets and prevents us from having to check the > >- * socket type in the hash table lookups. > >+ * - There are three "global" tables, one for bound connectible (stream / > >+ * seqpacket) sockets, one for bound datagram sockets, and one for connected > >+ * sockets. Bound sockets are sockets that have specified an address that > >+ * they are responsible for. Connected sockets are sockets that have > >+ * established a connection with another socket. These tables are "global" in > >+ * that all sockets on the system are placed within them. - Note, though, > >+ * that the bound tables contain an extra entry for a list of unbound > >+ * sockets. The bound tables are used solely for lookup of sockets when packets > >+ * are received. > >+ * > >+ * - There are separate bind tables for connectible and datagram sockets to avoid > >+ * address collisions between stream/seqpacket sockets and datagram sockets. > >+ * > >+ * - Transports may elect to NOT use the global datagram bind table by > >+ * implementing the ->dgram_bind() callback. If that callback is implemented, > >+ * the global bind table is not used and the responsibility of bound datagram > >+ * socket tracking is deferred to the transport. > > * > > * - Sockets created by user action will either be "client" sockets that > > * initiate a connection or "server" sockets that listen for connections; we do > >@@ -116,6 +121,7 @@ > > static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); > > static void vsock_sk_destruct(struct sock *sk); > > static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); > >+static bool sock_type_connectible(u16 type); > > > > /* Protocol family. */ > > struct proto vsock_proto = { > >@@ -152,21 +158,25 @@ static DEFINE_MUTEX(vsock_register_mutex); > > * VSocket is stored in the connected hash table. > > * > > * Unbound sockets are all put on the same list attached to the end of the hash > >- * table (vsock_unbound_sockets). Bound sockets are added to the hash table in > >- * the bucket that their local address hashes to (vsock_bound_sockets(addr) > >- * represents the list that addr hashes to). > >+ * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets). Bound sockets > >+ * are added to the hash table in the bucket that their local address hashes to > >+ * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents > >+ * the list that addr hashes to). > > * > >- * Specifically, we initialize the vsock_bind_table array to a size of > >- * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through > >- * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and > >- * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function > >- * mods with VSOCK_HASH_SIZE to ensure this. > >+ * Specifically, taking connectible sockets as an example we initialize the > >+ * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that > >+ * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for > >+ * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. > >+ * The hash function mods with VSOCK_HASH_SIZE to ensure this. > >+ * Datagrams and vsock_dgram_bind_table operate in the same way. > > */ > > #define MAX_PORT_RETRIES 24 > > > > #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) > > #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) > >+#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)]) > > #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) > >+#define vsock_unbound_dgram_sockets (&vsock_dgram_bind_table[VSOCK_HASH_SIZE]) > > > > /* XXX This can probably be implemented in a better way. */ > > #define VSOCK_CONN_HASH(src, dst) \ > >@@ -182,6 +192,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; > > EXPORT_SYMBOL_GPL(vsock_connected_table); > > DEFINE_SPINLOCK(vsock_table_lock); > > EXPORT_SYMBOL_GPL(vsock_table_lock); > >+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1]; > >+static DEFINE_SPINLOCK(vsock_dgram_table_lock); > > > > /* Autobind this socket to the local address if necessary. */ > > static int vsock_auto_bind(struct vsock_sock *vsk) > >@@ -204,6 +216,9 @@ static void vsock_init_tables(void) > > > > for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) > > INIT_LIST_HEAD(&vsock_connected_table[i]); > >+ > >+ for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++) > >+ INIT_LIST_HEAD(&vsock_dgram_bind_table[i]); > > } > > > > static void __vsock_insert_bound(struct list_head *list, > >@@ -271,13 +286,28 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, > > return NULL; > > } > > > >-static void vsock_insert_unbound(struct vsock_sock *vsk) > >+static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk) > >+{ > >+ spin_lock_bh(&vsock_dgram_table_lock); > >+ __vsock_insert_bound(vsock_unbound_dgram_sockets, vsk); > >+ spin_unlock_bh(&vsock_dgram_table_lock); > >+} > >+ > >+static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk) > > { > > spin_lock_bh(&vsock_table_lock); > > __vsock_insert_bound(vsock_unbound_sockets, vsk); > > spin_unlock_bh(&vsock_table_lock); > > } > > > >+static void vsock_insert_unbound(struct vsock_sock *vsk) > >+{ > >+ if (sock_type_connectible(sk_vsock(vsk)->sk_type)) > >+ __vsock_insert_connectible_unbound(vsk); > >+ else > >+ __vsock_insert_dgram_unbound(vsk); > >+} > >+ > > void vsock_insert_connected(struct vsock_sock *vsk) > > { > > struct list_head *list = vsock_connected_sockets( > >@@ -289,6 +319,14 @@ void vsock_insert_connected(struct vsock_sock *vsk) > > } > > EXPORT_SYMBOL_GPL(vsock_insert_connected); > > > >+static void vsock_remove_dgram_bound(struct vsock_sock *vsk) > >+{ > >+ spin_lock_bh(&vsock_dgram_table_lock); > >+ if (__vsock_in_bound_table(vsk)) > >+ __vsock_remove_bound(vsk); > >+ spin_unlock_bh(&vsock_dgram_table_lock); > >+} > >+ > > void vsock_remove_bound(struct vsock_sock *vsk) > > { > > spin_lock_bh(&vsock_table_lock); > >@@ -340,7 +378,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); > > > > void vsock_remove_sock(struct vsock_sock *vsk) > > { > >- vsock_remove_bound(vsk); > >+ if (sock_type_connectible(sk_vsock(vsk)->sk_type)) > >+ vsock_remove_bound(vsk); > >+ else > >+ vsock_remove_dgram_bound(vsk); > > Can we try to be consistent, for example we have vsock_insert_unbound() > which calls internally sock_type_connectible(), while > vsock_remove_bound() is just for connectible sockets. It's a bit > confusing. I agree with you. I will make the style more consistent by keeping vsock_insert_unbound() only work on connectible sockets. > > > vsock_remove_connected(vsk); > > } > > EXPORT_SYMBOL_GPL(vsock_remove_sock); > >@@ -746,11 +787,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, > > return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1); > > } > > > >-static int __vsock_bind_dgram(struct vsock_sock *vsk, > >- struct sockaddr_vm *addr) > >+static int vsock_bind_dgram(struct vsock_sock *vsk, > >+ struct sockaddr_vm *addr) > > Why we are renaming this? I will keep the original __vsock_bind_dgram() for consistency. > > > { > >- if (!vsk->transport || !vsk->transport->dgram_bind) > >- return -EINVAL; > >+ if (!vsk->transport || !vsk->transport->dgram_bind) { > > Why this condition? > > Maybe a comment here is needed because I'm lost... We currently use !vsk->transport->dgram_bind to determine if this is VMCI dgram transport. Will add a comment explaining this. > > >+ int retval; > >+ > >+ spin_lock_bh(&vsock_dgram_table_lock); > >+ retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table, > >+ VSOCK_HASH_SIZE); > > Should we use VSOCK_HASH_SIZE + 1 here? > > Using ARRAY_SIZE(x) should avoid this problem. Yes. The size here is wrong. I will remove the size check (the discussion is in patch 4). Thanks, Amery > > > >+ spin_unlock_bh(&vsock_dgram_table_lock); > >+ > >+ return retval; > >+ } > > > > return vsk->transport->dgram_bind(vsk, addr); > > } > >@@ -781,7 +830,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) > > break; > > > > case SOCK_DGRAM: > >- retval = __vsock_bind_dgram(vsk, addr); > >+ retval = vsock_bind_dgram(vsk, addr); > > break; > > > > default: > >-- > >2.20.1 > > >
On Sun, Jul 28, 2024 at 02:37:24PM GMT, Amery Hung wrote: >On Tue, Jul 23, 2024 at 7:41 AM Stefano Garzarella <sgarzare@redhat.com> wrote: >> >> On Wed, Jul 10, 2024 at 09:25:46PM GMT, Amery Hung wrote: >> >From: Bobby Eshleman <bobby.eshleman@bytedance.com> >> > >> >This commit adds support for bound dgram sockets to be tracked in a >> >separate bind table from connectible sockets in order to avoid address >> >collisions. With this commit, users can simultaneously bind a dgram >> >socket and connectible socket to the same CID and port. >> > >> >Signed-off-by: Bobby Eshleman <bobby.eshleman@bytedance.com> >> >--- >> > net/vmw_vsock/af_vsock.c | 103 +++++++++++++++++++++++++++++---------- >> > 1 file changed, 76 insertions(+), 27 deletions(-) >> > >> >diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c >> >index d571be9cdbf0..ab08cd81720e 100644 >> >--- a/net/vmw_vsock/af_vsock.c >> >+++ b/net/vmw_vsock/af_vsock.c >> >@@ -10,18 +10,23 @@ >> > * - There are two kinds of sockets: those created by user action (such as >> > * calling socket(2)) and those created by incoming connection request packets. >> > * >> >- * - There are two "global" tables, one for bound sockets (sockets that have >> >- * specified an address that they are responsible for) and one for connected >> >- * sockets (sockets that have established a connection with another socket). >> >- * These tables are "global" in that all sockets on the system are placed >> >- * within them. - Note, though, that the bound table contains an extra entry >> >- * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in >> >- * that list. The bound table is used solely for lookup of sockets when packets >> >- * are received and that's not necessary for SOCK_DGRAM sockets since we create >> >- * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM >> >- * sockets out of the bound hash buckets will reduce the chance of collisions >> >- * when looking for SOCK_STREAM sockets and prevents us from having to check the >> >- * socket type in the hash table lookups. >> >+ * - There are three "global" tables, one for bound connectible (stream / >> >+ * seqpacket) sockets, one for bound datagram sockets, and one for connected >> >+ * sockets. Bound sockets are sockets that have specified an address that >> >+ * they are responsible for. Connected sockets are sockets that have >> >+ * established a connection with another socket. These tables are "global" in >> >+ * that all sockets on the system are placed within them. - Note, though, >> >+ * that the bound tables contain an extra entry for a list of unbound >> >+ * sockets. The bound tables are used solely for lookup of sockets when packets >> >+ * are received. >> >+ * >> >+ * - There are separate bind tables for connectible and datagram sockets to avoid >> >+ * address collisions between stream/seqpacket sockets and datagram sockets. >> >+ * >> >+ * - Transports may elect to NOT use the global datagram bind table by >> >+ * implementing the ->dgram_bind() callback. If that callback is implemented, >> >+ * the global bind table is not used and the responsibility of bound datagram >> >+ * socket tracking is deferred to the transport. >> > * >> > * - Sockets created by user action will either be "client" sockets that >> > * initiate a connection or "server" sockets that listen for connections; we do >> >@@ -116,6 +121,7 @@ >> > static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); >> > static void vsock_sk_destruct(struct sock *sk); >> > static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); >> >+static bool sock_type_connectible(u16 type); >> > >> > /* Protocol family. */ >> > struct proto vsock_proto = { >> >@@ -152,21 +158,25 @@ static DEFINE_MUTEX(vsock_register_mutex); >> > * VSocket is stored in the connected hash table. >> > * >> > * Unbound sockets are all put on the same list attached to the end of the hash >> >- * table (vsock_unbound_sockets). Bound sockets are added to the hash table in >> >- * the bucket that their local address hashes to (vsock_bound_sockets(addr) >> >- * represents the list that addr hashes to). >> >+ * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets). Bound sockets >> >+ * are added to the hash table in the bucket that their local address hashes to >> >+ * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents >> >+ * the list that addr hashes to). >> > * >> >- * Specifically, we initialize the vsock_bind_table array to a size of >> >- * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through >> >- * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and >> >- * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function >> >- * mods with VSOCK_HASH_SIZE to ensure this. >> >+ * Specifically, taking connectible sockets as an example we initialize the >> >+ * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that >> >+ * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for >> >+ * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. >> >+ * The hash function mods with VSOCK_HASH_SIZE to ensure this. >> >+ * Datagrams and vsock_dgram_bind_table operate in the same way. >> > */ >> > #define MAX_PORT_RETRIES 24 >> > >> > #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) >> > #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) >> >+#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)]) >> > #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) >> >+#define vsock_unbound_dgram_sockets (&vsock_dgram_bind_table[VSOCK_HASH_SIZE]) >> > >> > /* XXX This can probably be implemented in a better way. */ >> > #define VSOCK_CONN_HASH(src, dst) \ >> >@@ -182,6 +192,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; >> > EXPORT_SYMBOL_GPL(vsock_connected_table); >> > DEFINE_SPINLOCK(vsock_table_lock); >> > EXPORT_SYMBOL_GPL(vsock_table_lock); >> >+static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1]; >> >+static DEFINE_SPINLOCK(vsock_dgram_table_lock); >> > >> > /* Autobind this socket to the local address if necessary. */ >> > static int vsock_auto_bind(struct vsock_sock *vsk) >> >@@ -204,6 +216,9 @@ static void vsock_init_tables(void) >> > >> > for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) >> > INIT_LIST_HEAD(&vsock_connected_table[i]); >> >+ >> >+ for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++) >> >+ INIT_LIST_HEAD(&vsock_dgram_bind_table[i]); >> > } >> > >> > static void __vsock_insert_bound(struct list_head *list, >> >@@ -271,13 +286,28 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, >> > return NULL; >> > } >> > >> >-static void vsock_insert_unbound(struct vsock_sock *vsk) >> >+static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk) >> >+{ >> >+ spin_lock_bh(&vsock_dgram_table_lock); >> >+ __vsock_insert_bound(vsock_unbound_dgram_sockets, vsk); >> >+ spin_unlock_bh(&vsock_dgram_table_lock); >> >+} >> >+ >> >+static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk) >> > { >> > spin_lock_bh(&vsock_table_lock); >> > __vsock_insert_bound(vsock_unbound_sockets, vsk); >> > spin_unlock_bh(&vsock_table_lock); >> > } >> > >> >+static void vsock_insert_unbound(struct vsock_sock *vsk) >> >+{ >> >+ if (sock_type_connectible(sk_vsock(vsk)->sk_type)) >> >+ __vsock_insert_connectible_unbound(vsk); >> >+ else >> >+ __vsock_insert_dgram_unbound(vsk); >> >+} >> >+ >> > void vsock_insert_connected(struct vsock_sock *vsk) >> > { >> > struct list_head *list = vsock_connected_sockets( >> >@@ -289,6 +319,14 @@ void vsock_insert_connected(struct vsock_sock *vsk) >> > } >> > EXPORT_SYMBOL_GPL(vsock_insert_connected); >> > >> >+static void vsock_remove_dgram_bound(struct vsock_sock *vsk) >> >+{ >> >+ spin_lock_bh(&vsock_dgram_table_lock); >> >+ if (__vsock_in_bound_table(vsk)) >> >+ __vsock_remove_bound(vsk); >> >+ spin_unlock_bh(&vsock_dgram_table_lock); >> >+} >> >+ >> > void vsock_remove_bound(struct vsock_sock *vsk) >> > { >> > spin_lock_bh(&vsock_table_lock); >> >@@ -340,7 +378,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); >> > >> > void vsock_remove_sock(struct vsock_sock *vsk) >> > { >> >- vsock_remove_bound(vsk); >> >+ if (sock_type_connectible(sk_vsock(vsk)->sk_type)) >> >+ vsock_remove_bound(vsk); >> >+ else >> >+ vsock_remove_dgram_bound(vsk); >> >> Can we try to be consistent, for example we have vsock_insert_unbound() >> which calls internally sock_type_connectible(), while >> vsock_remove_bound() is just for connectible sockets. It's a bit >> confusing. > >I agree with you. I will make the style more consistent by keeping >vsock_insert_unbound() only work on connectible sockets. Maybe I would have done the opposite, making vsock_remove_bound() usable on all sockets. But I haven't really looked at whether that's feasible or not. > >> >> > vsock_remove_connected(vsk); >> > } >> > EXPORT_SYMBOL_GPL(vsock_remove_sock); >> >@@ -746,11 +787,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, >> > return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1); >> > } >> > >> >-static int __vsock_bind_dgram(struct vsock_sock *vsk, >> >- struct sockaddr_vm *addr) >> >+static int vsock_bind_dgram(struct vsock_sock *vsk, >> >+ struct sockaddr_vm *addr) >> >> Why we are renaming this? > >I will keep the original __vsock_bind_dgram() for consistency. > >> >> > { >> >- if (!vsk->transport || !vsk->transport->dgram_bind) >> >- return -EINVAL; >> >+ if (!vsk->transport || !vsk->transport->dgram_bind) { >> >> Why this condition? >> >> Maybe a comment here is needed because I'm lost... > >We currently use !vsk->transport->dgram_bind to determine if this is >VMCI dgram transport. Will add a comment explaining this. Thanks, what's not clear to me is why before this series we returned an error, whereas now we call vsock_bind_common(). Thanks, Stefano > >> >> >+ int retval; >> >+ >> >+ spin_lock_bh(&vsock_dgram_table_lock); >> >+ retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table, >> >+ VSOCK_HASH_SIZE); >> >> Should we use VSOCK_HASH_SIZE + 1 here? >> >> Using ARRAY_SIZE(x) should avoid this problem. > >Yes. The size here is wrong. I will remove the size check (the >discussion is in patch 4). > >Thanks, >Amery > > > >> >> >> >+ spin_unlock_bh(&vsock_dgram_table_lock); >> >+ >> >+ return retval; >> >+ } >> > >> > return vsk->transport->dgram_bind(vsk, addr); >> > } >> >@@ -781,7 +830,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) >> > break; >> > >> > case SOCK_DGRAM: >> >- retval = __vsock_bind_dgram(vsk, addr); >> >+ retval = vsock_bind_dgram(vsk, addr); >> > break; >> > >> > default: >> >-- >> >2.20.1 >> > >> >
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d571be9cdbf0..ab08cd81720e 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -10,18 +10,23 @@ * - There are two kinds of sockets: those created by user action (such as * calling socket(2)) and those created by incoming connection request packets. * - * - There are two "global" tables, one for bound sockets (sockets that have - * specified an address that they are responsible for) and one for connected - * sockets (sockets that have established a connection with another socket). - * These tables are "global" in that all sockets on the system are placed - * within them. - Note, though, that the bound table contains an extra entry - * for a list of unbound sockets and SOCK_DGRAM sockets will always remain in - * that list. The bound table is used solely for lookup of sockets when packets - * are received and that's not necessary for SOCK_DGRAM sockets since we create - * a datagram handle for each and need not perform a lookup. Keeping SOCK_DGRAM - * sockets out of the bound hash buckets will reduce the chance of collisions - * when looking for SOCK_STREAM sockets and prevents us from having to check the - * socket type in the hash table lookups. + * - There are three "global" tables, one for bound connectible (stream / + * seqpacket) sockets, one for bound datagram sockets, and one for connected + * sockets. Bound sockets are sockets that have specified an address that + * they are responsible for. Connected sockets are sockets that have + * established a connection with another socket. These tables are "global" in + * that all sockets on the system are placed within them. - Note, though, + * that the bound tables contain an extra entry for a list of unbound + * sockets. The bound tables are used solely for lookup of sockets when packets + * are received. + * + * - There are separate bind tables for connectible and datagram sockets to avoid + * address collisions between stream/seqpacket sockets and datagram sockets. + * + * - Transports may elect to NOT use the global datagram bind table by + * implementing the ->dgram_bind() callback. If that callback is implemented, + * the global bind table is not used and the responsibility of bound datagram + * socket tracking is deferred to the transport. * * - Sockets created by user action will either be "client" sockets that * initiate a connection or "server" sockets that listen for connections; we do @@ -116,6 +121,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr); static void vsock_sk_destruct(struct sock *sk); static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +static bool sock_type_connectible(u16 type); /* Protocol family. */ struct proto vsock_proto = { @@ -152,21 +158,25 @@ static DEFINE_MUTEX(vsock_register_mutex); * VSocket is stored in the connected hash table. * * Unbound sockets are all put on the same list attached to the end of the hash - * table (vsock_unbound_sockets). Bound sockets are added to the hash table in - * the bucket that their local address hashes to (vsock_bound_sockets(addr) - * represents the list that addr hashes to). + * tables (vsock_unbound_sockets/vsock_unbound_dgram_sockets). Bound sockets + * are added to the hash table in the bucket that their local address hashes to + * (vsock_bound_sockets(addr) and vsock_bound_dgram_sockets(addr) represents + * the list that addr hashes to). * - * Specifically, we initialize the vsock_bind_table array to a size of - * VSOCK_HASH_SIZE + 1 so that vsock_bind_table[0] through - * vsock_bind_table[VSOCK_HASH_SIZE - 1] are for bound sockets and - * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function - * mods with VSOCK_HASH_SIZE to ensure this. + * Specifically, taking connectible sockets as an example we initialize the + * vsock_bind_table array to a size of VSOCK_HASH_SIZE + 1 so that + * vsock_bind_table[0] through vsock_bind_table[VSOCK_HASH_SIZE - 1] are for + * bound sockets and vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. + * The hash function mods with VSOCK_HASH_SIZE to ensure this. + * Datagrams and vsock_dgram_bind_table operate in the same way. */ #define MAX_PORT_RETRIES 24 #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) #define vsock_bound_sockets(addr) (&vsock_bind_table[VSOCK_HASH(addr)]) +#define vsock_bound_dgram_sockets(addr) (&vsock_dgram_bind_table[VSOCK_HASH(addr)]) #define vsock_unbound_sockets (&vsock_bind_table[VSOCK_HASH_SIZE]) +#define vsock_unbound_dgram_sockets (&vsock_dgram_bind_table[VSOCK_HASH_SIZE]) /* XXX This can probably be implemented in a better way. */ #define VSOCK_CONN_HASH(src, dst) \ @@ -182,6 +192,8 @@ struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; EXPORT_SYMBOL_GPL(vsock_connected_table); DEFINE_SPINLOCK(vsock_table_lock); EXPORT_SYMBOL_GPL(vsock_table_lock); +static struct list_head vsock_dgram_bind_table[VSOCK_HASH_SIZE + 1]; +static DEFINE_SPINLOCK(vsock_dgram_table_lock); /* Autobind this socket to the local address if necessary. */ static int vsock_auto_bind(struct vsock_sock *vsk) @@ -204,6 +216,9 @@ static void vsock_init_tables(void) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) INIT_LIST_HEAD(&vsock_connected_table[i]); + + for (i = 0; i < ARRAY_SIZE(vsock_dgram_bind_table); i++) + INIT_LIST_HEAD(&vsock_dgram_bind_table[i]); } static void __vsock_insert_bound(struct list_head *list, @@ -271,13 +286,28 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, return NULL; } -static void vsock_insert_unbound(struct vsock_sock *vsk) +static void __vsock_insert_dgram_unbound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_dgram_table_lock); + __vsock_insert_bound(vsock_unbound_dgram_sockets, vsk); + spin_unlock_bh(&vsock_dgram_table_lock); +} + +static void __vsock_insert_connectible_unbound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); __vsock_insert_bound(vsock_unbound_sockets, vsk); spin_unlock_bh(&vsock_table_lock); } +static void vsock_insert_unbound(struct vsock_sock *vsk) +{ + if (sock_type_connectible(sk_vsock(vsk)->sk_type)) + __vsock_insert_connectible_unbound(vsk); + else + __vsock_insert_dgram_unbound(vsk); +} + void vsock_insert_connected(struct vsock_sock *vsk) { struct list_head *list = vsock_connected_sockets( @@ -289,6 +319,14 @@ void vsock_insert_connected(struct vsock_sock *vsk) } EXPORT_SYMBOL_GPL(vsock_insert_connected); +static void vsock_remove_dgram_bound(struct vsock_sock *vsk) +{ + spin_lock_bh(&vsock_dgram_table_lock); + if (__vsock_in_bound_table(vsk)) + __vsock_remove_bound(vsk); + spin_unlock_bh(&vsock_dgram_table_lock); +} + void vsock_remove_bound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); @@ -340,7 +378,10 @@ EXPORT_SYMBOL_GPL(vsock_find_connected_socket); void vsock_remove_sock(struct vsock_sock *vsk) { - vsock_remove_bound(vsk); + if (sock_type_connectible(sk_vsock(vsk)->sk_type)) + vsock_remove_bound(vsk); + else + vsock_remove_dgram_bound(vsk); vsock_remove_connected(vsk); } EXPORT_SYMBOL_GPL(vsock_remove_sock); @@ -746,11 +787,19 @@ static int __vsock_bind_connectible(struct vsock_sock *vsk, return vsock_bind_common(vsk, addr, vsock_bind_table, VSOCK_HASH_SIZE + 1); } -static int __vsock_bind_dgram(struct vsock_sock *vsk, - struct sockaddr_vm *addr) +static int vsock_bind_dgram(struct vsock_sock *vsk, + struct sockaddr_vm *addr) { - if (!vsk->transport || !vsk->transport->dgram_bind) - return -EINVAL; + if (!vsk->transport || !vsk->transport->dgram_bind) { + int retval; + + spin_lock_bh(&vsock_dgram_table_lock); + retval = vsock_bind_common(vsk, addr, vsock_dgram_bind_table, + VSOCK_HASH_SIZE); + spin_unlock_bh(&vsock_dgram_table_lock); + + return retval; + } return vsk->transport->dgram_bind(vsk, addr); } @@ -781,7 +830,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr) break; case SOCK_DGRAM: - retval = __vsock_bind_dgram(vsk, addr); + retval = vsock_bind_dgram(vsk, addr); break; default: