@@ -168,6 +168,21 @@ static int unix_state_lock_cmp_fn(const struct lockdep_map *_a,
/* unix_state_double_lock(): ascending address order. */
return a < b ? -1 : 0;
}
+
+static int unix_recvq_lock_cmp_fn(const struct lockdep_map *_a,
+ const struct lockdep_map *_b)
+{
+ const struct sock *a, *b;
+
+ a = container_of(_a, struct sock, sk_receive_queue.lock.dep_map);
+ b = container_of(_b, struct sock, sk_receive_queue.lock.dep_map);
+
+ /* unix_collect_skb(): listener -> embryo order. */
+ if (a->sk_state == TCP_LISTEN && unix_sk(b)->listener == a)
+ return -1;
+
+ return 0;
+}
#endif
static unsigned int unix_unbound_hash(struct sock *sk)
@@ -1015,6 +1030,8 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern,
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen);
sk->sk_destruct = unix_sock_destructor;
+ lock_set_cmp_fn(&sk->sk_receive_queue.lock, unix_recvq_lock_cmp_fn, NULL);
+
u = unix_sk(sk);
u->listener = NULL;
u->vertex = NULL;
@@ -337,11 +337,6 @@ static bool unix_vertex_dead(struct unix_vertex *vertex)
return true;
}
-enum unix_recv_queue_lock_class {
- U_RECVQ_LOCK_NORMAL,
- U_RECVQ_LOCK_EMBRYO,
-};
-
static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)
{
skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist);
@@ -375,8 +370,7 @@ static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist
skb_queue_walk(queue, skb) {
struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue;
- /* listener -> embryo order, the inversion never happens. */
- spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO);
+ spin_lock(&embryo_queue->lock);
unix_collect_queue(unix_sk(skb->sk), hitlist);
spin_unlock(&embryo_queue->lock);
}
While GC is cleaning up cyclic references by SCM_RIGHTS, unix_collect_skb() collects skb in the socket's recvq. If the socket is TCP_LISTEN, we need to collect skb in the embryo's queue. Then, both the listener's recvq lock and the embroy's one are held. The locking is always done in the listener -> embryo order. Let's define it as unix_recvq_lock_cmp_fn() instead of using spin_lock_nested(). Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> --- net/unix/af_unix.c | 17 +++++++++++++++++ net/unix/garbage.c | 8 +------- 2 files changed, 18 insertions(+), 7 deletions(-)