]> git.itanic.dy.fi Git - linux-stable/commitdiff
af_unix: Fix garbage collector racing against connect()
authorMichal Luczaj <mhal@rbox.co>
Tue, 9 Apr 2024 20:09:39 +0000 (22:09 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 Apr 2024 09:23:34 +0000 (11:23 +0200)
[ Upstream commit 47d8ac011fe1c9251070e1bd64cb10b48193ec51 ]

Garbage collector does not take into account the risk of embryo getting
enqueued during the garbage collection. If such embryo has a peer that
carries SCM_RIGHTS, two consecutive passes of scan_children() may see a
different set of children. Leading to an incorrectly elevated inflight
count, and then a dangling pointer within the gc_inflight_list.

sockets are AF_UNIX/SOCK_STREAM
S is an unconnected socket
L is a listening in-flight socket bound to addr, not in fdtable
V's fd will be passed via sendmsg(), gets inflight count bumped

connect(S, addr) sendmsg(S, [V]); close(V) __unix_gc()
---------------- ------------------------- -----------

NS = unix_create1()
skb1 = sock_wmalloc(NS)
L = unix_find_other(addr)
unix_state_lock(L)
unix_peer(S) = NS
// V count=1 inflight=0

  NS = unix_peer(S)
  skb2 = sock_alloc()
skb_queue_tail(NS, skb2[V])

// V became in-flight
// V count=2 inflight=1

close(V)

// V count=1 inflight=1
// GC candidate condition met

for u in gc_inflight_list:
  if (total_refs == inflight_refs)
    add u to gc_candidates

// gc_candidates={L, V}

for u in gc_candidates:
  scan_children(u, dec_inflight)

// embryo (skb1) was not
// reachable from L yet, so V's
// inflight remains unchanged
__skb_queue_tail(L, skb1)
unix_state_unlock(L)
for u in gc_candidates:
  if (u.inflight)
    scan_children(u, inc_inflight_move_tail)

// V count=1 inflight=2 (!)

If there is a GC-candidate listening socket, lock/unlock its state. This
makes GC wait until the end of any ongoing connect() to that socket. After
flipping the lock, a possibly SCM-laden embryo is already enqueued. And if
there is another embryo coming, it can not possibly carry SCM_RIGHTS. At
this point, unix_inflight() can not happen because unix_gc_lock is already
taken. Inflight graph remains unaffected.

Fixes: 1fd05ba5a2f2 ("[AF_UNIX]: Rewrite garbage collector, fixes race.")
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Link: https://lore.kernel.org/r/20240409201047.1032217-1-mhal@rbox.co
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
net/unix/garbage.c

index aea222796dfdcef9914b2f3a56c5ec90007699ab..8734c0c1fc197b38088be1870a085cc417c239ab 100644 (file)
@@ -235,11 +235,22 @@ void unix_gc(void)
         * receive queues.  Other, non candidate sockets _can_ be
         * added to queue, so we must make sure only to touch
         * candidates.
+        *
+        * Embryos, though never candidates themselves, affect which
+        * candidates are reachable by the garbage collector.  Before
+        * being added to a listener's queue, an embryo may already
+        * receive data carrying SCM_RIGHTS, potentially making the
+        * passed socket a candidate that is not yet reachable by the
+        * collector.  It becomes reachable once the embryo is
+        * enqueued.  Therefore, we must ensure that no SCM-laden
+        * embryo appears in a (candidate) listener's queue between
+        * consecutive scan_children() calls.
         */
        list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+               struct sock *sk = &u->sk;
                long total_refs;
 
-               total_refs = file_count(u->sk.sk_socket->file);
+               total_refs = file_count(sk->sk_socket->file);
 
                BUG_ON(!u->inflight);
                BUG_ON(total_refs < u->inflight);
@@ -247,6 +258,11 @@ void unix_gc(void)
                        list_move_tail(&u->link, &gc_candidates);
                        __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
                        __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+                       if (sk->sk_state == TCP_LISTEN) {
+                               unix_state_lock(sk);
+                               unix_state_unlock(sk);
+                       }
                }
        }