netfilter: bridge: confirm multicast packets before passing them up the stack

author Florian Westphal <fw@strlen.de>

Tue, 27 Feb 2024 15:17:51 +0000 (16:17 +0100)

committer Pablo Neira Ayuso <pablo@netfilter.org>

Wed, 28 Feb 2024 23:22:44 +0000 (00:22 +0100)
author Florian Westphal <fw@strlen.de>
Tue, 27 Feb 2024 15:17:51 +0000 (16:17 +0100)
committer Pablo Neira Ayuso <pablo@netfilter.org>
Wed, 28 Feb 2024 23:22:44 +0000 (00:22 +0100)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h

index 80900d9109920f686f971e431b95361a831fcc26..ce660d51549b469243357bf9187e108582bc4d95 100644 (file)
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -474,6 +474,7 @@ struct nf_ct_hook {
                               const struct sk_buff *);
         void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
         void (*set_closing)(struct nf_conntrack *nfct);
+       int (*confirm)(struct sk_buff *skb);
  };
  extern const struct nf_ct_hook __rcu *nf_ct_hook;
  
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c

index ed17208907578a231d283c04bd97ce48bebdffaa..35e10c5a766d550e0c5cb85cf5a0c4835b52a89d 100644 (file)
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
  #include <linux/sysctl.h>
  #endif
  
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
  static unsigned int brnf_net_id __read_mostly;
  
  struct brnf_net {
@@ -553,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
         return NF_STOLEN;
  }
  
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ *      macvlan0
+ *      br0
+ *  ethX  ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ *    -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ *    interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ *    and schedules a work queue to send them out on the lower devices.
+ *
+ *    The clone skb->_nfct is not a copy, it is the same entry as the
+ *    original skb.  The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table).  This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+                                  struct sk_buff *skb,
+                                  const struct nf_hook_state *state)
+{
+       struct nf_conntrack *nfct = skb_nfct(skb);
+       const struct nf_ct_hook *ct_hook;
+       struct nf_conn *ct;
+       int ret;
+
+       if (!nfct || skb->pkt_type == PACKET_HOST)
+               return NF_ACCEPT;
+
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       if (likely(nf_ct_is_confirmed(ct)))
+               return NF_ACCEPT;
+
+       WARN_ON_ONCE(skb_shared(skb));
+       WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+       /* We can't call nf_confirm here, it would create a dependency
+        * on nf_conntrack module.
+        */
+       ct_hook = rcu_dereference(nf_ct_hook);
+       if (!ct_hook) {
+               skb->_nfct = 0ul;
+               nf_conntrack_put(nfct);
+               return NF_ACCEPT;
+       }
+
+       nf_bridge_pull_encap_header(skb);
+       ret = ct_hook->confirm(skb);
+       switch (ret & NF_VERDICT_MASK) {
+       case NF_STOLEN:
+               return NF_STOLEN;
+       default:
+               nf_bridge_push_encap_header(skb);
+               break;
+       }
+
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+       return ret;
+}
+#endif
  
  /* PF_BRIDGE/FORWARD *************************************************/
  static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -964,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
                 .hooknum = NF_BR_PRE_ROUTING,
                 .priority = NF_BR_PRI_BRNF,
         },
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       {
+               .hook = br_nf_local_in,
+               .pf = NFPROTO_BRIDGE,
+               .hooknum = NF_BR_LOCAL_IN,
+               .priority = NF_BR_PRI_LAST,
+       },
+#endif
         {
                 .hook = br_nf_forward,
                 .pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c

index abb090f94ed2609eeb9cd54b4e5faed1c3cb7bfe..6f877e31709bad3646ea15bf3a96999ed275bdc1 100644 (file)
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
         return nf_conntrack_in(skb, &bridge_state);
  }
  
+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+                                   const struct nf_hook_state *state)
+{
+       enum ip_conntrack_info ctinfo;
+       struct nf_conn *ct;
+
+       if (skb->pkt_type == PACKET_HOST)
+               return NF_ACCEPT;
+
+       /* nf_conntrack_confirm() cannot handle concurrent clones,
+        * this happens for broad/multicast frames with e.g. macvlan on top
+        * of the bridge device.
+        */
+       ct = nf_ct_get(skb, &ctinfo);
+       if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+               return NF_ACCEPT;
+
+       /* let inet prerouting call conntrack again */
+       skb->_nfct = 0;
+       nf_ct_put(ct);
+
+       return NF_ACCEPT;
+}
+
  static void nf_ct_bridge_frag_save(struct sk_buff *skb,
                                    struct nf_bridge_frag_data *data)
  {
@@ -385,6 +409,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
                 .hooknum        = NF_BR_PRE_ROUTING,
                 .priority       = NF_IP_PRI_CONNTRACK,
         },
+       {
+               .hook           = nf_ct_bridge_in,
+               .pf             = NFPROTO_BRIDGE,
+               .hooknum        = NF_BR_LOCAL_IN,
+               .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
+       },
         {
                 .hook           = nf_ct_bridge_post,
                 .pf             = NFPROTO_BRIDGE,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c

index 2e5f3864d353a39cfde138b725e790d7290b82c9..5b876fa7f9af9e5dfe950929b29f0fc92daf9bab 100644 (file)
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
         .get_tuple_skb  = nf_conntrack_get_tuple_skb,
         .attach         = nf_conntrack_attach,
         .set_closing    = nf_conntrack_set_closing,
+       .confirm        = __nf_conntrack_confirm,
  };
  
  void nf_conntrack_init_end(void)
author	Florian Westphal <fw@strlen.de>
	Tue, 27 Feb 2024 15:17:51 +0000 (16:17 +0100)
committer	Pablo Neira Ayuso <pablo@netfilter.org>
	Wed, 28 Feb 2024 23:22:44 +0000 (00:22 +0100)
include/linux/netfilter.h		patch \| blob \| history
net/bridge/br_netfilter_hooks.c		patch \| blob \| history
net/bridge/netfilter/nf_conntrack_bridge.c		patch \| blob \| history
net/netfilter/nf_conntrack_core.c		patch \| blob \| history