]> git.itanic.dy.fi Git - linux-stable/commitdiff
bridge: Add backup nexthop ID support
authorIdo Schimmel <idosch@nvidia.com>
Mon, 17 Jul 2023 08:12:28 +0000 (11:12 +0300)
committerDavid S. Miller <davem@davemloft.net>
Wed, 19 Jul 2023 09:53:49 +0000 (10:53 +0100)
Add a new bridge port attribute that allows attaching a nexthop object
ID to an skb that is redirected to a backup bridge port with VLAN
tunneling enabled.

Specifically, when redirecting a known unicast packet, read the backup
nexthop ID from the bridge port that lost its carrier and set it in the
bridge control block of the skb before forwarding it via the backup
port. Note that reading the ID from the bridge port should not result in
a cache miss as the ID is added next to the 'backup_port' field that was
already accessed. After this change, the 'state' field still stays on
the first cache line, together with other data path related fields such
as 'flags and 'vlgrp':

struct net_bridge_port {
        struct net_bridge *        br;                   /*     0     8 */
        struct net_device *        dev;                  /*     8     8 */
        netdevice_tracker          dev_tracker;          /*    16     0 */
        struct list_head           list;                 /*    16    16 */
        long unsigned int          flags;                /*    32     8 */
        struct net_bridge_vlan_group * vlgrp;            /*    40     8 */
        struct net_bridge_port *   backup_port;          /*    48     8 */
        u32                        backup_nhid;          /*    56     4 */
        u8                         priority;             /*    60     1 */
        u8                         state;                /*    61     1 */
        u16                        port_no;              /*    62     2 */
        /* --- cacheline 1 boundary (64 bytes) --- */
[...]
} __attribute__((__aligned__(8)));

When forwarding an skb via a bridge port that has VLAN tunneling
enabled, check if the backup nexthop ID stored in the bridge control
block is valid (i.e., not zero). If so, instead of attaching the
pre-allocated metadata (that only has the tunnel key set), allocate a
new metadata, set both the tunnel key and the nexthop object ID and
attach it to the skb.

By default, do not dump the new attribute to user space as a value of
zero is an invalid nexthop object ID.

The above is useful for EVPN multihoming. When one of the links
composing an Ethernet Segment (ES) fails, traffic needs to be redirected
towards the host via one of the other ES peers. For example, if a host
is multihomed to three different VTEPs, the backup port of each ES link
needs to be set to the VXLAN device and the backup nexthop ID needs to
point to an FDB nexthop group that includes the IP addresses of the
other two VTEPs. The VXLAN driver will extract the ID from the metadata
of the redirected skb, calculate its flow hash and forward it towards
one of the other VTEPs. If the ID does not exist, or represents an
invalid nexthop object, the VXLAN driver will drop the skb. This
relieves the bridge driver from the need to validate the ID.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/uapi/linux/if_link.h
net/bridge/br_forward.c
net/bridge/br_netlink.c
net/bridge/br_private.h
net/bridge/br_vlan_tunnel.c
net/core/rtnetlink.c

index 0f6a0fe09bdbdae078f769b73bf11824adcf3ef8..ce3117df9cec2948203319c64f62c0af99d58c01 100644 (file)
@@ -570,6 +570,7 @@ enum {
        IFLA_BRPORT_MCAST_N_GROUPS,
        IFLA_BRPORT_MCAST_MAX_GROUPS,
        IFLA_BRPORT_NEIGH_VLAN_SUPPRESS,
+       IFLA_BRPORT_BACKUP_NHID,
        __IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
index 6116eba1bd891ef3f2ef6e357e213aa8b6b8495b..9d7bc8b96b53a911d5add8d0b21736358441755f 100644 (file)
@@ -154,6 +154,7 @@ void br_forward(const struct net_bridge_port *to,
                backup_port = rcu_dereference(to->backup_port);
                if (unlikely(!backup_port))
                        goto out;
+               BR_INPUT_SKB_CB(skb)->backup_nhid = READ_ONCE(to->backup_nhid);
                to = backup_port;
        }
 
index 05c5863d2e20f8bc386981d2c7145fbffe5da95b..10f0d33d8ccf2ee7a5e1d46b502bcf40ac515cbd 100644 (file)
@@ -211,6 +211,7 @@ static inline size_t br_port_info_size(void)
                + nla_total_size(sizeof(u8))    /* IFLA_BRPORT_MRP_IN_OPEN */
                + nla_total_size(sizeof(u32))   /* IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT */
                + nla_total_size(sizeof(u32))   /* IFLA_BRPORT_MCAST_EHT_HOSTS_CNT */
+               + nla_total_size(sizeof(u32))   /* IFLA_BRPORT_BACKUP_NHID */
                + 0;
 }
 
@@ -319,6 +320,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
                            backup_p->dev->ifindex);
        rcu_read_unlock();
 
+       if (p->backup_nhid &&
+           nla_put_u32(skb, IFLA_BRPORT_BACKUP_NHID, p->backup_nhid))
+               return -EMSGSIZE;
+
        return 0;
 }
 
@@ -895,6 +900,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
        [IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
        [IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
        [IFLA_BRPORT_NEIGH_VLAN_SUPPRESS] = NLA_POLICY_MAX(NLA_U8, 1),
+       [IFLA_BRPORT_BACKUP_NHID] = { .type = NLA_U32 },
 };
 
 /* Change the state of the port and notify spanning tree */
@@ -1065,6 +1071,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
                        return err;
        }
 
+       if (tb[IFLA_BRPORT_BACKUP_NHID]) {
+               u32 backup_nhid = nla_get_u32(tb[IFLA_BRPORT_BACKUP_NHID]);
+
+               WRITE_ONCE(p->backup_nhid, backup_nhid);
+       }
+
        return 0;
 }
 
index a63b32c1638ea42725d04386ebd67bb1fb169331..05a965ef76f1a870beb093169572db4777de623c 100644 (file)
@@ -387,6 +387,7 @@ struct net_bridge_port {
        struct net_bridge_vlan_group    __rcu *vlgrp;
 #endif
        struct net_bridge_port          __rcu *backup_port;
+       u32                             backup_nhid;
 
        /* STP */
        u8                              priority;
@@ -605,6 +606,8 @@ struct br_input_skb_cb {
         */
        unsigned long fwd_hwdoms;
 #endif
+
+       u32 backup_nhid;
 };
 
 #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
index 6399a8a69d07643730c97e34271e165676c68590..81833ca7a2c77e38eb34046002c644e9cd70e9ca 100644 (file)
@@ -201,6 +201,21 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
        if (err)
                return err;
 
+       if (BR_INPUT_SKB_CB(skb)->backup_nhid) {
+               tunnel_dst = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
+                                             tunnel_id, 0);
+               if (!tunnel_dst)
+                       return -ENOMEM;
+
+               tunnel_dst->u.tun_info.mode |= IP_TUNNEL_INFO_TX |
+                                              IP_TUNNEL_INFO_BRIDGE;
+               tunnel_dst->u.tun_info.key.nhid =
+                       BR_INPUT_SKB_CB(skb)->backup_nhid;
+               skb_dst_set(skb, &tunnel_dst->dst);
+
+               return 0;
+       }
+
        tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
        if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
                skb_dst_set(skb, &tunnel_dst->dst);
index 70838d7e5b32545dbb3d58c007ec189f2270ec28..1777a5e1830b7a5c47e23855d5707eeff33098fd 100644 (file)
@@ -61,7 +61,7 @@
 #include "dev.h"
 
 #define RTNL_MAX_TYPE          50
-#define RTNL_SLAVE_MAX_TYPE    43
+#define RTNL_SLAVE_MAX_TYPE    44
 
 struct rtnl_link {
        rtnl_doit_func          doit;