]> git.itanic.dy.fi Git - linux-stable/commitdiff
RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile
authorMaher Sanalla <msanalla@nvidia.com>
Mon, 29 Aug 2022 09:02:27 +0000 (12:02 +0300)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 20 Sep 2022 10:43:45 +0000 (12:43 +0200)
[ Upstream commit 9ca05b0f27de928be121cccf07735819dc9e1ed3 ]

When the RDMA auxiliary driver probes, it sets its profile based on
devlink driverinit value. The latter might not be in sync with FW yet
(In case devlink reload is not performed), thus causing a mismatch
between RDMA driver and FW. This results in the following FW syndrome
when the RDMA driver tries to adjust RoCE state, which fails the probe:

"0xC1F678 | modify_nic_vport_context: roce_en set on a vport that
doesn't support roce"

To prevent this, select the PF profile based on FW RoCE capability
instead of relying on devlink driverinit value.
To provide backward compatibility of the RoCE disable feature, on older
FW's where roce_rw is not set (FW RoCE capability is read-only), keep
the current behavior e.g., rely on devlink driverinit value.

Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level")
Reviewed-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Link: https://lore.kernel.org/r/cb34ce9a1df4a24c135cb804db87f7d2418bd6cc.1661763459.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/infiniband/hw/mlx5/main.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
include/linux/mlx5/driver.h

index 63c89a72cc352d77d7f6eb94ccc828e8b9d80094..bb13164124fdbc04753de09f79766e9da2235a4f 100644 (file)
@@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev,
        dev->mdev = mdev;
        dev->num_ports = num_ports;
 
-       if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev))
+       if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev))
                profile = &raw_eth_profile;
        else
                profile = &pf_profile;
index 64d54bba91f694335111113e5e66a42856b7ff28..6c8bb74bd8fc6f7ed53a13b77ada58a90e69f6e3 100644 (file)
@@ -501,6 +501,24 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev)
        return err;
 }
 
+bool mlx5_is_roce_on(struct mlx5_core_dev *dev)
+{
+       struct devlink *devlink = priv_to_devlink(dev);
+       union devlink_param_value val;
+       int err;
+
+       err = devlink_param_driverinit_value_get(devlink,
+                                                DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
+                                                &val);
+
+       if (!err)
+               return val.vbool;
+
+       mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err);
+       return MLX5_CAP_GEN(dev, roce);
+}
+EXPORT_SYMBOL(mlx5_is_roce_on);
+
 static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx)
 {
        void *set_hca_cap;
@@ -604,7 +622,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
                         MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix));
 
        if (MLX5_CAP_GEN(dev, roce_rw_supported))
-               MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev));
+               MLX5_SET(cmd_hca_cap, set_hca_cap, roce,
+                        mlx5_is_roce_on(dev));
 
        max_uc_list = max_uc_list_get_devlink_param(dev);
        if (max_uc_list > 0)
@@ -630,7 +649,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
  */
 static bool is_roce_fw_disabled(struct mlx5_core_dev *dev)
 {
-       return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) ||
+       return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) ||
                (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce));
 }
 
index 0015a08ddbd24f391f02d78036685bfdb2525efb..b3ea245faa5157b027cc0b0f7780e4851c6a7c27 100644 (file)
@@ -1275,16 +1275,17 @@ enum {
        MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
 };
 
-static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
+bool mlx5_is_roce_on(struct mlx5_core_dev *dev);
+
+static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev)
 {
-       struct devlink *devlink = priv_to_devlink(dev);
-       union devlink_param_value val;
-       int err;
-
-       err = devlink_param_driverinit_value_get(devlink,
-                                                DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE,
-                                                &val);
-       return err ? MLX5_CAP_GEN(dev, roce) : val.vbool;
+       if (MLX5_CAP_GEN(dev, roce_rw_supported))
+               return MLX5_CAP_GEN(dev, roce);
+
+       /* If RoCE cap is read-only in FW, get RoCE state from devlink
+        * in order to support RoCE enable/disable feature
+        */
+       return mlx5_is_roce_on(dev);
 }
 
 #endif /* MLX5_DRIVER_H */