]> git.itanic.dy.fi Git - linux-stable/commitdiff
net/mlx5: Avoid recovery in probe flows
authorShay Drory <shayd@nvidia.com>
Thu, 24 Nov 2022 11:34:12 +0000 (13:34 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Wed, 28 Dec 2022 19:38:50 +0000 (11:38 -0800)
Currently, recovery is done without considering whether the device is
still in probe flow.
This may lead to recovery before device have finished probed
successfully. e.g.: while mlx5_init_one() is running. Recovery flow is
using functionality that is loaded only by mlx5_init_one(), and there
is no point in running recovery without mlx5_init_one() finished
successfully.

Fix it by waiting for probe flow to finish and checking whether the
device is probed before trying to perform recovery.

Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/health.c

index 86ed87d704f7d7a81a8cc69653d3a52e0b184d61..96417c5feed76b3aa9d4f48fed5cb60f6d18e72e 100644 (file)
@@ -674,6 +674,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
        dev = container_of(priv, struct mlx5_core_dev, priv);
        devlink = priv_to_devlink(dev);
 
+       mutex_lock(&dev->intf_state_mutex);
+       if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
+               mlx5_core_err(dev, "health works are not permitted at this stage\n");
+               return;
+       }
+       mutex_unlock(&dev->intf_state_mutex);
        enter_error_state(dev, false);
        if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
                devl_lock(devlink);