net/mlx5: SF: Fix probing active SFs during driver probe phase

author Shay Drory <shayd@nvidia.com>

Thu, 4 Aug 2022 09:38:41 +0000 (12:38 +0300)

committer Saeed Mahameed <saeedm@nvidia.com>

Tue, 22 Nov 2022 02:14:33 +0000 (18:14 -0800)
author Shay Drory <shayd@nvidia.com>
Thu, 4 Aug 2022 09:38:41 +0000 (12:38 +0300)
committer Saeed Mahameed <saeedm@nvidia.com>
Tue, 22 Nov 2022 02:14:33 +0000 (18:14 -0800)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c

index 7da012ff0d4192847b3b8e4bcd16d49d1916479b..8e2abbab05f04aa6df64b9ef0a5fbdf864faaea1 100644 (file)
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
@@ -18,6 +18,10 @@ struct mlx5_sf_dev_table {
         phys_addr_t base_address;
         u64 sf_bar_length;
         struct notifier_block nb;
+       struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */
+       struct workqueue_struct *active_wq;
+       struct work_struct work;
+       u8 stop_active_wq:1;
         struct mlx5_core_dev *dev;
  };
  
@@ -168,6 +172,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
                 return 0;
  
         sf_index = event->function_id - base_id;
+       mutex_lock(&table->table_lock);
         sf_dev = xa_load(&table->devices, sf_index);
         switch (event->new_vhca_state) {
         case MLX5_VHCA_STATE_INVALID:
@@ -191,6 +196,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
         default:
                 break;
         }
+       mutex_unlock(&table->table_lock);
         return 0;
  }
  
@@ -215,6 +221,78 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
         return 0;
  }
  
+static void mlx5_sf_dev_add_active_work(struct work_struct *work)
+{
+       struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work);
+       u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
+       struct mlx5_core_dev *dev = table->dev;
+       u16 max_functions;
+       u16 function_id;
+       u16 sw_func_id;
+       int err = 0;
+       u8 state;
+       int i;
+
+       max_functions = mlx5_sf_max_functions(dev);
+       function_id = MLX5_CAP_GEN(dev, sf_base_id);
+       for (i = 0; i < max_functions; i++, function_id++) {
+               if (table->stop_active_wq)
+                       return;
+               err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
+               if (err)
+                       /* A failure of specific vhca doesn't mean others will
+                        * fail as well.
+                        */
+                       continue;
+               state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
+               if (state != MLX5_VHCA_STATE_ACTIVE)
+                       continue;
+
+               sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
+               mutex_lock(&table->table_lock);
+               /* Don't probe device which is already probe */
+               if (!xa_load(&table->devices, i))
+                       mlx5_sf_dev_add(dev, i, function_id, sw_func_id);
+               /* There is a race where SF got inactive after the query
+                * above. e.g.: the query returns that the state of the
+                * SF is active, and after that the eswitch manager set it to
+                * inactive.
+                * This case cannot be managed in SW, since the probing of the
+                * SF is on one system, and the inactivation is on a different
+                * system.
+                * If the inactive is done after the SF perform init_hca(),
+                * the SF will fully probe and then removed. If it was
+                * done before init_hca(), the SF probe will fail.
+                */
+               mutex_unlock(&table->table_lock);
+       }
+}
+
+/* In case SFs are generated externally, probe active SFs */
+static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table)
+{
+       if (MLX5_CAP_GEN(table->dev, eswitch_manager))
+               return 0; /* the table is local */
+
+       /* Use a workqueue to probe active SFs, which are in large
+        * quantity and may take up to minutes to probe.
+        */
+       table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
+       if (!table->active_wq)
+               return -ENOMEM;
+       INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work);
+       queue_work(table->active_wq, &table->work);
+       return 0;
+}
+
+static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table)
+{
+       if (table->active_wq) {
+               table->stop_active_wq = true;
+               destroy_workqueue(table->active_wq);
+       }
+}
+
  void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
  {
         struct mlx5_sf_dev_table *table;
@@ -240,11 +318,17 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
         table->base_address = pci_resource_start(dev->pdev, 2);
         table->max_sfs = max_sfs;
         xa_init(&table->devices);
+       mutex_init(&table->table_lock);
         dev->priv.sf_dev_table = table;
  
         err = mlx5_vhca_event_notifier_register(dev, &table->nb);
         if (err)
                 goto vhca_err;
+
+       err = mlx5_sf_dev_queue_active_work(table);
+       if (err)
+               goto add_active_err;
+
         err = mlx5_sf_dev_vhca_arm_all(table);
         if (err)
                 goto arm_err;
@@ -252,6 +336,8 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
         return;
  
  arm_err:
+       mlx5_sf_dev_destroy_active_work(table);
+add_active_err:
         mlx5_vhca_event_notifier_unregister(dev, &table->nb);
  vhca_err:
         table->max_sfs = 0;
@@ -279,7 +365,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
         if (!table)
                 return;
  
+       mlx5_sf_dev_destroy_active_work(table);
         mlx5_vhca_event_notifier_unregister(dev, &table->nb);
+       mutex_destroy(&table->table_lock);
  
         /* Now that event handler is not running, it is safe to destroy
          * the sf device without race.
author	Shay Drory <shayd@nvidia.com>
	Thu, 4 Aug 2022 09:38:41 +0000 (12:38 +0300)
committer	Saeed Mahameed <saeedm@nvidia.com>
	Tue, 22 Nov 2022 02:14:33 +0000 (18:14 -0800)