]> git.itanic.dy.fi Git - linux-stable/commitdiff
cxl/pmem: Fix nvdimm registration races
authorDan Williams <dan.j.williams@intel.com>
Tue, 14 Feb 2023 01:01:05 +0000 (17:01 -0800)
committerDan Williams <dan.j.williams@intel.com>
Tue, 14 Feb 2023 01:01:05 +0000 (17:01 -0800)
A loop of the form:

    while true; do modprobe cxl_pci; modprobe -r cxl_pci; done

...fails with the following crash signature:

    BUG: kernel NULL pointer dereference, address: 0000000000000040
    [..]
    RIP: 0010:cxl_internal_send_cmd+0x5/0xb0 [cxl_core]
    [..]
    Call Trace:
     <TASK>
     cxl_pmem_ctl+0x121/0x240 [cxl_pmem]
     nvdimm_get_config_data+0xd6/0x1a0 [libnvdimm]
     nd_label_data_init+0x135/0x7e0 [libnvdimm]
     nvdimm_probe+0xd6/0x1c0 [libnvdimm]
     nvdimm_bus_probe+0x7a/0x1e0 [libnvdimm]
     really_probe+0xde/0x380
     __driver_probe_device+0x78/0x170
     driver_probe_device+0x1f/0x90
     __device_attach_driver+0x85/0x110
     bus_for_each_drv+0x7d/0xc0
     __device_attach+0xb4/0x1e0
     bus_probe_device+0x9f/0xc0
     device_add+0x445/0x9c0
     nd_async_device_register+0xe/0x40 [libnvdimm]
     async_run_entry_fn+0x30/0x130

...namely that the bottom half of async nvdimm device registration runs
after the CXL has already torn down the context that cxl_pmem_ctl()
needs. Unlike the ACPI NFIT case that benefits from launching multiple
nvdimm device registrations in parallel from those listed in the table,
CXL is already marked PROBE_PREFER_ASYNCHRONOUS. So provide for a
synchronous registration path to preclude this scenario.

Fixes: 21083f51521f ("cxl/pmem: Register 'pmem' / cxl_nvdimm devices")
Cc: <stable@vger.kernel.org>
Reported-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
drivers/cxl/pmem.c
drivers/nvdimm/bus.c
drivers/nvdimm/dimm_devs.c
drivers/nvdimm/nd-core.h
include/linux/libnvdimm.h

index 08bbbac9a6d08a4388121e543b7e4e252cf458a9..71cfa1fdf902714e1e97cfafc20302cf84e34138 100644 (file)
@@ -76,6 +76,7 @@ static int cxl_nvdimm_probe(struct device *dev)
                return rc;
 
        set_bit(NDD_LABELING, &flags);
+       set_bit(NDD_REGISTER_SYNC, &flags);
        set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
        set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
        set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
index b38d0355b0ac3b4e7b6c836d71f635e491e937ea..5ad49056921b5f328d831aacb85c8b85e26b52dd 100644 (file)
@@ -508,7 +508,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
        put_device(dev);
 }
 
-void nd_device_register(struct device *dev)
+static void __nd_device_register(struct device *dev, bool sync)
 {
        if (!dev)
                return;
@@ -531,11 +531,24 @@ void nd_device_register(struct device *dev)
        }
        get_device(dev);
 
-       async_schedule_dev_domain(nd_async_device_register, dev,
-                                 &nd_async_domain);
+       if (sync)
+               nd_async_device_register(dev, 0);
+       else
+               async_schedule_dev_domain(nd_async_device_register, dev,
+                                         &nd_async_domain);
+}
+
+void nd_device_register(struct device *dev)
+{
+       __nd_device_register(dev, false);
 }
 EXPORT_SYMBOL(nd_device_register);
 
+void nd_device_register_sync(struct device *dev)
+{
+       __nd_device_register(dev, true);
+}
+
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
 {
        bool killed;
index 1fc081dcf631578373b6bebb0f293dd5e7917914..6d3b03a9fa02a4672200894f1014062822c2d649 100644 (file)
@@ -624,7 +624,10 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
        nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
        device_initialize(dev);
        lockdep_set_class(&dev->mutex, &nvdimm_key);
-       nd_device_register(dev);
+       if (test_bit(NDD_REGISTER_SYNC, &flags))
+               nd_device_register_sync(dev);
+       else
+               nd_device_register(dev);
 
        return nvdimm;
 }
index cc86ee09d7c081172d1257363e51a301378de684..845408f10655694018d3553646b0815c9841df0b 100644 (file)
@@ -107,6 +107,7 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
 void nd_synchronize(void);
 void nd_device_register(struct device *dev);
+void nd_device_register_sync(struct device *dev);
 struct nd_label_id;
 char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid,
                      u32 flags);
index af38252ad704597a44226641b0f16f354249536c..e772aae71843167083adb344a33ab04d7296f416 100644 (file)
@@ -41,6 +41,9 @@ enum {
         */
        NDD_INCOHERENT = 7,
 
+       /* dimm provider wants synchronous registration by __nvdimm_create() */
+       NDD_REGISTER_SYNC = 8,
+
        /* need to set a limit somewhere, but yes, this is likely overkill */
        ND_IOCTL_MAX_BUFLEN = SZ_4M,
        ND_CMD_MAX_ELEM = 5,