]> git.itanic.dy.fi Git - linux-stable/commitdiff
fs: open block device after superblock creation
authorJan Kara <jack@suse.cz>
Mon, 24 Jul 2023 17:51:45 +0000 (10:51 -0700)
committerChristian Brauner <brauner@kernel.org>
Thu, 10 Aug 2023 08:34:56 +0000 (10:34 +0200)
Currently get_tree_bdev and mount_bdev open the block device before
committing to allocating a super block. That creates problems for
restricting the number of writers to a device, and also leads to a
unusual and not very helpful holder (the fs_type).

Reorganize the super block code to first look whether the superblock for
a particular device does already exist and open the block device only if
it doesn't.

[hch: port to before the bdev_handle changes,
      duplicate the bdev read-only check from blkdev_get_by_path,
      extend the fsfree_mutex coverage to protect against freezes,
      fix an open bdev leak when the bdev is frozen,
      use the bdev local variable more,
      rename the s variable to sb to be more descriptive]
[brauner: remove references to mounts as they're mostly irrelevant]
[brauner & hch: fold fixes for romfs and cramfs for
                syzbot+2faac0423fdc9692822b@syzkaller.appspotmail.com]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Message-Id: <20230724175145.201318-1-hch@lst.de>
Signed-off-by: Christian Brauner <brauner@kernel.org>
fs/cramfs/inode.c
fs/romfs/super.c
fs/super.c

index 27c6597aa1be5a0fc0bce9be27f4244ce74369ac..bc99d0fb8ad4c9fc9084b0aa6209a89037286c60 100644 (file)
@@ -485,12 +485,16 @@ static void cramfs_kill_sb(struct super_block *sb)
 {
        struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
 
+       generic_shutdown_super(sb);
+
        if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
                if (sbi && sbi->mtd_point_size)
                        mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
-               kill_mtd_super(sb);
+               put_mtd_device(sb->s_mtd);
+               sb->s_mtd = NULL;
        } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
-               kill_block_super(sb);
+               sync_blockdev(sb->s_bdev);
+               blkdev_put(sb->s_bdev, sb->s_type);
        }
        kfree(sbi);
 }
index c59b230d55b435482f202b3457fe7dc6b4035081..42d7a344472f8f02731084685488c7c83b11f4c7 100644 (file)
@@ -583,16 +583,18 @@ static int romfs_init_fs_context(struct fs_context *fc)
  */
 static void romfs_kill_sb(struct super_block *sb)
 {
+       generic_shutdown_super(sb);
+
 #ifdef CONFIG_ROMFS_ON_MTD
        if (sb->s_mtd) {
-               kill_mtd_super(sb);
-               return;
+               put_mtd_device(sb->s_mtd);
+               sb->s_mtd = NULL;
        }
 #endif
 #ifdef CONFIG_ROMFS_ON_BLOCK
        if (sb->s_bdev) {
-               kill_block_super(sb);
-               return;
+               sync_blockdev(sb->s_bdev);
+               blkdev_put(sb->s_bdev, sb->s_type);
        }
 #endif
 }
index 7755cc2a36078020ba97343b4528325fcb655f8b..249558ecfd774dc54349450ab285018bb4d74736 100644 (file)
@@ -1228,12 +1228,7 @@ static const struct blk_holder_ops fs_holder_ops = {
 
 static int set_bdev_super(struct super_block *s, void *data)
 {
-       s->s_bdev = data;
-       s->s_dev = s->s_bdev->bd_dev;
-       s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
-
-       if (bdev_stable_writes(s->s_bdev))
-               s->s_iflags |= SB_I_STABLE_WRITES;
+       s->s_dev = *(dev_t *)data;
        return 0;
 }
 
@@ -1244,7 +1239,61 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 
 static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 {
-       return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key;
+       return !(s->s_iflags & SB_I_RETIRED) &&
+               s->s_dev == *(dev_t *)fc->sget_key;
+}
+
+static int setup_bdev_super(struct super_block *sb, int sb_flags,
+               struct fs_context *fc)
+{
+       blk_mode_t mode = sb_open_mode(sb_flags);
+       struct block_device *bdev;
+
+       bdev = blkdev_get_by_dev(sb->s_dev, mode, sb->s_type, &fs_holder_ops);
+       if (IS_ERR(bdev)) {
+               if (fc)
+                       errorf(fc, "%s: Can't open blockdev", fc->source);
+               return PTR_ERR(bdev);
+       }
+
+       /*
+        * This really should be in blkdev_get_by_dev, but right now can't due
+        * to legacy issues that require us to allow opening a block device node
+        * writable from userspace even for a read-only block device.
+        */
+       if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
+               blkdev_put(bdev, sb->s_type);
+               return -EACCES;
+       }
+
+       /*
+        * Until SB_BORN flag is set, there can be no active superblock
+        * references and thus no filesystem freezing. get_active_super() will
+        * just loop waiting for SB_BORN so even freeze_bdev() cannot proceed.
+        *
+        * It is enough to check bdev was not frozen before we set s_bdev.
+        */
+       mutex_lock(&bdev->bd_fsfreeze_mutex);
+       if (bdev->bd_fsfreeze_count > 0) {
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               if (fc)
+                       warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+               blkdev_put(bdev, sb->s_type);
+               return -EBUSY;
+       }
+       spin_lock(&sb_lock);
+       sb->s_bdev = bdev;
+       sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
+       if (bdev_stable_writes(bdev))
+               sb->s_iflags |= SB_I_STABLE_WRITES;
+       spin_unlock(&sb_lock);
+       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+
+       snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
+       shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
+                               sb->s_id);
+       sb_set_blocksize(sb, block_size(bdev));
+       return 0;
 }
 
 /**
@@ -1256,71 +1305,48 @@ int get_tree_bdev(struct fs_context *fc,
                int (*fill_super)(struct super_block *,
                                  struct fs_context *))
 {
-       struct block_device *bdev;
        struct super_block *s;
        int error = 0;
+       dev_t dev;
 
        if (!fc->source)
                return invalf(fc, "No source specified");
 
-       bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags),
-                                 fc->fs_type, &fs_holder_ops);
-       if (IS_ERR(bdev)) {
-               errorf(fc, "%s: Can't open blockdev", fc->source);
-               return PTR_ERR(bdev);
-       }
-
-       /* Once the superblock is inserted into the list by sget_fc(), s_umount
-        * will protect the lockfs code from trying to start a snapshot while
-        * we are mounting
-        */
-       mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-               blkdev_put(bdev, fc->fs_type);
-               return -EBUSY;
+       error = lookup_bdev(fc->source, &dev);
+       if (error) {
+               errorf(fc, "%s: Can't lookup blockdev", fc->source);
+               return error;
        }
 
        fc->sb_flags |= SB_NOSEC;
-       fc->sget_key = bdev;
+       fc->sget_key = &dev;
        s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
-       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-       if (IS_ERR(s)) {
-               blkdev_put(bdev, fc->fs_type);
+       if (IS_ERR(s))
                return PTR_ERR(s);
-       }
 
        if (s->s_root) {
                /* Don't summarily change the RO/RW state. */
                if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
-                       warnf(fc, "%pg: Can't mount, would change RO state", bdev);
+                       warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
                        deactivate_locked_super(s);
-                       blkdev_put(bdev, fc->fs_type);
                        return -EBUSY;
                }
-
+       } else {
                /*
-                * s_umount nests inside open_mutex during
-                * __invalidate_device().  blkdev_put() acquires
-                * open_mutex and can't be called under s_umount.  Drop
-                * s_umount temporarily.  This is safe as we're
-                * holding an active reference.
+                * We drop s_umount here because we need to open the bdev and
+                * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+                * __invalidate_device()). It is safe because we have active sb
+                * reference and SB_BORN is not set yet.
                 */
                up_write(&s->s_umount);
-               blkdev_put(bdev, fc->fs_type);
+               error = setup_bdev_super(s, fc->sb_flags, fc);
                down_write(&s->s_umount);
-       } else {
-               snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-               shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-                                       fc->fs_type->name, s->s_id);
-               sb_set_blocksize(s, block_size(bdev));
-               error = fill_super(s, fc);
+               if (!error)
+                       error = fill_super(s, fc);
                if (error) {
                        deactivate_locked_super(s);
                        return error;
                }
-
                s->s_flags |= SB_ACTIVE;
        }
 
@@ -1332,78 +1358,52 @@ EXPORT_SYMBOL(get_tree_bdev);
 
 static int test_bdev_super(struct super_block *s, void *data)
 {
-       return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data;
+       return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
 }
 
 struct dentry *mount_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
        int (*fill_super)(struct super_block *, void *, int))
 {
-       struct block_device *bdev;
        struct super_block *s;
-       int error = 0;
+       int error;
+       dev_t dev;
 
-       bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
-                                 &fs_holder_ops);
-       if (IS_ERR(bdev))
-               return ERR_CAST(bdev);
+       error = lookup_bdev(dev_name, &dev);
+       if (error)
+               return ERR_PTR(error);
 
-       /*
-        * once the super is inserted into the list by sget, s_umount
-        * will protect the lockfs code from trying to start a snapshot
-        * while we are mounting
-        */
-       mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               error = -EBUSY;
-               goto error_bdev;
-       }
-       s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
-                bdev);
-       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+       flags |= SB_NOSEC;
+       s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
        if (IS_ERR(s))
-               goto error_s;
+               return ERR_CAST(s);
 
        if (s->s_root) {
                if ((flags ^ s->s_flags) & SB_RDONLY) {
                        deactivate_locked_super(s);
-                       error = -EBUSY;
-                       goto error_bdev;
+                       return ERR_PTR(-EBUSY);
                }
-
+       } else {
                /*
-                * s_umount nests inside open_mutex during
-                * __invalidate_device().  blkdev_put() acquires
-                * open_mutex and can't be called under s_umount.  Drop
-                * s_umount temporarily.  This is safe as we're
-                * holding an active reference.
+                * We drop s_umount here because we need to open the bdev and
+                * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+                * __invalidate_device()). It is safe because we have active sb
+                * reference and SB_BORN is not set yet.
                 */
                up_write(&s->s_umount);
-               blkdev_put(bdev, fs_type);
+               error = setup_bdev_super(s, flags, NULL);
                down_write(&s->s_umount);
-       } else {
-               snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-               shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-                                       fs_type->name, s->s_id);
-               sb_set_blocksize(s, block_size(bdev));
-               error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+               if (!error)
+                       error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
                if (error) {
                        deactivate_locked_super(s);
-                       goto error;
+                       return ERR_PTR(error);
                }
 
                s->s_flags |= SB_ACTIVE;
        }
 
        return dget(s->s_root);
-
-error_s:
-       error = PTR_ERR(s);
-error_bdev:
-       blkdev_put(bdev, fs_type);
-error:
-       return ERR_PTR(error);
 }
 EXPORT_SYMBOL(mount_bdev);
 
@@ -1412,8 +1412,10 @@ void kill_block_super(struct super_block *sb)
        struct block_device *bdev = sb->s_bdev;
 
        generic_shutdown_super(sb);
-       sync_blockdev(bdev);
-       blkdev_put(bdev, sb->s_type);
+       if (bdev) {
+               sync_blockdev(bdev);
+               blkdev_put(bdev, sb->s_type);
+       }
 }
 
 EXPORT_SYMBOL(kill_block_super);