]> git.itanic.dy.fi Git - linux-stable/commitdiff
xfs: don't commit sunit/swidth updates to disk if that would cause repair failures
authorDarrick J. Wong <darrick.wong@oracle.com>
Sat, 24 Sep 2022 12:56:54 +0000 (18:26 +0530)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 28 Sep 2022 09:04:11 +0000 (11:04 +0200)
commit 13eaec4b2adf2657b8167b67e27c97cc7314d923 upstream.

Alex Lyakas reported[1] that mounting an xfs filesystem with new sunit
and swidth values could cause xfs_repair to fail loudly.  The problem
here is that repair calculates the where mkfs should have allocated the
root inode, based on the superblock geometry.  The allocation decisions
depend on sunit, which means that we really can't go updating sunit if
it would lead to a subsequent repair failure on an otherwise correct
filesystem.

Port from xfs_repair some code that computes the location of the root
inode and teach mount to skip the ondisk update if it would cause
problems for repair.  Along the way we'll update the documentation,
provide a function for computing the minimum AGFL size instead of
open-coding it, and cut down some indenting in the mount code.

Note that we allow the mount to proceed (and new allocations will
reflect this new geometry) because we've never screened this kind of
thing before.  We'll have to wait for a new future incompat feature to
enforce correct behavior, alas.

Note that the geometry reporting always uses the superblock values, not
the incore ones, so that is what xfs_info and xfs_growfs will report.

[1] https://lore.kernel.org/linux-xfs/20191125130744.GA44777@bfoster/T/#m00f9594b511e076e2fcdd489d78bc30216d72a7d

Reported-by: Alex Lyakas <alex@zadara.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Acked-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandan.babu@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
fs/xfs/libxfs/xfs_ialloc.c
fs/xfs/libxfs/xfs_ialloc.h
fs/xfs/xfs_mount.c
fs/xfs/xfs_trace.h

index 443cf33f66668d7d8e8fac3343dbd190e3a78889..c3e0c2f61be4da0b2c4563713a7907981d2e383d 100644 (file)
@@ -2854,3 +2854,67 @@ xfs_ialloc_setup_geometry(
        else
                igeo->ialloc_align = 0;
 }
+
+/* Compute the location of the root directory inode that is laid out by mkfs. */
+xfs_ino_t
+xfs_ialloc_calc_rootino(
+       struct xfs_mount        *mp,
+       int                     sunit)
+{
+       struct xfs_ino_geometry *igeo = M_IGEO(mp);
+       xfs_agblock_t           first_bno;
+
+       /*
+        * Pre-calculate the geometry of AG 0.  We know what it looks like
+        * because libxfs knows how to create allocation groups now.
+        *
+        * first_bno is the first block in which mkfs could possibly have
+        * allocated the root directory inode, once we factor in the metadata
+        * that mkfs formats before it.  Namely, the four AG headers...
+        */
+       first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+       /* ...the two free space btree roots... */
+       first_bno += 2;
+
+       /* ...the inode btree root... */
+       first_bno += 1;
+
+       /* ...the initial AGFL... */
+       first_bno += xfs_alloc_min_freelist(mp, NULL);
+
+       /* ...the free inode btree root... */
+       if (xfs_sb_version_hasfinobt(&mp->m_sb))
+               first_bno++;
+
+       /* ...the reverse mapping btree root... */
+       if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+               first_bno++;
+
+       /* ...the reference count btree... */
+       if (xfs_sb_version_hasreflink(&mp->m_sb))
+               first_bno++;
+
+       /*
+        * ...and the log, if it is allocated in the first allocation group.
+        *
+        * This can happen with filesystems that only have a single
+        * allocation group, or very odd geometries created by old mkfs
+        * versions on very small filesystems.
+        */
+       if (mp->m_sb.sb_logstart &&
+           XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
+                first_bno += mp->m_sb.sb_logblocks;
+
+       /*
+        * Now round first_bno up to whatever allocation alignment is given
+        * by the filesystem or was passed in.
+        */
+       if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
+               first_bno = roundup(first_bno, sunit);
+       else if (xfs_sb_version_hasalign(&mp->m_sb) &&
+                       mp->m_sb.sb_inoalignmt > 1)
+               first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
+
+       return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
+}
index 323592d563d520f9f940fc1913d475bb6ce90708..72b3468b97b152aeb364e2a933fbfe06fd7dee15 100644 (file)
@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
 
 int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
 void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
+xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
 
 #endif /* __XFS_IALLOC_H__ */
index 5c2539e13a0b9e14dbc99fcc890e7979cb723d16..bbcf48a625b2a4cb5d998ca304a4f1c6dba1f9b1 100644 (file)
@@ -31,7 +31,7 @@
 #include "xfs_reflink.h"
 #include "xfs_extent_busy.h"
 #include "xfs_health.h"
-
+#include "xfs_trace.h"
 
 static DEFINE_MUTEX(xfs_uuid_table_mutex);
 static int xfs_uuid_table_size;
@@ -364,6 +364,42 @@ xfs_readsb(
        return error;
 }
 
+/*
+ * If the sunit/swidth change would move the precomputed root inode value, we
+ * must reject the ondisk change because repair will stumble over that.
+ * However, we allow the mount to proceed because we never rejected this
+ * combination before.  Returns true to update the sb, false otherwise.
+ */
+static inline int
+xfs_check_new_dalign(
+       struct xfs_mount        *mp,
+       int                     new_dalign,
+       bool                    *update_sb)
+{
+       struct xfs_sb           *sbp = &mp->m_sb;
+       xfs_ino_t               calc_ino;
+
+       calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
+       trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
+
+       if (sbp->sb_rootino == calc_ino) {
+               *update_sb = true;
+               return 0;
+       }
+
+       xfs_warn(mp,
+"Cannot change stripe alignment; would require moving root inode.");
+
+       /*
+        * XXX: Next time we add a new incompat feature, this should start
+        * returning -EINVAL to fail the mount.  Until then, spit out a warning
+        * that we're ignoring the administrator's instructions.
+        */
+       xfs_warn(mp, "Skipping superblock stripe alignment update.");
+       *update_sb = false;
+       return 0;
+}
+
 /*
  * If we were provided with new sunit/swidth values as mount options, make sure
  * that they pass basic alignment and superblock feature checks, and convert
@@ -424,10 +460,17 @@ xfs_update_alignment(
        struct xfs_sb           *sbp = &mp->m_sb;
 
        if (mp->m_dalign) {
+               bool            update_sb;
+               int             error;
+
                if (sbp->sb_unit == mp->m_dalign &&
                    sbp->sb_width == mp->m_swidth)
                        return 0;
 
+               error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
+               if (error || !update_sb)
+                       return error;
+
                sbp->sb_unit = mp->m_dalign;
                sbp->sb_width = mp->m_swidth;
                mp->m_update_sb = true;
index eaae275ed43084dc6ba14715cd854871341698b4..ffb398c1de698bdbc757c122ccaecaa99efbe4fc 100644 (file)
@@ -3609,6 +3609,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large);
 DEFINE_KMEM_EVENT(kmem_realloc);
 DEFINE_KMEM_EVENT(kmem_zone_alloc);
 
+TRACE_EVENT(xfs_check_new_dalign,
+       TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
+       TP_ARGS(mp, new_dalign, calc_rootino),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(int, new_dalign)
+               __field(xfs_ino_t, sb_rootino)
+               __field(xfs_ino_t, calc_rootino)
+       ),
+       TP_fast_assign(
+               __entry->dev = mp->m_super->s_dev;
+               __entry->new_dalign = new_dalign;
+               __entry->sb_rootino = mp->m_sb.sb_rootino;
+               __entry->calc_rootino = calc_rootino;
+       ),
+       TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->new_dalign, __entry->sb_rootino,
+                 __entry->calc_rootino)
+)
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH