changeset 1556:1a5d8dbc5a19

John Spencer reported ext4 filesystem corruption, which seems to have been a problem in the kernel since 3.6.3. Backporting a few patches to try to address it.
author Rob Landley <rob@landley.net>
date Tue, 13 Nov 2012 07:27:25 -0600
parents c734d53d271a
children 8e3d9443cce5
files sources/patches/linux-fixext4.patch
diffstat 1 files changed, 282 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sources/patches/linux-fixext4.patch	Tue Nov 13 07:27:25 2012 -0600
@@ -0,0 +1,282 @@
+commit f2a09af645b762f8230e7eba7fee3b6f7e6e96e7
+Author: Yongqiang Yang <xiaoqiangnk@gmail.com>
+Date:   Sun Sep 23 23:16:03 2012 -0400
+
+    ext4: check free inode count before allocating an inode
+    
+    Recently, I ecountered some corrupted filesystems in which some
+    groups' free inode counts were 65535, it seemed that free inode
+    count was overflow.  This patch teaches ext4 to check free inode
+    count before allocaing an inode.
+    
+    Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
+    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+index 26154b8..fa36372 100644
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -697,6 +697,15 @@ got_group:
+ 		if (!gdp)
+ 			goto fail;
+ 
++		/*
++		 * Check free inodes count before loading bitmap.
++		 */
++		if (ext4_free_inodes_count(sb, gdp) == 0) {
++			if (++group == ngroups)
++				group = 0;
++			continue;
++		}
++
+ 		brelse(inode_bitmap_bh);
+ 		inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
+ 		if (!inode_bitmap_bh)
+commit 79f1ba49569e5aec919b653c55b03274c2331701
+Author: Tao Ma <boyu.mt@taobao.com>
+Date:   Mon Oct 22 00:34:32 2012 -0400
+
+    ext4: Checksum the block bitmap properly with bigalloc enabled
+    
+    In mke2fs, we only checksum the whole bitmap block and it is right.
+    While in the kernel, we use EXT4_BLOCKS_PER_GROUP to indicate the
+    size of the checksumed bitmap which is wrong when we enable bigalloc.
+    The right size should be EXT4_CLUSTERS_PER_GROUP and this patch fixes
+    it.
+    
+    Also as every caller of ext4_block_bitmap_csum_set and
+    ext4_block_bitmap_csum_verify pass in EXT4_BLOCKS_PER_GROUP(sb)/8,
+    we'd better removes this parameter and sets it in the function itself.
+    
+    Signed-off-by: Tao Ma <boyu.mt@taobao.com>
+    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+    Reviewed-by: Lukas Czerner <lczerner@redhat.com>
+    Cc: stable@vger.kernel.org
+
+diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
+index 1b50890..cf18217 100644
+--- a/fs/ext4/balloc.c
++++ b/fs/ext4/balloc.c
+@@ -174,8 +174,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+ 		ext4_free_inodes_set(sb, gdp, 0);
+ 		ext4_itable_unused_set(sb, gdp, 0);
+ 		memset(bh->b_data, 0xff, sb->s_blocksize);
+-		ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
+-					   EXT4_BLOCKS_PER_GROUP(sb) / 8);
++		ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
+ 		return;
+ 	}
+ 	memset(bh->b_data, 0, sb->s_blocksize);
+@@ -212,8 +211,7 @@ void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+ 	 */
+ 	ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group),
+ 			     sb->s_blocksize * 8, bh->b_data);
+-	ext4_block_bitmap_csum_set(sb, block_group, gdp, bh,
+-				   EXT4_BLOCKS_PER_GROUP(sb) / 8);
++	ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
+ 	ext4_group_desc_csum_set(sb, block_group, gdp);
+ }
+ 
+@@ -350,7 +348,7 @@ void ext4_validate_block_bitmap(struct super_block *sb,
+ 		return;
+ 	}
+ 	if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
+-			desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) {
++			desc, bh))) {
+ 		ext4_unlock_group(sb, block_group);
+ 		ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
+ 		return;
+diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
+index 5c2d181..3285aa5 100644
+--- a/fs/ext4/bitmap.c
++++ b/fs/ext4/bitmap.c
+@@ -58,11 +58,12 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+ 
+ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+ 				  struct ext4_group_desc *gdp,
+-				  struct buffer_head *bh, int sz)
++				  struct buffer_head *bh)
+ {
+ 	__u32 hi;
+ 	__u32 provided, calculated;
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
++	int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
+ 
+ 	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+ 					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
+@@ -84,8 +85,9 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+ 
+ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+ 				struct ext4_group_desc *gdp,
+-				struct buffer_head *bh, int sz)
++				struct buffer_head *bh)
+ {
++	int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
+ 	__u32 csum;
+ 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+ 
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 78971cf..3c20de1 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -1882,10 +1882,10 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+ 				  struct buffer_head *bh, int sz);
+ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
+ 				struct ext4_group_desc *gdp,
+-				struct buffer_head *bh, int sz);
++				struct buffer_head *bh);
+ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
+ 				  struct ext4_group_desc *gdp,
+-				  struct buffer_head *bh, int sz);
++				  struct buffer_head *bh);
+ 
+ /* balloc.c */
+ extern void ext4_validate_block_bitmap(struct super_block *sb,
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+index fa36372..4facdd2 100644
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -762,9 +762,7 @@ got:
+ 			ext4_free_group_clusters_set(sb, gdp,
+ 				ext4_free_clusters_after_init(sb, group, gdp));
+ 			ext4_block_bitmap_csum_set(sb, group, gdp,
+-						   block_bitmap_bh,
+-						   EXT4_BLOCKS_PER_GROUP(sb) /
+-						   8);
++						   block_bitmap_bh);
+ 			ext4_group_desc_csum_set(sb, group, gdp);
+ 		}
+ 		ext4_unlock_group(sb, group);
+diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
+index a415465..eb1e385 100644
+--- a/fs/ext4/mballoc.c
++++ b/fs/ext4/mballoc.c
+@@ -2805,8 +2805,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+ 	}
+ 	len = ext4_free_group_clusters(sb, gdp) - ac->ac_b_ex.fe_len;
+ 	ext4_free_group_clusters_set(sb, gdp, len);
+-	ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh,
+-				   EXT4_BLOCKS_PER_GROUP(sb) / 8);
++	ext4_block_bitmap_csum_set(sb, ac->ac_b_ex.fe_group, gdp, bitmap_bh);
+ 	ext4_group_desc_csum_set(sb, ac->ac_b_ex.fe_group, gdp);
+ 
+ 	ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+@@ -4666,8 +4665,7 @@ do_more:
+ 
+ 	ret = ext4_free_group_clusters(sb, gdp) + count_clusters;
+ 	ext4_free_group_clusters_set(sb, gdp, ret);
+-	ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh,
+-				   EXT4_BLOCKS_PER_GROUP(sb) / 8);
++	ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
+ 	ext4_group_desc_csum_set(sb, block_group, gdp);
+ 	ext4_unlock_group(sb, block_group);
+ 	percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
+@@ -4811,8 +4809,7 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
+ 	mb_free_blocks(NULL, &e4b, bit, count);
+ 	blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc);
+ 	ext4_free_group_clusters_set(sb, desc, blk_free_count);
+-	ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh,
+-				   EXT4_BLOCKS_PER_GROUP(sb) / 8);
++	ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
+ 	ext4_group_desc_csum_set(sb, block_group, desc);
+ 	ext4_unlock_group(sb, block_group);
+ 	percpu_counter_add(&sbi->s_freeclusters_counter,
+diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
+index 7a75e10..47bf06a 100644
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
+ 	bh = ext4_get_bitmap(sb, group_data->block_bitmap);
+ 	if (!bh)
+ 		return -EIO;
+-	ext4_block_bitmap_csum_set(sb, group, gdp, bh,
+-				   EXT4_BLOCKS_PER_GROUP(sb) / 8);
++	ext4_block_bitmap_csum_set(sb, group, gdp, bh);
+ 	brelse(bh);
+ 
+ 	return 0;
+commit ffb5387e85d528fb6d0d924abfa3fbf0fc484071
+Author: Eric Sandeen <sandeen@redhat.com>
+Date:   Sun Oct 28 22:24:57 2012 -0400
+
+    ext4: fix unjournaled inode bitmap modification
+    
+    commit 119c0d4460b001e44b41dcf73dc6ee794b98bd31 changed
+    ext4_new_inode() such that the inode bitmap was being modified
+    outside a transaction, which could lead to corruption, and was
+    discovered when journal_checksum found a bad checksum in the
+    journal during log replay.
+    
+    Nix ran into this when using the journal_async_commit mount
+    option, which enables journal checksumming.  The ensuing
+    journal replay failures due to the bad checksums led to
+    filesystem corruption reported as the now infamous
+    "Apparent serious progressive ext4 data corruption bug"
+    
+    [ Changed by tytso to only call ext4_journal_get_write_access() only
+      when we're fairly certain that we're going to allocate the inode. ]
+    
+    I've tested this by mounting with journal_checksum and
+    running fsstress then dropping power; I've also tested by
+    hacking DM to create snapshots w/o first quiescing, which
+    allows me to test journal replay repeatedly w/o actually
+    power-cycling the box.  Without the patch I hit a journal
+    checksum error every time.  With this fix it survives
+    many iterations.
+    
+    Reported-by: Nix <nix@esperi.org.uk>
+    Signed-off-by: Eric Sandeen <sandeen@redhat.com>
+    Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
+    Cc: stable@vger.kernel.org
+
+diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
+index 4facdd2..3a100e7 100644
+--- a/fs/ext4/ialloc.c
++++ b/fs/ext4/ialloc.c
+@@ -725,6 +725,10 @@ repeat_in_this_group:
+ 				   "inode=%lu", ino + 1);
+ 			continue;
+ 		}
++		BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
++		err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
++		if (err)
++			goto fail;
+ 		ext4_lock_group(sb, group);
+ 		ret2 = ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data);
+ 		ext4_unlock_group(sb, group);
+@@ -738,6 +742,11 @@ repeat_in_this_group:
+ 	goto out;
+ 
+ got:
++	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
++	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
++	if (err)
++		goto fail;
++
+ 	/* We may have to initialize the block bitmap if it isn't already */
+ 	if (ext4_has_group_desc_csum(sb) &&
+ 	    gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
+@@ -771,11 +780,6 @@ got:
+ 			goto fail;
+ 	}
+ 
+-	BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
+-	err = ext4_journal_get_write_access(handle, inode_bitmap_bh);
+-	if (err)
+-		goto fail;
+-
+ 	BUFFER_TRACE(group_desc_bh, "get_write_access");
+ 	err = ext4_journal_get_write_access(handle, group_desc_bh);
+ 	if (err)
+@@ -823,11 +827,6 @@ got:
+ 	}
+ 	ext4_unlock_group(sb, group);
+ 
+-	BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata");
+-	err = ext4_handle_dirty_metadata(handle, NULL, inode_bitmap_bh);
+-	if (err)
+-		goto fail;
+-
+ 	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
+ 	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
+ 	if (err)