Index: linux-2.6.19/fs/ext4/super.c =================================================================== --- linux-2.6.19.orig/fs/ext4/super.c +++ linux-2.6.19/fs/ext4/super.c @@ -35,6 +35,8 @@ #include #include #include +#include +#include #include @@ -481,6 +483,9 @@ static void ext4_put_super (struct super invalidate_bdev(sbi->journal_bdev, 0); ext4_blkdev_remove(sbi); } + if (sbi->s_mmp_tsk) + kthread_stop(sbi->s_mmp_tsk); + sb->s_fs_info = NULL; kfree(sbi); return; @@ -1441,6 +1446,223 @@ static ext4_fsblk_t descriptor_loc(struc return (has_super + ext4_group_first_block_no(sb, bg)); } +static inline +int write_mmp_block(struct super_block *sb, struct buffer_head *bh, + const char *bdev_name) +{ + int retval; + + mark_buffer_dirty(bh); + retval = sync_dirty_buffer(bh); + if (retval) + ext4_error(sb, "write_mmp_block", + "Error writing to MMP block."); + + return retval; +} + +static inline +int read_mmp_block(struct super_block *sb, struct buffer_head **bh, + ext4_fsblk_t mmp_block) +{ + if (*bh) + clear_buffer_uptodate(*bh); + + *bh = sb_bread(sb, mmp_block); + if (!*bh) { + ext4_warning(sb, "read_mmp_block", + "Error while reading MMP block %llu", mmp_block); + return -1; + } + + return 0; +} + +/* + * kmmpd will update the MMP sequence every s_mmp_interval seconds + */ +static int kmmpd(void *data) +{ + struct super_block *sb = (struct super_block *) data; + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct buffer_head *bh = NULL; + struct mmp_struct *mmp; + ext4_fsblk_t mmp_block; + u32 seq = 0; + unsigned long failed_writes = 0; + int retval; + int mmp_interval = cpu_to_le16(es->s_mmp_interval); + + mmp_block = le32_to_cpu(es->s_mmp_block); + retval = read_mmp_block(sb, &bh, mmp_block); + if (retval) + goto failed; + + mmp = (struct mmp_struct *)(bh->b_data); + mmp->mmp_magic = cpu_to_le32(EXT4_MMP_MAGIC); + mmp->mmp_time = cpu_to_le64(get_seconds()); + mmp->mmp_interval = mmp_interval; + bdevname(bh->b_bdev, mmp->mmp_bdevname); + + down_read(&uts_sem); + memcpy(mmp->mmp_nodename, init_uts_ns.name.nodename, 64); + up_read(&uts_sem); + + while (!kthread_should_stop()) { + if (++seq >= EXT4_MMP_FSCK_ON) + seq = 1; + + mmp->mmp_seq = cpu_to_le32(seq); + mmp->mmp_time = cpu_to_le64(get_seconds()); + + retval = write_mmp_block(sb, bh, mmp->mmp_bdevname); + /* + * Don't spew too many error messages. Print one every + * (s_mmp_interval * 60) seconds. + */ + if (retval && (failed_writes % 60) == 0) { + ext4_warning(sb, "kmmpd", + "Error writing to MMP block"); + failed_writes++; + } + + if (!(le32_to_cpu(es->s_feature_incompat) & + EXT4_FEATURE_INCOMPAT_MMP)) { + ext4_warning(sb, "kmmpd", "kmmpd being stopped " + "since MMP feature has been " + "disabled."); + goto failed; + } + + if (sb->s_flags & MS_RDONLY) { + ext4_warning(sb, "kmmpd", "kmmpd being stopped since " + "filesystem has been remounted as readonly."); + goto failed; + } + + schedule_timeout_interruptible(mmp_interval * HZ); + } + + /* Unmount seems to be clean */ + mmp->mmp_seq = cpu_to_le32(EXT4_MMP_CLEAN); + mmp->mmp_time = cpu_to_le64(get_seconds()); + + retval = write_mmp_block(sb, bh, mmp->mmp_bdevname); + +failed: + brelse(bh); + return 0; +} + +void dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, + const char *function, const char *msg) +{ + ext4_warning(sb, function, msg); + ext4_warning(sb, function, "Dumping MMP information:\n" + "Time last updated: %llu\n" + "Last node which updated MMP: %s\n" + "Last block device which updated MMP: %s\n", + le64_to_cpu(mmp->mmp_time), mmp->mmp_nodename, + mmp->mmp_bdevname); +} + +/* + * Protect the filesystem from being mounted more than once. + */ +static int ext4_multi_mount_protect(struct super_block *sb, + ext4_fsblk_t mmp_block) +{ + struct ext4_super_block *es = EXT4_SB(sb)->s_es; + struct buffer_head *bh = NULL; + struct mmp_struct *mmp = NULL; + u32 seq; + unsigned int wait_interval = 2 * le32_to_cpu(es->s_mmp_interval); + int retval; + + if (mmp_block < le32_to_cpu(es->s_first_data_block) || + mmp_block > ext4_blocks_count(EXT4_SB(sb)->s_es)) { + ext4_warning(sb, "ext4_multi_mount_protect", + "Invalid MMP block in superblock"); + goto failed; + } + + retval = read_mmp_block(sb, &bh, mmp_block); + if (retval) + goto failed; + + mmp = (struct mmp_struct *)(bh->b_data); + if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) { + ext4_error(sb, "ext4_multi_mount_protect", + "Invalid magic number in MMP block"); + goto failed; + } + + if (le32_to_cpu(es->s_mmp_interval) == 0) + es->s_mmp_interval = cpu_to_le32(EXT4_MMP_DEF_INTERVAL); + + seq = le32_to_cpu(mmp->mmp_seq); + if (seq == EXT4_MMP_CLEAN) + goto skip; + + if (seq == EXT4_MMP_FSCK_ON) { + dump_mmp_msg(sb, mmp, "ext4_multi_mount_protect", + "fsck is running on the filesystem"); + goto failed; + } + + /* wait for MMP interval and check seq again */ + schedule_timeout_uninterruptible(HZ * wait_interval); + + retval = read_mmp_block(sb, &bh, mmp_block); + if (retval) + goto failed; + mmp = (struct mmp_struct *)(bh->b_data); + if (seq != le32_to_cpu(mmp->mmp_seq)) { + dump_mmp_msg(sb, mmp, "ext4_multi_mount_protect", + "Device is already active on another node."); + goto failed; + } + +skip: + /* write a new random sequence number */ + get_random_bytes(&seq, sizeof(u32)); + mmp->mmp_seq = cpu_to_le32(seq); + retval = write_mmp_block(sb, bh, sb->s_id); + if (retval) + goto failed; + + /* wait for MMP interval and check seq again */ + schedule_timeout_uninterruptible(HZ * wait_interval); + + retval = read_mmp_block(sb, &bh, mmp_block); + if (retval) + goto failed; + mmp = (struct mmp_struct *)(bh->b_data); + if (seq != le32_to_cpu(mmp->mmp_seq)) { + dump_mmp_msg(sb, mmp, "ext4_multi_mount_protect", + "Device is already active on another node."); + goto failed; + } + + /* Start a kernel thread to update the MMP block periodically */ + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%02x:%02x", + MAJOR(sb->s_dev), MINOR(sb->s_dev)); + if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { + EXT4_SB(sb)->s_mmp_tsk = 0; + ext4_warning(sb, "ext4_multi_mount_protect", + "Unable to create kmmpd thread for %s.", sb->s_id); + goto failed; + } + + brelse(bh); + return 0; + +failed: + brelse(bh); + + return 1; +} + static int ext4_fill_super (struct super_block *sb, void *data, int silent) { @@ -1770,6 +1992,10 @@ static int ext4_fill_super (struct super EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)); + if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP)) + if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) + goto failed_mount2; + /* * The first inode we look at is the journal inode. Don't try * root first: it may be modified in the journal! Index: linux-2.6.19/include/linux/ext4_fs_sb.h =================================================================== --- linux-2.6.19.orig/include/linux/ext4_fs_sb.h +++ linux-2.6.19/include/linux/ext4_fs_sb.h @@ -90,6 +90,8 @@ struct ext4_sb_info { unsigned long s_ext_extents; #endif unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ + + struct task_struct * s_mmp_tsk; /* Kernel thread for multiple mount protection */ }; #endif /* _LINUX_EXT4_FS_SB */ Index: linux-2.6.19/include/linux/ext4_fs.h =================================================================== --- linux-2.6.19.orig/include/linux/ext4_fs.h +++ linux-2.6.19/include/linux/ext4_fs.h @@ -578,10 +578,11 @@ struct ext4_super_block { __le32 s_free_blocks_count_hi; /* Free blocks count */ __le16 s_min_extra_isize; /* All inodes have at least # bytes */ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */ - __le32 s_flags; /* Miscellaneous flags */ +/*160*/ __le32 s_flags; /* Miscellaneous flags */ __le16 s_raid_stride; /* RAID stride */ - __le16 s_pad; /* Padding */ - __le32 s_reserved[166]; /* Padding to the end of the block */ + __le16 s_mmp_interval; /* Wait for # seconds in MMP checking */ + __le64 s_mmp_block; /* Block for multi-mount protection */ + __u32 s_reserved[164]; /* Padding to the end of the block */ }; #ifdef __KERNEL__ @@ -680,13 +681,15 @@ static inline int ext4_valid_inum(struct #define EXT4_FEATURE_INCOMPAT_META_BG 0x0010 #define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080 +#define EXT4_FEATURE_INCOMPAT_MMP 0x0100 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_RECOVER| \ EXT4_FEATURE_INCOMPAT_META_BG| \ EXT4_FEATURE_INCOMPAT_EXTENTS| \ - EXT4_FEATURE_INCOMPAT_64BIT) + EXT4_FEATURE_INCOMPAT_64BIT| \ + EXT4_FEATURE_INCOMPAT_MMP) #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE| \ @@ -850,6 +853,30 @@ void ext4_get_group_no_and_offset(struct unsigned long *blockgrpp, ext4_grpblk_t *offsetp); /* + * This structure will be used for multiple mount protection. It will be + * written into the block number saved in the s_mmp_block field in the + * superblock. + */ +#define EXT4_MMP_MAGIC 0x004D4D50 /* ASCII of MMP */ +#define EXT4_MMP_CLEAN 0xFF4D4D50 /* Value of mmp_seq for clean unmount */ +#define EXT4_MMP_FSCK_ON 0xE24D4D50 /* Value of mmp_seq when being fscked */ +struct mmp_struct { + __le32 mmp_magic; + __le32 mmp_seq; + __le64 mmp_time; + char mmp_nodename[64]; + char mmp_bdevname[BDEVNAME_SIZE]; + __le16 mmp_interval; + __le16 mmp_pad1; + __le32 mmp_pad2; +}; + +/* + * Interval in number of seconds to update the MMP sequence number. + */ +#define EXT4_MMP_DEF_INTERVAL 5 + +/* * Function prototypes */