diff options
Diffstat (limited to 'fs/ext4/super.c')
| -rw-r--r-- | fs/ext4/super.c | 3397 | 
1 files changed, 2079 insertions, 1318 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 61182fe6254..6df7bc611db 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -38,12 +38,14 @@  #include <linux/ctype.h>  #include <linux/log2.h>  #include <linux/crc16.h> +#include <linux/cleancache.h>  #include <asm/uaccess.h>  #include <linux/kthread.h>  #include <linux/freezer.h>  #include "ext4.h" +#include "ext4_extents.h"	/* Needed for trace points definition */  #include "ext4_jbd2.h"  #include "xattr.h"  #include "acl.h" @@ -54,29 +56,50 @@  static struct proc_dir_entry *ext4_proc_root;  static struct kset *ext4_kset; -struct ext4_lazy_init *ext4_li_info; -struct mutex ext4_li_mtx; -struct ext4_features *ext4_feat; +static struct ext4_lazy_init *ext4_li_info; +static struct mutex ext4_li_mtx; +static struct ext4_features *ext4_feat; +static int ext4_mballoc_ready;  static int ext4_load_journal(struct super_block *, struct ext4_super_block *,  			     unsigned long journal_devnum); +static int ext4_show_options(struct seq_file *seq, struct dentry *root);  static int ext4_commit_super(struct super_block *sb, int sync);  static void ext4_mark_recovery_complete(struct super_block *sb,  					struct ext4_super_block *es);  static void ext4_clear_journal_err(struct super_block *sb,  				   struct ext4_super_block *es);  static int ext4_sync_fs(struct super_block *sb, int wait); -static const char *ext4_decode_error(struct super_block *sb, int errno, -				     char nbuf[16]); +static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);  static int ext4_remount(struct super_block *sb, int *flags, char *data);  static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);  static int ext4_unfreeze(struct super_block *sb); -static void ext4_write_super(struct super_block *sb);  static int ext4_freeze(struct super_block *sb);  static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,  		       const char *dev_name, void *data); +static inline int ext2_feature_set_ok(struct super_block *sb); +static inline int ext3_feature_set_ok(struct super_block *sb); +static int ext4_feature_set_ok(struct super_block *sb, int readonly);  static void ext4_destroy_lazyinit_thread(void);  static void ext4_unregister_li_request(struct super_block *sb); +static void ext4_clear_request_list(void); +static int ext4_reserve_clusters(struct ext4_sb_info *, ext4_fsblk_t); + +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext2_fs_type = { +	.owner		= THIS_MODULE, +	.name		= "ext2", +	.mount		= ext4_mount, +	.kill_sb	= kill_block_super, +	.fs_flags	= FS_REQUIRES_DEV, +}; +MODULE_ALIAS_FS("ext2"); +MODULE_ALIAS("ext2"); +#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) +#else +#define IS_EXT2_SB(sb) (0) +#endif +  #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)  static struct file_system_type ext3_fs_type = { @@ -86,11 +109,85 @@ static struct file_system_type ext3_fs_type = {  	.kill_sb	= kill_block_super,  	.fs_flags	= FS_REQUIRES_DEV,  }; +MODULE_ALIAS_FS("ext3"); +MODULE_ALIAS("ext3");  #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)  #else  #define IS_EXT3_SB(sb) (0)  #endif +static int ext4_verify_csum_type(struct super_block *sb, +				 struct ext4_super_block *es) +{ +	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, +					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) +		return 1; + +	return es->s_checksum_type == EXT4_CRC32C_CHKSUM; +} + +static __le32 ext4_superblock_csum(struct super_block *sb, +				   struct ext4_super_block *es) +{ +	struct ext4_sb_info *sbi = EXT4_SB(sb); +	int offset = offsetof(struct ext4_super_block, s_checksum); +	__u32 csum; + +	csum = ext4_chksum(sbi, ~0, (char *)es, offset); + +	return cpu_to_le32(csum); +} + +static int ext4_superblock_csum_verify(struct super_block *sb, +				       struct ext4_super_block *es) +{ +	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, +				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) +		return 1; + +	return es->s_checksum == ext4_superblock_csum(sb, es); +} + +void ext4_superblock_csum_set(struct super_block *sb) +{ +	struct ext4_super_block *es = EXT4_SB(sb)->s_es; + +	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, +		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) +		return; + +	es->s_checksum = ext4_superblock_csum(sb, es); +} + +void *ext4_kvmalloc(size_t size, gfp_t flags) +{ +	void *ret; + +	ret = kmalloc(size, flags | __GFP_NOWARN); +	if (!ret) +		ret = __vmalloc(size, flags, PAGE_KERNEL); +	return ret; +} + +void *ext4_kvzalloc(size_t size, gfp_t flags) +{ +	void *ret; + +	ret = kzalloc(size, flags | __GFP_NOWARN); +	if (!ret) +		ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); +	return ret; +} + +void ext4_kvfree(void *ptr) +{ +	if (is_vmalloc_addr(ptr)) +		vfree(ptr); +	else +		kfree(ptr); + +} +  ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,  			       struct ext4_group_desc *bg)  { @@ -115,8 +212,8 @@ ext4_fsblk_t ext4_inode_table(struct super_block *sb,  		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);  } -__u32 ext4_free_blks_count(struct super_block *sb, -			      struct ext4_group_desc *bg) +__u32 ext4_free_group_clusters(struct super_block *sb, +			       struct ext4_group_desc *bg)  {  	return le16_to_cpu(bg->bg_free_blocks_count_lo) |  		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ? @@ -171,8 +268,8 @@ void ext4_inode_table_set(struct super_block *sb,  		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);  } -void ext4_free_blks_set(struct super_block *sb, -			  struct ext4_group_desc *bg, __u32 count) +void ext4_free_group_clusters_set(struct super_block *sb, +				  struct ext4_group_desc *bg, __u32 count)  {  	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);  	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT) @@ -204,116 +301,6 @@ void ext4_itable_unused_set(struct super_block *sb,  } -/* Just increment the non-pointer handle value */ -static handle_t *ext4_get_nojournal(void) -{ -	handle_t *handle = current->journal_info; -	unsigned long ref_cnt = (unsigned long)handle; - -	BUG_ON(ref_cnt >= EXT4_NOJOURNAL_MAX_REF_COUNT); - -	ref_cnt++; -	handle = (handle_t *)ref_cnt; - -	current->journal_info = handle; -	return handle; -} - - -/* Decrement the non-pointer handle value */ -static void ext4_put_nojournal(handle_t *handle) -{ -	unsigned long ref_cnt = (unsigned long)handle; - -	BUG_ON(ref_cnt == 0); - -	ref_cnt--; -	handle = (handle_t *)ref_cnt; - -	current->journal_info = handle; -} - -/* - * Wrappers for jbd2_journal_start/end. - * - * The only special thing we need to do here is to make sure that all - * journal_end calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. - */ -handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) -{ -	journal_t *journal; - -	if (sb->s_flags & MS_RDONLY) -		return ERR_PTR(-EROFS); - -	vfs_check_frozen(sb, SB_FREEZE_TRANS); -	/* Special case here: if the journal has aborted behind our -	 * backs (eg. EIO in the commit thread), then we still need to -	 * take the FS itself readonly cleanly. */ -	journal = EXT4_SB(sb)->s_journal; -	if (journal) { -		if (is_journal_aborted(journal)) { -			ext4_abort(sb, "Detected aborted journal"); -			return ERR_PTR(-EROFS); -		} -		return jbd2_journal_start(journal, nblocks); -	} -	return ext4_get_nojournal(); -} - -/* - * The only special thing we need to do here is to make sure that all - * jbd2_journal_stop calls result in the superblock being marked dirty, so - * that sync() will call the filesystem's write_super callback if - * appropriate. - */ -int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) -{ -	struct super_block *sb; -	int err; -	int rc; - -	if (!ext4_handle_valid(handle)) { -		ext4_put_nojournal(handle); -		return 0; -	} -	sb = handle->h_transaction->t_journal->j_private; -	err = handle->h_err; -	rc = jbd2_journal_stop(handle); - -	if (!err) -		err = rc; -	if (err) -		__ext4_std_error(sb, where, line, err); -	return err; -} - -void ext4_journal_abort_handle(const char *caller, unsigned int line, -			       const char *err_fn, struct buffer_head *bh, -			       handle_t *handle, int err) -{ -	char nbuf[16]; -	const char *errstr = ext4_decode_error(NULL, err, nbuf); - -	BUG_ON(!ext4_handle_valid(handle)); - -	if (bh) -		BUFFER_TRACE(bh, "abort"); - -	if (!handle->h_err) -		handle->h_err = err; - -	if (is_handle_aborted(handle)) -		return; - -	printk(KERN_ERR "%s:%d: aborting transaction: %s in %s\n", -	       caller, line, errstr, err_fn); - -	jbd2_journal_abort_handle(handle); -} -  static void __save_error_info(struct super_block *sb, const char *func,  			    unsigned int line)  { @@ -338,7 +325,7 @@ static void __save_error_info(struct super_block *sb, const char *func,  	 */  	if (!es->s_error_count)  		mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); -	es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); +	le32_add_cpu(&es->s_error_count, 1);  }  static void save_error_info(struct super_block *sb, const char *func, @@ -348,6 +335,41 @@ static void save_error_info(struct super_block *sb, const char *func,  	ext4_commit_super(sb, 1);  } +/* + * The del_gendisk() function uninitializes the disk-specific data + * structures, including the bdi structure, without telling anyone + * else.  Once this happens, any attempt to call mark_buffer_dirty() + * (for example, by ext4_commit_super), will cause a kernel OOPS. + * This is a kludge to prevent these oops until we can put in a proper + * hook in del_gendisk() to inform the VFS and file system layers. + */ +static int block_device_ejected(struct super_block *sb) +{ +	struct inode *bd_inode = sb->s_bdev->bd_inode; +	struct backing_dev_info *bdi = bd_inode->i_mapping->backing_dev_info; + +	return bdi->dev == NULL; +} + +static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) +{ +	struct super_block		*sb = journal->j_private; +	struct ext4_sb_info		*sbi = EXT4_SB(sb); +	int				error = is_journal_aborted(journal); +	struct ext4_journal_cb_entry	*jce; + +	BUG_ON(txn->t_state == T_FINISHED); +	spin_lock(&sbi->s_md_lock); +	while (!list_empty(&txn->t_private_list)) { +		jce = list_entry(txn->t_private_list.next, +				 struct ext4_journal_cb_entry, jce_list); +		list_del_init(&jce->jce_list); +		spin_unlock(&sbi->s_md_lock); +		jce->jce_func(sb, jce, error); +		spin_lock(&sbi->s_md_lock); +	} +	spin_unlock(&sbi->s_md_lock); +}  /* Deal with the reporting of failure conditions on a filesystem such as   * inconsistencies detected or read IO failures. @@ -378,6 +400,11 @@ static void ext4_handle_error(struct super_block *sb)  	}  	if (test_opt(sb, ERRORS_RO)) {  		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); +		/* +		 * Make sure updated value of ->s_mount_flags will be visible +		 * before ->s_flags update +		 */ +		smp_wmb();  		sb->s_flags |= MS_RDONLY;  	}  	if (test_opt(sb, ERRORS_PANIC)) @@ -385,73 +412,98 @@ static void ext4_handle_error(struct super_block *sb)  			sb->s_id);  } +#define ext4_error_ratelimit(sb)					\ +		___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),	\ +			     "EXT4-fs error") +  void __ext4_error(struct super_block *sb, const char *function,  		  unsigned int line, const char *fmt, ...)  { +	struct va_format vaf;  	va_list args; -	va_start(args, fmt); -	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: ", -	       sb->s_id, function, line, current->comm); -	vprintk(fmt, args); -	printk("\n"); -	va_end(args); - +	if (ext4_error_ratelimit(sb)) { +		va_start(args, fmt); +		vaf.fmt = fmt; +		vaf.va = &args; +		printk(KERN_CRIT +		       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n", +		       sb->s_id, function, line, current->comm, &vaf); +		va_end(args); +	} +	save_error_info(sb, function, line);  	ext4_handle_error(sb);  } -void ext4_error_inode(struct inode *inode, const char *function, -		      unsigned int line, ext4_fsblk_t block, -		      const char *fmt, ...) +void __ext4_error_inode(struct inode *inode, const char *function, +			unsigned int line, ext4_fsblk_t block, +			const char *fmt, ...)  {  	va_list args; +	struct va_format vaf;  	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;  	es->s_last_error_ino = cpu_to_le32(inode->i_ino);  	es->s_last_error_block = cpu_to_le64(block); +	if (ext4_error_ratelimit(inode->i_sb)) { +		va_start(args, fmt); +		vaf.fmt = fmt; +		vaf.va = &args; +		if (block) +			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " +			       "inode #%lu: block %llu: comm %s: %pV\n", +			       inode->i_sb->s_id, function, line, inode->i_ino, +			       block, current->comm, &vaf); +		else +			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: " +			       "inode #%lu: comm %s: %pV\n", +			       inode->i_sb->s_id, function, line, inode->i_ino, +			       current->comm, &vaf); +		va_end(args); +	}  	save_error_info(inode->i_sb, function, line); -	va_start(args, fmt); -	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: inode #%lu: ", -	       inode->i_sb->s_id, function, line, inode->i_ino); -	if (block) -		printk("block %llu: ", block); -	printk("comm %s: ", current->comm); -	vprintk(fmt, args); -	printk("\n"); -	va_end(args); -  	ext4_handle_error(inode->i_sb);  } -void ext4_error_file(struct file *file, const char *function, -		     unsigned int line, const char *fmt, ...) +void __ext4_error_file(struct file *file, const char *function, +		       unsigned int line, ext4_fsblk_t block, +		       const char *fmt, ...)  {  	va_list args; +	struct va_format vaf;  	struct ext4_super_block *es; -	struct inode *inode = file->f_dentry->d_inode; +	struct inode *inode = file_inode(file);  	char pathname[80], *path;  	es = EXT4_SB(inode->i_sb)->s_es;  	es->s_last_error_ino = cpu_to_le32(inode->i_ino); +	if (ext4_error_ratelimit(inode->i_sb)) { +		path = d_path(&(file->f_path), pathname, sizeof(pathname)); +		if (IS_ERR(path)) +			path = "(unknown)"; +		va_start(args, fmt); +		vaf.fmt = fmt; +		vaf.va = &args; +		if (block) +			printk(KERN_CRIT +			       "EXT4-fs error (device %s): %s:%d: inode #%lu: " +			       "block %llu: comm %s: path %s: %pV\n", +			       inode->i_sb->s_id, function, line, inode->i_ino, +			       block, current->comm, path, &vaf); +		else +			printk(KERN_CRIT +			       "EXT4-fs error (device %s): %s:%d: inode #%lu: " +			       "comm %s: path %s: %pV\n", +			       inode->i_sb->s_id, function, line, inode->i_ino, +			       current->comm, path, &vaf); +		va_end(args); +	}  	save_error_info(inode->i_sb, function, line); -	va_start(args, fmt); -	path = d_path(&(file->f_path), pathname, sizeof(pathname)); -	if (!path) -		path = "(unknown)"; -	printk(KERN_CRIT -	       "EXT4-fs error (device %s): %s:%d: inode #%lu " -	       "(comm %s path %s): ", -	       inode->i_sb->s_id, function, line, inode->i_ino, -	       current->comm, path); -	vprintk(fmt, args); -	printk("\n"); -	va_end(args); -  	ext4_handle_error(inode->i_sb);  } -static const char *ext4_decode_error(struct super_block *sb, int errno, -				     char nbuf[16]) +const char *ext4_decode_error(struct super_block *sb, int errno, +			      char nbuf[16])  {  	char *errstr = NULL; @@ -500,11 +552,13 @@ void __ext4_std_error(struct super_block *sb, const char *function,  	    (sb->s_flags & MS_RDONLY))  		return; -	errstr = ext4_decode_error(sb, errno, nbuf); -	printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", -	       sb->s_id, function, line, errstr); -	save_error_info(sb, function, line); +	if (ext4_error_ratelimit(sb)) { +		errstr = ext4_decode_error(sb, errno, nbuf); +		printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n", +		       sb->s_id, function, line, errstr); +	} +	save_error_info(sb, function, line);  	ext4_handle_error(sb);  } @@ -533,8 +587,13 @@ void __ext4_abort(struct super_block *sb, const char *function,  	if ((sb->s_flags & MS_RDONLY) == 0) {  		ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); -		sb->s_flags |= MS_RDONLY;  		EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; +		/* +		 * Make sure updated value of ->s_mount_flags will be visible +		 * before ->s_flags update +		 */ +		smp_wmb(); +		sb->s_flags |= MS_RDONLY;  		if (EXT4_SB(sb)->s_journal)  			jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);  		save_error_info(sb, function, line); @@ -543,28 +602,37 @@ void __ext4_abort(struct super_block *sb, const char *function,  		panic("EXT4-fs panic from previous error\n");  } -void ext4_msg (struct super_block * sb, const char *prefix, -		   const char *fmt, ...) +void __ext4_msg(struct super_block *sb, +		const char *prefix, const char *fmt, ...)  { +	struct va_format vaf;  	va_list args; +	if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state), "EXT4-fs")) +		return; +  	va_start(args, fmt); -	printk("%sEXT4-fs (%s): ", prefix, sb->s_id); -	vprintk(fmt, args); -	printk("\n"); +	vaf.fmt = fmt; +	vaf.va = &args; +	printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);  	va_end(args);  }  void __ext4_warning(struct super_block *sb, const char *function,  		    unsigned int line, const char *fmt, ...)  { +	struct va_format vaf;  	va_list args; +	if (!___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state), +			  "EXT4-fs warning")) +		return; +  	va_start(args, fmt); -	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: ", -	       sb->s_id, function, line); -	vprintk(fmt, args); -	printk("\n"); +	vaf.fmt = fmt; +	vaf.va = &args; +	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n", +	       sb->s_id, function, line, &vaf);  	va_end(args);  } @@ -575,22 +643,28 @@ void __ext4_grp_locked_error(const char *function, unsigned int line,  __releases(bitlock)  __acquires(bitlock)  { +	struct va_format vaf;  	va_list args;  	struct ext4_super_block *es = EXT4_SB(sb)->s_es;  	es->s_last_error_ino = cpu_to_le32(ino);  	es->s_last_error_block = cpu_to_le64(block);  	__save_error_info(sb, function, line); -	va_start(args, fmt); -	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", -	       sb->s_id, function, line, grp); -	if (ino) -		printk("inode %lu: ", ino); -	if (block) -		printk("block %llu:", (unsigned long long) block); -	vprintk(fmt, args); -	printk("\n"); -	va_end(args); + +	if (ext4_error_ratelimit(sb)) { +		va_start(args, fmt); +		vaf.fmt = fmt; +		vaf.va = &args; +		printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", +		       sb->s_id, function, line, grp); +		if (ino) +			printk(KERN_CONT "inode %lu: ", ino); +		if (block) +			printk(KERN_CONT "block %llu:", +			       (unsigned long long) block); +		printk(KERN_CONT "%pV\n", &vaf); +		va_end(args); +	}  	if (test_opt(sb, ERRORS_CONT)) {  		ext4_commit_super(sb, 0); @@ -605,7 +679,7 @@ __acquires(bitlock)  	 * filesystem will have already been marked read/only and the  	 * journal has been aborted.  We return 1 as a hint to callers  	 * who might what to use the return value from -	 * ext4_grp_locked_error() to distinguish beween the +	 * ext4_grp_locked_error() to distinguish between the  	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more  	 * aggressively from the ext4 function in question, with a  	 * more appropriate error code. @@ -647,7 +721,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)  	struct block_device *bdev;  	char b[BDEVNAME_SIZE]; -	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE); +	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);  	if (IS_ERR(bdev))  		goto fail;  	return bdev; @@ -661,23 +735,19 @@ fail:  /*   * Release the journal device   */ -static int ext4_blkdev_put(struct block_device *bdev) +static void ext4_blkdev_put(struct block_device *bdev)  { -	bd_release(bdev); -	return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); +	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);  } -static int ext4_blkdev_remove(struct ext4_sb_info *sbi) +static void ext4_blkdev_remove(struct ext4_sb_info *sbi)  {  	struct block_device *bdev; -	int ret = -ENODEV; -  	bdev = sbi->journal_bdev;  	if (bdev) { -		ret = ext4_blkdev_put(bdev); +		ext4_blkdev_put(bdev);  		sbi->journal_bdev = NULL;  	} -	return ret;  }  static inline struct inode *orphan_list_entry(struct list_head *l) @@ -712,12 +782,8 @@ static void ext4_put_super(struct super_block *sb)  	ext4_unregister_li_request(sb);  	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); -	flush_workqueue(sbi->dio_unwritten_wq); -	destroy_workqueue(sbi->dio_unwritten_wq); - -	lock_super(sb); -	if (sb->s_dirt) -		ext4_commit_super(sb, 1); +	flush_workqueue(sbi->rsv_conversion_wq); +	destroy_workqueue(sbi->rsv_conversion_wq);  	if (sbi->s_journal) {  		err = jbd2_journal_destroy(sbi->s_journal); @@ -726,7 +792,8 @@ static void ext4_put_super(struct super_block *sb)  			ext4_abort(sb, "Couldn't clean up the journal");  	} -	del_timer(&sbi->s_err_report); +	ext4_es_unregister_shrinker(sbi); +	del_timer_sync(&sbi->s_err_report);  	ext4_release_system_zone(sb);  	ext4_mb_release(sb);  	ext4_ext_release(sb); @@ -735,24 +802,25 @@ static void ext4_put_super(struct super_block *sb)  	if (!(sb->s_flags & MS_RDONLY)) {  		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);  		es->s_state = cpu_to_le16(sbi->s_mount_state); -		ext4_commit_super(sb, 1);  	} +	if (!(sb->s_flags & MS_RDONLY)) +		ext4_commit_super(sb, 1); +  	if (sbi->s_proc) { +		remove_proc_entry("options", sbi->s_proc);  		remove_proc_entry(sb->s_id, ext4_proc_root);  	}  	kobject_del(&sbi->s_kobj);  	for (i = 0; i < sbi->s_gdb_count; i++)  		brelse(sbi->s_group_desc[i]); -	kfree(sbi->s_group_desc); -	if (is_vmalloc_addr(sbi->s_flex_groups)) -		vfree(sbi->s_flex_groups); -	else -		kfree(sbi->s_flex_groups); -	percpu_counter_destroy(&sbi->s_freeblocks_counter); +	ext4_kvfree(sbi->s_group_desc); +	ext4_kvfree(sbi->s_flex_groups); +	percpu_counter_destroy(&sbi->s_freeclusters_counter);  	percpu_counter_destroy(&sbi->s_freeinodes_counter);  	percpu_counter_destroy(&sbi->s_dirs_counter); -	percpu_counter_destroy(&sbi->s_dirtyblocks_counter); +	percpu_counter_destroy(&sbi->s_dirtyclusters_counter); +	percpu_counter_destroy(&sbi->s_extent_cache_cnt);  	brelse(sbi->s_sbh);  #ifdef CONFIG_QUOTA  	for (i = 0; i < MAXQUOTAS; i++) @@ -778,14 +846,21 @@ static void ext4_put_super(struct super_block *sb)  		invalidate_bdev(sbi->journal_bdev);  		ext4_blkdev_remove(sbi);  	} +	if (sbi->s_mb_cache) { +		ext4_xattr_destroy_cache(sbi->s_mb_cache); +		sbi->s_mb_cache = NULL; +	} +	if (sbi->s_mmp_tsk) +		kthread_stop(sbi->s_mmp_tsk);  	sb->s_fs_info = NULL;  	/*  	 * Now that we are completely done shutting down the  	 * superblock, we need to actually destroy the kobject.  	 */ -	unlock_super(sb);  	kobject_put(&sbi->s_kobj);  	wait_for_completion(&sbi->s_kobj_unregister); +	if (sbi->s_chksum_driver) +		crypto_free_shash(sbi->s_chksum_driver);  	kfree(sbi->s_blockgroup_lock);  	kfree(sbi);  } @@ -804,31 +879,31 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)  		return NULL;  	ei->vfs_inode.i_version = 1; -	ei->vfs_inode.i_data.writeback_index = 0; -	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); +	spin_lock_init(&ei->i_raw_lock);  	INIT_LIST_HEAD(&ei->i_prealloc_list);  	spin_lock_init(&ei->i_prealloc_lock); -	/* -	 * Note:  We can be called before EXT4_SB(sb)->s_journal is set, -	 * therefore it can be null here.  Don't check it, just initialize -	 * jinode. -	 */ -	jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode); +	ext4_es_init_tree(&ei->i_es_tree); +	rwlock_init(&ei->i_es_lock); +	INIT_LIST_HEAD(&ei->i_es_lru); +	ei->i_es_lru_nr = 0; +	ei->i_touch_when = 0;  	ei->i_reserved_data_blocks = 0;  	ei->i_reserved_meta_blocks = 0;  	ei->i_allocated_meta_blocks = 0;  	ei->i_da_metadata_calc_len = 0; -	ei->i_delalloc_reserved_flag = 0; +	ei->i_da_metadata_calc_last_lblock = 0;  	spin_lock_init(&(ei->i_block_reservation_lock));  #ifdef CONFIG_QUOTA  	ei->i_reserved_quota = 0;  #endif -	INIT_LIST_HEAD(&ei->i_completed_io_list); +	ei->jinode = NULL; +	INIT_LIST_HEAD(&ei->i_rsv_conversion_list);  	spin_lock_init(&ei->i_completed_io_lock); -	ei->cur_aio_dio = NULL;  	ei->i_sync_tid = 0;  	ei->i_datasync_tid = 0;  	atomic_set(&ei->i_ioend_count, 0); +	atomic_set(&ei->i_unwritten, 0); +	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);  	return &ei->vfs_inode;  } @@ -841,9 +916,14 @@ static int ext4_drop_inode(struct inode *inode)  	return drop;  } +static void ext4_i_callback(struct rcu_head *head) +{ +	struct inode *inode = container_of(head, struct inode, i_rcu); +	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); +} +  static void ext4_destroy_inode(struct inode *inode)  { -	ext4_ioend_wait(inode);  	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {  		ext4_msg(inode->i_sb, KERN_ERR,  			 "Inode %lu (%p): orphan list check failed!", @@ -853,7 +933,7 @@ static void ext4_destroy_inode(struct inode *inode)  				true);  		dump_stack();  	} -	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); +	call_rcu(&inode->i_rcu, ext4_i_callback);  }  static void init_once(void *foo) @@ -861,14 +941,12 @@ static void init_once(void *foo)  	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;  	INIT_LIST_HEAD(&ei->i_orphan); -#ifdef CONFIG_EXT4_FS_XATTR  	init_rwsem(&ei->xattr_sem); -#endif  	init_rwsem(&ei->i_data_sem);  	inode_init_once(&ei->vfs_inode);  } -static int init_inodecache(void) +static int __init init_inodecache(void)  {  	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",  					     sizeof(struct ext4_inode_info), @@ -882,195 +960,28 @@ static int init_inodecache(void)  static void destroy_inodecache(void)  { +	/* +	 * Make sure all delayed rcu free inodes are flushed before we +	 * destroy cache. +	 */ +	rcu_barrier();  	kmem_cache_destroy(ext4_inode_cachep);  }  void ext4_clear_inode(struct inode *inode)  {  	invalidate_inode_buffers(inode); -	end_writeback(inode); +	clear_inode(inode);  	dquot_drop(inode);  	ext4_discard_preallocations(inode); -	if (EXT4_JOURNAL(inode)) -		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, -				       &EXT4_I(inode)->jinode); -} - -static inline void ext4_show_quota_options(struct seq_file *seq, -					   struct super_block *sb) -{ -#if defined(CONFIG_QUOTA) -	struct ext4_sb_info *sbi = EXT4_SB(sb); - -	if (sbi->s_jquota_fmt) { -		char *fmtname = ""; - -		switch (sbi->s_jquota_fmt) { -		case QFMT_VFS_OLD: -			fmtname = "vfsold"; -			break; -		case QFMT_VFS_V0: -			fmtname = "vfsv0"; -			break; -		case QFMT_VFS_V1: -			fmtname = "vfsv1"; -			break; -		} -		seq_printf(seq, ",jqfmt=%s", fmtname); -	} - -	if (sbi->s_qf_names[USRQUOTA]) -		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); - -	if (sbi->s_qf_names[GRPQUOTA]) -		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - -	if (test_opt(sb, USRQUOTA)) -		seq_puts(seq, ",usrquota"); - -	if (test_opt(sb, GRPQUOTA)) -		seq_puts(seq, ",grpquota"); -#endif -} - -/* - * Show an option if - *  - it's set to a non-default value OR - *  - if the per-sb default is different from the global default - */ -static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) -{ -	int def_errors; -	unsigned long def_mount_opts; -	struct super_block *sb = vfs->mnt_sb; -	struct ext4_sb_info *sbi = EXT4_SB(sb); -	struct ext4_super_block *es = sbi->s_es; - -	def_mount_opts = le32_to_cpu(es->s_default_mount_opts); -	def_errors     = le16_to_cpu(es->s_errors); - -	if (sbi->s_sb_block != 1) -		seq_printf(seq, ",sb=%llu", sbi->s_sb_block); -	if (test_opt(sb, MINIX_DF)) -		seq_puts(seq, ",minixdf"); -	if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS)) -		seq_puts(seq, ",grpid"); -	if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS)) -		seq_puts(seq, ",nogrpid"); -	if (sbi->s_resuid != EXT4_DEF_RESUID || -	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) { -		seq_printf(seq, ",resuid=%u", sbi->s_resuid); -	} -	if (sbi->s_resgid != EXT4_DEF_RESGID || -	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) { -		seq_printf(seq, ",resgid=%u", sbi->s_resgid); -	} -	if (test_opt(sb, ERRORS_RO)) { -		if (def_errors == EXT4_ERRORS_PANIC || -		    def_errors == EXT4_ERRORS_CONTINUE) { -			seq_puts(seq, ",errors=remount-ro"); -		} -	} -	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) -		seq_puts(seq, ",errors=continue"); -	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) -		seq_puts(seq, ",errors=panic"); -	if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16)) -		seq_puts(seq, ",nouid32"); -	if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG)) -		seq_puts(seq, ",debug"); -	if (test_opt(sb, OLDALLOC)) -		seq_puts(seq, ",oldalloc"); -#ifdef CONFIG_EXT4_FS_XATTR -	if (test_opt(sb, XATTR_USER) && -		!(def_mount_opts & EXT4_DEFM_XATTR_USER)) -		seq_puts(seq, ",user_xattr"); -	if (!test_opt(sb, XATTR_USER) && -	    (def_mount_opts & EXT4_DEFM_XATTR_USER)) { -		seq_puts(seq, ",nouser_xattr"); -	} -#endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL -	if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) -		seq_puts(seq, ",acl"); -	if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) -		seq_puts(seq, ",noacl"); -#endif -	if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { -		seq_printf(seq, ",commit=%u", -			   (unsigned) (sbi->s_commit_interval / HZ)); -	} -	if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) { -		seq_printf(seq, ",min_batch_time=%u", -			   (unsigned) sbi->s_min_batch_time); +	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); +	ext4_es_lru_del(inode); +	if (EXT4_I(inode)->jinode) { +		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode), +					       EXT4_I(inode)->jinode); +		jbd2_free_inode(EXT4_I(inode)->jinode); +		EXT4_I(inode)->jinode = NULL;  	} -	if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { -		seq_printf(seq, ",max_batch_time=%u", -			   (unsigned) sbi->s_min_batch_time); -	} - -	/* -	 * We're changing the default of barrier mount option, so -	 * let's always display its mount state so it's clear what its -	 * status is. -	 */ -	seq_puts(seq, ",barrier="); -	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0"); -	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) -		seq_puts(seq, ",journal_async_commit"); -	else if (test_opt(sb, JOURNAL_CHECKSUM)) -		seq_puts(seq, ",journal_checksum"); -	if (test_opt(sb, I_VERSION)) -		seq_puts(seq, ",i_version"); -	if (!test_opt(sb, DELALLOC) && -	    !(def_mount_opts & EXT4_DEFM_NODELALLOC)) -		seq_puts(seq, ",nodelalloc"); - -	if (sbi->s_stripe) -		seq_printf(seq, ",stripe=%lu", sbi->s_stripe); -	/* -	 * journal mode get enabled in different ways -	 * So just print the value even if we didn't specify it -	 */ -	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) -		seq_puts(seq, ",data=journal"); -	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) -		seq_puts(seq, ",data=ordered"); -	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) -		seq_puts(seq, ",data=writeback"); - -	if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) -		seq_printf(seq, ",inode_readahead_blks=%u", -			   sbi->s_inode_readahead_blks); - -	if (test_opt(sb, DATA_ERR_ABORT)) -		seq_puts(seq, ",data_err=abort"); - -	if (test_opt(sb, NO_AUTO_DA_ALLOC)) -		seq_puts(seq, ",noauto_da_alloc"); - -	if (test_opt(sb, DISCARD) && !(def_mount_opts & EXT4_DEFM_DISCARD)) -		seq_puts(seq, ",discard"); - -	if (test_opt(sb, NOLOAD)) -		seq_puts(seq, ",norecovery"); - -	if (test_opt(sb, DIOREAD_NOLOCK)) -		seq_puts(seq, ",dioread_nolock"); - -	if (test_opt(sb, BLOCK_VALIDITY) && -	    !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY)) -		seq_puts(seq, ",block_validity"); - -	if (!test_opt(sb, INIT_INODE_TABLE)) -		seq_puts(seq, ",noinit_inode_table"); -	else if (sbi->s_li_wait_mult) -		seq_printf(seq, ",init_inode_table=%u", -			   (unsigned) sbi->s_li_wait_mult); - -	ext4_show_quota_options(seq, sb); - -	return 0;  }  static struct inode *ext4_nfs_get_inode(struct super_block *sb, @@ -1146,18 +1057,22 @@ static int ext4_release_dquot(struct dquot *dquot);  static int ext4_mark_dquot_dirty(struct dquot *dquot);  static int ext4_write_info(struct super_block *sb, int type);  static int ext4_quota_on(struct super_block *sb, int type, int format_id, -				char *path); +			 struct path *path); +static int ext4_quota_on_sysfile(struct super_block *sb, int type, +				 int format_id);  static int ext4_quota_off(struct super_block *sb, int type); +static int ext4_quota_off_sysfile(struct super_block *sb, int type);  static int ext4_quota_on_mount(struct super_block *sb, int type);  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,  			       size_t len, loff_t off);  static ssize_t ext4_quota_write(struct super_block *sb, int type,  				const char *data, size_t len, loff_t off); +static int ext4_quota_enable(struct super_block *sb, int type, int format_id, +			     unsigned int flags); +static int ext4_enable_quotas(struct super_block *sb);  static const struct dquot_operations ext4_quota_operations = { -#ifdef CONFIG_QUOTA  	.get_reserved_space = ext4_get_reserved_space, -#endif  	.write_dquot	= ext4_write_dquot,  	.acquire_dquot	= ext4_acquire_dquot,  	.release_dquot	= ext4_release_dquot, @@ -1176,6 +1091,16 @@ static const struct quotactl_ops ext4_qctl_operations = {  	.get_dqblk	= dquot_get_dqblk,  	.set_dqblk	= dquot_set_dqblk  }; + +static const struct quotactl_ops ext4_qctl_sysfile_operations = { +	.quota_on_meta	= ext4_quota_on_sysfile, +	.quota_off	= ext4_quota_off_sysfile, +	.quota_sync	= dquot_quota_sync, +	.get_info	= dquot_get_dqinfo, +	.set_info	= dquot_set_dqinfo, +	.get_dqblk	= dquot_get_dqblk, +	.set_dqblk	= dquot_set_dqblk +};  #endif  static const struct super_operations ext4_sops = { @@ -1197,7 +1122,6 @@ static const struct super_operations ext4_sops = {  	.quota_write	= ext4_quota_write,  #endif  	.bdev_try_to_free_page = bdev_try_to_free_page, -	.trim_fs	= ext4_trim_fs  };  static const struct super_operations ext4_nojournal_sops = { @@ -1207,7 +1131,7 @@ static const struct super_operations ext4_nojournal_sops = {  	.dirty_inode	= ext4_dirty_inode,  	.drop_inode	= ext4_drop_inode,  	.evict_inode	= ext4_evict_inode, -	.write_super	= ext4_write_super, +	.sync_fs	= ext4_sync_fs_nojournal,  	.put_super	= ext4_put_super,  	.statfs		= ext4_statfs,  	.remount_fs	= ext4_remount, @@ -1228,24 +1152,23 @@ static const struct export_operations ext4_export_ops = {  enum {  	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,  	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, -	Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, +	Opt_nouid32, Opt_debug, Opt_removed,  	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, -	Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_nobh, Opt_bh, -	Opt_commit, Opt_min_batch_time, Opt_max_batch_time, -	Opt_journal_update, Opt_journal_dev, -	Opt_journal_checksum, Opt_journal_async_commit, +	Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, +	Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, +	Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,  	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,  	Opt_data_err_abort, Opt_data_err_ignore,  	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,  	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, -	Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, -	Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version, -	Opt_stripe, Opt_delalloc, Opt_nodelalloc, -	Opt_block_validity, Opt_noblock_validity, +	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, +	Opt_usrquota, Opt_grpquota, Opt_i_version, +	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, +	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,  	Opt_inode_readahead_blks, Opt_journal_ioprio,  	Opt_dioread_nolock, Opt_dioread_lock, -	Opt_discard, Opt_nodiscard, -	Opt_init_inode_table, Opt_noinit_inode_table, +	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, +	Opt_max_dir_size_kb,  };  static const match_table_t tokens = { @@ -1263,21 +1186,21 @@ static const match_table_t tokens = {  	{Opt_err_ro, "errors=remount-ro"},  	{Opt_nouid32, "nouid32"},  	{Opt_debug, "debug"}, -	{Opt_oldalloc, "oldalloc"}, -	{Opt_orlov, "orlov"}, +	{Opt_removed, "oldalloc"}, +	{Opt_removed, "orlov"},  	{Opt_user_xattr, "user_xattr"},  	{Opt_nouser_xattr, "nouser_xattr"},  	{Opt_acl, "acl"},  	{Opt_noacl, "noacl"}, -	{Opt_noload, "noload"},  	{Opt_noload, "norecovery"}, -	{Opt_nobh, "nobh"}, -	{Opt_bh, "bh"}, +	{Opt_noload, "noload"}, +	{Opt_removed, "nobh"}, +	{Opt_removed, "bh"},  	{Opt_commit, "commit=%u"},  	{Opt_min_batch_time, "min_batch_time=%u"},  	{Opt_max_batch_time, "max_batch_time=%u"}, -	{Opt_journal_update, "journal=update"},  	{Opt_journal_dev, "journal_dev=%u"}, +	{Opt_journal_path, "journal_path=%s"},  	{Opt_journal_checksum, "journal_checksum"},  	{Opt_journal_async_commit, "journal_async_commit"},  	{Opt_abort, "abort"}, @@ -1302,9 +1225,10 @@ static const match_table_t tokens = {  	{Opt_nobarrier, "nobarrier"},  	{Opt_i_version, "i_version"},  	{Opt_stripe, "stripe=%u"}, -	{Opt_resize, "resize"},  	{Opt_delalloc, "delalloc"},  	{Opt_nodelalloc, "nodelalloc"}, +	{Opt_removed, "mblk_io_submit"}, +	{Opt_removed, "nomblk_io_submit"},  	{Opt_block_validity, "block_validity"},  	{Opt_noblock_validity, "noblock_validity"},  	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, @@ -1316,9 +1240,15 @@ static const match_table_t tokens = {  	{Opt_dioread_lock, "dioread_lock"},  	{Opt_discard, "discard"},  	{Opt_nodiscard, "nodiscard"}, -	{Opt_init_inode_table, "init_itable=%u"}, -	{Opt_init_inode_table, "init_itable"}, -	{Opt_noinit_inode_table, "noinit_itable"}, +	{Opt_init_itable, "init_itable=%u"}, +	{Opt_init_itable, "init_itable"}, +	{Opt_noinit_itable, "noinit_itable"}, +	{Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, +	{Opt_removed, "check=none"},	/* mount option from ext2/3 */ +	{Opt_removed, "nocheck"},	/* mount option from ext2/3 */ +	{Opt_removed, "reservation"},	/* mount option from ext2/3 */ +	{Opt_removed, "noreservation"}, /* mount option from ext2/3 */ +	{Opt_removed, "journal=%u"},	/* mount option from ext2/3 */  	{Opt_err, NULL},  }; @@ -1354,37 +1284,46 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)  {  	struct ext4_sb_info *sbi = EXT4_SB(sb);  	char *qname; +	int ret = -1;  	if (sb_any_quota_loaded(sb) &&  		!sbi->s_qf_names[qtype]) {  		ext4_msg(sb, KERN_ERR,  			"Cannot change journaled "  			"quota options when quota turned on"); -		return 0; +		return -1; +	} +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) { +		ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options " +			 "when QUOTA feature is enabled"); +		return -1;  	}  	qname = match_strdup(args);  	if (!qname) {  		ext4_msg(sb, KERN_ERR,  			"Not enough memory for storing quotafile name"); -		return 0; +		return -1;  	} -	if (sbi->s_qf_names[qtype] && -		strcmp(sbi->s_qf_names[qtype], qname)) { -		ext4_msg(sb, KERN_ERR, -			"%s quota file already specified", QTYPE2NAME(qtype)); -		kfree(qname); -		return 0; +	if (sbi->s_qf_names[qtype]) { +		if (strcmp(sbi->s_qf_names[qtype], qname) == 0) +			ret = 1; +		else +			ext4_msg(sb, KERN_ERR, +				 "%s quota file already specified", +				 QTYPE2NAME(qtype)); +		goto errout;  	} -	sbi->s_qf_names[qtype] = qname; -	if (strchr(sbi->s_qf_names[qtype], '/')) { +	if (strchr(qname, '/')) {  		ext4_msg(sb, KERN_ERR,  			"quotafile must be on filesystem root"); -		kfree(sbi->s_qf_names[qtype]); -		sbi->s_qf_names[qtype] = NULL; -		return 0; +		goto errout;  	} -	set_opt(sbi->s_mount_opt, QUOTA); +	sbi->s_qf_names[qtype] = qname; +	set_opt(sb, QUOTA);  	return 1; +errout: +	kfree(qname); +	return ret;  }  static int clear_qf_name(struct super_block *sb, int qtype) @@ -1396,427 +1335,371 @@ static int clear_qf_name(struct super_block *sb, int qtype)  		sbi->s_qf_names[qtype]) {  		ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options"  			" when quota turned on"); -		return 0; +		return -1;  	} -	/* -	 * The space will be released later when all options are confirmed -	 * to be correct -	 */ +	kfree(sbi->s_qf_names[qtype]);  	sbi->s_qf_names[qtype] = NULL;  	return 1;  }  #endif +#define MOPT_SET	0x0001 +#define MOPT_CLEAR	0x0002 +#define MOPT_NOSUPPORT	0x0004 +#define MOPT_EXPLICIT	0x0008 +#define MOPT_CLEAR_ERR	0x0010 +#define MOPT_GTE0	0x0020 +#ifdef CONFIG_QUOTA +#define MOPT_Q		0 +#define MOPT_QFMT	0x0040 +#else +#define MOPT_Q		MOPT_NOSUPPORT +#define MOPT_QFMT	MOPT_NOSUPPORT +#endif +#define MOPT_DATAJ	0x0080 +#define MOPT_NO_EXT2	0x0100 +#define MOPT_NO_EXT3	0x0200 +#define MOPT_EXT4_ONLY	(MOPT_NO_EXT2 | MOPT_NO_EXT3) +#define MOPT_STRING	0x0400 + +static const struct mount_opts { +	int	token; +	int	mount_opt; +	int	flags; +} ext4_mount_opts[] = { +	{Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET}, +	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR}, +	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET}, +	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR}, +	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET}, +	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR}, +	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK, +	 MOPT_EXT4_ONLY | MOPT_SET}, +	{Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK, +	 MOPT_EXT4_ONLY | MOPT_CLEAR}, +	{Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET}, +	{Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR}, +	{Opt_delalloc, EXT4_MOUNT_DELALLOC, +	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, +	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC, +	 MOPT_EXT4_ONLY | MOPT_CLEAR}, +	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, +	 MOPT_EXT4_ONLY | MOPT_SET}, +	{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | +				    EXT4_MOUNT_JOURNAL_CHECKSUM), +	 MOPT_EXT4_ONLY | MOPT_SET}, +	{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET}, +	{Opt_err_panic, EXT4_MOUNT_ERRORS_PANIC, MOPT_SET | MOPT_CLEAR_ERR}, +	{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR}, +	{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR}, +	{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT, +	 MOPT_NO_EXT2 | MOPT_SET}, +	{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT, +	 MOPT_NO_EXT2 | MOPT_CLEAR}, +	{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET}, +	{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR}, +	{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET}, +	{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR}, +	{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR}, +	{Opt_commit, 0, MOPT_GTE0}, +	{Opt_max_batch_time, 0, MOPT_GTE0}, +	{Opt_min_batch_time, 0, MOPT_GTE0}, +	{Opt_inode_readahead_blks, 0, MOPT_GTE0}, +	{Opt_init_itable, 0, MOPT_GTE0}, +	{Opt_stripe, 0, MOPT_GTE0}, +	{Opt_resuid, 0, MOPT_GTE0}, +	{Opt_resgid, 0, MOPT_GTE0}, +	{Opt_journal_dev, 0, MOPT_GTE0}, +	{Opt_journal_path, 0, MOPT_STRING}, +	{Opt_journal_ioprio, 0, MOPT_GTE0}, +	{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, +	{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ}, +	{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA, +	 MOPT_NO_EXT2 | MOPT_DATAJ}, +	{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, +	{Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, +#ifdef CONFIG_EXT4_FS_POSIX_ACL +	{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, +	{Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, +#else +	{Opt_acl, 0, MOPT_NOSUPPORT}, +	{Opt_noacl, 0, MOPT_NOSUPPORT}, +#endif +	{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, +	{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, +	{Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q}, +	{Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, +							MOPT_SET | MOPT_Q}, +	{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA, +							MOPT_SET | MOPT_Q}, +	{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA | +		       EXT4_MOUNT_GRPQUOTA), MOPT_CLEAR | MOPT_Q}, +	{Opt_usrjquota, 0, MOPT_Q}, +	{Opt_grpjquota, 0, MOPT_Q}, +	{Opt_offusrjquota, 0, MOPT_Q}, +	{Opt_offgrpjquota, 0, MOPT_Q}, +	{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, +	{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, +	{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, +	{Opt_max_dir_size_kb, 0, MOPT_GTE0}, +	{Opt_err, 0, 0} +}; + +static int handle_mount_opt(struct super_block *sb, char *opt, int token, +			    substring_t *args, unsigned long *journal_devnum, +			    unsigned int *journal_ioprio, int is_remount) +{ +	struct ext4_sb_info *sbi = EXT4_SB(sb); +	const struct mount_opts *m; +	kuid_t uid; +	kgid_t gid; +	int arg = 0; + +#ifdef CONFIG_QUOTA +	if (token == Opt_usrjquota) +		return set_qf_name(sb, USRQUOTA, &args[0]); +	else if (token == Opt_grpjquota) +		return set_qf_name(sb, GRPQUOTA, &args[0]); +	else if (token == Opt_offusrjquota) +		return clear_qf_name(sb, USRQUOTA); +	else if (token == Opt_offgrpjquota) +		return clear_qf_name(sb, GRPQUOTA); +#endif +	switch (token) { +	case Opt_noacl: +	case Opt_nouser_xattr: +		ext4_msg(sb, KERN_WARNING, deprecated_msg, opt, "3.5"); +		break; +	case Opt_sb: +		return 1;	/* handled by get_sb_block() */ +	case Opt_removed: +		ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); +		return 1; +	case Opt_abort: +		sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; +		return 1; +	case Opt_i_version: +		sb->s_flags |= MS_I_VERSION; +		return 1; +	} + +	for (m = ext4_mount_opts; m->token != Opt_err; m++) +		if (token == m->token) +			break; + +	if (m->token == Opt_err) { +		ext4_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" " +			 "or missing value", opt); +		return -1; +	} + +	if ((m->flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { +		ext4_msg(sb, KERN_ERR, +			 "Mount option \"%s\" incompatible with ext2", opt); +		return -1; +	} +	if ((m->flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) { +		ext4_msg(sb, KERN_ERR, +			 "Mount option \"%s\" incompatible with ext3", opt); +		return -1; +	} + +	if (args->from && !(m->flags & MOPT_STRING) && match_int(args, &arg)) +		return -1; +	if (args->from && (m->flags & MOPT_GTE0) && (arg < 0)) +		return -1; +	if (m->flags & MOPT_EXPLICIT) +		set_opt2(sb, EXPLICIT_DELALLOC); +	if (m->flags & MOPT_CLEAR_ERR) +		clear_opt(sb, ERRORS_MASK); +	if (token == Opt_noquota && sb_any_quota_loaded(sb)) { +		ext4_msg(sb, KERN_ERR, "Cannot change quota " +			 "options when quota turned on"); +		return -1; +	} + +	if (m->flags & MOPT_NOSUPPORT) { +		ext4_msg(sb, KERN_ERR, "%s option not supported", opt); +	} else if (token == Opt_commit) { +		if (arg == 0) +			arg = JBD2_DEFAULT_MAX_COMMIT_AGE; +		sbi->s_commit_interval = HZ * arg; +	} else if (token == Opt_max_batch_time) { +		sbi->s_max_batch_time = arg; +	} else if (token == Opt_min_batch_time) { +		sbi->s_min_batch_time = arg; +	} else if (token == Opt_inode_readahead_blks) { +		if (arg && (arg > (1 << 30) || !is_power_of_2(arg))) { +			ext4_msg(sb, KERN_ERR, +				 "EXT4-fs: inode_readahead_blks must be " +				 "0 or a power of 2 smaller than 2^31"); +			return -1; +		} +		sbi->s_inode_readahead_blks = arg; +	} else if (token == Opt_init_itable) { +		set_opt(sb, INIT_INODE_TABLE); +		if (!args->from) +			arg = EXT4_DEF_LI_WAIT_MULT; +		sbi->s_li_wait_mult = arg; +	} else if (token == Opt_max_dir_size_kb) { +		sbi->s_max_dir_size_kb = arg; +	} else if (token == Opt_stripe) { +		sbi->s_stripe = arg; +	} else if (token == Opt_resuid) { +		uid = make_kuid(current_user_ns(), arg); +		if (!uid_valid(uid)) { +			ext4_msg(sb, KERN_ERR, "Invalid uid value %d", arg); +			return -1; +		} +		sbi->s_resuid = uid; +	} else if (token == Opt_resgid) { +		gid = make_kgid(current_user_ns(), arg); +		if (!gid_valid(gid)) { +			ext4_msg(sb, KERN_ERR, "Invalid gid value %d", arg); +			return -1; +		} +		sbi->s_resgid = gid; +	} else if (token == Opt_journal_dev) { +		if (is_remount) { +			ext4_msg(sb, KERN_ERR, +				 "Cannot specify journal on remount"); +			return -1; +		} +		*journal_devnum = arg; +	} else if (token == Opt_journal_path) { +		char *journal_path; +		struct inode *journal_inode; +		struct path path; +		int error; + +		if (is_remount) { +			ext4_msg(sb, KERN_ERR, +				 "Cannot specify journal on remount"); +			return -1; +		} +		journal_path = match_strdup(&args[0]); +		if (!journal_path) { +			ext4_msg(sb, KERN_ERR, "error: could not dup " +				"journal device string"); +			return -1; +		} + +		error = kern_path(journal_path, LOOKUP_FOLLOW, &path); +		if (error) { +			ext4_msg(sb, KERN_ERR, "error: could not find " +				"journal device path: error %d", error); +			kfree(journal_path); +			return -1; +		} + +		journal_inode = path.dentry->d_inode; +		if (!S_ISBLK(journal_inode->i_mode)) { +			ext4_msg(sb, KERN_ERR, "error: journal path %s " +				"is not a block device", journal_path); +			path_put(&path); +			kfree(journal_path); +			return -1; +		} + +		*journal_devnum = new_encode_dev(journal_inode->i_rdev); +		path_put(&path); +		kfree(journal_path); +	} else if (token == Opt_journal_ioprio) { +		if (arg > 7) { +			ext4_msg(sb, KERN_ERR, "Invalid journal IO priority" +				 " (must be 0-7)"); +			return -1; +		} +		*journal_ioprio = +			IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, arg); +	} else if (m->flags & MOPT_DATAJ) { +		if (is_remount) { +			if (!sbi->s_journal) +				ext4_msg(sb, KERN_WARNING, "Remounting file system with no journal so ignoring journalled data option"); +			else if (test_opt(sb, DATA_FLAGS) != m->mount_opt) { +				ext4_msg(sb, KERN_ERR, +					 "Cannot change data mode on remount"); +				return -1; +			} +		} else { +			clear_opt(sb, DATA_FLAGS); +			sbi->s_mount_opt |= m->mount_opt; +		} +#ifdef CONFIG_QUOTA +	} else if (m->flags & MOPT_QFMT) { +		if (sb_any_quota_loaded(sb) && +		    sbi->s_jquota_fmt != m->mount_opt) { +			ext4_msg(sb, KERN_ERR, "Cannot change journaled " +				 "quota options when quota turned on"); +			return -1; +		} +		if (EXT4_HAS_RO_COMPAT_FEATURE(sb, +					       EXT4_FEATURE_RO_COMPAT_QUOTA)) { +			ext4_msg(sb, KERN_ERR, +				 "Cannot set journaled quota options " +				 "when QUOTA feature is enabled"); +			return -1; +		} +		sbi->s_jquota_fmt = m->mount_opt; +#endif +	} else { +		if (!args->from) +			arg = 1; +		if (m->flags & MOPT_CLEAR) +			arg = !arg; +		else if (unlikely(!(m->flags & MOPT_SET))) { +			ext4_msg(sb, KERN_WARNING, +				 "buggy handling of option %s", opt); +			WARN_ON(1); +			return -1; +		} +		if (arg != 0) +			sbi->s_mount_opt |= m->mount_opt; +		else +			sbi->s_mount_opt &= ~m->mount_opt; +	} +	return 1; +} +  static int parse_options(char *options, struct super_block *sb,  			 unsigned long *journal_devnum,  			 unsigned int *journal_ioprio, -			 ext4_fsblk_t *n_blocks_count, int is_remount) +			 int is_remount)  {  	struct ext4_sb_info *sbi = EXT4_SB(sb);  	char *p;  	substring_t args[MAX_OPT_ARGS]; -	int data_opt = 0; -	int option; -#ifdef CONFIG_QUOTA -	int qfmt; -#endif +	int token;  	if (!options)  		return 1;  	while ((p = strsep(&options, ",")) != NULL) { -		int token;  		if (!*p)  			continue; -  		/*  		 * Initialize args struct so we know whether arg was  		 * found; some options take optional arguments.  		 */ -		args[0].to = args[0].from = 0; +		args[0].to = args[0].from = NULL;  		token = match_token(p, tokens, args); -		switch (token) { -		case Opt_bsd_df: -			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); -			clear_opt(sbi->s_mount_opt, MINIX_DF); -			break; -		case Opt_minix_df: -			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); -			set_opt(sbi->s_mount_opt, MINIX_DF); - -			break; -		case Opt_grpid: -			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); -			set_opt(sbi->s_mount_opt, GRPID); - -			break; -		case Opt_nogrpid: -			ext4_msg(sb, KERN_WARNING, deprecated_msg, p, "2.6.38"); -			clear_opt(sbi->s_mount_opt, GRPID); - -			break; -		case Opt_resuid: -			if (match_int(&args[0], &option)) -				return 0; -			sbi->s_resuid = option; -			break; -		case Opt_resgid: -			if (match_int(&args[0], &option)) -				return 0; -			sbi->s_resgid = option; -			break; -		case Opt_sb: -			/* handled by get_sb_block() instead of here */ -			/* *sb_block = match_int(&args[0]); */ -			break; -		case Opt_err_panic: -			clear_opt(sbi->s_mount_opt, ERRORS_CONT); -			clear_opt(sbi->s_mount_opt, ERRORS_RO); -			set_opt(sbi->s_mount_opt, ERRORS_PANIC); -			break; -		case Opt_err_ro: -			clear_opt(sbi->s_mount_opt, ERRORS_CONT); -			clear_opt(sbi->s_mount_opt, ERRORS_PANIC); -			set_opt(sbi->s_mount_opt, ERRORS_RO); -			break; -		case Opt_err_cont: -			clear_opt(sbi->s_mount_opt, ERRORS_RO); -			clear_opt(sbi->s_mount_opt, ERRORS_PANIC); -			set_opt(sbi->s_mount_opt, ERRORS_CONT); -			break; -		case Opt_nouid32: -			set_opt(sbi->s_mount_opt, NO_UID32); -			break; -		case Opt_debug: -			set_opt(sbi->s_mount_opt, DEBUG); -			break; -		case Opt_oldalloc: -			set_opt(sbi->s_mount_opt, OLDALLOC); -			break; -		case Opt_orlov: -			clear_opt(sbi->s_mount_opt, OLDALLOC); -			break; -#ifdef CONFIG_EXT4_FS_XATTR -		case Opt_user_xattr: -			set_opt(sbi->s_mount_opt, XATTR_USER); -			break; -		case Opt_nouser_xattr: -			clear_opt(sbi->s_mount_opt, XATTR_USER); -			break; -#else -		case Opt_user_xattr: -		case Opt_nouser_xattr: -			ext4_msg(sb, KERN_ERR, "(no)user_xattr options not supported"); -			break; -#endif -#ifdef CONFIG_EXT4_FS_POSIX_ACL -		case Opt_acl: -			set_opt(sbi->s_mount_opt, POSIX_ACL); -			break; -		case Opt_noacl: -			clear_opt(sbi->s_mount_opt, POSIX_ACL); -			break; -#else -		case Opt_acl: -		case Opt_noacl: -			ext4_msg(sb, KERN_ERR, "(no)acl options not supported"); -			break; -#endif -		case Opt_journal_update: -			/* @@@ FIXME */ -			/* Eventually we will want to be able to create -			   a journal file here.  For now, only allow the -			   user to specify an existing inode to be the -			   journal file. */ -			if (is_remount) { -				ext4_msg(sb, KERN_ERR, -					 "Cannot specify journal on remount"); -				return 0; -			} -			set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); -			break; -		case Opt_journal_dev: -			if (is_remount) { -				ext4_msg(sb, KERN_ERR, -					"Cannot specify journal on remount"); -				return 0; -			} -			if (match_int(&args[0], &option)) -				return 0; -			*journal_devnum = option; -			break; -		case Opt_journal_checksum: -			set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); -			break; -		case Opt_journal_async_commit: -			set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); -			set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); -			break; -		case Opt_noload: -			set_opt(sbi->s_mount_opt, NOLOAD); -			break; -		case Opt_commit: -			if (match_int(&args[0], &option)) -				return 0; -			if (option < 0) -				return 0; -			if (option == 0) -				option = JBD2_DEFAULT_MAX_COMMIT_AGE; -			sbi->s_commit_interval = HZ * option; -			break; -		case Opt_max_batch_time: -			if (match_int(&args[0], &option)) -				return 0; -			if (option < 0) -				return 0; -			if (option == 0) -				option = EXT4_DEF_MAX_BATCH_TIME; -			sbi->s_max_batch_time = option; -			break; -		case Opt_min_batch_time: -			if (match_int(&args[0], &option)) -				return 0; -			if (option < 0) -				return 0; -			sbi->s_min_batch_time = option; -			break; -		case Opt_data_journal: -			data_opt = EXT4_MOUNT_JOURNAL_DATA; -			goto datacheck; -		case Opt_data_ordered: -			data_opt = EXT4_MOUNT_ORDERED_DATA; -			goto datacheck; -		case Opt_data_writeback: -			data_opt = EXT4_MOUNT_WRITEBACK_DATA; -		datacheck: -			if (is_remount) { -				if (test_opt(sb, DATA_FLAGS) != data_opt) { -					ext4_msg(sb, KERN_ERR, -						"Cannot change data mode on remount"); -					return 0; -				} -			} else { -				clear_opt(sbi->s_mount_opt, DATA_FLAGS); -				sbi->s_mount_opt |= data_opt; -			} -			break; -		case Opt_data_err_abort: -			set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); -			break; -		case Opt_data_err_ignore: -			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); -			break; -#ifdef CONFIG_QUOTA -		case Opt_usrjquota: -			if (!set_qf_name(sb, USRQUOTA, &args[0])) -				return 0; -			break; -		case Opt_grpjquota: -			if (!set_qf_name(sb, GRPQUOTA, &args[0])) -				return 0; -			break; -		case Opt_offusrjquota: -			if (!clear_qf_name(sb, USRQUOTA)) -				return 0; -			break; -		case Opt_offgrpjquota: -			if (!clear_qf_name(sb, GRPQUOTA)) -				return 0; -			break; - -		case Opt_jqfmt_vfsold: -			qfmt = QFMT_VFS_OLD; -			goto set_qf_format; -		case Opt_jqfmt_vfsv0: -			qfmt = QFMT_VFS_V0; -			goto set_qf_format; -		case Opt_jqfmt_vfsv1: -			qfmt = QFMT_VFS_V1; -set_qf_format: -			if (sb_any_quota_loaded(sb) && -			    sbi->s_jquota_fmt != qfmt) { -				ext4_msg(sb, KERN_ERR, "Cannot change " -					"journaled quota options when " -					"quota turned on"); -				return 0; -			} -			sbi->s_jquota_fmt = qfmt; -			break; -		case Opt_quota: -		case Opt_usrquota: -			set_opt(sbi->s_mount_opt, QUOTA); -			set_opt(sbi->s_mount_opt, USRQUOTA); -			break; -		case Opt_grpquota: -			set_opt(sbi->s_mount_opt, QUOTA); -			set_opt(sbi->s_mount_opt, GRPQUOTA); -			break; -		case Opt_noquota: -			if (sb_any_quota_loaded(sb)) { -				ext4_msg(sb, KERN_ERR, "Cannot change quota " -					"options when quota turned on"); -				return 0; -			} -			clear_opt(sbi->s_mount_opt, QUOTA); -			clear_opt(sbi->s_mount_opt, USRQUOTA); -			clear_opt(sbi->s_mount_opt, GRPQUOTA); -			break; -#else -		case Opt_quota: -		case Opt_usrquota: -		case Opt_grpquota: -			ext4_msg(sb, KERN_ERR, -				"quota options not supported"); -			break; -		case Opt_usrjquota: -		case Opt_grpjquota: -		case Opt_offusrjquota: -		case Opt_offgrpjquota: -		case Opt_jqfmt_vfsold: -		case Opt_jqfmt_vfsv0: -		case Opt_jqfmt_vfsv1: -			ext4_msg(sb, KERN_ERR, -				"journaled quota options not supported"); -			break; -		case Opt_noquota: -			break; -#endif -		case Opt_abort: -			sbi->s_mount_flags |= EXT4_MF_FS_ABORTED; -			break; -		case Opt_nobarrier: -			clear_opt(sbi->s_mount_opt, BARRIER); -			break; -		case Opt_barrier: -			if (args[0].from) { -				if (match_int(&args[0], &option)) -					return 0; -			} else -				option = 1;	/* No argument, default to 1 */ -			if (option) -				set_opt(sbi->s_mount_opt, BARRIER); -			else -				clear_opt(sbi->s_mount_opt, BARRIER); -			break; -		case Opt_ignore: -			break; -		case Opt_resize: -			if (!is_remount) { -				ext4_msg(sb, KERN_ERR, -					"resize option only available " -					"for remount"); -				return 0; -			} -			if (match_int(&args[0], &option) != 0) -				return 0; -			*n_blocks_count = option; -			break; -		case Opt_nobh: -			ext4_msg(sb, KERN_WARNING, -				 "Ignoring deprecated nobh option"); -			break; -		case Opt_bh: -			ext4_msg(sb, KERN_WARNING, -				 "Ignoring deprecated bh option"); -			break; -		case Opt_i_version: -			set_opt(sbi->s_mount_opt, I_VERSION); -			sb->s_flags |= MS_I_VERSION; -			break; -		case Opt_nodelalloc: -			clear_opt(sbi->s_mount_opt, DELALLOC); -			break; -		case Opt_stripe: -			if (match_int(&args[0], &option)) -				return 0; -			if (option < 0) -				return 0; -			sbi->s_stripe = option; -			break; -		case Opt_delalloc: -			set_opt(sbi->s_mount_opt, DELALLOC); -			break; -		case Opt_block_validity: -			set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); -			break; -		case Opt_noblock_validity: -			clear_opt(sbi->s_mount_opt, BLOCK_VALIDITY); -			break; -		case Opt_inode_readahead_blks: -			if (match_int(&args[0], &option)) -				return 0; -			if (option < 0 || option > (1 << 30)) -				return 0; -			if (!is_power_of_2(option)) { -				ext4_msg(sb, KERN_ERR, -					 "EXT4-fs: inode_readahead_blks" -					 " must be a power of 2"); -				return 0; -			} -			sbi->s_inode_readahead_blks = option; -			break; -		case Opt_journal_ioprio: -			if (match_int(&args[0], &option)) -				return 0; -			if (option < 0 || option > 7) -				break; -			*journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, -							    option); -			break; -		case Opt_noauto_da_alloc: -			set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); -			break; -		case Opt_auto_da_alloc: -			if (args[0].from) { -				if (match_int(&args[0], &option)) -					return 0; -			} else -				option = 1;	/* No argument, default to 1 */ -			if (option) -				clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC); -			else -				set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); -			break; -		case Opt_discard: -			set_opt(sbi->s_mount_opt, DISCARD); -			break; -		case Opt_nodiscard: -			clear_opt(sbi->s_mount_opt, DISCARD); -			break; -		case Opt_dioread_nolock: -			set_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); -			break; -		case Opt_dioread_lock: -			clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); -			break; -		case Opt_init_inode_table: -			set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); -			if (args[0].from) { -				if (match_int(&args[0], &option)) -					return 0; -			} else -				option = EXT4_DEF_LI_WAIT_MULT; -			if (option < 0) -				return 0; -			sbi->s_li_wait_mult = option; -			break; -		case Opt_noinit_inode_table: -			clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE); -			break; -		default: -			ext4_msg(sb, KERN_ERR, -			       "Unrecognized mount option \"%s\" " -			       "or missing value", p); +		if (handle_mount_opt(sb, p, token, args, journal_devnum, +				     journal_ioprio, is_remount) < 0)  			return 0; -		}  	}  #ifdef CONFIG_QUOTA +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && +	    (test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) { +		ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA " +			 "feature is enabled"); +		return 0; +	}  	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {  		if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA]) -			clear_opt(sbi->s_mount_opt, USRQUOTA); +			clear_opt(sb, USRQUOTA);  		if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA]) -			clear_opt(sbi->s_mount_opt, GRPQUOTA); +			clear_opt(sb, GRPQUOTA);  		if (test_opt(sb, GRPQUOTA) || test_opt(sb, USRQUOTA)) {  			ext4_msg(sb, KERN_ERR, "old and new quota " @@ -1838,9 +1721,171 @@ set_qf_format:  		}  	}  #endif +	if (test_opt(sb, DIOREAD_NOLOCK)) { +		int blocksize = +			BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); + +		if (blocksize < PAGE_CACHE_SIZE) { +			ext4_msg(sb, KERN_ERR, "can't mount with " +				 "dioread_nolock if block size != PAGE_SIZE"); +			return 0; +		} +	}  	return 1;  } +static inline void ext4_show_quota_options(struct seq_file *seq, +					   struct super_block *sb) +{ +#if defined(CONFIG_QUOTA) +	struct ext4_sb_info *sbi = EXT4_SB(sb); + +	if (sbi->s_jquota_fmt) { +		char *fmtname = ""; + +		switch (sbi->s_jquota_fmt) { +		case QFMT_VFS_OLD: +			fmtname = "vfsold"; +			break; +		case QFMT_VFS_V0: +			fmtname = "vfsv0"; +			break; +		case QFMT_VFS_V1: +			fmtname = "vfsv1"; +			break; +		} +		seq_printf(seq, ",jqfmt=%s", fmtname); +	} + +	if (sbi->s_qf_names[USRQUOTA]) +		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + +	if (sbi->s_qf_names[GRPQUOTA]) +		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); +#endif +} + +static const char *token2str(int token) +{ +	const struct match_token *t; + +	for (t = tokens; t->token != Opt_err; t++) +		if (t->token == token && !strchr(t->pattern, '=')) +			break; +	return t->pattern; +} + +/* + * Show an option if + *  - it's set to a non-default value OR + *  - if the per-sb default is different from the global default + */ +static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, +			      int nodefs) +{ +	struct ext4_sb_info *sbi = EXT4_SB(sb); +	struct ext4_super_block *es = sbi->s_es; +	int def_errors, def_mount_opt = nodefs ? 0 : sbi->s_def_mount_opt; +	const struct mount_opts *m; +	char sep = nodefs ? '\n' : ','; + +#define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep) +#define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg) + +	if (sbi->s_sb_block != 1) +		SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block); + +	for (m = ext4_mount_opts; m->token != Opt_err; m++) { +		int want_set = m->flags & MOPT_SET; +		if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || +		    (m->flags & MOPT_CLEAR_ERR)) +			continue; +		if (!(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt))) +			continue; /* skip if same as the default */ +		if ((want_set && +		     (sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || +		    (!want_set && (sbi->s_mount_opt & m->mount_opt))) +			continue; /* select Opt_noFoo vs Opt_Foo */ +		SEQ_OPTS_PRINT("%s", token2str(m->token)); +	} + +	if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) || +	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) +		SEQ_OPTS_PRINT("resuid=%u", +				from_kuid_munged(&init_user_ns, sbi->s_resuid)); +	if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) || +	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) +		SEQ_OPTS_PRINT("resgid=%u", +				from_kgid_munged(&init_user_ns, sbi->s_resgid)); +	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors); +	if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO) +		SEQ_OPTS_PUTS("errors=remount-ro"); +	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE) +		SEQ_OPTS_PUTS("errors=continue"); +	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC) +		SEQ_OPTS_PUTS("errors=panic"); +	if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) +		SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ); +	if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) +		SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); +	if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) +		SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); +	if (sb->s_flags & MS_I_VERSION) +		SEQ_OPTS_PUTS("i_version"); +	if (nodefs || sbi->s_stripe) +		SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); +	if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ def_mount_opt)) { +		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) +			SEQ_OPTS_PUTS("data=journal"); +		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) +			SEQ_OPTS_PUTS("data=ordered"); +		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) +			SEQ_OPTS_PUTS("data=writeback"); +	} +	if (nodefs || +	    sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) +		SEQ_OPTS_PRINT("inode_readahead_blks=%u", +			       sbi->s_inode_readahead_blks); + +	if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && +		       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) +		SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); +	if (nodefs || sbi->s_max_dir_size_kb) +		SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb); + +	ext4_show_quota_options(seq, sb); +	return 0; +} + +static int ext4_show_options(struct seq_file *seq, struct dentry *root) +{ +	return _ext4_show_options(seq, root->d_sb, 0); +} + +static int options_seq_show(struct seq_file *seq, void *offset) +{ +	struct super_block *sb = seq->private; +	int rc; + +	seq_puts(seq, (sb->s_flags & MS_RDONLY) ? "ro" : "rw"); +	rc = _ext4_show_options(seq, sb, 1); +	seq_puts(seq, "\n"); +	return rc; +} + +static int options_open_fs(struct inode *inode, struct file *file) +{ +	return single_open(file, options_seq_show, PDE_DATA(inode)); +} + +static const struct file_operations ext4_seq_options_fops = { +	.owner = THIS_MODULE, +	.open = options_open_fs, +	.read = seq_read, +	.llseek = seq_lseek, +	.release = single_release, +}; +  static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,  			    int read_only)  { @@ -1853,15 +1898,15 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,  		res = MS_RDONLY;  	}  	if (read_only) -		return res; +		goto done;  	if (!(sbi->s_mount_state & EXT4_VALID_FS))  		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "  			 "running e2fsck is recommended"); -	else if ((sbi->s_mount_state & EXT4_ERROR_FS)) +	else if (sbi->s_mount_state & EXT4_ERROR_FS)  		ext4_msg(sb, KERN_WARNING,  			 "warning: mounting fs with errors, "  			 "running e2fsck is recommended"); -	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && +	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&  		 le16_to_cpu(es->s_mnt_count) >=  		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))  		ext4_msg(sb, KERN_WARNING, @@ -1884,52 +1929,68 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,  		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);  	ext4_commit_super(sb, 1); +done:  	if (test_opt(sb, DEBUG))  		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, " -				"bpg=%lu, ipg=%lu, mo=%04x]\n", +				"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",  			sb->s_blocksize,  			sbi->s_groups_count,  			EXT4_BLOCKS_PER_GROUP(sb),  			EXT4_INODES_PER_GROUP(sb), -			sbi->s_mount_opt); +			sbi->s_mount_opt, sbi->s_mount_opt2); +	cleancache_init_fs(sb);  	return res;  } +int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) +{ +	struct ext4_sb_info *sbi = EXT4_SB(sb); +	struct flex_groups *new_groups; +	int size; + +	if (!sbi->s_log_groups_per_flex) +		return 0; + +	size = ext4_flex_group(sbi, ngroup - 1) + 1; +	if (size <= sbi->s_flex_groups_allocated) +		return 0; + +	size = roundup_pow_of_two(size * sizeof(struct flex_groups)); +	new_groups = ext4_kvzalloc(size, GFP_KERNEL); +	if (!new_groups) { +		ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", +			 size / (int) sizeof(struct flex_groups)); +		return -ENOMEM; +	} + +	if (sbi->s_flex_groups) { +		memcpy(new_groups, sbi->s_flex_groups, +		       (sbi->s_flex_groups_allocated * +			sizeof(struct flex_groups))); +		ext4_kvfree(sbi->s_flex_groups); +	} +	sbi->s_flex_groups = new_groups; +	sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); +	return 0; +} +  static int ext4_fill_flex_info(struct super_block *sb)  {  	struct ext4_sb_info *sbi = EXT4_SB(sb);  	struct ext4_group_desc *gdp = NULL; -	ext4_group_t flex_group_count;  	ext4_group_t flex_group; -	int groups_per_flex = 0; -	size_t size; -	int i; +	int i, err;  	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; -	groups_per_flex = 1 << sbi->s_log_groups_per_flex; - -	if (groups_per_flex < 2) { +	if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {  		sbi->s_log_groups_per_flex = 0;  		return 1;  	} -	/* We allocate both existing and potentially added groups */ -	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + -			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << -			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; -	size = flex_group_count * sizeof(struct flex_groups); -	sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); -	if (sbi->s_flex_groups == NULL) { -		sbi->s_flex_groups = vmalloc(size); -		if (sbi->s_flex_groups) -			memset(sbi->s_flex_groups, 0, size); -	} -	if (sbi->s_flex_groups == NULL) { -		ext4_msg(sb, KERN_ERR, "not enough memory for " -				"%u flex groups", flex_group_count); +	err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); +	if (err)  		goto failed; -	}  	for (i = 0; i < sbi->s_groups_count; i++) {  		gdp = ext4_get_group_desc(sb, i, NULL); @@ -1937,8 +1998,8 @@ static int ext4_fill_flex_info(struct super_block *sb)  		flex_group = ext4_flex_group(sbi, i);  		atomic_add(ext4_free_inodes_count(sb, gdp),  			   &sbi->s_flex_groups[flex_group].free_inodes); -		atomic_add(ext4_free_blks_count(sb, gdp), -			   &sbi->s_flex_groups[flex_group].free_blocks); +		atomic64_add(ext4_free_group_clusters(sb, gdp), +			     &sbi->s_flex_groups[flex_group].free_clusters);  		atomic_add(ext4_used_dirs_count(sb, gdp),  			   &sbi->s_flex_groups[flex_group].used_dirs);  	} @@ -1948,43 +2009,69 @@ failed:  	return 0;  } -__le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, -			    struct ext4_group_desc *gdp) +static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group, +				   struct ext4_group_desc *gdp)  { +	int offset;  	__u16 crc = 0; +	__le32 le_group = cpu_to_le32(block_group); -	if (sbi->s_es->s_feature_ro_compat & -	    cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { -		int offset = offsetof(struct ext4_group_desc, bg_checksum); -		__le32 le_group = cpu_to_le32(block_group); - -		crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); -		crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); -		crc = crc16(crc, (__u8 *)gdp, offset); -		offset += sizeof(gdp->bg_checksum); /* skip checksum */ -		/* for checksum of struct ext4_group_desc do the rest...*/ -		if ((sbi->s_es->s_feature_incompat & -		     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && -		    offset < le16_to_cpu(sbi->s_es->s_desc_size)) -			crc = crc16(crc, (__u8 *)gdp + offset, -				    le16_to_cpu(sbi->s_es->s_desc_size) - -					offset); +	if ((sbi->s_es->s_feature_ro_compat & +	     cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) { +		/* Use new metadata_csum algorithm */ +		__le16 save_csum; +		__u32 csum32; + +		save_csum = gdp->bg_checksum; +		gdp->bg_checksum = 0; +		csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group, +				     sizeof(le_group)); +		csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, +				     sbi->s_desc_size); +		gdp->bg_checksum = save_csum; + +		crc = csum32 & 0xFFFF; +		goto out;  	} +	/* old crc16 code */ +	offset = offsetof(struct ext4_group_desc, bg_checksum); + +	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); +	crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group)); +	crc = crc16(crc, (__u8 *)gdp, offset); +	offset += sizeof(gdp->bg_checksum); /* skip checksum */ +	/* for checksum of struct ext4_group_desc do the rest...*/ +	if ((sbi->s_es->s_feature_incompat & +	     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) && +	    offset < le16_to_cpu(sbi->s_es->s_desc_size)) +		crc = crc16(crc, (__u8 *)gdp + offset, +			    le16_to_cpu(sbi->s_es->s_desc_size) - +				offset); + +out:  	return cpu_to_le16(crc);  } -int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group, +int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,  				struct ext4_group_desc *gdp)  { -	if ((sbi->s_es->s_feature_ro_compat & -	     cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) && -	    (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp))) +	if (ext4_has_group_desc_csum(sb) && +	    (gdp->bg_checksum != ext4_group_desc_csum(EXT4_SB(sb), +						      block_group, gdp)))  		return 0;  	return 1;  } +void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group, +			      struct ext4_group_desc *gdp) +{ +	if (!ext4_has_group_desc_csum(sb)) +		return; +	gdp->bg_checksum = ext4_group_desc_csum(EXT4_SB(sb), block_group, gdp); +} +  /* Called at mount-time, super-block is locked */  static int ext4_check_descriptors(struct super_block *sb,  				  ext4_group_t *first_not_zeroed) @@ -2039,7 +2126,7 @@ static int ext4_check_descriptors(struct super_block *sb,  			return 0;  		}  		ext4_lock_group(sb, i); -		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { +		if (!ext4_group_desc_csum_verify(sb, i, gdp)) {  			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "  				 "Checksum for group %u failed (%u!=%u)",  				 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, @@ -2056,7 +2143,8 @@ static int ext4_check_descriptors(struct super_block *sb,  	if (NULL != first_not_zeroed)  		*first_not_zeroed = grp; -	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); +	ext4_free_blocks_count_set(sbi->s_es, +				   EXT4_C2B(sbi, ext4_count_free_clusters(sb)));  	sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));  	return 1;  } @@ -2097,11 +2185,20 @@ static void ext4_orphan_cleanup(struct super_block *sb,  		return;  	} +	/* Check if feature set would not allow a r/w mount */ +	if (!ext4_feature_set_ok(sb, 0)) { +		ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " +			 "unknown ROCOMPAT features"); +		return; +	} +  	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { -		if (es->s_last_orphan) +		/* don't clear list on RO mount w/ errors */ +		if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {  			jbd_debug(1, "Errors on filesystem, "  				  "clearing orphan list.\n"); -		es->s_last_orphan = 0; +			es->s_last_orphan = 0; +		}  		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");  		return;  	} @@ -2137,17 +2234,22 @@ static void ext4_orphan_cleanup(struct super_block *sb,  		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);  		dquot_initialize(inode);  		if (inode->i_nlink) { -			ext4_msg(sb, KERN_DEBUG, -				"%s: truncating inode %lu to %lld bytes", -				__func__, inode->i_ino, inode->i_size); +			if (test_opt(sb, DEBUG)) +				ext4_msg(sb, KERN_DEBUG, +					"%s: truncating inode %lu to %lld bytes", +					__func__, inode->i_ino, inode->i_size);  			jbd_debug(2, "truncating inode %lu to %lld bytes\n",  				  inode->i_ino, inode->i_size); +			mutex_lock(&inode->i_mutex); +			truncate_inode_pages(inode->i_mapping, inode->i_size);  			ext4_truncate(inode); +			mutex_unlock(&inode->i_mutex);  			nr_truncates++;  		} else { -			ext4_msg(sb, KERN_DEBUG, -				"%s: deleting unreferenced inode %lu", -				__func__, inode->i_ino); +			if (test_opt(sb, DEBUG)) +				ext4_msg(sb, KERN_DEBUG, +					"%s: deleting unreferenced inode %lu", +					__func__, inode->i_ino);  			jbd_debug(2, "deleting unreferenced inode %lu\n",  				  inode->i_ino);  			nr_orphans++; @@ -2180,6 +2282,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,   * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,   * so that won't be a limiting factor.   * + * However there is other limiting factor. We do store extents in the form + * of starting block and length, hence the resulting length of the extent + * covering maximum file size must fit into on-disk format containers as + * well. Given that length is always by 1 unit bigger than max unit (because + * we count 0 as well) we have to lower the s_maxbytes by one fs block. + *   * Note, this does *not* consider any metadata overhead for vfs i_blocks.   */  static loff_t ext4_max_size(int blkbits, int has_huge_files) @@ -2201,10 +2309,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)  		upper_limit <<= blkbits;  	} -	/* 32-bit extent-start container, ee_block */ -	res = 1LL << 32; +	/* +	 * 32-bit extent-start container, ee_block. We lower the maxbytes +	 * by one fs block, so ee_len can cover the extent of maximum file +	 * size +	 */ +	res = (1LL << 32) - 1;  	res <<= blkbits; -	res -= 1;  	/* Sanity check against vm- & vfs- imposed limits */  	if (res > upper_limit) @@ -2292,6 +2403,16 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,  	if (ext4_bg_has_super(sb, bg))  		has_super = 1; +	/* +	 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at +	 * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled +	 * on modern mke2fs or blksize > 1k on older mke2fs) then we must +	 * compensate. +	 */ +	if (sb->s_blocksize == 1024 && nr == 0 && +	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0) +		has_super++; +  	return (has_super + ext4_group_first_block_no(sb, bg));  } @@ -2311,17 +2432,25 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)  	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);  	unsigned long stripe_width =  			le32_to_cpu(sbi->s_es->s_raid_stripe_width); +	int ret;  	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) -		return sbi->s_stripe; - -	if (stripe_width <= sbi->s_blocks_per_group) -		return stripe_width; +		ret = sbi->s_stripe; +	else if (stripe_width <= sbi->s_blocks_per_group) +		ret = stripe_width; +	else if (stride <= sbi->s_blocks_per_group) +		ret = stride; +	else +		ret = 0; -	if (stride <= sbi->s_blocks_per_group) -		return stride; +	/* +	 * If the stripe width is 1, this makes no sense and +	 * we set it to 0 to turn off stripe handling code. +	 */ +	if (ret <= 1) +		ret = 0; -	return 0; +	return ret;  }  /* sysfs supprt */ @@ -2331,20 +2460,21 @@ struct ext4_attr {  	ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *);  	ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *,  			 const char *, size_t); -	int offset; +	union { +		int offset; +		int deprecated_val; +	} u;  }; -static int parse_strtoul(const char *buf, -		unsigned long max, unsigned long *value) +static int parse_strtoull(const char *buf, +		unsigned long long max, unsigned long long *value)  { -	char *endp; - -	*value = simple_strtoul(skip_spaces(buf), &endp, 0); -	endp = skip_spaces(endp); -	if (*endp || *value > max) -		return -EINVAL; +	int ret; -	return 0; +	ret = kstrtoull(skip_spaces(buf), 0, value); +	if (!ret && *value > max) +		ret = -EINVAL; +	return ret;  }  static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a, @@ -2352,7 +2482,8 @@ static ssize_t delayed_allocation_blocks_show(struct ext4_attr *a,  					      char *buf)  {  	return snprintf(buf, PAGE_SIZE, "%llu\n", -			(s64) percpu_counter_sum(&sbi->s_dirtyblocks_counter)); +		(s64) EXT4_C2B(sbi, +			percpu_counter_sum(&sbi->s_dirtyclusters_counter)));  }  static ssize_t session_write_kbytes_show(struct ext4_attr *a, @@ -2385,11 +2516,13 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,  					  const char *buf, size_t count)  {  	unsigned long t; +	int ret; -	if (parse_strtoul(buf, 0x40000000, &t)) -		return -EINVAL; +	ret = kstrtoul(skip_spaces(buf), 0, &t); +	if (ret) +		return ret; -	if (!is_power_of_2(t)) +	if (t && (!is_power_of_2(t) || t > 0x40000000))  		return -EINVAL;  	sbi->s_inode_readahead_blks = t; @@ -2399,7 +2532,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a,  static ssize_t sbi_ui_show(struct ext4_attr *a,  			   struct ext4_sb_info *sbi, char *buf)  { -	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); +	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);  	return snprintf(buf, PAGE_SIZE, "%u\n", *ui);  } @@ -2408,21 +2541,69 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,  			    struct ext4_sb_info *sbi,  			    const char *buf, size_t count)  { -	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); +	unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset);  	unsigned long t; +	int ret; -	if (parse_strtoul(buf, 0xffffffff, &t)) -		return -EINVAL; +	ret = kstrtoul(skip_spaces(buf), 0, &t); +	if (ret) +		return ret;  	*ui = t;  	return count;  } +static ssize_t reserved_clusters_show(struct ext4_attr *a, +				  struct ext4_sb_info *sbi, char *buf) +{ +	return snprintf(buf, PAGE_SIZE, "%llu\n", +		(unsigned long long) atomic64_read(&sbi->s_resv_clusters)); +} + +static ssize_t reserved_clusters_store(struct ext4_attr *a, +				   struct ext4_sb_info *sbi, +				   const char *buf, size_t count) +{ +	unsigned long long val; +	int ret; + +	if (parse_strtoull(buf, -1ULL, &val)) +		return -EINVAL; +	ret = ext4_reserve_clusters(sbi, val); + +	return ret ? ret : count; +} + +static ssize_t trigger_test_error(struct ext4_attr *a, +				  struct ext4_sb_info *sbi, +				  const char *buf, size_t count) +{ +	int len = count; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	if (len && buf[len-1] == '\n') +		len--; + +	if (len) +		ext4_error(sbi->s_sb, "%.*s", len, buf); +	return count; +} + +static ssize_t sbi_deprecated_show(struct ext4_attr *a, +				   struct ext4_sb_info *sbi, char *buf) +{ +	return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); +} +  #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \  static struct ext4_attr ext4_attr_##_name = {			\  	.attr = {.name = __stringify(_name), .mode = _mode },	\  	.show	= _show,					\  	.store	= _store,					\ -	.offset = offsetof(struct ext4_sb_info, _elname),	\ +	.u = {							\ +		.offset = offsetof(struct ext4_sb_info, _elname),\ +	},							\  }  #define EXT4_ATTR(name, mode, show, store) \  static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) @@ -2433,10 +2614,19 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)  #define EXT4_RW_ATTR_SBI_UI(name, elname)	\  	EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)  #define ATTR_LIST(name) &ext4_attr_##name.attr +#define EXT4_DEPRECATED_ATTR(_name, _val)	\ +static struct ext4_attr ext4_attr_##_name = {			\ +	.attr = {.name = __stringify(_name), .mode = 0444 },	\ +	.show	= sbi_deprecated_show,				\ +	.u = {							\ +		.deprecated_val = _val,				\ +	},							\ +}  EXT4_RO_ATTR(delayed_allocation_blocks);  EXT4_RO_ATTR(session_write_kbytes);  EXT4_RO_ATTR(lifetime_write_kbytes); +EXT4_RW_ATTR(reserved_clusters);  EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show,  		 inode_readahead_blks_store, s_inode_readahead_blks);  EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); @@ -2446,12 +2636,21 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan);  EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);  EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);  EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); -EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); +EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); +EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); +EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_interval_ms, s_err_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(err_ratelimit_burst, s_err_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); +EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);  static struct attribute *ext4_attrs[] = {  	ATTR_LIST(delayed_allocation_blocks),  	ATTR_LIST(session_write_kbytes),  	ATTR_LIST(lifetime_write_kbytes), +	ATTR_LIST(reserved_clusters),  	ATTR_LIST(inode_readahead_blks),  	ATTR_LIST(inode_goal),  	ATTR_LIST(mb_stats), @@ -2461,16 +2660,26 @@ static struct attribute *ext4_attrs[] = {  	ATTR_LIST(mb_stream_req),  	ATTR_LIST(mb_group_prealloc),  	ATTR_LIST(max_writeback_mb_bump), +	ATTR_LIST(extent_max_zeroout_kb), +	ATTR_LIST(trigger_fs_error), +	ATTR_LIST(err_ratelimit_interval_ms), +	ATTR_LIST(err_ratelimit_burst), +	ATTR_LIST(warning_ratelimit_interval_ms), +	ATTR_LIST(warning_ratelimit_burst), +	ATTR_LIST(msg_ratelimit_interval_ms), +	ATTR_LIST(msg_ratelimit_burst),  	NULL,  };  /* Features this copy of ext4 supports */  EXT4_INFO_ATTR(lazy_itable_init);  EXT4_INFO_ATTR(batched_discard); +EXT4_INFO_ATTR(meta_bg_resize);  static struct attribute *ext4_feat_attrs[] = {  	ATTR_LIST(lazy_itable_init),  	ATTR_LIST(batched_discard), +	ATTR_LIST(meta_bg_resize),  	NULL,  }; @@ -2564,6 +2773,23 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)  			return 0;  		}  	} +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC) && +	    !EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) { +		ext4_msg(sb, KERN_ERR, +			 "Can't support bigalloc feature without " +			 "extents feature\n"); +		return 0; +	} + +#ifndef CONFIG_QUOTA +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && +	    !readonly) { +		ext4_msg(sb, KERN_ERR, +			 "Filesystem with quota feature cannot be mounted RDWR " +			 "without CONFIG_QUOTA"); +		return 0; +	} +#endif  /* CONFIG_QUOTA */  	return 1;  } @@ -2581,10 +2807,11 @@ static void print_daily_error_info(unsigned long arg)  	es = sbi->s_es;  	if (es->s_error_count) -		ext4_msg(sb, KERN_NOTICE, "error count: %u", +		/* fsck newer than v1.41.13 is needed to clean this condition. */ +		ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",  			 le32_to_cpu(es->s_error_count));  	if (es->s_first_error_time) { -		printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d", +		printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",  		       sb->s_id, le32_to_cpu(es->s_first_error_time),  		       (int) sizeof(es->s_first_error_func),  		       es->s_first_error_func, @@ -2598,7 +2825,7 @@ static void print_daily_error_info(unsigned long arg)  		printk("\n");  	}  	if (es->s_last_error_time) { -		printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d", +		printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",  		       sb->s_id, le32_to_cpu(es->s_last_error_time),  		       (int) sizeof(es->s_last_error_func),  		       es->s_last_error_func, @@ -2614,12 +2841,6 @@ static void print_daily_error_info(unsigned long arg)  	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */  } -static void ext4_lazyinode_timeout(unsigned long data) -{ -	struct task_struct *p = (struct task_struct *)data; -	wake_up_process(p); -} -  /* Find next suitable group and run ext4_init_inode_table */  static int ext4_run_li_request(struct ext4_li_request *elr)  { @@ -2632,6 +2853,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)  	sb = elr->lr_super;  	ngroups = EXT4_SB(sb)->s_groups_count; +	sb_start_write(sb);  	for (group = elr->lr_next_group; group < ngroups; group++) {  		gdp = ext4_get_group_desc(sb, group, NULL);  		if (!gdp) { @@ -2643,7 +2865,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)  			break;  	} -	if (group == ngroups) +	if (group >= ngroups)  		ret = 1;  	if (!ret) { @@ -2651,23 +2873,21 @@ static int ext4_run_li_request(struct ext4_li_request *elr)  		ret = ext4_init_inode_table(sb, group,  					    elr->lr_timeout ? 0 : 1);  		if (elr->lr_timeout == 0) { -			timeout = jiffies - timeout; -			if (elr->lr_sbi->s_li_wait_mult) -				timeout *= elr->lr_sbi->s_li_wait_mult; -			else -				timeout *= 20; +			timeout = (jiffies - timeout) * +				  elr->lr_sbi->s_li_wait_mult;  			elr->lr_timeout = timeout;  		}  		elr->lr_next_sched = jiffies + elr->lr_timeout;  		elr->lr_next_group = group + 1;  	} +	sb_end_write(sb);  	return ret;  }  /*   * Remove lr_request from the list_request and free the - * request tructure. Should be called with li_list_mtx held + * request structure. Should be called with li_list_mtx held   */  static void ext4_remove_li_request(struct ext4_li_request *elr)  { @@ -2685,16 +2905,20 @@ static void ext4_remove_li_request(struct ext4_li_request *elr)  static void ext4_unregister_li_request(struct super_block *sb)  { -	struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request; - -	if (!ext4_li_info) +	mutex_lock(&ext4_li_mtx); +	if (!ext4_li_info) { +		mutex_unlock(&ext4_li_mtx);  		return; +	}  	mutex_lock(&ext4_li_info->li_list_mtx); -	ext4_remove_li_request(elr); +	ext4_remove_li_request(EXT4_SB(sb)->s_li_request);  	mutex_unlock(&ext4_li_info->li_list_mtx); +	mutex_unlock(&ext4_li_mtx);  } +static struct task_struct *ext4_lazyinit_task; +  /*   * This is the function where ext4lazyinit thread lives. It walks   * through the request list searching for next scheduled filesystem. @@ -2709,17 +2933,10 @@ static int ext4_lazyinit_thread(void *arg)  	struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;  	struct list_head *pos, *n;  	struct ext4_li_request *elr; -	unsigned long next_wakeup; -	DEFINE_WAIT(wait); +	unsigned long next_wakeup, cur;  	BUG_ON(NULL == eli); -	eli->li_timer.data = (unsigned long)current; -	eli->li_timer.function = ext4_lazyinode_timeout; - -	eli->li_task = current; -	wake_up(&eli->li_wait_task); -  cont_thread:  	while (true) {  		next_wakeup = MAX_JIFFY_OFFSET; @@ -2747,22 +2964,21 @@ cont_thread:  		}  		mutex_unlock(&eli->li_list_mtx); -		if (freezing(current)) -			refrigerator(); +		try_to_freeze(); -		if ((time_after_eq(jiffies, next_wakeup)) || +		cur = jiffies; +		if ((time_after_eq(cur, next_wakeup)) ||  		    (MAX_JIFFY_OFFSET == next_wakeup)) {  			cond_resched();  			continue;  		} -		eli->li_timer.expires = next_wakeup; -		add_timer(&eli->li_timer); -		prepare_to_wait(&eli->li_wait_daemon, &wait, -				TASK_INTERRUPTIBLE); -		if (time_before(jiffies, next_wakeup)) -			schedule(); -		finish_wait(&eli->li_wait_daemon, &wait); +		schedule_timeout_interruptible(next_wakeup - cur); + +		if (kthread_should_stop()) { +			ext4_clear_request_list(); +			goto exit_thread; +		}  	}  exit_thread: @@ -2782,10 +2998,6 @@ exit_thread:  		goto cont_thread;  	}  	mutex_unlock(&eli->li_list_mtx); -	del_timer_sync(&ext4_li_info->li_timer); -	eli->li_task = NULL; -	wake_up(&eli->li_wait_task); -  	kfree(ext4_li_info);  	ext4_li_info = NULL;  	mutex_unlock(&ext4_li_mtx); @@ -2799,9 +3011,6 @@ static void ext4_clear_request_list(void)  	struct ext4_li_request *elr;  	mutex_lock(&ext4_li_info->li_list_mtx); -	if (list_empty(&ext4_li_info->li_request_list)) -		return; -  	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {  		elr = list_entry(pos, struct ext4_li_request,  				 lr_request); @@ -2812,23 +3021,19 @@ static void ext4_clear_request_list(void)  static int ext4_run_lazyinit_thread(void)  { -	struct task_struct *t; - -	t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit"); -	if (IS_ERR(t)) { -		int err = PTR_ERR(t); +	ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread, +					 ext4_li_info, "ext4lazyinit"); +	if (IS_ERR(ext4_lazyinit_task)) { +		int err = PTR_ERR(ext4_lazyinit_task);  		ext4_clear_request_list(); -		del_timer_sync(&ext4_li_info->li_timer);  		kfree(ext4_li_info);  		ext4_li_info = NULL; -		printk(KERN_CRIT "EXT4: error %d creating inode table " +		printk(KERN_CRIT "EXT4-fs: error %d creating inode table "  				 "initialization thread\n",  				 err);  		return err;  	}  	ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING; - -	wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);  	return 0;  } @@ -2863,13 +3068,9 @@ static int ext4_li_info_new(void)  	if (!eli)  		return -ENOMEM; -	eli->li_task = NULL;  	INIT_LIST_HEAD(&eli->li_request_list);  	mutex_init(&eli->li_list_mtx); -	init_waitqueue_head(&eli->li_wait_daemon); -	init_waitqueue_head(&eli->li_wait_task); -	init_timer(&eli->li_timer);  	eli->li_state |= EXT4_LAZYINIT_QUIT;  	ext4_li_info = eli; @@ -2882,7 +3083,6 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,  {  	struct ext4_sb_info *sbi = EXT4_SB(sb);  	struct ext4_li_request *elr; -	unsigned long rnd;  	elr = kzalloc(sizeof(*elr), GFP_KERNEL);  	if (!elr) @@ -2897,41 +3097,39 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,  	 * spread the inode table initialization requests  	 * better.  	 */ -	get_random_bytes(&rnd, sizeof(rnd)); -	elr->lr_next_sched = jiffies + (unsigned long)rnd % -			     (EXT4_DEF_LI_MAX_START_DELAY * HZ); - +	elr->lr_next_sched = jiffies + (prandom_u32() % +				(EXT4_DEF_LI_MAX_START_DELAY * HZ));  	return elr;  } -static int ext4_register_li_request(struct super_block *sb, -				    ext4_group_t first_not_zeroed) +int ext4_register_li_request(struct super_block *sb, +			     ext4_group_t first_not_zeroed)  {  	struct ext4_sb_info *sbi = EXT4_SB(sb); -	struct ext4_li_request *elr; +	struct ext4_li_request *elr = NULL;  	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; -	int ret; +	int ret = 0; -	if (sbi->s_li_request != NULL) -		return 0; +	mutex_lock(&ext4_li_mtx); +	if (sbi->s_li_request != NULL) { +		/* +		 * Reset timeout so it can be computed again, because +		 * s_li_wait_mult might have changed. +		 */ +		sbi->s_li_request->lr_timeout = 0; +		goto out; +	}  	if (first_not_zeroed == ngroups ||  	    (sb->s_flags & MS_RDONLY) || -	    !test_opt(sb, INIT_INODE_TABLE)) { -		sbi->s_li_request = NULL; -		return 0; -	} - -	if (first_not_zeroed == ngroups) { -		sbi->s_li_request = NULL; -		return 0; -	} +	    !test_opt(sb, INIT_INODE_TABLE)) +		goto out;  	elr = ext4_li_request_new(sb, first_not_zeroed); -	if (!elr) -		return -ENOMEM; - -	mutex_lock(&ext4_li_mtx); +	if (!elr) { +		ret = -ENOMEM; +		goto out; +	}  	if (NULL == ext4_li_info) {  		ret = ext4_li_info_new(); @@ -2944,6 +3142,12 @@ static int ext4_register_li_request(struct super_block *sb,  	mutex_unlock(&ext4_li_info->li_list_mtx);  	sbi->s_li_request = elr; +	/* +	 * set elr to NULL here since it has been inserted to +	 * the request_list and the removal and free of it is +	 * handled by ext4_clear_request_list from now on. +	 */ +	elr = NULL;  	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {  		ret = ext4_run_lazyinit_thread(); @@ -2967,21 +3171,209 @@ static void ext4_destroy_lazyinit_thread(void)  	 * If thread exited earlier  	 * there's nothing to be done.  	 */ -	if (!ext4_li_info) +	if (!ext4_li_info || !ext4_lazyinit_task)  		return; -	ext4_clear_request_list(); +	kthread_stop(ext4_lazyinit_task); +} -	while (ext4_li_info->li_task) { -		wake_up(&ext4_li_info->li_wait_daemon); -		wait_event(ext4_li_info->li_wait_task, -			   ext4_li_info->li_task == NULL); +static int set_journal_csum_feature_set(struct super_block *sb) +{ +	int ret = 1; +	int compat, incompat; +	struct ext4_sb_info *sbi = EXT4_SB(sb); + +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, +				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { +		/* journal checksum v2 */ +		compat = 0; +		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; +	} else { +		/* journal checksum v1 */ +		compat = JBD2_FEATURE_COMPAT_CHECKSUM; +		incompat = 0; +	} + +	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { +		ret = jbd2_journal_set_features(sbi->s_journal, +				compat, 0, +				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | +				incompat); +	} else if (test_opt(sb, JOURNAL_CHECKSUM)) { +		ret = jbd2_journal_set_features(sbi->s_journal, +				compat, 0, +				incompat); +		jbd2_journal_clear_features(sbi->s_journal, 0, 0, +				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); +	} else { +		jbd2_journal_clear_features(sbi->s_journal, +				JBD2_FEATURE_COMPAT_CHECKSUM, 0, +				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | +				JBD2_FEATURE_INCOMPAT_CSUM_V2);  	} + +	return ret; +} + +/* + * Note: calculating the overhead so we can be compatible with + * historical BSD practice is quite difficult in the face of + * clusters/bigalloc.  This is because multiple metadata blocks from + * different block group can end up in the same allocation cluster. + * Calculating the exact overhead in the face of clustered allocation + * requires either O(all block bitmaps) in memory or O(number of block + * groups**2) in time.  We will still calculate the superblock for + * older file systems --- and if we come across with a bigalloc file + * system with zero in s_overhead_clusters the estimate will be close to + * correct especially for very large cluster sizes --- but for newer + * file systems, it's better to calculate this figure once at mkfs + * time, and store it in the superblock.  If the superblock value is + * present (even for non-bigalloc file systems), we will use it. + */ +static int count_overhead(struct super_block *sb, ext4_group_t grp, +			  char *buf) +{ +	struct ext4_sb_info	*sbi = EXT4_SB(sb); +	struct ext4_group_desc	*gdp; +	ext4_fsblk_t		first_block, last_block, b; +	ext4_group_t		i, ngroups = ext4_get_groups_count(sb); +	int			s, j, count = 0; + +	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC)) +		return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + +			sbi->s_itb_per_group + 2); + +	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + +		(grp * EXT4_BLOCKS_PER_GROUP(sb)); +	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1; +	for (i = 0; i < ngroups; i++) { +		gdp = ext4_get_group_desc(sb, i, NULL); +		b = ext4_block_bitmap(sb, gdp); +		if (b >= first_block && b <= last_block) { +			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); +			count++; +		} +		b = ext4_inode_bitmap(sb, gdp); +		if (b >= first_block && b <= last_block) { +			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf); +			count++; +		} +		b = ext4_inode_table(sb, gdp); +		if (b >= first_block && b + sbi->s_itb_per_group <= last_block) +			for (j = 0; j < sbi->s_itb_per_group; j++, b++) { +				int c = EXT4_B2C(sbi, b - first_block); +				ext4_set_bit(c, buf); +				count++; +			} +		if (i != grp) +			continue; +		s = 0; +		if (ext4_bg_has_super(sb, grp)) { +			ext4_set_bit(s++, buf); +			count++; +		} +		for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { +			ext4_set_bit(EXT4_B2C(sbi, s++), buf); +			count++; +		} +	} +	if (!count) +		return 0; +	return EXT4_CLUSTERS_PER_GROUP(sb) - +		ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8); +} + +/* + * Compute the overhead and stash it in sbi->s_overhead + */ +int ext4_calculate_overhead(struct super_block *sb) +{ +	struct ext4_sb_info *sbi = EXT4_SB(sb); +	struct ext4_super_block *es = sbi->s_es; +	ext4_group_t i, ngroups = ext4_get_groups_count(sb); +	ext4_fsblk_t overhead = 0; +	char *buf = (char *) get_zeroed_page(GFP_KERNEL); + +	if (!buf) +		return -ENOMEM; + +	/* +	 * Compute the overhead (FS structures).  This is constant +	 * for a given filesystem unless the number of block groups +	 * changes so we cache the previous value until it does. +	 */ + +	/* +	 * All of the blocks before first_data_block are overhead +	 */ +	overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block)); + +	/* +	 * Add the overhead found in each block group +	 */ +	for (i = 0; i < ngroups; i++) { +		int blks; + +		blks = count_overhead(sb, i, buf); +		overhead += blks; +		if (blks) +			memset(buf, 0, PAGE_SIZE); +		cond_resched(); +	} +	/* Add the journal blocks as well */ +	if (sbi->s_journal) +		overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); + +	sbi->s_overhead = overhead; +	smp_wmb(); +	free_page((unsigned long) buf); +	return 0; +} + + +static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) +{ +	ext4_fsblk_t resv_clusters; + +	/* +	 * There's no need to reserve anything when we aren't using extents. +	 * The space estimates are exact, there are no unwritten extents, +	 * hole punching doesn't need new metadata... This is needed especially +	 * to keep ext2/3 backward compatibility. +	 */ +	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) +		return 0; +	/* +	 * By default we reserve 2% or 4096 clusters, whichever is smaller. +	 * This should cover the situations where we can not afford to run +	 * out of space like for example punch hole, or converting +	 * unwritten extents in delalloc path. In most cases such +	 * allocation would require 1, or 2 blocks, higher numbers are +	 * very rare. +	 */ +	resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> +			EXT4_SB(sb)->s_cluster_bits; + +	do_div(resv_clusters, 50); +	resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); + +	return resv_clusters; +} + + +static int ext4_reserve_clusters(struct ext4_sb_info *sbi, ext4_fsblk_t count) +{ +	ext4_fsblk_t clusters = ext4_blocks_count(sbi->s_es) >> +				sbi->s_cluster_bits; + +	if (count >= clusters) +		return -EINVAL; + +	atomic64_set(&sbi->s_resv_clusters, count); +	return 0;  }  static int ext4_fill_super(struct super_block *sb, void *data, int silent) -				__releases(kernel_lock) -				__acquires(kernel_lock)  {  	char *orig_data = kstrdup(data, GFP_KERNEL);  	struct buffer_head *bh; @@ -2997,12 +3389,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	char *cp;  	const char *descr;  	int ret = -ENOMEM; -	int blocksize; +	int blocksize, clustersize;  	unsigned int db_count;  	unsigned int i; -	int needs_recovery, has_huge_files; +	int needs_recovery, has_huge_files, has_bigalloc;  	__u64 blocks_count; -	int err; +	int err = 0;  	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;  	ext4_group_t first_not_zeroed; @@ -3017,9 +3409,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  		goto out_free_orig;  	}  	sb->s_fs_info = sbi; -	sbi->s_mount_opt = 0; -	sbi->s_resuid = EXT4_DEF_RESUID; -	sbi->s_resgid = EXT4_DEF_RESGID; +	sbi->s_sb = sb;  	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;  	sbi->s_sb_block = sb_block;  	if (sb->s_bdev->bd_part) @@ -3030,6 +3420,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	for (cp = sb->s_id; (cp = strchr(cp, '/'));)  		*cp = '!'; +	/* -EINVAL is default */  	ret = -EINVAL;  	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);  	if (!blocksize) { @@ -3056,78 +3447,138 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	 * Note: s_es must be initialized as soon as possible because  	 *       some ext4 macro-instructions depend on its value  	 */ -	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); +	es = (struct ext4_super_block *) (bh->b_data + offset);  	sbi->s_es = es;  	sb->s_magic = le16_to_cpu(es->s_magic);  	if (sb->s_magic != EXT4_SUPER_MAGIC)  		goto cantfind_ext4;  	sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); +	/* Warn if metadata_csum and gdt_csum are both set. */ +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, +				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) && +	    EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) +		ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are " +			     "redundant flags; please run fsck."); + +	/* Check for a known checksum algorithm */ +	if (!ext4_verify_csum_type(sb, es)) { +		ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " +			 "unknown checksum algorithm."); +		silent = 1; +		goto cantfind_ext4; +	} + +	/* Load the checksum driver */ +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, +				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { +		sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); +		if (IS_ERR(sbi->s_chksum_driver)) { +			ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); +			ret = PTR_ERR(sbi->s_chksum_driver); +			sbi->s_chksum_driver = NULL; +			goto failed_mount; +		} +	} + +	/* Check superblock checksum */ +	if (!ext4_superblock_csum_verify(sb, es)) { +		ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " +			 "invalid superblock checksum.  Run e2fsck?"); +		silent = 1; +		goto cantfind_ext4; +	} + +	/* Precompute checksum seed for all metadata */ +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, +			EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) +		sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, +					       sizeof(es->s_uuid)); +  	/* Set defaults before we parse the mount options */  	def_mount_opts = le32_to_cpu(es->s_default_mount_opts); -	set_opt(sbi->s_mount_opt, INIT_INODE_TABLE); +	set_opt(sb, INIT_INODE_TABLE);  	if (def_mount_opts & EXT4_DEFM_DEBUG) -		set_opt(sbi->s_mount_opt, DEBUG); -	if (def_mount_opts & EXT4_DEFM_BSDGROUPS) { -		ext4_msg(sb, KERN_WARNING, deprecated_msg, "bsdgroups", -			"2.6.38"); -		set_opt(sbi->s_mount_opt, GRPID); -	} +		set_opt(sb, DEBUG); +	if (def_mount_opts & EXT4_DEFM_BSDGROUPS) +		set_opt(sb, GRPID);  	if (def_mount_opts & EXT4_DEFM_UID16) -		set_opt(sbi->s_mount_opt, NO_UID32); -#ifdef CONFIG_EXT4_FS_XATTR -	if (def_mount_opts & EXT4_DEFM_XATTR_USER) -		set_opt(sbi->s_mount_opt, XATTR_USER); -#endif +		set_opt(sb, NO_UID32); +	/* xattr user namespace & acls are now defaulted on */ +	set_opt(sb, XATTR_USER);  #ifdef CONFIG_EXT4_FS_POSIX_ACL -	if (def_mount_opts & EXT4_DEFM_ACL) -		set_opt(sbi->s_mount_opt, POSIX_ACL); +	set_opt(sb, POSIX_ACL);  #endif  	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) -		set_opt(sbi->s_mount_opt, JOURNAL_DATA); +		set_opt(sb, JOURNAL_DATA);  	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) -		set_opt(sbi->s_mount_opt, ORDERED_DATA); +		set_opt(sb, ORDERED_DATA);  	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) -		set_opt(sbi->s_mount_opt, WRITEBACK_DATA); +		set_opt(sb, WRITEBACK_DATA);  	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) -		set_opt(sbi->s_mount_opt, ERRORS_PANIC); +		set_opt(sb, ERRORS_PANIC);  	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) -		set_opt(sbi->s_mount_opt, ERRORS_CONT); +		set_opt(sb, ERRORS_CONT);  	else -		set_opt(sbi->s_mount_opt, ERRORS_RO); +		set_opt(sb, ERRORS_RO);  	if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) -		set_opt(sbi->s_mount_opt, BLOCK_VALIDITY); +		set_opt(sb, BLOCK_VALIDITY);  	if (def_mount_opts & EXT4_DEFM_DISCARD) -		set_opt(sbi->s_mount_opt, DISCARD); +		set_opt(sb, DISCARD); -	sbi->s_resuid = le16_to_cpu(es->s_def_resuid); -	sbi->s_resgid = le16_to_cpu(es->s_def_resgid); +	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); +	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));  	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;  	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;  	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;  	if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) -		set_opt(sbi->s_mount_opt, BARRIER); +		set_opt(sb, BARRIER);  	/*  	 * enable delayed allocation by default  	 * Use -o nodelalloc to turn it off  	 */ -	if (!IS_EXT3_SB(sb) && +	if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&  	    ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) -		set_opt(sbi->s_mount_opt, DELALLOC); +		set_opt(sb, DELALLOC); + +	/* +	 * set default s_li_wait_mult for lazyinit, for the case there is +	 * no mount option specified. +	 */ +	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;  	if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, -			   &journal_devnum, &journal_ioprio, NULL, 0)) { +			   &journal_devnum, &journal_ioprio, 0)) {  		ext4_msg(sb, KERN_WARNING,  			 "failed to parse options in superblock: %s",  			 sbi->s_es->s_mount_opts);  	} +	sbi->s_def_mount_opt = sbi->s_mount_opt;  	if (!parse_options((char *) data, sb, &journal_devnum, -			   &journal_ioprio, NULL, 0)) +			   &journal_ioprio, 0))  		goto failed_mount; +	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { +		printk_once(KERN_WARNING "EXT4-fs: Warning: mounting " +			    "with data=journal disables delayed " +			    "allocation and O_DIRECT support!\n"); +		if (test_opt2(sb, EXPLICIT_DELALLOC)) { +			ext4_msg(sb, KERN_ERR, "can't mount with " +				 "both data=journal and delalloc"); +			goto failed_mount; +		} +		if (test_opt(sb, DIOREAD_NOLOCK)) { +			ext4_msg(sb, KERN_ERR, "can't mount with " +				 "both data=journal and dioread_nolock"); +			goto failed_mount; +		} +		if (test_opt(sb, DELALLOC)) +			clear_opt(sb, DELALLOC); +	} +  	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |  		(test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0); @@ -3139,6 +3590,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  		       "feature flags set on rev 0 fs, "  		       "running e2fsck is recommended"); +	if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { +		set_opt2(sb, HURD_COMPAT); +		if (EXT4_HAS_INCOMPAT_FEATURE(sb, +					      EXT4_FEATURE_INCOMPAT_64BIT)) { +			ext4_msg(sb, KERN_ERR, +				 "The Hurd can't support 64-bit file systems"); +			goto failed_mount; +		} +	} + +	if (IS_EXT2_SB(sb)) { +		if (ext2_feature_set_ok(sb)) +			ext4_msg(sb, KERN_INFO, "mounting ext2 file system " +				 "using the ext4 subsystem"); +		else { +			ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " +				 "to feature incompatibilities"); +			goto failed_mount; +		} +	} + +	if (IS_EXT3_SB(sb)) { +		if (ext3_feature_set_ok(sb)) +			ext4_msg(sb, KERN_INFO, "mounting ext3 file system " +				 "using the ext4 subsystem"); +		else { +			ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " +				 "to feature incompatibilities"); +			goto failed_mount; +		} +	} +  	/*  	 * Check feature flags regardless of the revision level, since we  	 * previously didn't change the revision level when setting the flags, @@ -3148,7 +3631,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  		goto failed_mount;  	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); -  	if (blocksize < EXT4_MIN_BLOCK_SIZE ||  	    blocksize > EXT4_MAX_BLOCK_SIZE) {  		ext4_msg(sb, KERN_ERR, @@ -3173,7 +3655,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  			       "Can't read superblock on 2nd try");  			goto failed_mount;  		} -		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); +		es = (struct ext4_super_block *)(bh->b_data + offset);  		sbi->s_es = es;  		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {  			ext4_msg(sb, KERN_ERR, @@ -3238,25 +3720,71 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	for (i = 0; i < 4; i++)  		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);  	sbi->s_def_hash_version = es->s_def_hash_version; -	i = le32_to_cpu(es->s_flags); -	if (i & EXT2_FLAGS_UNSIGNED_HASH) -		sbi->s_hash_unsigned = 3; -	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) { +	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { +		i = le32_to_cpu(es->s_flags); +		if (i & EXT2_FLAGS_UNSIGNED_HASH) +			sbi->s_hash_unsigned = 3; +		else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {  #ifdef __CHAR_UNSIGNED__ -		es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); -		sbi->s_hash_unsigned = 3; +			if (!(sb->s_flags & MS_RDONLY)) +				es->s_flags |= +					cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH); +			sbi->s_hash_unsigned = 3;  #else -		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH); +			if (!(sb->s_flags & MS_RDONLY)) +				es->s_flags |= +					cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);  #endif -		sb->s_dirt = 1; +		}  	} -	if (sbi->s_blocks_per_group > blocksize * 8) { -		ext4_msg(sb, KERN_ERR, -		       "#blocks per group too big: %lu", -		       sbi->s_blocks_per_group); -		goto failed_mount; +	/* Handle clustersize */ +	clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); +	has_bigalloc = EXT4_HAS_RO_COMPAT_FEATURE(sb, +				EXT4_FEATURE_RO_COMPAT_BIGALLOC); +	if (has_bigalloc) { +		if (clustersize < blocksize) { +			ext4_msg(sb, KERN_ERR, +				 "cluster size (%d) smaller than " +				 "block size (%d)", clustersize, blocksize); +			goto failed_mount; +		} +		sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - +			le32_to_cpu(es->s_log_block_size); +		sbi->s_clusters_per_group = +			le32_to_cpu(es->s_clusters_per_group); +		if (sbi->s_clusters_per_group > blocksize * 8) { +			ext4_msg(sb, KERN_ERR, +				 "#clusters per group too big: %lu", +				 sbi->s_clusters_per_group); +			goto failed_mount; +		} +		if (sbi->s_blocks_per_group != +		    (sbi->s_clusters_per_group * (clustersize / blocksize))) { +			ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " +				 "clusters per group (%lu) inconsistent", +				 sbi->s_blocks_per_group, +				 sbi->s_clusters_per_group); +			goto failed_mount; +		} +	} else { +		if (clustersize != blocksize) { +			ext4_warning(sb, "fragment/cluster size (%d) != " +				     "block size (%d)", clustersize, +				     blocksize); +			clustersize = blocksize; +		} +		if (sbi->s_blocks_per_group > blocksize * 8) { +			ext4_msg(sb, KERN_ERR, +				 "#blocks per group too big: %lu", +				 sbi->s_blocks_per_group); +			goto failed_mount; +		} +		sbi->s_clusters_per_group = sbi->s_blocks_per_group; +		sbi->s_cluster_bits = 0;  	} +	sbi->s_cluster_ratio = clustersize / blocksize; +  	if (sbi->s_inodes_per_group > blocksize * 8) {  		ext4_msg(sb, KERN_ERR,  		       "#inodes per group too big: %lu", @@ -3264,13 +3792,17 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  		goto failed_mount;  	} +	/* Do we have standard group size of clustersize * 8 blocks ? */ +	if (sbi->s_blocks_per_group == clustersize << 3) +		set_opt2(sb, STD_GROUP_SIZE); +  	/*  	 * Test whether we have more sectors than will fit in sector_t,  	 * and whether the max offset is addressable by the page cache.  	 */ -	ret = generic_check_addressable(sb->s_blocksize_bits, +	err = generic_check_addressable(sb->s_blocksize_bits,  					ext4_blocks_count(es)); -	if (ret) { +	if (err) {  		ext4_msg(sb, KERN_ERR, "filesystem"  			 " too large to mount safely on this system");  		if (sizeof(sector_t) < 8) @@ -3295,7 +3827,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	 * of the filesystem.  	 */  	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { -                ext4_msg(sb, KERN_WARNING, "bad geometry: first data" +		ext4_msg(sb, KERN_WARNING, "bad geometry: first data "  			 "block %u is beyond end of filesystem (%llu)",  			 le32_to_cpu(es->s_first_data_block),  			 ext4_blocks_count(es)); @@ -3319,17 +3851,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));  	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /  		   EXT4_DESC_PER_BLOCK(sb); -	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), -				    GFP_KERNEL); +	sbi->s_group_desc = ext4_kvmalloc(db_count * +					  sizeof(struct buffer_head *), +					  GFP_KERNEL);  	if (sbi->s_group_desc == NULL) {  		ext4_msg(sb, KERN_ERR, "not enough memory"); +		ret = -ENOMEM;  		goto failed_mount;  	} -#ifdef CONFIG_PROC_FS  	if (ext4_proc_root)  		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); -#endif + +	if (sbi->s_proc) +		proc_create_data("options", S_IRUGO, sbi->s_proc, +				 &ext4_seq_options_fops, sb);  	bgl_lock_init(sbi->s_blockgroup_lock); @@ -3359,8 +3895,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	get_random_bytes(&sbi->s_next_generation, sizeof(u32));  	spin_lock_init(&sbi->s_next_gen_lock); -	err = percpu_counter_init(&sbi->s_freeblocks_counter, -			ext4_count_free_blocks(sb)); +	init_timer(&sbi->s_err_report); +	sbi->s_err_report.function = print_daily_error_info; +	sbi->s_err_report.data = (unsigned long) sb; + +	/* Register extent status tree shrinker */ +	ext4_es_register_shrinker(sbi); + +	err = percpu_counter_init(&sbi->s_freeclusters_counter, +			ext4_count_free_clusters(sb));  	if (!err) {  		err = percpu_counter_init(&sbi->s_freeinodes_counter,  				ext4_count_free_inodes(sb)); @@ -3370,7 +3913,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  				ext4_count_dirs(sb));  	}  	if (!err) { -		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); +		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0); +	} +	if (!err) { +		err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);  	}  	if (err) {  		ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -3378,7 +3924,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	}  	sbi->s_stripe = ext4_get_stripe_size(sbi); -	sbi->s_max_writeback_mb_bump = 128; +	sbi->s_extent_max_zeroout_kb = 32;  	/*  	 * set up enough so that it can read an inode @@ -3391,12 +3937,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	sb->s_export_op = &ext4_export_ops;  	sb->s_xattr = ext4_xattr_handlers;  #ifdef CONFIG_QUOTA -	sb->s_qcop = &ext4_qctl_operations;  	sb->dq_op = &ext4_quota_operations; +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) +		sb->s_qcop = &ext4_qctl_sysfile_operations; +	else +		sb->s_qcop = &ext4_qctl_operations;  #endif +	memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); +  	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */  	mutex_init(&sbi->s_orphan_lock); -	mutex_init(&sbi->s_resize_lock);  	sb->s_root = NULL; @@ -3404,6 +3954,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  			  EXT4_HAS_INCOMPAT_FEATURE(sb,  				    EXT4_FEATURE_INCOMPAT_RECOVER)); +	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && +	    !(sb->s_flags & MS_RDONLY)) +		if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) +			goto failed_mount3; +  	/*  	 * The first inode we look at is the journal inode.  Don't try  	 * root first: it may be modified in the journal! @@ -3418,33 +3973,23 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  		       "suppressed and not mounted read-only");  		goto failed_mount_wq;  	} else { -		clear_opt(sbi->s_mount_opt, DATA_FLAGS); -		set_opt(sbi->s_mount_opt, WRITEBACK_DATA); +		clear_opt(sb, DATA_FLAGS);  		sbi->s_journal = NULL;  		needs_recovery = 0;  		goto no_journal;  	} -	if (ext4_blocks_count(es) > 0xffffffffULL && +	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT) &&  	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,  				       JBD2_FEATURE_INCOMPAT_64BIT)) {  		ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");  		goto failed_mount_wq;  	} -	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { -		jbd2_journal_set_features(sbi->s_journal, -				JBD2_FEATURE_COMPAT_CHECKSUM, 0, -				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); -	} else if (test_opt(sb, JOURNAL_CHECKSUM)) { -		jbd2_journal_set_features(sbi->s_journal, -				JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); -		jbd2_journal_clear_features(sbi->s_journal, 0, 0, -				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); -	} else { -		jbd2_journal_clear_features(sbi->s_journal, -				JBD2_FEATURE_COMPAT_CHECKSUM, 0, -				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); +	if (!set_journal_csum_feature_set(sb)) { +		ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " +			 "feature set"); +		goto failed_mount_wq;  	}  	/* We have now updated the journal if required, so we can @@ -3457,9 +4002,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  		 */  		if (jbd2_journal_check_available_features  		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) -			set_opt(sbi->s_mount_opt, ORDERED_DATA); +			set_opt(sb, ORDERED_DATA);  		else -			set_opt(sbi->s_mount_opt, JOURNAL_DATA); +			set_opt(sb, JOURNAL_DATA);  		break;  	case EXT4_MOUNT_ORDERED_DATA: @@ -3475,23 +4020,51 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	}  	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); +	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; +  	/*  	 * The journal may have updated the bg summary counts, so we  	 * need to update the global counters.  	 */ -	percpu_counter_set(&sbi->s_freeblocks_counter, -			   ext4_count_free_blocks(sb)); +	percpu_counter_set(&sbi->s_freeclusters_counter, +			   ext4_count_free_clusters(sb));  	percpu_counter_set(&sbi->s_freeinodes_counter,  			   ext4_count_free_inodes(sb));  	percpu_counter_set(&sbi->s_dirs_counter,  			   ext4_count_dirs(sb)); -	percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); +	percpu_counter_set(&sbi->s_dirtyclusters_counter, 0);  no_journal: -	EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); -	if (!EXT4_SB(sb)->dio_unwritten_wq) { -		printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); -		goto failed_mount_wq; +	if (ext4_mballoc_ready) { +		sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); +		if (!sbi->s_mb_cache) { +			ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); +			goto failed_mount_wq; +		} +	} + +	/* +	 * Get the # of file system overhead blocks from the +	 * superblock if present. +	 */ +	if (es->s_overhead_clusters) +		sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); +	else { +		err = ext4_calculate_overhead(sb); +		if (err) +			goto failed_mount_wq; +	} + +	/* +	 * The maximum number of concurrent works can be high and +	 * concurrency isn't really necessary.  Limit it to 1. +	 */ +	EXT4_SB(sb)->rsv_conversion_wq = +		alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); +	if (!EXT4_SB(sb)->rsv_conversion_wq) { +		printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); +		ret = -ENOMEM; +		goto failed_mount4;  	}  	/* @@ -3503,22 +4076,23 @@ no_journal:  	if (IS_ERR(root)) {  		ext4_msg(sb, KERN_ERR, "get root inode failed");  		ret = PTR_ERR(root); +		root = NULL;  		goto failed_mount4;  	}  	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { -		iput(root);  		ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); +		iput(root);  		goto failed_mount4;  	} -	sb->s_root = d_alloc_root(root); +	sb->s_root = d_make_root(root);  	if (!sb->s_root) {  		ext4_msg(sb, KERN_ERR, "get root dentry failed"); -		iput(root);  		ret = -ENOMEM;  		goto failed_mount4;  	} -	ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); +	if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY)) +		sb->s_flags |= MS_RDONLY;  	/* determine the minimum size of new large inodes, if present */  	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { @@ -3545,53 +4119,48 @@ no_journal:  			 "available");  	} -	if (test_opt(sb, DELALLOC) && -	    (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) { -		ext4_msg(sb, KERN_WARNING, "Ignoring delalloc option - " -			 "requested data journaling mode"); -		clear_opt(sbi->s_mount_opt, DELALLOC); -	} -	if (test_opt(sb, DIOREAD_NOLOCK)) { -		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { -			ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " -				"option - requested data journaling mode"); -			clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); -		} -		if (sb->s_blocksize < PAGE_SIZE) { -			ext4_msg(sb, KERN_WARNING, "Ignoring dioread_nolock " -				"option - block size is too small"); -			clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK); -		} +	err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); +	if (err) { +		ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " +			 "reserved pool", ext4_calculate_resv_clusters(sb)); +		goto failed_mount4a;  	}  	err = ext4_setup_system_zone(sb);  	if (err) {  		ext4_msg(sb, KERN_ERR, "failed to initialize system "  			 "zone (%d)", err); -		goto failed_mount4; +		goto failed_mount4a;  	}  	ext4_ext_init(sb); -	err = ext4_mb_init(sb, needs_recovery); +	err = ext4_mb_init(sb);  	if (err) {  		ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",  			 err); -		goto failed_mount4; +		goto failed_mount5;  	}  	err = ext4_register_li_request(sb, first_not_zeroed);  	if (err) -		goto failed_mount4; +		goto failed_mount6;  	sbi->s_kobj.kset = ext4_kset;  	init_completion(&sbi->s_kobj_unregister);  	err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,  				   "%s", sb->s_id); -	if (err) { -		ext4_mb_release(sb); -		ext4_ext_release(sb); -		goto failed_mount4; -	}; +	if (err) +		goto failed_mount7; + +#ifdef CONFIG_QUOTA +	/* Enable quota usage during mount. */ +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) && +	    !(sb->s_flags & MS_RDONLY)) { +		err = ext4_enable_quotas(sb); +		if (err) +			goto failed_mount8; +	} +#endif  /* CONFIG_QUOTA */  	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;  	ext4_orphan_cleanup(sb, es); @@ -3610,16 +4179,26 @@ no_journal:  	} else  		descr = "out journal"; +	if (test_opt(sb, DISCARD)) { +		struct request_queue *q = bdev_get_queue(sb->s_bdev); +		if (!blk_queue_discard(q)) +			ext4_msg(sb, KERN_WARNING, +				 "mounting with \"discard\" option, but " +				 "the device does not support discard"); +	} +  	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "  		 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,  		 *sbi->s_es->s_mount_opts ? "; " : "", orig_data); -	init_timer(&sbi->s_err_report); -	sbi->s_err_report.function = print_daily_error_info; -	sbi->s_err_report.data = (unsigned long) sb;  	if (es->s_error_count)  		mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ +	/* Enable message ratelimiting. Default is 10 messages per 5 secs. */ +	ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10); +	ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10); +	ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10); +  	kfree(orig_data);  	return 0; @@ -3628,32 +4207,50 @@ cantfind_ext4:  		ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");  	goto failed_mount; +#ifdef CONFIG_QUOTA +failed_mount8: +	kobject_del(&sbi->s_kobj); +#endif +failed_mount7: +	ext4_unregister_li_request(sb); +failed_mount6: +	ext4_mb_release(sb); +failed_mount5: +	ext4_ext_release(sb); +	ext4_release_system_zone(sb); +failed_mount4a: +	dput(sb->s_root); +	sb->s_root = NULL;  failed_mount4:  	ext4_msg(sb, KERN_ERR, "mount failed"); -	destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); +	if (EXT4_SB(sb)->rsv_conversion_wq) +		destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);  failed_mount_wq: -	ext4_release_system_zone(sb);  	if (sbi->s_journal) {  		jbd2_journal_destroy(sbi->s_journal);  		sbi->s_journal = NULL;  	}  failed_mount3: -	if (sbi->s_flex_groups) { -		if (is_vmalloc_addr(sbi->s_flex_groups)) -			vfree(sbi->s_flex_groups); -		else -			kfree(sbi->s_flex_groups); -	} -	percpu_counter_destroy(&sbi->s_freeblocks_counter); +	ext4_es_unregister_shrinker(sbi); +	del_timer_sync(&sbi->s_err_report); +	if (sbi->s_flex_groups) +		ext4_kvfree(sbi->s_flex_groups); +	percpu_counter_destroy(&sbi->s_freeclusters_counter);  	percpu_counter_destroy(&sbi->s_freeinodes_counter);  	percpu_counter_destroy(&sbi->s_dirs_counter); -	percpu_counter_destroy(&sbi->s_dirtyblocks_counter); +	percpu_counter_destroy(&sbi->s_dirtyclusters_counter); +	percpu_counter_destroy(&sbi->s_extent_cache_cnt); +	if (sbi->s_mmp_tsk) +		kthread_stop(sbi->s_mmp_tsk);  failed_mount2:  	for (i = 0; i < db_count; i++)  		brelse(sbi->s_group_desc[i]); -	kfree(sbi->s_group_desc); +	ext4_kvfree(sbi->s_group_desc);  failed_mount: +	if (sbi->s_chksum_driver) +		crypto_free_shash(sbi->s_chksum_driver);  	if (sbi->s_proc) { +		remove_proc_entry("options", sbi->s_proc);  		remove_proc_entry(sb->s_id, ext4_proc_root);  	}  #ifdef CONFIG_QUOTA @@ -3668,7 +4265,7 @@ out_fail:  	kfree(sbi);  out_free_orig:  	kfree(orig_data); -	return ret; +	return err ? err : ret;  }  /* @@ -3758,13 +4355,6 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,  	if (bdev == NULL)  		return NULL; -	if (bd_claim(bdev, sb)) { -		ext4_msg(sb, KERN_ERR, -			"failed to claim external journal device"); -		blkdev_put(bdev, FMODE_READ|FMODE_WRITE); -		return NULL; -	} -  	blocksize = sb->s_blocksize;  	hblock = bdev_logical_block_size(bdev);  	if (blocksize < hblock) { @@ -3782,7 +4372,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,  		goto out_bdev;  	} -	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset); +	es = (struct ext4_super_block *) (bh->b_data + offset);  	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||  	    !(le32_to_cpu(es->s_feature_incompat) &  	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) { @@ -3809,7 +4399,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,  		goto out_bdev;  	}  	journal->j_private = sb; -	ll_rw_block(READ, 1, &journal->j_sb_buffer); +	ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &journal->j_sb_buffer);  	wait_on_buffer(journal->j_sb_buffer);  	if (!buffer_uptodate(journal->j_sb_buffer)) {  		ext4_msg(sb, KERN_ERR, "I/O error on journal device"); @@ -3890,15 +4480,6 @@ static int ext4_load_journal(struct super_block *sb,  	if (!(journal->j_flags & JBD2_BARRIER))  		ext4_msg(sb, KERN_INFO, "barriers disabled"); -	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { -		err = jbd2_journal_update_format(journal); -		if (err)  { -			ext4_msg(sb, KERN_ERR, "error updating journal"); -			jbd2_journal_destroy(journal); -			return err; -		} -	} -  	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))  		err = jbd2_journal_wipe(journal, !really_read_only);  	if (!err) { @@ -3939,7 +4520,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)  	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;  	int error = 0; -	if (!sbh) +	if (!sbh || block_device_ejected(sb))  		return error;  	if (buffer_write_io_error(sbh)) {  		/* @@ -3975,13 +4556,14 @@ static int ext4_commit_super(struct super_block *sb, int sync)  	else  		es->s_kbytes_written =  			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written); -	ext4_free_blocks_count_set(es, percpu_counter_sum_positive( -					   &EXT4_SB(sb)->s_freeblocks_counter)); +	ext4_free_blocks_count_set(es, +			EXT4_C2B(EXT4_SB(sb), percpu_counter_sum_positive( +				&EXT4_SB(sb)->s_freeclusters_counter)));  	es->s_free_inodes_count =  		cpu_to_le32(percpu_counter_sum_positive(  				&EXT4_SB(sb)->s_freeinodes_counter)); -	sb->s_dirt = 0;  	BUFFER_TRACE(sbh, "marking dirty"); +	ext4_superblock_csum_set(sb);  	mark_buffer_dirty(sbh);  	if (sync) {  		error = sync_dirty_buffer(sbh); @@ -4062,6 +4644,7 @@ static void ext4_clear_journal_err(struct super_block *sb,  		ext4_commit_super(sb, 1);  		jbd2_journal_clear_err(journal); +		jbd2_journal_update_sb_errno(journal);  	}  } @@ -4072,45 +4655,72 @@ static void ext4_clear_journal_err(struct super_block *sb,  int ext4_force_commit(struct super_block *sb)  {  	journal_t *journal; -	int ret = 0;  	if (sb->s_flags & MS_RDONLY)  		return 0;  	journal = EXT4_SB(sb)->s_journal; -	if (journal) { -		vfs_check_frozen(sb, SB_FREEZE_TRANS); -		ret = ext4_journal_force_commit(journal); -	} - -	return ret; -} - -static void ext4_write_super(struct super_block *sb) -{ -	lock_super(sb); -	ext4_commit_super(sb, 1); -	unlock_super(sb); +	return ext4_journal_force_commit(journal);  }  static int ext4_sync_fs(struct super_block *sb, int wait)  {  	int ret = 0;  	tid_t target; +	bool needs_barrier = false;  	struct ext4_sb_info *sbi = EXT4_SB(sb);  	trace_ext4_sync_fs(sb, wait); -	flush_workqueue(sbi->dio_unwritten_wq); +	flush_workqueue(sbi->rsv_conversion_wq); +	/* +	 * Writeback quota in non-journalled quota case - journalled quota has +	 * no dirty dquots +	 */ +	dquot_writeback_dquots(sb, -1); +	/* +	 * Data writeback is possible w/o journal transaction, so barrier must +	 * being sent at the end of the function. But we can skip it if +	 * transaction_commit will do it for us. +	 */ +	target = jbd2_get_latest_transaction(sbi->s_journal); +	if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && +	    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) +		needs_barrier = true; +  	if (jbd2_journal_start_commit(sbi->s_journal, &target)) {  		if (wait) -			jbd2_log_wait_commit(sbi->s_journal, target); +			ret = jbd2_log_wait_commit(sbi->s_journal, target);  	} +	if (needs_barrier) { +		int err; +		err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); +		if (!ret) +			ret = err; +	} + +	return ret; +} + +static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) +{ +	int ret = 0; + +	trace_ext4_sync_fs(sb, wait); +	flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); +	dquot_writeback_dquots(sb, -1); +	if (wait && test_opt(sb, BARRIER)) +		ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); +  	return ret;  }  /*   * LVM calls this function before a (read-only) snapshot is created.  This   * gives us a chance to flush the journal completely and mark the fs clean. + * + * Note that only this function cannot bring a filesystem to be in a clean + * state independently. It relies on upper layer to stop all data & metadata + * modifications.   */  static int ext4_freeze(struct super_block *sb)  { @@ -4137,7 +4747,7 @@ static int ext4_freeze(struct super_block *sb)  	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);  	error = ext4_commit_super(sb, 1);  out: -	/* we rely on s_frozen to stop further updates */ +	/* we rely on upper layer to stop further updates */  	jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);  	return error;  } @@ -4151,34 +4761,47 @@ static int ext4_unfreeze(struct super_block *sb)  	if (sb->s_flags & MS_RDONLY)  		return 0; -	lock_super(sb);  	/* Reset the needs_recovery flag before the fs is unlocked. */  	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);  	ext4_commit_super(sb, 1); -	unlock_super(sb);  	return 0;  } +/* + * Structure to save mount options for ext4_remount's benefit + */ +struct ext4_mount_options { +	unsigned long s_mount_opt; +	unsigned long s_mount_opt2; +	kuid_t s_resuid; +	kgid_t s_resgid; +	unsigned long s_commit_interval; +	u32 s_min_batch_time, s_max_batch_time; +#ifdef CONFIG_QUOTA +	int s_jquota_fmt; +	char *s_qf_names[MAXQUOTAS]; +#endif +}; +  static int ext4_remount(struct super_block *sb, int *flags, char *data)  {  	struct ext4_super_block *es;  	struct ext4_sb_info *sbi = EXT4_SB(sb); -	ext4_fsblk_t n_blocks_count = 0;  	unsigned long old_sb_flags;  	struct ext4_mount_options old_opts;  	int enable_quota = 0;  	ext4_group_t g;  	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO; -	int err; +	int err = 0;  #ifdef CONFIG_QUOTA -	int i; +	int i, j;  #endif  	char *orig_data = kstrdup(data, GFP_KERNEL);  	/* Store the original options */ -	lock_super(sb);  	old_sb_flags = sb->s_flags;  	old_opts.s_mount_opt = sbi->s_mount_opt; +	old_opts.s_mount_opt2 = sbi->s_mount_opt2;  	old_opts.s_resuid = sbi->s_resuid;  	old_opts.s_resgid = sbi->s_resgid;  	old_opts.s_commit_interval = sbi->s_commit_interval; @@ -4187,7 +4810,17 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  #ifdef CONFIG_QUOTA  	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;  	for (i = 0; i < MAXQUOTAS; i++) -		old_opts.s_qf_names[i] = sbi->s_qf_names[i]; +		if (sbi->s_qf_names[i]) { +			old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], +							 GFP_KERNEL); +			if (!old_opts.s_qf_names[i]) { +				for (j = 0; j < i; j++) +					kfree(old_opts.s_qf_names[j]); +				kfree(orig_data); +				return -ENOMEM; +			} +		} else +			old_opts.s_qf_names[i] = NULL;  #endif  	if (sbi->s_journal && sbi->s_journal->j_task->io_context)  		journal_ioprio = sbi->s_journal->j_task->io_context->ioprio; @@ -4195,12 +4828,26 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  	/*  	 * Allow the "check" option to be passed as a remount option.  	 */ -	if (!parse_options(data, sb, NULL, &journal_ioprio, -			   &n_blocks_count, 1)) { +	if (!parse_options(data, sb, NULL, &journal_ioprio, 1)) {  		err = -EINVAL;  		goto restore_opts;  	} +	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { +		if (test_opt2(sb, EXPLICIT_DELALLOC)) { +			ext4_msg(sb, KERN_ERR, "can't mount with " +				 "both data=journal and delalloc"); +			err = -EINVAL; +			goto restore_opts; +		} +		if (test_opt(sb, DIOREAD_NOLOCK)) { +			ext4_msg(sb, KERN_ERR, "can't mount with " +				 "both data=journal and dioread_nolock"); +			err = -EINVAL; +			goto restore_opts; +		} +	} +  	if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)  		ext4_abort(sb, "Abort forced by user"); @@ -4214,14 +4861,16 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  		set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);  	} -	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) || -		n_blocks_count > ext4_blocks_count(es)) { +	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {  		if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) {  			err = -EROFS;  			goto restore_opts;  		}  		if (*flags & MS_RDONLY) { +			err = sync_filesystem(sb); +			if (err < 0) +				goto restore_opts;  			err = dquot_suspend(sb, -1);  			if (err < 0)  				goto restore_opts; @@ -4257,7 +4906,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  				struct ext4_group_desc *gdp =  					ext4_get_group_desc(sb, g, NULL); -				if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { +				if (!ext4_group_desc_csum_verify(sb, g, gdp)) {  					ext4_msg(sb, KERN_ERR,  	       "ext4_remount: Checksum for group %u failed (%u!=%u)",  		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), @@ -4290,10 +4939,15 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  			if (sbi->s_journal)  				ext4_clear_journal_err(sb, es);  			sbi->s_mount_state = le16_to_cpu(es->s_state); -			if ((err = ext4_group_extend(sb, es, n_blocks_count))) -				goto restore_opts;  			if (!ext4_setup_super(sb, es, 0))  				sb->s_flags &= ~MS_RDONLY; +			if (EXT4_HAS_INCOMPAT_FEATURE(sb, +						     EXT4_FEATURE_INCOMPAT_MMP)) +				if (ext4_multi_mount_protect(sb, +						le64_to_cpu(es->s_mmp_block))) { +					err = -EROFS; +					goto restore_opts; +				}  			enable_quota = 1;  		}  	} @@ -4311,19 +4965,24 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  	}  	ext4_setup_system_zone(sb); -	if (sbi->s_journal == NULL) +	if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))  		ext4_commit_super(sb, 1);  #ifdef CONFIG_QUOTA  	/* Release old quota file names */  	for (i = 0; i < MAXQUOTAS; i++) -		if (old_opts.s_qf_names[i] && -		    old_opts.s_qf_names[i] != sbi->s_qf_names[i]) -			kfree(old_opts.s_qf_names[i]); +		kfree(old_opts.s_qf_names[i]); +	if (enable_quota) { +		if (sb_any_quota_suspended(sb)) +			dquot_resume(sb, -1); +		else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, +					EXT4_FEATURE_RO_COMPAT_QUOTA)) { +			err = ext4_enable_quotas(sb); +			if (err) +				goto restore_opts; +		} +	}  #endif -	unlock_super(sb); -	if (enable_quota) -		dquot_resume(sb, -1);  	ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);  	kfree(orig_data); @@ -4332,6 +4991,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  restore_opts:  	sb->s_flags = old_sb_flags;  	sbi->s_mount_opt = old_opts.s_mount_opt; +	sbi->s_mount_opt2 = old_opts.s_mount_opt2;  	sbi->s_resuid = old_opts.s_resuid;  	sbi->s_resgid = old_opts.s_resgid;  	sbi->s_commit_interval = old_opts.s_commit_interval; @@ -4340,13 +5000,10 @@ restore_opts:  #ifdef CONFIG_QUOTA  	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;  	for (i = 0; i < MAXQUOTAS; i++) { -		if (sbi->s_qf_names[i] && -		    old_opts.s_qf_names[i] != sbi->s_qf_names[i]) -			kfree(sbi->s_qf_names[i]); +		kfree(sbi->s_qf_names[i]);  		sbi->s_qf_names[i] = old_opts.s_qf_names[i];  	}  #endif -	unlock_super(sb);  	kfree(orig_data);  	return err;  } @@ -4356,54 +5013,24 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)  	struct super_block *sb = dentry->d_sb;  	struct ext4_sb_info *sbi = EXT4_SB(sb);  	struct ext4_super_block *es = sbi->s_es; +	ext4_fsblk_t overhead = 0, resv_blocks;  	u64 fsid; +	s64 bfree; +	resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters)); -	if (test_opt(sb, MINIX_DF)) { -		sbi->s_overhead_last = 0; -	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) { -		ext4_group_t i, ngroups = ext4_get_groups_count(sb); -		ext4_fsblk_t overhead = 0; - -		/* -		 * Compute the overhead (FS structures).  This is constant -		 * for a given filesystem unless the number of block groups -		 * changes so we cache the previous value until it does. -		 */ - -		/* -		 * All of the blocks before first_data_block are -		 * overhead -		 */ -		overhead = le32_to_cpu(es->s_first_data_block); - -		/* -		 * Add the overhead attributed to the superblock and -		 * block group descriptors.  If the sparse superblocks -		 * feature is turned on, then not all groups have this. -		 */ -		for (i = 0; i < ngroups; i++) { -			overhead += ext4_bg_has_super(sb, i) + -				ext4_bg_num_gdb(sb, i); -			cond_resched(); -		} - -		/* -		 * Every block group has an inode bitmap, a block -		 * bitmap, and an inode table. -		 */ -		overhead += ngroups * (2 + sbi->s_itb_per_group); -		sbi->s_overhead_last = overhead; -		smp_wmb(); -		sbi->s_blocks_last = ext4_blocks_count(es); -	} +	if (!test_opt(sb, MINIX_DF)) +		overhead = sbi->s_overhead;  	buf->f_type = EXT4_SUPER_MAGIC;  	buf->f_bsize = sb->s_blocksize; -	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; -	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - -		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); -	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); -	if (buf->f_bfree < ext4_r_blocks_count(es)) +	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead); +	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) - +		percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter); +	/* prevent underflow in case that few free space is available */ +	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0)); +	buf->f_bavail = buf->f_bfree - +			(ext4_r_blocks_count(es) + resv_blocks); +	if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))  		buf->f_bavail = 0;  	buf->f_files = le32_to_cpu(es->s_inodes_count);  	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); @@ -4430,7 +5057,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)  static inline struct inode *dquot_to_inode(struct dquot *dquot)  { -	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type]; +	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];  }  static int ext4_write_dquot(struct dquot *dquot) @@ -4440,7 +5067,7 @@ static int ext4_write_dquot(struct dquot *dquot)  	struct inode *inode;  	inode = dquot_to_inode(dquot); -	handle = ext4_journal_start(inode, +	handle = ext4_journal_start(inode, EXT4_HT_QUOTA,  				    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));  	if (IS_ERR(handle))  		return PTR_ERR(handle); @@ -4456,7 +5083,7 @@ static int ext4_acquire_dquot(struct dquot *dquot)  	int ret, err;  	handle_t *handle; -	handle = ext4_journal_start(dquot_to_inode(dquot), +	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,  				    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));  	if (IS_ERR(handle))  		return PTR_ERR(handle); @@ -4472,7 +5099,7 @@ static int ext4_release_dquot(struct dquot *dquot)  	int ret, err;  	handle_t *handle; -	handle = ext4_journal_start(dquot_to_inode(dquot), +	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,  				    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));  	if (IS_ERR(handle)) {  		/* Release dquot anyway to avoid endless cycle in dqput() */ @@ -4488,9 +5115,12 @@ static int ext4_release_dquot(struct dquot *dquot)  static int ext4_mark_dquot_dirty(struct dquot *dquot)  { +	struct super_block *sb = dquot->dq_sb; +	struct ext4_sb_info *sbi = EXT4_SB(sb); +  	/* Are we journaling quotas? */ -	if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || -	    EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) || +	    sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {  		dquot_mark_dquot_dirty(dquot);  		return ext4_write_dquot(dquot);  	} else { @@ -4504,7 +5134,7 @@ static int ext4_write_info(struct super_block *sb, int type)  	handle_t *handle;  	/* Data block + inode block */ -	handle = ext4_journal_start(sb->s_root->d_inode, 2); +	handle = ext4_journal_start(sb->s_root->d_inode, EXT4_HT_QUOTA, 2);  	if (IS_ERR(handle))  		return PTR_ERR(handle);  	ret = dquot_commit_info(sb, type); @@ -4528,27 +5158,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)   * Standard function to be called on quota_on   */  static int ext4_quota_on(struct super_block *sb, int type, int format_id, -			 char *name) +			 struct path *path)  {  	int err; -	struct path path;  	if (!test_opt(sb, QUOTA))  		return -EINVAL; -	err = kern_path(name, LOOKUP_FOLLOW, &path); -	if (err) -		return err; -  	/* Quotafile not on the same filesystem? */ -	if (path.mnt->mnt_sb != sb) { -		path_put(&path); +	if (path->dentry->d_sb != sb)  		return -EXDEV; -	}  	/* Journaling quota? */  	if (EXT4_SB(sb)->s_qf_names[type]) {  		/* Quotafile not in fs root? */ -		if (path.dentry->d_parent != sb->s_root) +		if (path->dentry->d_parent != sb->s_root)  			ext4_msg(sb, KERN_WARNING,  				"Quota file not on filesystem root. "  				"Journaled quota will not work"); @@ -4559,7 +5182,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,  	 * all updates to the file when we bypass pagecache...  	 */  	if (EXT4_SB(sb)->s_journal && -	    ext4_should_journal_data(path.dentry->d_inode)) { +	    ext4_should_journal_data(path->dentry->d_inode)) {  		/*  		 * We don't need to lock updates but journal_flush() could  		 * otherwise be livelocked... @@ -4567,30 +5190,124 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,  		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);  		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);  		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); -		if (err) { -			path_put(&path); +		if (err)  			return err; -		}  	} -	err = dquot_quota_on_path(sb, type, format_id, &path); -	path_put(&path); +	return dquot_quota_on(sb, type, format_id, path); +} + +static int ext4_quota_enable(struct super_block *sb, int type, int format_id, +			     unsigned int flags) +{ +	int err; +	struct inode *qf_inode; +	unsigned long qf_inums[MAXQUOTAS] = { +		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), +		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) +	}; + +	BUG_ON(!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)); + +	if (!qf_inums[type]) +		return -EPERM; + +	qf_inode = ext4_iget(sb, qf_inums[type]); +	if (IS_ERR(qf_inode)) { +		ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); +		return PTR_ERR(qf_inode); +	} + +	/* Don't account quota for quota files to avoid recursion */ +	qf_inode->i_flags |= S_NOQUOTA; +	err = dquot_enable(qf_inode, type, format_id, flags); +	iput(qf_inode); +  	return err;  } +/* Enable usage tracking for all quota types. */ +static int ext4_enable_quotas(struct super_block *sb) +{ +	int type, err = 0; +	unsigned long qf_inums[MAXQUOTAS] = { +		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), +		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) +	}; + +	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; +	for (type = 0; type < MAXQUOTAS; type++) { +		if (qf_inums[type]) { +			err = ext4_quota_enable(sb, type, QFMT_VFS_V1, +						DQUOT_USAGE_ENABLED); +			if (err) { +				ext4_warning(sb, +					"Failed to enable quota tracking " +					"(type=%d, err=%d). Please run " +					"e2fsck to fix.", type, err); +				return err; +			} +		} +	} +	return 0; +} + +/* + * quota_on function that is used when QUOTA feature is set. + */ +static int ext4_quota_on_sysfile(struct super_block *sb, int type, +				 int format_id) +{ +	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) +		return -EINVAL; + +	/* +	 * USAGE was enabled at mount time. Only need to enable LIMITS now. +	 */ +	return ext4_quota_enable(sb, type, format_id, DQUOT_LIMITS_ENABLED); +} +  static int ext4_quota_off(struct super_block *sb, int type)  { +	struct inode *inode = sb_dqopt(sb)->files[type]; +	handle_t *handle; +  	/* Force all delayed allocation blocks to be allocated.  	 * Caller already holds s_umount sem */  	if (test_opt(sb, DELALLOC))  		sync_filesystem(sb); +	if (!inode) +		goto out; + +	/* Update modification times of quota files when userspace can +	 * start looking at them */ +	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1); +	if (IS_ERR(handle)) +		goto out; +	inode->i_mtime = inode->i_ctime = CURRENT_TIME; +	ext4_mark_inode_dirty(handle, inode); +	ext4_journal_stop(handle); + +out:  	return dquot_quota_off(sb, type);  } +/* + * quota_off function that is used when QUOTA feature is set. + */ +static int ext4_quota_off_sysfile(struct super_block *sb, int type) +{ +	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) +		return -EINVAL; + +	/* Disable only the limits. */ +	return dquot_disable(sb, type, DQUOT_LIMITS_ENABLED); +} +  /* Read data from quotafile - avoid pagecache and such because we cannot afford   * acquiring the locks... As quota files are never truncated and quota code - * itself serializes the operations (and noone else should touch the files) + * itself serializes the operations (and no one else should touch the files)   * we don't have to be afraid of races */  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,  			       size_t len, loff_t off) @@ -4657,10 +5374,10 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,  		return -EIO;  	} -	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);  	bh = ext4_bread(handle, inode, blk, 1, &err);  	if (!bh)  		goto out; +	BUFFER_TRACE(bh, "get write access");  	err = ext4_journal_get_write_access(handle, bh);  	if (err) {  		brelse(bh); @@ -4673,17 +5390,13 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,  	err = ext4_handle_dirty_metadata(handle, NULL, bh);  	brelse(bh);  out: -	if (err) { -		mutex_unlock(&inode->i_mutex); +	if (err)  		return err; -	}  	if (inode->i_size < off + len) {  		i_size_write(inode, off + len);  		EXT4_I(inode)->i_disksize = inode->i_size; +		ext4_mark_inode_dirty(handle, inode);  	} -	inode->i_mtime = inode->i_ctime = CURRENT_TIME; -	ext4_mark_inode_dirty(handle, inode); -	mutex_unlock(&inode->i_mutex);  	return len;  } @@ -4696,14 +5409,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,  }  #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) -static struct file_system_type ext2_fs_type = { -	.owner		= THIS_MODULE, -	.name		= "ext2", -	.mount		= ext4_mount, -	.kill_sb	= kill_block_super, -	.fs_flags	= FS_REQUIRES_DEV, -}; -  static inline void register_as_ext2(void)  {  	int err = register_filesystem(&ext2_fs_type); @@ -4716,10 +5421,21 @@ static inline void unregister_as_ext2(void)  {  	unregister_filesystem(&ext2_fs_type);  } -MODULE_ALIAS("ext2"); + +static inline int ext2_feature_set_ok(struct super_block *sb) +{ +	if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) +		return 0; +	if (sb->s_flags & MS_RDONLY) +		return 1; +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) +		return 0; +	return 1; +}  #else  static inline void register_as_ext2(void) { }  static inline void unregister_as_ext2(void) { } +static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }  #endif  #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) @@ -4735,10 +5451,23 @@ static inline void unregister_as_ext3(void)  {  	unregister_filesystem(&ext3_fs_type);  } -MODULE_ALIAS("ext3"); + +static inline int ext3_feature_set_ok(struct super_block *sb) +{ +	if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) +		return 0; +	if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) +		return 0; +	if (sb->s_flags & MS_RDONLY) +		return 1; +	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) +		return 0; +	return 1; +}  #else  static inline void register_as_ext3(void) { }  static inline void unregister_as_ext3(void) { } +static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; }  #endif  static struct file_system_type ext4_fs_type = { @@ -4748,8 +5477,9 @@ static struct file_system_type ext4_fs_type = {  	.kill_sb	= kill_block_super,  	.fs_flags	= FS_REQUIRES_DEV,  }; +MODULE_ALIAS_FS("ext4"); -int __init ext4_init_feat_adverts(void) +static int __init ext4_init_feat_adverts(void)  {  	struct ext4_features *ef;  	int ret = -ENOMEM; @@ -4773,59 +5503,89 @@ out:  	return ret;  } +static void ext4_exit_feat_adverts(void) +{ +	kobject_put(&ext4_feat->f_kobj); +	wait_for_completion(&ext4_feat->f_kobj_unregister); +	kfree(ext4_feat); +} + +/* Shared across all ext4 file systems */ +wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ]; +struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ]; +  static int __init ext4_init_fs(void)  { -	int err; +	int i, err; +	ext4_li_info = NULL; +	mutex_init(&ext4_li_mtx); + +	/* Build-time check for flags consistency */  	ext4_check_flag_values(); -	err = ext4_init_pageio(); + +	for (i = 0; i < EXT4_WQ_HASH_SZ; i++) { +		mutex_init(&ext4__aio_mutex[i]); +		init_waitqueue_head(&ext4__ioend_wq[i]); +	} + +	err = ext4_init_es();  	if (err)  		return err; + +	err = ext4_init_pageio(); +	if (err) +		goto out7; +  	err = ext4_init_system_zone();  	if (err) -		goto out5; +		goto out6;  	ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); -	if (!ext4_kset) -		goto out4; +	if (!ext4_kset) { +		err = -ENOMEM; +		goto out5; +	}  	ext4_proc_root = proc_mkdir("fs/ext4", NULL);  	err = ext4_init_feat_adverts(); - -	err = ext4_init_mballoc();  	if (err) -		goto out3; +		goto out4; -	err = ext4_init_xattr(); +	err = ext4_init_mballoc();  	if (err)  		goto out2; +	else +		ext4_mballoc_ready = 1;  	err = init_inodecache();  	if (err)  		goto out1; -	register_as_ext2();  	register_as_ext3(); +	register_as_ext2();  	err = register_filesystem(&ext4_fs_type);  	if (err)  		goto out; -	ext4_li_info = NULL; -	mutex_init(&ext4_li_mtx);  	return 0;  out:  	unregister_as_ext2();  	unregister_as_ext3();  	destroy_inodecache();  out1: -	ext4_exit_xattr(); -out2: +	ext4_mballoc_ready = 0;  	ext4_exit_mballoc(); -out3: -	kfree(ext4_feat); -	remove_proc_entry("fs/ext4", NULL); -	kset_unregister(ext4_kset); +out2: +	ext4_exit_feat_adverts();  out4: -	ext4_exit_system_zone(); +	if (ext4_proc_root) +		remove_proc_entry("fs/ext4", NULL); +	kset_unregister(ext4_kset);  out5: +	ext4_exit_system_zone(); +out6:  	ext4_exit_pageio(); +out7: +	ext4_exit_es(); +  	return err;  } @@ -4836,12 +5596,13 @@ static void __exit ext4_exit_fs(void)  	unregister_as_ext3();  	unregister_filesystem(&ext4_fs_type);  	destroy_inodecache(); -	ext4_exit_xattr();  	ext4_exit_mballoc(); +	ext4_exit_feat_adverts();  	remove_proc_entry("fs/ext4", NULL);  	kset_unregister(ext4_kset);  	ext4_exit_system_zone();  	ext4_exit_pageio(); +	ext4_exit_es();  }  MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");  | 
