diff options
Diffstat (limited to 'fs/ext4/dir.c')
| -rw-r--r-- | fs/ext4/dir.c | 430 | 
1 files changed, 244 insertions, 186 deletions
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ece76fb6a40..ef1bed66c14 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -27,122 +27,130 @@  #include <linux/slab.h>  #include <linux/rbtree.h>  #include "ext4.h" +#include "xattr.h" -static unsigned char ext4_filetype_table[] = { -	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -}; - -static int ext4_readdir(struct file *, void *, filldir_t); -static int ext4_dx_readdir(struct file *filp, -			   void *dirent, filldir_t filldir); -static int ext4_release_dir(struct inode *inode, -				struct file *filp); - -const struct file_operations ext4_dir_operations = { -	.llseek		= ext4_llseek, -	.read		= generic_read_dir, -	.readdir	= ext4_readdir,		/* we take BKL. needed?*/ -	.unlocked_ioctl = ext4_ioctl, -#ifdef CONFIG_COMPAT -	.compat_ioctl	= ext4_compat_ioctl, -#endif -	.fsync		= ext4_sync_file, -	.release	= ext4_release_dir, -}; - +static int ext4_dx_readdir(struct file *, struct dir_context *); -static unsigned char get_dtype(struct super_block *sb, int filetype) +/** + * Check if the given dir-inode refers to an htree-indexed directory + * (or a directory which could potentially get converted to use htree + * indexing). + * + * Return 1 if it is a dx dir, 0 if not + */ +static int is_dx_dir(struct inode *inode)  { -	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) || -	    (filetype >= EXT4_FT_MAX)) -		return DT_UNKNOWN; +	struct super_block *sb = inode->i_sb; -	return (ext4_filetype_table[filetype]); -} +	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, +		     EXT4_FEATURE_COMPAT_DIR_INDEX) && +	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || +	     ((inode->i_size >> sb->s_blocksize_bits) == 1) || +	     ext4_has_inline_data(inode))) +		return 1; +	return 0; +} +/* + * Return 0 if the directory entry is OK, and 1 if there is a problem + * + * Note: this is the opposite of what ext2 and ext3 historically returned... + * + * bh passed here can be an inode block or a dir data block, depending + * on the inode inline data flag. + */  int __ext4_check_dir_entry(const char *function, unsigned int line, -			   struct inode *dir, +			   struct inode *dir, struct file *filp,  			   struct ext4_dir_entry_2 *de, -			   struct buffer_head *bh, +			   struct buffer_head *bh, char *buf, int size,  			   unsigned int offset)  {  	const char *error_msg = NULL;  	const int rlen = ext4_rec_len_from_disk(de->rec_len,  						dir->i_sb->s_blocksize); -	if (rlen < EXT4_DIR_REC_LEN(1)) +	if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))  		error_msg = "rec_len is smaller than minimal"; -	else if (rlen % 4 != 0) +	else if (unlikely(rlen % 4 != 0))  		error_msg = "rec_len % 4 != 0"; -	else if (rlen < EXT4_DIR_REC_LEN(de->name_len)) +	else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))  		error_msg = "rec_len is too small for name_len"; -	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize) -		error_msg = "directory entry across blocks"; -	else if (le32_to_cpu(de->inode) > -			le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)) +	else if (unlikely(((char *) de - buf) + rlen > size)) +		error_msg = "directory entry across range"; +	else if (unlikely(le32_to_cpu(de->inode) > +			le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))  		error_msg = "inode out of bounds"; +	else +		return 0; -	if (error_msg != NULL) +	if (filp) +		ext4_error_file(filp, function, line, bh->b_blocknr, +				"bad entry in directory: %s - offset=%u(%u), " +				"inode=%u, rec_len=%d, name_len=%d", +				error_msg, (unsigned) (offset % size), +				offset, le32_to_cpu(de->inode), +				rlen, de->name_len); +	else  		ext4_error_inode(dir, function, line, bh->b_blocknr, -			"bad entry in directory: %s - " -			"offset=%u(%u), inode=%u, rec_len=%d, name_len=%d", -			error_msg, (unsigned) (offset%bh->b_size), offset, -			le32_to_cpu(de->inode), -			rlen, de->name_len); -	return error_msg == NULL ? 1 : 0; +				"bad entry in directory: %s - offset=%u(%u), " +				"inode=%u, rec_len=%d, name_len=%d", +				error_msg, (unsigned) (offset % size), +				offset, le32_to_cpu(de->inode), +				rlen, de->name_len); + +	return 1;  } -static int ext4_readdir(struct file *filp, -			 void *dirent, filldir_t filldir) +static int ext4_readdir(struct file *file, struct dir_context *ctx)  { -	int error = 0;  	unsigned int offset; -	int i, stored; +	int i;  	struct ext4_dir_entry_2 *de; -	struct super_block *sb;  	int err; -	struct inode *inode = filp->f_path.dentry->d_inode; -	int ret = 0; +	struct inode *inode = file_inode(file); +	struct super_block *sb = inode->i_sb;  	int dir_has_error = 0; -	sb = inode->i_sb; - -	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb, -				    EXT4_FEATURE_COMPAT_DIR_INDEX) && -	    ((ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) || -	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) { -		err = ext4_dx_readdir(filp, dirent, filldir); +	if (is_dx_dir(inode)) { +		err = ext4_dx_readdir(file, ctx);  		if (err != ERR_BAD_DX_DIR) { -			ret = err; -			goto out; +			return err;  		}  		/*  		 * We don't set the inode dirty flag since it's not  		 * critical that it get flushed back to the disk.  		 */ -		ext4_clear_inode_flag(filp->f_path.dentry->d_inode, +		ext4_clear_inode_flag(file_inode(file),  				      EXT4_INODE_INDEX);  	} -	stored = 0; -	offset = filp->f_pos & (sb->s_blocksize - 1); -	while (!error && !stored && filp->f_pos < inode->i_size) { +	if (ext4_has_inline_data(inode)) { +		int has_inline_data = 1; +		int ret = ext4_read_inline_dir(file, ctx, +					   &has_inline_data); +		if (has_inline_data) +			return ret; +	} + +	offset = ctx->pos & (sb->s_blocksize - 1); + +	while (ctx->pos < inode->i_size) {  		struct ext4_map_blocks map;  		struct buffer_head *bh = NULL; -		map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); +		map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);  		map.m_len = 1;  		err = ext4_map_blocks(NULL, inode, &map, 0);  		if (err > 0) {  			pgoff_t index = map.m_pblk >>  					(PAGE_CACHE_SHIFT - inode->i_blkbits); -			if (!ra_has_index(&filp->f_ra, index)) +			if (!ra_has_index(&file->f_ra, index))  				page_cache_sync_readahead(  					sb->s_bdev->bd_inode->i_mapping, -					&filp->f_ra, filp, +					&file->f_ra, file,  					index, 1); -			filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; +			file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;  			bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err);  		} @@ -152,24 +160,37 @@ static int ext4_readdir(struct file *filp,  		 */  		if (!bh) {  			if (!dir_has_error) { -				EXT4_ERROR_INODE(inode, "directory " -					   "contains a hole at offset %Lu", -					   (unsigned long long) filp->f_pos); +				EXT4_ERROR_FILE(file, 0, +						"directory contains a " +						"hole at offset %llu", +					   (unsigned long long) ctx->pos);  				dir_has_error = 1;  			}  			/* corrupt size?  Maybe no more blocks to read */ -			if (filp->f_pos > inode->i_blocks << 9) +			if (ctx->pos > inode->i_blocks << 9)  				break; -			filp->f_pos += sb->s_blocksize - offset; +			ctx->pos += sb->s_blocksize - offset; +			continue; +		} + +		/* Check the checksum */ +		if (!buffer_verified(bh) && +		    !ext4_dirent_csum_verify(inode, +				(struct ext4_dir_entry *)bh->b_data)) { +			EXT4_ERROR_FILE(file, 0, "directory fails checksum " +					"at offset %llu", +					(unsigned long long)ctx->pos); +			ctx->pos += sb->s_blocksize - offset; +			brelse(bh);  			continue;  		} +		set_buffer_verified(bh); -revalidate:  		/* If the dir block has changed since the last call to  		 * readdir(2), then we might be pointing to an invalid  		 * dirent right now.  Scan from the start of the block  		 * to make sure. */ -		if (filp->f_version != inode->i_version) { +		if (file->f_version != inode->i_version) {  			for (i = 0; i < sb->s_blocksize && i < offset; ) {  				de = (struct ext4_dir_entry_2 *)  					(bh->b_data + i); @@ -186,74 +207,129 @@ revalidate:  							    sb->s_blocksize);  			}  			offset = i; -			filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) +			ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))  				| offset; -			filp->f_version = inode->i_version; +			file->f_version = inode->i_version;  		} -		while (!error && filp->f_pos < inode->i_size +		while (ctx->pos < inode->i_size  		       && offset < sb->s_blocksize) {  			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); -			if (!ext4_check_dir_entry(inode, de, -						  bh, offset)) { +			if (ext4_check_dir_entry(inode, file, de, bh, +						 bh->b_data, bh->b_size, +						 offset)) {  				/* -				 * On error, skip the f_pos to the next block +				 * On error, skip to the next block  				 */ -				filp->f_pos = (filp->f_pos | +				ctx->pos = (ctx->pos |  						(sb->s_blocksize - 1)) + 1; -				brelse(bh); -				ret = stored; -				goto out; +				break;  			}  			offset += ext4_rec_len_from_disk(de->rec_len,  					sb->s_blocksize);  			if (le32_to_cpu(de->inode)) { -				/* We might block in the next section -				 * if the data destination is -				 * currently swapped out.  So, use a -				 * version stamp to detect whether or -				 * not the directory has been modified -				 * during the copy operation. -				 */ -				u64 version = filp->f_version; - -				error = filldir(dirent, de->name, +				if (!dir_emit(ctx, de->name,  						de->name_len, -						filp->f_pos,  						le32_to_cpu(de->inode), -						get_dtype(sb, de->file_type)); -				if (error) -					break; -				if (version != filp->f_version) -					goto revalidate; -				stored++; +						get_dtype(sb, de->file_type))) { +					brelse(bh); +					return 0; +				}  			} -			filp->f_pos += ext4_rec_len_from_disk(de->rec_len, +			ctx->pos += ext4_rec_len_from_disk(de->rec_len,  						sb->s_blocksize);  		}  		offset = 0;  		brelse(bh); +		if (ctx->pos < inode->i_size) { +			if (!dir_relax(inode)) +				return 0; +		}  	} -out: -	return ret; +	return 0; +} + +static inline int is_32bit_api(void) +{ +#ifdef CONFIG_COMPAT +	return is_compat_task(); +#else +	return (BITS_PER_LONG == 32); +#endif  }  /*   * These functions convert from the major/minor hash to an f_pos - * value. + * value for dx directories + * + * Upper layer (for example NFS) should specify FMODE_32BITHASH or + * FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted + * directly on both 32-bit and 64-bit nodes, under such case, neither + * FMODE_32BITHASH nor FMODE_64BITHASH is specified. + */ +static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor) +{ +	if ((filp->f_mode & FMODE_32BITHASH) || +	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) +		return major >> 1; +	else +		return ((__u64)(major >> 1) << 32) | (__u64)minor; +} + +static inline __u32 pos2maj_hash(struct file *filp, loff_t pos) +{ +	if ((filp->f_mode & FMODE_32BITHASH) || +	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) +		return (pos << 1) & 0xffffffff; +	else +		return ((pos >> 32) << 1) & 0xffffffff; +} + +static inline __u32 pos2min_hash(struct file *filp, loff_t pos) +{ +	if ((filp->f_mode & FMODE_32BITHASH) || +	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) +		return 0; +	else +		return pos & 0xffffffff; +} + +/* + * Return 32- or 64-bit end-of-file for dx directories + */ +static inline loff_t ext4_get_htree_eof(struct file *filp) +{ +	if ((filp->f_mode & FMODE_32BITHASH) || +	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) +		return EXT4_HTREE_EOF_32BIT; +	else +		return EXT4_HTREE_EOF_64BIT; +} + + +/* + * ext4_dir_llseek() calls generic_file_llseek_size to handle htree + * directories, where the "offset" is in terms of the filename hash + * value instead of the byte offset. + * + * Because we may return a 64-bit hash that is well beyond offset limits, + * we need to pass the max hash as the maximum allowable offset in + * the htree directory case.   * - * Currently we only use major hash numer.  This is unfortunate, but - * on 32-bit machines, the same VFS interface is used for lseek and - * llseek, so if we use the 64 bit offset, then the 32-bit versions of - * lseek/telldir/seekdir will blow out spectacularly, and from within - * the ext2 low-level routine, we don't know if we're being called by - * a 64-bit version of the system call or the 32-bit version of the - * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir - * cookie.  Sigh. + * For non-htree, ext4_llseek already chooses the proper max offset.   */ -#define hash2pos(major, minor)	(major >> 1) -#define pos2maj_hash(pos)	((pos << 1) & 0xffffffff) -#define pos2min_hash(pos)	(0) +static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence) +{ +	struct inode *inode = file->f_mapping->host; +	int dx_dir = is_dx_dir(inode); +	loff_t htree_max = ext4_get_htree_eof(file); + +	if (likely(dx_dir)) +		return generic_file_llseek_size(file, offset, whence, +						    htree_max, htree_max); +	else +		return ext4_llseek(file, offset, whence); +}  /*   * This structure holds the nodes of the red-black tree used to store @@ -276,53 +352,29 @@ struct fname {   */  static void free_rb_tree_fname(struct rb_root *root)  { -	struct rb_node	*n = root->rb_node; -	struct rb_node	*parent; -	struct fname	*fname; - -	while (n) { -		/* Do the node's children first */ -		if (n->rb_left) { -			n = n->rb_left; -			continue; -		} -		if (n->rb_right) { -			n = n->rb_right; -			continue; -		} -		/* -		 * The node has no children; free it, and then zero -		 * out parent's link to it.  Finally go to the -		 * beginning of the loop and try to free the parent -		 * node. -		 */ -		parent = rb_parent(n); -		fname = rb_entry(n, struct fname, rb_hash); +	struct fname *fname, *next; + +	rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)  		while (fname) {  			struct fname *old = fname;  			fname = fname->next;  			kfree(old);  		} -		if (!parent) -			*root = RB_ROOT; -		else if (parent->rb_left == n) -			parent->rb_left = NULL; -		else if (parent->rb_right == n) -			parent->rb_right = NULL; -		n = parent; -	} + +	*root = RB_ROOT;  } -static struct dir_private_info *ext4_htree_create_dir_info(loff_t pos) +static struct dir_private_info *ext4_htree_create_dir_info(struct file *filp, +							   loff_t pos)  {  	struct dir_private_info *p;  	p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);  	if (!p)  		return NULL; -	p->curr_hash = pos2maj_hash(pos); -	p->curr_minor_hash = pos2min_hash(pos); +	p->curr_hash = pos2maj_hash(filp, pos); +	p->curr_minor_hash = pos2min_hash(filp, pos);  	return p;  } @@ -397,63 +449,57 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,   * for all entres on the fname linked list.  (Normally there is only   * one entry on the linked list, unless there are 62 bit hash collisions.)   */ -static int call_filldir(struct file *filp, void *dirent, -			filldir_t filldir, struct fname *fname) +static int call_filldir(struct file *file, struct dir_context *ctx, +			struct fname *fname)  { -	struct dir_private_info *info = filp->private_data; -	loff_t	curr_pos; -	struct inode *inode = filp->f_path.dentry->d_inode; -	struct super_block *sb; -	int error; - -	sb = inode->i_sb; +	struct dir_private_info *info = file->private_data; +	struct inode *inode = file_inode(file); +	struct super_block *sb = inode->i_sb;  	if (!fname) { -		printk(KERN_ERR "EXT4-fs: call_filldir: called with " -		       "null fname?!?\n"); +		ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " +			 "called with null fname?!?", __func__, __LINE__, +			 inode->i_ino, current->comm);  		return 0;  	} -	curr_pos = hash2pos(fname->hash, fname->minor_hash); +	ctx->pos = hash2pos(file, fname->hash, fname->minor_hash);  	while (fname) { -		error = filldir(dirent, fname->name, -				fname->name_len, curr_pos, +		if (!dir_emit(ctx, fname->name, +				fname->name_len,  				fname->inode, -				get_dtype(sb, fname->file_type)); -		if (error) { -			filp->f_pos = curr_pos; +				get_dtype(sb, fname->file_type))) {  			info->extra_fname = fname; -			return error; +			return 1;  		}  		fname = fname->next;  	}  	return 0;  } -static int ext4_dx_readdir(struct file *filp, -			 void *dirent, filldir_t filldir) +static int ext4_dx_readdir(struct file *file, struct dir_context *ctx)  { -	struct dir_private_info *info = filp->private_data; -	struct inode *inode = filp->f_path.dentry->d_inode; +	struct dir_private_info *info = file->private_data; +	struct inode *inode = file_inode(file);  	struct fname *fname;  	int	ret;  	if (!info) { -		info = ext4_htree_create_dir_info(filp->f_pos); +		info = ext4_htree_create_dir_info(file, ctx->pos);  		if (!info)  			return -ENOMEM; -		filp->private_data = info; +		file->private_data = info;  	} -	if (filp->f_pos == EXT4_HTREE_EOF) +	if (ctx->pos == ext4_get_htree_eof(file))  		return 0;	/* EOF */  	/* Some one has messed with f_pos; reset the world */ -	if (info->last_pos != filp->f_pos) { +	if (info->last_pos != ctx->pos) {  		free_rb_tree_fname(&info->root);  		info->curr_node = NULL;  		info->extra_fname = NULL; -		info->curr_hash = pos2maj_hash(filp->f_pos); -		info->curr_minor_hash = pos2min_hash(filp->f_pos); +		info->curr_hash = pos2maj_hash(file, ctx->pos); +		info->curr_minor_hash = pos2min_hash(file, ctx->pos);  	}  	/* @@ -461,7 +507,7 @@ static int ext4_dx_readdir(struct file *filp,  	 * chain, return them first.  	 */  	if (info->extra_fname) { -		if (call_filldir(filp, dirent, filldir, info->extra_fname)) +		if (call_filldir(file, ctx, info->extra_fname))  			goto finished;  		info->extra_fname = NULL;  		goto next_node; @@ -475,17 +521,17 @@ static int ext4_dx_readdir(struct file *filp,  		 * cached entries.  		 */  		if ((!info->curr_node) || -		    (filp->f_version != inode->i_version)) { +		    (file->f_version != inode->i_version)) {  			info->curr_node = NULL;  			free_rb_tree_fname(&info->root); -			filp->f_version = inode->i_version; -			ret = ext4_htree_fill_tree(filp, info->curr_hash, +			file->f_version = inode->i_version; +			ret = ext4_htree_fill_tree(file, info->curr_hash,  						   info->curr_minor_hash,  						   &info->next_hash);  			if (ret < 0)  				return ret;  			if (ret == 0) { -				filp->f_pos = EXT4_HTREE_EOF; +				ctx->pos = ext4_get_htree_eof(file);  				break;  			}  			info->curr_node = rb_first(&info->root); @@ -494,7 +540,7 @@ static int ext4_dx_readdir(struct file *filp,  		fname = rb_entry(info->curr_node, struct fname, rb_hash);  		info->curr_hash = fname->hash;  		info->curr_minor_hash = fname->minor_hash; -		if (call_filldir(filp, dirent, filldir, fname)) +		if (call_filldir(file, ctx, fname))  			break;  	next_node:  		info->curr_node = rb_next(info->curr_node); @@ -505,7 +551,7 @@ static int ext4_dx_readdir(struct file *filp,  			info->curr_minor_hash = fname->minor_hash;  		} else {  			if (info->next_hash == ~0) { -				filp->f_pos = EXT4_HTREE_EOF; +				ctx->pos = ext4_get_htree_eof(file);  				break;  			}  			info->curr_hash = info->next_hash; @@ -513,7 +559,7 @@ static int ext4_dx_readdir(struct file *filp,  		}  	}  finished: -	info->last_pos = filp->f_pos; +	info->last_pos = ctx->pos;  	return 0;  } @@ -524,3 +570,15 @@ static int ext4_release_dir(struct inode *inode, struct file *filp)  	return 0;  } + +const struct file_operations ext4_dir_operations = { +	.llseek		= ext4_dir_llseek, +	.read		= generic_read_dir, +	.iterate	= ext4_readdir, +	.unlocked_ioctl = ext4_ioctl, +#ifdef CONFIG_COMPAT +	.compat_ioctl	= ext4_compat_ioctl, +#endif +	.fsync		= ext4_sync_file, +	.release	= ext4_release_dir, +};  | 
