diff options
Diffstat (limited to 'fs/ext4/namei.c')
| -rw-r--r-- | fs/ext4/namei.c | 1224 |
1 files changed, 728 insertions, 496 deletions
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f9ed946a448..3520ab8a663 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -47,38 +47,112 @@ #define NAMEI_RA_CHUNKS 2 #define NAMEI_RA_BLOCKS 4 #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS) -#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b)) static struct buffer_head *ext4_append(handle_t *handle, struct inode *inode, - ext4_lblk_t *block, int *err) + ext4_lblk_t *block) { struct buffer_head *bh; + int err = 0; if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && ((inode->i_size >> 10) >= - EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) { - *err = -ENOSPC; - return NULL; - } + EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) + return ERR_PTR(-ENOSPC); *block = inode->i_size >> inode->i_sb->s_blocksize_bits; - bh = ext4_bread(handle, inode, *block, 1, err); - if (bh) { - inode->i_size += inode->i_sb->s_blocksize; - EXT4_I(inode)->i_disksize = inode->i_size; - *err = ext4_journal_get_write_access(handle, bh); - if (*err) { + bh = ext4_bread(handle, inode, *block, 1, &err); + if (!bh) + return ERR_PTR(err); + inode->i_size += inode->i_sb->s_blocksize; + EXT4_I(inode)->i_disksize = inode->i_size; + BUFFER_TRACE(bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, bh); + if (err) { + brelse(bh); + ext4_std_error(inode->i_sb, err); + return ERR_PTR(err); + } + return bh; +} + +static int ext4_dx_csum_verify(struct inode *inode, + struct ext4_dir_entry *dirent); + +typedef enum { + EITHER, INDEX, DIRENT +} dirblock_type_t; + +#define ext4_read_dirblock(inode, block, type) \ + __ext4_read_dirblock((inode), (block), (type), __LINE__) + +static struct buffer_head *__ext4_read_dirblock(struct inode *inode, + ext4_lblk_t block, + dirblock_type_t type, + unsigned int line) +{ + struct buffer_head *bh; + struct ext4_dir_entry *dirent; + int err = 0, is_dx_block = 0; + + bh = ext4_bread(NULL, inode, block, 0, &err); + if (!bh) { + if (err == 0) { + ext4_error_inode(inode, __func__, line, block, + "Directory hole found"); + return ERR_PTR(-EIO); + } + __ext4_warning(inode->i_sb, __func__, line, + "error reading directory block " + "(ino %lu, block %lu)", inode->i_ino, + (unsigned long) block); + return ERR_PTR(err); + } + dirent = (struct ext4_dir_entry *) bh->b_data; + /* Determine whether or not we have an index block */ + if (is_dx(inode)) { + if (block == 0) + is_dx_block = 1; + else if (ext4_rec_len_from_disk(dirent->rec_len, + inode->i_sb->s_blocksize) == + inode->i_sb->s_blocksize) + is_dx_block = 1; + } + if (!is_dx_block && type == INDEX) { + ext4_error_inode(inode, __func__, line, block, + "directory leaf block found instead of index block"); + return ERR_PTR(-EIO); + } + if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, + EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) || + buffer_verified(bh)) + return bh; + + /* + * An empty leaf block can get mistaken for a index block; for + * this reason, we can only check the index checksum when the + * caller is sure it should be an index block. + */ + if (is_dx_block && type == INDEX) { + if (ext4_dx_csum_verify(inode, dirent)) + set_buffer_verified(bh); + else { + ext4_error_inode(inode, __func__, line, block, + "Directory index failed checksum"); brelse(bh); - bh = NULL; + return ERR_PTR(-EIO); } } - if (!bh && !(*err)) { - *err = -EIO; - ext4_error(inode->i_sb, - "Directory hole detected on inode %lu\n", - inode->i_ino); + if (!is_dx_block) { + if (ext4_dirent_csum_verify(inode, dirent)) + set_buffer_verified(bh); + else { + ext4_error_inode(inode, __func__, line, block, + "Directory block failed checksum"); + brelse(bh); + return ERR_PTR(-EIO); + } } return bh; } @@ -343,15 +417,16 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent, { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); - __u32 csum, old_csum; + __u32 csum; + __le32 save_csum; int size; size = count_offset + (count * sizeof(struct dx_entry)); - old_csum = t->dt_checksum; + save_csum = t->dt_checksum; t->dt_checksum = 0; csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size); csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail)); - t->dt_checksum = old_csum; + t->dt_checksum = save_csum; return cpu_to_le32(csum); } @@ -604,9 +679,9 @@ dx_probe(const struct qstr *d_name, struct inode *dir, u32 hash; frame->bh = NULL; - if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) { - if (*err == 0) - *err = ERR_BAD_DX_DIR; + bh = ext4_read_dirblock(dir, 0, INDEX); + if (IS_ERR(bh)) { + *err = PTR_ERR(bh); goto fail; } root = (struct dx_root *) bh->b_data; @@ -643,15 +718,6 @@ dx_probe(const struct qstr *d_name, struct inode *dir, goto fail; } - if (!buffer_verified(bh) && - !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) { - ext4_warning(dir->i_sb, "Root failed checksum"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail; - } - set_buffer_verified(bh); - entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); @@ -709,22 +775,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir, frame->entries = entries; frame->at = at; if (!indirect--) return frame; - if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) { - if (!(*err)) - *err = ERR_BAD_DX_DIR; + bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); + if (IS_ERR(bh)) { + *err = PTR_ERR(bh); goto fail2; } - at = entries = ((struct dx_node *) bh->b_data)->entries; - - if (!buffer_verified(bh) && - !ext4_dx_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - ext4_warning(dir->i_sb, "Node failed checksum"); - brelse(bh); - *err = ERR_BAD_DX_DIR; - goto fail2; - } - set_buffer_verified(bh); + entries = ((struct dx_node *) bh->b_data)->entries; if (dx_get_limit(entries) != dx_node_limit (dir)) { ext4_warning(dir->i_sb, @@ -783,7 +839,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, { struct dx_frame *p; struct buffer_head *bh; - int err, num_frames = 0; + int num_frames = 0; __u32 bhash; p = frame; @@ -822,25 +878,9 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, * block so no check is necessary */ while (num_frames--) { - if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), - 0, &err))) { - if (!err) { - ext4_error(dir->i_sb, - "Directory hole detected on inode %lu\n", - dir->i_ino); - return -EIO; - } - return err; /* Failure */ - } - - if (!buffer_verified(bh) && - !ext4_dx_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - ext4_warning(dir->i_sb, "Node failed checksum"); - return -EIO; - } - set_buffer_verified(bh); - + bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX); + if (IS_ERR(bh)) + return PTR_ERR(bh); p++; brelse(p->bh); p->bh = bh; @@ -866,20 +906,9 @@ static int htree_dirblock_to_tree(struct file *dir_file, dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", (unsigned long)block)); - if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) { - if (!err) { - err = -EIO; - ext4_error(dir->i_sb, - "Directory hole detected on inode %lu\n", - dir->i_ino); - } - return err; - } - - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) - return -EIO; - set_buffer_verified(bh); + bh = ext4_read_dirblock(dir, block, DIRENT); + if (IS_ERR(bh)) + return PTR_ERR(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; top = (struct ext4_dir_entry_2 *) ((char *) de + @@ -890,11 +919,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, bh->b_data, bh->b_size, (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) + ((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse(bh); - return count; + /* silently ignore the rest of the block */ + break; } ext4fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || @@ -937,13 +963,24 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", start_hash, start_minor_hash)); - dir = dir_file->f_path.dentry->d_inode; + dir = file_inode(dir_file); if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) { hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; if (hinfo.hash_version <= DX_HASH_TEA) hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; + if (ext4_has_inline_data(dir)) { + int has_inline_data = 1; + count = htree_inlinedir_to_tree(dir_file, dir, 0, + &hinfo, start_hash, + start_minor_hash, + &has_inline_data); + if (has_inline_data) { + *next_hash = ~0; + return count; + } + } count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo, start_hash, start_minor_hash); *next_hash = ~0; @@ -1333,26 +1370,11 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q return NULL; do { block = dx_get_block(frame->at); - if (!(bh = ext4_bread(NULL, dir, block, 0, err))) { - if (!(*err)) { - *err = -EIO; - ext4_error(dir->i_sb, - "Directory hole detected on inode %lu\n", - dir->i_ino); - } + bh = ext4_read_dirblock(dir, block, DIRENT); + if (IS_ERR(bh)) { + *err = PTR_ERR(bh); goto errout; } - - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(dir, "checksumming directory " - "block %lu", (unsigned long)block); - brelse(bh); - *err = -EIO; - goto errout; - } - set_buffer_verified(bh); retval = search_dirblock(bh, dir, d_name, block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); @@ -1404,9 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi return ERR_PTR(-EIO); } if (unlikely(ino == dir->i_ino)) { - EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir", - dentry->d_name.len, - dentry->d_name.name); + EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir", + dentry); return ERR_PTR(-EIO); } inode = ext4_iget(dir->i_sb, ino); @@ -1443,24 +1464,6 @@ struct dentry *ext4_get_parent(struct dentry *child) return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); } -#define S_SHIFT 12 -static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = { - [S_IFREG >> S_SHIFT] = EXT4_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = EXT4_FT_DIR, - [S_IFCHR >> S_SHIFT] = EXT4_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = EXT4_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = EXT4_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = EXT4_FT_SOCK, - [S_IFLNK >> S_SHIFT] = EXT4_FT_SYMLINK, -}; - -static inline void ext4_set_de_type(struct super_block *sb, - struct ext4_dir_entry_2 *de, - umode_t mode) { - if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE)) - de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - /* * Move count entries from end of map between two memory locations. * Returns pointer to last entry moved. @@ -1536,11 +1539,12 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) csum_size = sizeof(struct ext4_dir_entry_tail); - bh2 = ext4_append (handle, dir, &newblock, &err); - if (!(bh2)) { + bh2 = ext4_append(handle, dir, &newblock); + if (IS_ERR(bh2)) { brelse(*bh); *bh = NULL; - goto errout; + *error = PTR_ERR(bh2); + return NULL; } BUFFER_TRACE(*bh, "get_write_access"); @@ -1621,7 +1625,6 @@ journal_error: brelse(bh2); *bh = NULL; ext4_std_error(dir->i_sb, err); -errout: *error = err; return NULL; } @@ -1699,7 +1702,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, const char *name = dentry->d_name.name; int namelen = dentry->d_name.len; unsigned int blocksize = dir->i_sb->s_blocksize; - unsigned short reclen; int csum_size = 0; int err; @@ -1707,7 +1709,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) csum_size = sizeof(struct ext4_dir_entry_tail); - reclen = EXT4_DIR_REC_LEN(namelen); if (!de) { err = ext4_find_dest_de(dir, inode, bh, bh->b_data, blocksize - csum_size, @@ -1778,6 +1779,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, blocksize = dir->i_sb->s_blocksize; dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); + BUFFER_TRACE(bh, "get_write_access"); retval = ext4_journal_get_write_access(handle, bh); if (retval) { ext4_std_error(dir->i_sb, retval); @@ -1798,10 +1800,10 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, len = ((char *) root) + (blocksize - csum_size) - (char *) de; /* Allocate new block for the 0th block's dirents */ - bh2 = ext4_append(handle, dir, &block, &retval); - if (!(bh2)) { + bh2 = ext4_append(handle, dir, &block); + if (IS_ERR(bh2)) { brelse(bh); - return retval; + return PTR_ERR(bh2); } ext4_set_inode_flag(dir, EXT4_INODE_INDEX); data1 = bh2->b_data; @@ -1918,20 +1920,10 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, } blocks = dir->i_size >> sb->s_blocksize_bits; for (block = 0; block < blocks; block++) { - if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) { - if (!retval) { - retval = -EIO; - ext4_error(inode->i_sb, - "Directory hole detected on inode %lu\n", - inode->i_ino); - } - return retval; - } - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, - (struct ext4_dir_entry *)bh->b_data)) - return -EIO; - set_buffer_verified(bh); + bh = ext4_read_dirblock(dir, block, DIRENT); + if (IS_ERR(bh)) + return PTR_ERR(bh); + retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); if (retval != -ENOSPC) { brelse(bh); @@ -1943,9 +1935,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, return make_indexed_dir(handle, dentry, inode, bh); brelse(bh); } - bh = ext4_append(handle, dir, &block, &retval); - if (!bh) - return retval; + bh = ext4_append(handle, dir, &block); + if (IS_ERR(bh)) + return PTR_ERR(bh); de = (struct ext4_dir_entry_2 *) bh->b_data; de->inode = 0; de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize); @@ -1982,22 +1974,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, return err; entries = frame->entries; at = frame->at; - - if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) { - if (!err) { - err = -EIO; - ext4_error(dir->i_sb, - "Directory hole detected on inode %lu\n", - dir->i_ino); - } + bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + bh = NULL; goto cleanup; } - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) - goto journal_error; - set_buffer_verified(bh); - BUFFER_TRACE(bh, "get_write_access"); err = ext4_journal_get_write_access(handle, bh); if (err) @@ -2025,9 +2008,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, err = -ENOSPC; goto cleanup; } - bh2 = ext4_append (handle, dir, &newblock, &err); - if (!(bh2)) + bh2 = ext4_append(handle, dir, &newblock); + if (IS_ERR(bh2)) { + err = PTR_ERR(bh2); goto cleanup; + } node2 = (struct dx_node *)(bh2->b_data); entries2 = node2->entries; memset(&node2->fake, 0, sizeof(struct fake_dirent)); @@ -2106,8 +2091,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, journal_error: ext4_std_error(dir->i_sb, err); cleanup: - if (bh) - brelse(bh); + brelse(bh); dx_release(frames); return err; } @@ -2254,29 +2238,27 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, { handle_t *handle; struct inode *inode; - int err, retries = 0; + int err, credits, retries = 0; dquot_initialize(dir); + credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0, + NULL, EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { inode->i_op = &ext4_file_inode_operations; inode->i_fop = &ext4_file_operations; ext4_set_aops(inode); err = ext4_add_nondir(handle, dentry, inode); + if (!err && IS_DIRSYNC(dir)) + ext4_handle_sync(handle); } - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; @@ -2287,34 +2269,70 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry, { handle_t *handle; struct inode *inode; - int err, retries = 0; + int err, credits, retries = 0; if (!new_valid_dev(rdev)) return -EINVAL; dquot_initialize(dir); + credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0, + NULL, EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &ext4_special_inode_operations; err = ext4_add_nondir(handle, dentry, inode); + if (!err && IS_DIRSYNC(dir)) + ext4_handle_sync(handle); } - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); + if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) + goto retry; + return err; +} + +static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + handle_t *handle; + struct inode *inode; + int err, retries = 0; + + dquot_initialize(dir); + +retry: + inode = ext4_new_inode_start_handle(dir, mode, + NULL, 0, NULL, + EXT4_HT_DIR, + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + 4 + EXT4_XATTR_TRANS_BLOCKS); + handle = ext4_journal_current_handle(); + err = PTR_ERR(inode); + if (!IS_ERR(inode)) { + inode->i_op = &ext4_file_inode_operations; + inode->i_fop = &ext4_file_operations; + ext4_set_aops(inode); + d_tmpfile(dentry, inode); + err = ext4_orphan_add(handle, inode); + if (err) + goto err_unlock_inode; + mark_inode_dirty(inode); + unlock_new_inode(inode); + } + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; +err_unlock_inode: + ext4_journal_stop(handle); + unlock_new_inode(inode); + return err; } struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode, @@ -2351,6 +2369,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, struct buffer_head *dir_block = NULL; struct ext4_dir_entry_2 *de; struct ext4_dir_entry_tail *t; + ext4_lblk_t block = 0; unsigned int blocksize = dir->i_sb->s_blocksize; int csum_size = 0; int err; @@ -2367,16 +2386,10 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir, goto out; } - inode->i_size = EXT4_I(inode)->i_disksize = blocksize; - if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) { - if (!err) { - err = -EIO; - ext4_error(inode->i_sb, - "Directory hole detected on inode %lu\n", - inode->i_ino); - } - goto out; - } + inode->i_size = 0; + dir_block = ext4_append(handle, inode, &block); + if (IS_ERR(dir_block)) + return PTR_ERR(dir_block); BUFFER_TRACE(dir_block, "get_write_access"); err = ext4_journal_get_write_access(handle, dir_block); if (err) @@ -2403,25 +2416,20 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { handle_t *handle; struct inode *inode; - int err, retries = 0; + int err, credits, retries = 0; if (EXT4_DIR_LINK_MAX(dir)) return -EMLINK; dquot_initialize(dir); + credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3); retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, S_IFDIR | mode, - &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode, + &dentry->d_name, + 0, NULL, EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; @@ -2449,8 +2457,12 @@ out_clear_inode: goto out_clear_inode; unlock_new_inode(inode); d_instantiate(dentry, inode); + if (IS_DIRSYNC(dir)) + ext4_handle_sync(handle); + out_stop: - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; @@ -2476,25 +2488,14 @@ static int empty_dir(struct inode *inode) } sb = inode->i_sb; - if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || - !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { - if (err) - EXT4_ERROR_INODE(inode, - "error %d reading directory lblock 0", err); - else - ext4_warning(inode->i_sb, - "bad directory (dir #%lu) - no data block", - inode->i_ino); + if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) { + EXT4_ERROR_INODE(inode, "invalid size"); return 1; } - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(inode, "checksum error reading directory " - "lblock 0"); - return -EIO; - } - set_buffer_verified(bh); + bh = ext4_read_dirblock(inode, 0, EITHER); + if (IS_ERR(bh)) + return 1; + de = (struct ext4_dir_entry_2 *) bh->b_data; de1 = ext4_next_entry(de, sb->s_blocksize); if (le32_to_cpu(de->inode) != inode->i_ino || @@ -2511,34 +2512,14 @@ static int empty_dir(struct inode *inode) ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); de = ext4_next_entry(de1, sb->s_blocksize); while (offset < inode->i_size) { - if (!bh || - (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { unsigned int lblock; err = 0; brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); - bh = ext4_bread(NULL, inode, lblock, 0, &err); - if (!bh) { - if (err) - EXT4_ERROR_INODE(inode, - "error %d reading directory " - "lblock %u", err, lblock); - else - ext4_warning(inode->i_sb, - "bad directory (dir #%lu) - no data block", - inode->i_ino); - - offset += sb->s_blocksize; - continue; - } - if (!buffer_verified(bh) && - !ext4_dirent_csum_verify(inode, - (struct ext4_dir_entry *)bh->b_data)) { - EXT4_ERROR_INODE(inode, "checksum error " - "reading directory lblock 0"); - return -EIO; - } - set_buffer_verified(bh); + bh = ext4_read_dirblock(inode, lblock, EITHER); + if (IS_ERR(bh)) + return 1; de = (struct ext4_dir_entry_2 *) bh->b_data; } if (ext4_check_dir_entry(inode, NULL, de, bh, @@ -2559,26 +2540,37 @@ static int empty_dir(struct inode *inode) return 1; } -/* ext4_orphan_add() links an unlinked or truncated inode into a list of +/* + * ext4_orphan_add() links an unlinked or truncated inode into a list of * such inodes, starting at the superblock, in case we crash before the * file is closed/deleted, or in case the inode truncate spans multiple * transactions and the last transaction is not recovered after a crash. * * At filesystem recovery time, we walk this list deleting unlinked * inodes and truncating linked inodes in ext4_orphan_cleanup(). + * + * Orphan list manipulation functions must be called under i_mutex unless + * we are just creating the inode or deleting it. */ int ext4_orphan_add(handle_t *handle, struct inode *inode) { struct super_block *sb = inode->i_sb; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_iloc iloc; int err = 0, rc; + bool dirty = false; - if (!EXT4_SB(sb)->s_journal) + if (!sbi->s_journal) return 0; - mutex_lock(&EXT4_SB(sb)->s_orphan_lock); + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && + !mutex_is_locked(&inode->i_mutex)); + /* + * Exit early if inode already is on orphan list. This is a big speedup + * since we don't have to contend on the global s_orphan_lock. + */ if (!list_empty(&EXT4_I(inode)->i_orphan)) - goto out_unlock; + return 0; /* * Orphan handling is only valid for files with data blocks @@ -2589,48 +2581,51 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); - BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); - err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); + BUFFER_TRACE(sbi->s_sbh, "get_write_access"); + err = ext4_journal_get_write_access(handle, sbi->s_sbh); if (err) - goto out_unlock; + goto out; err = ext4_reserve_inode_write(handle, inode, &iloc); if (err) - goto out_unlock; + goto out; + + mutex_lock(&sbi->s_orphan_lock); /* * Due to previous errors inode may be already a part of on-disk * orphan list. If so skip on-disk list modification. */ - if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= - (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) - goto mem_insert; - - /* Insert this inode at the head of the on-disk orphan list... */ - NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); - EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); - err = ext4_handle_dirty_super(handle, sb); - rc = ext4_mark_iloc_dirty(handle, inode, &iloc); - if (!err) - err = rc; - - /* Only add to the head of the in-memory list if all the - * previous operations succeeded. If the orphan_add is going to - * fail (possibly taking the journal offline), we can't risk - * leaving the inode on the orphan list: stray orphan-list - * entries can cause panics at unmount time. - * - * This is safe: on error we're going to ignore the orphan list - * anyway on the next recovery. */ -mem_insert: - if (!err) - list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); - + if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) > + (le32_to_cpu(sbi->s_es->s_inodes_count))) { + /* Insert this inode at the head of the on-disk orphan list */ + NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); + sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); + dirty = true; + } + list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); + mutex_unlock(&sbi->s_orphan_lock); + + if (dirty) { + err = ext4_handle_dirty_super(handle, sb); + rc = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (!err) + err = rc; + if (err) { + /* + * We have to remove inode from in-memory list if + * addition to on disk orphan list failed. Stray orphan + * list entries can cause panics at unmount time. + */ + mutex_lock(&sbi->s_orphan_lock); + list_del(&EXT4_I(inode)->i_orphan); + mutex_unlock(&sbi->s_orphan_lock); + } + } jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); jbd_debug(4, "orphan inode %lu will point to %d\n", inode->i_ino, NEXT_ORPHAN(inode)); -out_unlock: - mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); - ext4_std_error(inode->i_sb, err); +out: + ext4_std_error(sb, err); return err; } @@ -2642,45 +2637,51 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) { struct list_head *prev; struct ext4_inode_info *ei = EXT4_I(inode); - struct ext4_sb_info *sbi; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); __u32 ino_next; struct ext4_iloc iloc; int err = 0; - if ((!EXT4_SB(inode->i_sb)->s_journal) && - !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) + if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) return 0; - mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); + WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && + !mutex_is_locked(&inode->i_mutex)); + /* Do this quick check before taking global s_orphan_lock. */ if (list_empty(&ei->i_orphan)) - goto out; + return 0; - ino_next = NEXT_ORPHAN(inode); - prev = ei->i_orphan.prev; - sbi = EXT4_SB(inode->i_sb); + if (handle) { + /* Grab inode buffer early before taking global s_orphan_lock */ + err = ext4_reserve_inode_write(handle, inode, &iloc); + } + mutex_lock(&sbi->s_orphan_lock); jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); + prev = ei->i_orphan.prev; list_del_init(&ei->i_orphan); /* If we're on an error path, we may not have a valid * transaction handle with which to update the orphan list on * disk, but we still need to remove the inode from the linked * list in memory. */ - if (!handle) - goto out; - - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) + if (!handle || err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_err; + } + ino_next = NEXT_ORPHAN(inode); if (prev == &sbi->s_orphan) { jbd_debug(4, "superblock will point to %u\n", ino_next); BUFFER_TRACE(sbi->s_sbh, "get_write_access"); err = ext4_journal_get_write_access(handle, sbi->s_sbh); - if (err) + if (err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; + } sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); + mutex_unlock(&sbi->s_orphan_lock); err = ext4_handle_dirty_super(handle, inode->i_sb); } else { struct ext4_iloc iloc2; @@ -2690,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) jbd_debug(4, "orphan inode %lu will point to %u\n", i_prev->i_ino, ino_next); err = ext4_reserve_inode_write(handle, i_prev, &iloc2); - if (err) + if (err) { + mutex_unlock(&sbi->s_orphan_lock); goto out_brelse; + } NEXT_ORPHAN(i_prev) = ino_next; err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); + mutex_unlock(&sbi->s_orphan_lock); } if (err) goto out_brelse; NEXT_ORPHAN(inode) = 0; err = ext4_mark_iloc_dirty(handle, inode, &iloc); - out_err: ext4_std_error(inode->i_sb, err); -out: - mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock); return err; out_brelse: @@ -2717,25 +2718,18 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode; struct buffer_head *bh; struct ext4_dir_entry_2 *de; - handle_t *handle; + handle_t *handle = NULL; /* Initialize quotas before so that eventual writes go in * separate transaction */ dquot_initialize(dir); dquot_initialize(dentry->d_inode); - handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); if (!bh) goto end_rmdir; - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - inode = dentry->d_inode; retval = -EIO; @@ -2746,6 +2740,17 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) if (!empty_dir(inode)) goto end_rmdir; + handle = ext4_journal_start(dir, EXT4_HT_DIR, + EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) { + retval = PTR_ERR(handle); + handle = NULL; + goto end_rmdir; + } + + if (IS_DIRSYNC(dir)) + ext4_handle_sync(handle); + retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_rmdir; @@ -2767,8 +2772,9 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) ext4_mark_inode_dirty(handle, dir); end_rmdir: - ext4_journal_stop(handle); brelse(bh); + if (handle) + ext4_journal_stop(handle); return retval; } @@ -2778,7 +2784,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode; struct buffer_head *bh; struct ext4_dir_entry_2 *de; - handle_t *handle; + handle_t *handle = NULL; trace_ext4_unlink_enter(dir, dentry); /* Initialize quotas before so that eventual writes go @@ -2786,13 +2792,6 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) dquot_initialize(dir); dquot_initialize(dentry->d_inode); - handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb)); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - retval = -ENOENT; bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); if (!bh) @@ -2804,6 +2803,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (le32_to_cpu(de->inode) != inode->i_ino) goto end_unlink; + handle = ext4_journal_start(dir, EXT4_HT_DIR, + EXT4_DATA_TRANS_BLOCKS(dir->i_sb)); + if (IS_ERR(handle)) { + retval = PTR_ERR(handle); + handle = NULL; + goto end_unlink; + } + + if (IS_DIRSYNC(dir)) + ext4_handle_sync(handle); + if (!inode->i_nlink) { ext4_warning(inode->i_sb, "Deleting nonexistent file (%lu), %d", @@ -2824,8 +2834,9 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) retval = 0; end_unlink: - ext4_journal_stop(handle); brelse(bh); + if (handle) + ext4_journal_stop(handle); trace_ext4_unlink_exit(dentry, retval); return retval; } @@ -2861,19 +2872,13 @@ static int ext4_symlink(struct inode *dir, * quota blocks, sb is already counted in previous macros). */ credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3; } retry: - handle = ext4_journal_start(dir, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - if (IS_DIRSYNC(dir)) - ext4_handle_sync(handle); - - inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO, - &dentry->d_name, 0, NULL); + inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO, + &dentry->d_name, 0, NULL, + EXT4_HT_DIR, credits); + handle = ext4_journal_current_handle(); err = PTR_ERR(inode); if (IS_ERR(inode)) goto out_stop; @@ -2903,7 +2908,7 @@ retry: * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified */ - handle = ext4_journal_start(dir, + handle = ext4_journal_start(dir, EXT4_HT_DIR, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1); if (IS_ERR(handle)) { @@ -2926,8 +2931,12 @@ retry: } EXT4_I(inode)->i_disksize = inode->i_size; err = ext4_add_nondir(handle, dentry, inode); + if (!err && IS_DIRSYNC(dir)) + ext4_handle_sync(handle); + out_stop: - ext4_journal_stop(handle); + if (handle) + ext4_journal_stop(handle); if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries)) goto retry; return err; @@ -2950,8 +2959,9 @@ static int ext4_link(struct dentry *old_dentry, dquot_initialize(dir); retry: - handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS); + handle = ext4_journal_start(dir, EXT4_HT_DIR, + (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -2965,6 +2975,11 @@ retry: err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); + /* this can happen only for tmpfile being + * linked the first time + */ + if (inode->i_nlink == 1) + ext4_orphan_del(handle, inode); d_instantiate(dentry, inode); } else { drop_nlink(inode); @@ -2991,13 +3006,9 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, struct buffer_head *bh; if (!ext4_has_inline_data(inode)) { - if (!(bh = ext4_bread(handle, inode, 0, 0, retval))) { - if (!*retval) { - *retval = -EIO; - ext4_error(inode->i_sb, - "Directory hole detected on inode %lu\n", - inode->i_ino); - } + bh = ext4_read_dirblock(inode, 0, EITHER); + if (IS_ERR(bh)) { + *retval = PTR_ERR(bh); return NULL; } *parent_de = ext4_next_entry( @@ -3010,210 +3021,427 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, return ext4_get_first_inline_block(inode, parent_de, retval); } +struct ext4_renament { + struct inode *dir; + struct dentry *dentry; + struct inode *inode; + bool is_dir; + int dir_nlink_delta; + + /* entry for "dentry" */ + struct buffer_head *bh; + struct ext4_dir_entry_2 *de; + int inlined; + + /* entry for ".." in inode if it's a directory */ + struct buffer_head *dir_bh; + struct ext4_dir_entry_2 *parent_de; + int dir_inlined; +}; + +static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) +{ + int retval; + + ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode, + &retval, &ent->parent_de, + &ent->dir_inlined); + if (!ent->dir_bh) + return retval; + if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino) + return -EIO; + BUFFER_TRACE(ent->dir_bh, "get_write_access"); + return ext4_journal_get_write_access(handle, ent->dir_bh); +} + +static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, + unsigned dir_ino) +{ + int retval; + + ent->parent_de->inode = cpu_to_le32(dir_ino); + BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata"); + if (!ent->dir_inlined) { + if (is_dx(ent->inode)) { + retval = ext4_handle_dirty_dx_node(handle, + ent->inode, + ent->dir_bh); + } else { + retval = ext4_handle_dirty_dirent_node(handle, + ent->inode, + ent->dir_bh); + } + } else { + retval = ext4_mark_inode_dirty(handle, ent->inode); + } + if (retval) { + ext4_std_error(ent->dir->i_sb, retval); + return retval; + } + return 0; +} + +static int ext4_setent(handle_t *handle, struct ext4_renament *ent, + unsigned ino, unsigned file_type) +{ + int retval; + + BUFFER_TRACE(ent->bh, "get write access"); + retval = ext4_journal_get_write_access(handle, ent->bh); + if (retval) + return retval; + ent->de->inode = cpu_to_le32(ino); + if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb, + EXT4_FEATURE_INCOMPAT_FILETYPE)) + ent->de->file_type = file_type; + ent->dir->i_version++; + ent->dir->i_ctime = ent->dir->i_mtime = + ext4_current_time(ent->dir); + ext4_mark_inode_dirty(handle, ent->dir); + BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); + if (!ent->inlined) { + retval = ext4_handle_dirty_dirent_node(handle, + ent->dir, ent->bh); + if (unlikely(retval)) { + ext4_std_error(ent->dir->i_sb, retval); + return retval; + } + } + brelse(ent->bh); + ent->bh = NULL; + + return 0; +} + +static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, + const struct qstr *d_name) +{ + int retval = -ENOENT; + struct buffer_head *bh; + struct ext4_dir_entry_2 *de; + + bh = ext4_find_entry(dir, d_name, &de, NULL); + if (bh) { + retval = ext4_delete_entry(handle, dir, de, bh); + brelse(bh); + } + return retval; +} + +static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) +{ + int retval; + /* + * ent->de could have moved from under us during htree split, so make + * sure that we are deleting the right entry. We might also be pointing + * to a stale entry in the unused part of ent->bh so just checking inum + * and the name isn't enough. + */ + if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || + ent->de->name_len != ent->dentry->d_name.len || + strncmp(ent->de->name, ent->dentry->d_name.name, + ent->de->name_len)) { + retval = ext4_find_delete_entry(handle, ent->dir, + &ent->dentry->d_name); + } else { + retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh); + if (retval == -ENOENT) { + retval = ext4_find_delete_entry(handle, ent->dir, + &ent->dentry->d_name); + } + } + + if (retval) { + ext4_warning(ent->dir->i_sb, + "Deleting old file (%lu), %d, error=%d", + ent->dir->i_ino, ent->dir->i_nlink, retval); + } +} + +static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) +{ + if (ent->dir_nlink_delta) { + if (ent->dir_nlink_delta == -1) + ext4_dec_count(handle, ent->dir); + else + ext4_inc_count(handle, ent->dir); + ext4_mark_inode_dirty(handle, ent->dir); + } +} + /* * Anybody can rename anything with this: the permission checks are left to the * higher-level routines. + * + * n.b. old_{dentry,inode) refers to the source dentry/inode + * while new_{dentry,inode) refers to the destination dentry/inode + * This comes from rename(const char *oldpath, const char *newpath) */ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { - handle_t *handle; - struct inode *old_inode, *new_inode; - struct buffer_head *old_bh, *new_bh, *dir_bh; - struct ext4_dir_entry_2 *old_de, *new_de; - int retval, force_da_alloc = 0; - int inlined = 0, new_inlined = 0; - struct ext4_dir_entry_2 *parent_de; - - dquot_initialize(old_dir); - dquot_initialize(new_dir); + handle_t *handle = NULL; + struct ext4_renament old = { + .dir = old_dir, + .dentry = old_dentry, + .inode = old_dentry->d_inode, + }; + struct ext4_renament new = { + .dir = new_dir, + .dentry = new_dentry, + .inode = new_dentry->d_inode, + }; + int retval; - old_bh = new_bh = dir_bh = NULL; + dquot_initialize(old.dir); + dquot_initialize(new.dir); /* Initialize quotas before so that eventual writes go * in separate transaction */ - if (new_dentry->d_inode) - dquot_initialize(new_dentry->d_inode); - handle = ext4_journal_start(old_dir, 2 * - EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + - EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (new.inode) + dquot_initialize(new.inode); - if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) - ext4_handle_sync(handle); - - old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process * and merrily kill the link to whatever was created under the * same name. Goodbye sticky bit ;-< */ - old_inode = old_dentry->d_inode; retval = -ENOENT; - if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) + if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) goto end_rename; - new_inode = new_dentry->d_inode; - new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, - &new_de, &new_inlined); - if (new_bh) { - if (!new_inode) { - brelse(new_bh); - new_bh = NULL; + new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, + &new.de, &new.inlined); + if (new.bh) { + if (!new.inode) { + brelse(new.bh); + new.bh = NULL; } } - if (S_ISDIR(old_inode->i_mode)) { - if (new_inode) { + if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) + ext4_alloc_da_blocks(old.inode); + + handle = ext4_journal_start(old.dir, EXT4_HT_DIR, + (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + + EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) + ext4_handle_sync(handle); + + if (S_ISDIR(old.inode->i_mode)) { + if (new.inode) { retval = -ENOTEMPTY; - if (!empty_dir(new_inode)) + if (!empty_dir(new.inode)) + goto end_rename; + } else { + retval = -EMLINK; + if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) goto end_rename; } - retval = -EIO; - dir_bh = ext4_get_first_dir_block(handle, old_inode, - &retval, &parent_de, - &inlined); - if (!dir_bh) - goto end_rename; - if (!inlined && !buffer_verified(dir_bh) && - !ext4_dirent_csum_verify(old_inode, - (struct ext4_dir_entry *)dir_bh->b_data)) - goto end_rename; - set_buffer_verified(dir_bh); - if (le32_to_cpu(parent_de->inode) != old_dir->i_ino) - goto end_rename; - retval = -EMLINK; - if (!new_inode && new_dir != old_dir && - EXT4_DIR_LINK_MAX(new_dir)) - goto end_rename; - BUFFER_TRACE(dir_bh, "get_write_access"); - retval = ext4_journal_get_write_access(handle, dir_bh); + retval = ext4_rename_dir_prepare(handle, &old); if (retval) goto end_rename; } - if (!new_bh) { - retval = ext4_add_entry(handle, new_dentry, old_inode); + if (!new.bh) { + retval = ext4_add_entry(handle, new.dentry, old.inode); if (retval) goto end_rename; } else { - BUFFER_TRACE(new_bh, "get write access"); - retval = ext4_journal_get_write_access(handle, new_bh); + retval = ext4_setent(handle, &new, + old.inode->i_ino, old.de->file_type); if (retval) goto end_rename; - new_de->inode = cpu_to_le32(old_inode->i_ino); - if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, - EXT4_FEATURE_INCOMPAT_FILETYPE)) - new_de->file_type = old_de->file_type; - new_dir->i_version++; - new_dir->i_ctime = new_dir->i_mtime = - ext4_current_time(new_dir); - ext4_mark_inode_dirty(handle, new_dir); - BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); - if (!new_inlined) { - retval = ext4_handle_dirty_dirent_node(handle, - new_dir, new_bh); - if (unlikely(retval)) { - ext4_std_error(new_dir->i_sb, retval); - goto end_rename; - } - } - brelse(new_bh); - new_bh = NULL; } /* * Like most other Unix systems, set the ctime for inodes on a * rename. */ - old_inode->i_ctime = ext4_current_time(old_inode); - ext4_mark_inode_dirty(handle, old_inode); + old.inode->i_ctime = ext4_current_time(old.inode); + ext4_mark_inode_dirty(handle, old.inode); /* * ok, that's it */ - if (le32_to_cpu(old_de->inode) != old_inode->i_ino || - old_de->name_len != old_dentry->d_name.len || - strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || - (retval = ext4_delete_entry(handle, old_dir, - old_de, old_bh)) == -ENOENT) { - /* old_de could have moved from under us during htree split, so - * make sure that we are deleting the right entry. We might - * also be pointing to a stale entry in the unused part of - * old_bh so just checking inum and the name isn't enough. */ - struct buffer_head *old_bh2; - struct ext4_dir_entry_2 *old_de2; - - old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, - &old_de2, NULL); - if (old_bh2) { - retval = ext4_delete_entry(handle, old_dir, - old_de2, old_bh2); - brelse(old_bh2); - } + ext4_rename_delete(handle, &old); + + if (new.inode) { + ext4_dec_count(handle, new.inode); + new.inode->i_ctime = ext4_current_time(new.inode); } - if (retval) { - ext4_warning(old_dir->i_sb, - "Deleting old file (%lu), %d, error=%d", - old_dir->i_ino, old_dir->i_nlink, retval); - } - - if (new_inode) { - ext4_dec_count(handle, new_inode); - new_inode->i_ctime = ext4_current_time(new_inode); - } - old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); - ext4_update_dx_flag(old_dir); - if (dir_bh) { - parent_de->inode = cpu_to_le32(new_dir->i_ino); - BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); - if (!inlined) { - if (is_dx(old_inode)) { - retval = ext4_handle_dirty_dx_node(handle, - old_inode, - dir_bh); - } else { - retval = ext4_handle_dirty_dirent_node(handle, - old_inode, dir_bh); - } - } else { - retval = ext4_mark_inode_dirty(handle, old_inode); - } - if (retval) { - ext4_std_error(old_dir->i_sb, retval); + old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir); + ext4_update_dx_flag(old.dir); + if (old.dir_bh) { + retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); + if (retval) goto end_rename; - } - ext4_dec_count(handle, old_dir); - if (new_inode) { + + ext4_dec_count(handle, old.dir); + if (new.inode) { /* checked empty_dir above, can't have another parent, * ext4_dec_count() won't work for many-linked dirs */ - clear_nlink(new_inode); + clear_nlink(new.inode); } else { - ext4_inc_count(handle, new_dir); - ext4_update_dx_flag(new_dir); - ext4_mark_inode_dirty(handle, new_dir); + ext4_inc_count(handle, new.dir); + ext4_update_dx_flag(new.dir); + ext4_mark_inode_dirty(handle, new.dir); } } - ext4_mark_inode_dirty(handle, old_dir); - if (new_inode) { - ext4_mark_inode_dirty(handle, new_inode); - if (!new_inode->i_nlink) - ext4_orphan_add(handle, new_inode); - if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC)) - force_da_alloc = 1; + ext4_mark_inode_dirty(handle, old.dir); + if (new.inode) { + ext4_mark_inode_dirty(handle, new.inode); + if (!new.inode->i_nlink) + ext4_orphan_add(handle, new.inode); } retval = 0; end_rename: - brelse(dir_bh); - brelse(old_bh); - brelse(new_bh); - ext4_journal_stop(handle); - if (retval == 0 && force_da_alloc) - ext4_alloc_da_blocks(old_inode); + brelse(old.dir_bh); + brelse(old.bh); + brelse(new.bh); + if (handle) + ext4_journal_stop(handle); + return retval; +} + +static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + handle_t *handle = NULL; + struct ext4_renament old = { + .dir = old_dir, + .dentry = old_dentry, + .inode = old_dentry->d_inode, + }; + struct ext4_renament new = { + .dir = new_dir, + .dentry = new_dentry, + .inode = new_dentry->d_inode, + }; + u8 new_file_type; + int retval; + + dquot_initialize(old.dir); + dquot_initialize(new.dir); + + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, + &old.de, &old.inlined); + /* + * Check for inode number is _not_ due to possible IO errors. + * We might rmdir the source, keep it as pwd of some process + * and merrily kill the link to whatever was created under the + * same name. Goodbye sticky bit ;-< + */ + retval = -ENOENT; + if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) + goto end_rename; + + new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, + &new.de, &new.inlined); + + /* RENAME_EXCHANGE case: old *and* new must both exist */ + if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) + goto end_rename; + + handle = ext4_journal_start(old.dir, EXT4_HT_DIR, + (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + + 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); + if (IS_ERR(handle)) + return PTR_ERR(handle); + + if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) + ext4_handle_sync(handle); + + if (S_ISDIR(old.inode->i_mode)) { + old.is_dir = true; + retval = ext4_rename_dir_prepare(handle, &old); + if (retval) + goto end_rename; + } + if (S_ISDIR(new.inode->i_mode)) { + new.is_dir = true; + retval = ext4_rename_dir_prepare(handle, &new); + if (retval) + goto end_rename; + } + + /* + * Other than the special case of overwriting a directory, parents' + * nlink only needs to be modified if this is a cross directory rename. + */ + if (old.dir != new.dir && old.is_dir != new.is_dir) { + old.dir_nlink_delta = old.is_dir ? -1 : 1; + new.dir_nlink_delta = -old.dir_nlink_delta; + retval = -EMLINK; + if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) || + (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir))) + goto end_rename; + } + + new_file_type = new.de->file_type; + retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type); + if (retval) + goto end_rename; + + retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type); + if (retval) + goto end_rename; + + /* + * Like most other Unix systems, set the ctime for inodes on a + * rename. + */ + old.inode->i_ctime = ext4_current_time(old.inode); + new.inode->i_ctime = ext4_current_time(new.inode); + ext4_mark_inode_dirty(handle, old.inode); + ext4_mark_inode_dirty(handle, new.inode); + + if (old.dir_bh) { + retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); + if (retval) + goto end_rename; + } + if (new.dir_bh) { + retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino); + if (retval) + goto end_rename; + } + ext4_update_dir_count(handle, &old); + ext4_update_dir_count(handle, &new); + retval = 0; + +end_rename: + brelse(old.dir_bh); + brelse(new.dir_bh); + brelse(old.bh); + brelse(new.bh); + if (handle) + ext4_journal_stop(handle); return retval; } +static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if (flags & RENAME_EXCHANGE) { + return ext4_cross_rename(old_dir, old_dentry, + new_dir, new_dentry); + } + /* + * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" + * is equivalent to regular rename. + */ + return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); +} + /* * directories can handle most operations... */ @@ -3226,13 +3454,16 @@ const struct inode_operations ext4_dir_inode_operations = { .mkdir = ext4_mkdir, .rmdir = ext4_rmdir, .mknod = ext4_mknod, + .tmpfile = ext4_tmpfile, .rename = ext4_rename, + .rename2 = ext4_rename2, .setattr = ext4_setattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, .listxattr = ext4_listxattr, .removexattr = generic_removexattr, .get_acl = ext4_get_acl, + .set_acl = ext4_set_acl, .fiemap = ext4_fiemap, }; @@ -3243,4 +3474,5 @@ const struct inode_operations ext4_special_inode_operations = { .listxattr = ext4_listxattr, .removexattr = generic_removexattr, .get_acl = ext4_get_acl, + .set_acl = ext4_set_acl, }; |
