diff options
Diffstat (limited to 'fs/ocfs2/file.c')
| -rw-r--r-- | fs/ocfs2/file.c | 691 |
1 files changed, 350 insertions, 341 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 77b4c04a280..2930e231f3f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -38,7 +38,6 @@ #include <linux/quotaops.h> #include <linux/blkdev.h> -#define MLOG_MASK_PREFIX ML_INODE #include <cluster/masklog.h> #include "ocfs2.h" @@ -61,6 +60,7 @@ #include "acl.h" #include "quota.h" #include "refcounttree.h" +#include "ocfs2_trace.h" #include "buffer_head_io.h" @@ -99,8 +99,10 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) int mode = file->f_flags; struct ocfs2_inode_info *oi = OCFS2_I(inode); - mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, - file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name); + trace_ocfs2_file_open(inode, file, file->f_path.dentry, + (unsigned long long)OCFS2_I(inode)->ip_blkno, + file->f_path.dentry->d_name.len, + file->f_path.dentry->d_name.name, mode); if (file->f_mode & FMODE_WRITE) dquot_initialize(inode); @@ -135,7 +137,6 @@ static int ocfs2_file_open(struct inode *inode, struct file *file) } leave: - mlog_exit(status); return status; } @@ -143,19 +144,19 @@ static int ocfs2_file_release(struct inode *inode, struct file *file) { struct ocfs2_inode_info *oi = OCFS2_I(inode); - mlog_entry("(0x%p, 0x%p, '%.*s')\n", inode, file, - file->f_path.dentry->d_name.len, - file->f_path.dentry->d_name.name); - spin_lock(&oi->ip_lock); if (!--oi->ip_open_count) oi->ip_flags &= ~OCFS2_INODE_OPEN_DIRECT; + + trace_ocfs2_file_release(inode, file, file->f_path.dentry, + oi->ip_blkno, + file->f_path.dentry->d_name.len, + file->f_path.dentry->d_name.name, + oi->ip_open_count); spin_unlock(&oi->ip_lock); ocfs2_free_file_private(inode, file); - mlog_exit(0); - return 0; } @@ -170,32 +171,44 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file) return 0; } -static int ocfs2_sync_file(struct file *file, int datasync) +static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, + int datasync) { int err = 0; - journal_t *journal; struct inode *inode = file->f_mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_inode_info *oi = OCFS2_I(inode); + journal_t *journal = osb->journal->j_journal; + int ret; + tid_t commit_tid; + bool needs_barrier = false; - mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync, - file->f_path.dentry, file->f_path.dentry->d_name.len, - file->f_path.dentry->d_name.name); + trace_ocfs2_sync_file(inode, file, file->f_path.dentry, + OCFS2_I(inode)->ip_blkno, + file->f_path.dentry->d_name.len, + file->f_path.dentry->d_name.name, + (unsigned long long)datasync); - if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { - /* - * We still have to flush drive's caches to get data to the - * platter - */ - if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) - blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); - goto bail; - } + if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) + return -EROFS; - journal = osb->journal->j_journal; - err = jbd2_journal_force_commit(journal); + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; -bail: - mlog_exit(err); + commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid; + if (journal->j_flags & JBD2_BARRIER && + !jbd2_trans_will_send_data_barrier(journal, commit_tid)) + needs_barrier = true; + err = jbd2_complete_transaction(journal, commit_tid); + if (needs_barrier) { + ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + if (!err) + err = ret; + } + + if (err) + mlog_errno(err); return (err < 0) ? -EIO : 0; } @@ -251,8 +264,6 @@ int ocfs2_update_inode_atime(struct inode *inode, handle_t *handle; struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data; - mlog_entry_void(); - handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -275,12 +286,12 @@ int ocfs2_update_inode_atime(struct inode *inode, inode->i_atime = CURRENT_TIME; di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); + ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, bh); out_commit: ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); out: - mlog_exit(ret); return ret; } @@ -291,7 +302,6 @@ static int ocfs2_set_inode_size(handle_t *handle, { int status; - mlog_entry_void(); i_size_write(inode, new_i_size); inode->i_blocks = ocfs2_inode_sector_count(inode); inode->i_ctime = inode->i_mtime = CURRENT_TIME; @@ -303,7 +313,6 @@ static int ocfs2_set_inode_size(handle_t *handle, } bail: - mlog_exit(status); return status; } @@ -327,6 +336,7 @@ int ocfs2_simple_size_update(struct inode *inode, if (ret < 0) mlog_errno(ret); + ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_commit_trans(osb, handle); out: return ret; @@ -359,7 +369,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) goto out; - return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); + return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); out: return status; @@ -375,8 +385,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, struct ocfs2_dinode *di; u64 cluster_bytes; - mlog_entry_void(); - /* * We need to CoW the cluster contains the offset if it is reflinked * since we will call ocfs2_zero_range_for_truncate later which will @@ -423,14 +431,13 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, di->i_size = cpu_to_le64(new_i_size); di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); + ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, fe_bh); out_commit: ocfs2_commit_trans(osb, handle); out: - - mlog_exit(status); return status; } @@ -442,14 +449,14 @@ static int ocfs2_truncate_file(struct inode *inode, struct ocfs2_dinode *fe = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - mlog_entry("(inode = %llu, new_i_size = %llu\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - (unsigned long long)new_i_size); - /* We trust di_bh because it comes from ocfs2_inode_lock(), which * already validated it */ fe = (struct ocfs2_dinode *) di_bh->b_data; + trace_ocfs2_truncate_file((unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)le64_to_cpu(fe->i_size), + (unsigned long long)new_i_size); + mlog_bug_on_msg(le64_to_cpu(fe->i_size) != i_size_read(inode), "Inode %llu, inode i_size = %lld != di " "i_size = %llu, i_flags = 0x%x\n", @@ -459,24 +466,14 @@ static int ocfs2_truncate_file(struct inode *inode, le32_to_cpu(fe->i_flags)); if (new_i_size > le64_to_cpu(fe->i_size)) { - mlog(0, "asked to truncate file with size (%llu) to size (%llu)!\n", - (unsigned long long)le64_to_cpu(fe->i_size), - (unsigned long long)new_i_size); + trace_ocfs2_truncate_file_error( + (unsigned long long)le64_to_cpu(fe->i_size), + (unsigned long long)new_i_size); status = -EINVAL; mlog_errno(status); goto bail; } - mlog(0, "inode %llu, i_size = %llu, new_i_size = %llu\n", - (unsigned long long)le64_to_cpu(fe->i_blkno), - (unsigned long long)le64_to_cpu(fe->i_size), - (unsigned long long)new_i_size); - - /* lets handle the simple truncate cases before doing any more - * cluster locking. */ - if (new_i_size == le64_to_cpu(fe->i_size)) - goto bail; - down_write(&OCFS2_I(inode)->ip_alloc_sem); ocfs2_resv_discard(&osb->osb_la_resmap, @@ -525,7 +522,6 @@ bail: if (!status && OCFS2_I(inode)->ip_clusters == 0) status = ocfs2_try_remove_refcount_tree(inode, di_bh); - mlog_exit(status); return status; } @@ -578,10 +574,8 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, struct ocfs2_extent_tree et; int did_quota = 0; - mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); - /* - * This function only exists for file systems which don't + * Unwritten extent only exists for file systems which * support holes. */ BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); @@ -596,11 +590,6 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, restart_all: BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); - mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " - "clusters_to_add = %u\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), - clusters_to_add); ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh); status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, &data_ac, &meta_ac); @@ -609,8 +598,7 @@ restart_all: goto leave; } - credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, - clusters_to_add); + credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list); handle = ocfs2_start_trans(osb, credits); if (IS_ERR(handle)) { status = PTR_ERR(handle); @@ -620,6 +608,12 @@ restart_all: } restarted_transaction: + trace_ocfs2_extend_allocation( + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)i_size_read(inode), + le32_to_cpu(fe->i_clusters), clusters_to_add, + why, restart_func); + status = dquot_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, clusters_to_add)); if (status) @@ -653,7 +647,7 @@ restarted_transaction: mlog_errno(status); goto leave; } - + ocfs2_update_inode_fsync_trans(handle, inode, 1); ocfs2_journal_dirty(handle, bh); spin_lock(&OCFS2_I(inode)->ip_lock); @@ -666,18 +660,12 @@ restarted_transaction: if (why != RESTART_NONE && clusters_to_add) { if (why == RESTART_META) { - mlog(0, "restarting function.\n"); restart_func = 1; status = 0; } else { BUG_ON(why != RESTART_TRANS); - mlog(0, "restarting transaction.\n"); - /* TODO: This can be more intelligent. */ - credits = ocfs2_calc_extend_credits(osb->sb, - &fe->id2.i_list, - clusters_to_add); - status = ocfs2_extend_trans(handle, credits); + status = ocfs2_allocate_extend_trans(handle, 1); if (status < 0) { /* handle still has to be committed at * this point. */ @@ -689,11 +677,11 @@ restarted_transaction: } } - mlog(0, "fe: i_clusters = %u, i_size=%llu\n", + trace_ocfs2_extend_allocation_end(OCFS2_I(inode)->ip_blkno, le32_to_cpu(fe->i_clusters), - (unsigned long long)le64_to_cpu(fe->i_size)); - mlog(0, "inode: ip_clusters=%u, i_size=%lld\n", - OCFS2_I(inode)->ip_clusters, (long long)i_size_read(inode)); + (unsigned long long)le64_to_cpu(fe->i_size), + OCFS2_I(inode)->ip_clusters, + (unsigned long long)i_size_read(inode)); leave: if (status < 0 && did_quota) @@ -718,7 +706,6 @@ leave: brelse(bh); bh = NULL; - mlog_exit(status); return status; } @@ -726,7 +713,8 @@ leave: * While a write will already be ordering the data, a truncate will not. * Thus, we need to explicitly order the zeroed pages. */ -static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) +static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode, + struct buffer_head *di_bh) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); handle_t *handle = NULL; @@ -743,8 +731,16 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) } ret = ocfs2_jbd2_file_inode(handle, inode); - if (ret < 0) + if (ret < 0) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) mlog_errno(ret); + ocfs2_update_inode_fsync_trans(handle, inode, 1); out: if (ret) { @@ -759,7 +755,7 @@ out: * to be too fragile to do exactly what we need without us having to * worry about recursive locking in ->write_begin() and ->write_end(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, - u64 abs_to) + u64 abs_to, struct buffer_head *di_bh) { struct address_space *mapping = inode->i_mapping; struct page *page; @@ -767,6 +763,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, handle_t *handle = NULL; int ret = 0; unsigned zero_from, zero_to, block_start, block_end; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; BUG_ON(abs_from >= abs_to); BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); @@ -785,10 +782,11 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, if (!zero_to) zero_to = PAGE_CACHE_SIZE; - mlog(0, - "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n", - (unsigned long long)abs_from, (unsigned long long)abs_to, - index, zero_from, zero_to); + trace_ocfs2_write_zero_page( + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)abs_from, + (unsigned long long)abs_to, + index, zero_from, zero_to); /* We know that zero_from is block aligned */ for (block_start = zero_from; block_start < zero_to; @@ -808,7 +806,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, } if (!handle) { - handle = ocfs2_zero_start_ordered_transaction(inode); + handle = ocfs2_zero_start_ordered_transaction(inode, + di_bh); if (IS_ERR(handle)) { ret = PTR_ERR(handle); handle = NULL; @@ -825,8 +824,23 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, ret = 0; } - if (handle) + if (handle) { + /* + * fs-writeback will release the dirty pages without page lock + * whose offset are over inode size, the release happens at + * block_write_full_page(). + */ + i_size_write(inode, abs_to); + inode->i_blocks = ocfs2_inode_sector_count(inode); + di->i_size = cpu_to_le64((u64)i_size_read(inode)); + inode->i_mtime = inode->i_ctime = CURRENT_TIME; + di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + di->i_mtime_nsec = di->i_ctime_nsec; + ocfs2_journal_dirty(handle, di_bh); + ocfs2_update_inode_fsync_trans(handle, inode, 1); ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); + } out_unlock: unlock_page(page); @@ -901,7 +915,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, zero_clusters = last_cpos - zero_cpos; if (needs_cow) { - rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, + rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, UINT_MAX); if (rc) { mlog_errno(rc); @@ -922,22 +936,23 @@ out: * has made sure that the entire range needs zeroing. */ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, - u64 range_end) + u64 range_end, struct buffer_head *di_bh) { int rc = 0; u64 next_pos; u64 zero_pos = range_start; - mlog(0, "range_start = %llu, range_end = %llu\n", - (unsigned long long)range_start, - (unsigned long long)range_end); + trace_ocfs2_zero_extend_range( + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)range_start, + (unsigned long long)range_end); BUG_ON(range_start >= range_end); while (zero_pos < range_end) { next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; if (next_pos > range_end) next_pos = range_end; - rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); + rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh); if (rc < 0) { mlog_errno(rc); break; @@ -962,9 +977,9 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, struct super_block *sb = inode->i_sb; zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); - mlog(0, "zero_start %llu for i_size %llu\n", - (unsigned long long)zero_start, - (unsigned long long)i_size_read(inode)); + trace_ocfs2_zero_extend((unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)zero_start, + (unsigned long long)i_size_read(inode)); while (zero_start < zero_to_size) { ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, zero_to_size, @@ -983,7 +998,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, range_end = zero_to_size; ret = ocfs2_zero_extend_range(inode, range_start, - range_end); + range_end, di_bh); if (ret) { mlog_errno(ret); break; @@ -1113,30 +1128,21 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) struct dquot *transfer_to[MAXQUOTAS] = { }; int qtype; - mlog_entry("(0x%p, '%.*s')\n", dentry, - dentry->d_name.len, dentry->d_name.name); + trace_ocfs2_setattr(inode, dentry, + (unsigned long long)OCFS2_I(inode)->ip_blkno, + dentry->d_name.len, dentry->d_name.name, + attr->ia_valid, attr->ia_mode, + from_kuid(&init_user_ns, attr->ia_uid), + from_kgid(&init_user_ns, attr->ia_gid)); /* ensuring we don't even attempt to truncate a symlink */ if (S_ISLNK(inode->i_mode)) attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid & ATTR_MODE) - mlog(0, "mode change: %d\n", attr->ia_mode); - if (attr->ia_valid & ATTR_UID) - mlog(0, "uid change: %d\n", attr->ia_uid); - if (attr->ia_valid & ATTR_GID) - mlog(0, "gid change: %d\n", attr->ia_gid); - if (attr->ia_valid & ATTR_SIZE) - mlog(0, "size change...\n"); - if (attr->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) - mlog(0, "time change...\n"); - #define OCFS2_VALID_ATTRS (ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE \ | ATTR_GID | ATTR_UID | ATTR_MODE) - if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) { - mlog(0, "can't handle attrs: 0x%x\n", attr->ia_valid); + if (!(attr->ia_valid & OCFS2_VALID_ATTRS)) return 0; - } status = inode_change_ok(inode, attr); if (status) @@ -1160,12 +1166,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) goto bail_unlock_rw; } - if (size_change && attr->ia_size != i_size_read(inode)) { + if (size_change) { status = inode_newsize_ok(inode, attr->ia_size); if (status) goto bail_unlock; - if (i_size_read(inode) > attr->ia_size) { + inode_dio_wait(inode); + + if (i_size_read(inode) >= attr->ia_size) { if (ocfs2_should_order_data(inode)) { status = ocfs2_begin_ordered_truncate(inode, attr->ia_size); @@ -1183,28 +1191,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } } - if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || - (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) { + if ((attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid)) || + (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid))) { /* * Gather pointers to quota structures so that allocation / * freeing of quota structures happens here and not inside * dquot_transfer() where we have problems with lock ordering */ - if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid + if (attr->ia_valid & ATTR_UID && !uid_eq(attr->ia_uid, inode->i_uid) && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { - transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, - USRQUOTA); + transfer_to[USRQUOTA] = dqget(sb, make_kqid_uid(attr->ia_uid)); if (!transfer_to[USRQUOTA]) { status = -ESRCH; goto bail_unlock; } } - if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid + if (attr->ia_valid & ATTR_GID && !gid_eq(attr->ia_gid, inode->i_gid) && OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { - transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, - GRPQUOTA); + transfer_to[GRPQUOTA] = dqget(sb, make_kqid_gid(attr->ia_gid)); if (!transfer_to[GRPQUOTA]) { status = -ESRCH; goto bail_unlock; @@ -1229,24 +1235,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) } } - /* - * This will intentionally not wind up calling truncate_setsize(), - * since all the work for a size change has been done above. - * Otherwise, we could get into problems with truncate as - * ip_alloc_sem is used there to protect against i_size - * changes. - * - * XXX: this means the conditional below can probably be removed. - */ - if ((attr->ia_valid & ATTR_SIZE) && - attr->ia_size != i_size_read(inode)) { - status = vmtruncate(inode, attr->ia_size); - if (status) { - mlog_errno(status); - goto bail_commit; - } - } - setattr_copy(inode, attr); mark_inode_dirty(inode); @@ -1269,12 +1257,11 @@ bail: dqput(transfer_to[qtype]); if (!status && attr->ia_valid & ATTR_MODE) { - status = ocfs2_acl_chmod(inode); + status = posix_acl_chmod(inode, inode->i_mode); if (status < 0) mlog_errno(status); } - mlog_exit(status); return status; } @@ -1287,8 +1274,6 @@ int ocfs2_getattr(struct vfsmount *mnt, struct ocfs2_super *osb = sb->s_fs_info; int err; - mlog_entry_void(); - err = ocfs2_inode_revalidate(dentry); if (err) { if (err != -ENOENT) @@ -1302,8 +1287,6 @@ int ocfs2_getattr(struct vfsmount *mnt, stat->blksize = osb->s_clustersize; bail: - mlog_exit(err); - return err; } @@ -1311,7 +1294,8 @@ int ocfs2_permission(struct inode *inode, int mask) { int ret; - mlog_entry_void(); + if (mask & MAY_NOT_BLOCK) + return -ECHILD; ret = ocfs2_inode_lock(inode, NULL, 0); if (ret) { @@ -1320,11 +1304,10 @@ int ocfs2_permission(struct inode *inode, int mask) goto out; } - ret = generic_permission(inode, mask, ocfs2_check_acl); + ret = generic_permission(inode, mask); ocfs2_inode_unlock(inode, 0); out: - mlog_exit(ret); return ret; } @@ -1336,8 +1319,9 @@ static int __ocfs2_write_remove_suid(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di; - mlog_entry("(Inode %llu, mode 0%o)\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode); + trace_ocfs2_write_remove_suid( + (unsigned long long)OCFS2_I(inode)->ip_blkno, + inode->i_mode); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { @@ -1359,13 +1343,13 @@ static int __ocfs2_write_remove_suid(struct inode *inode, di = (struct ocfs2_dinode *) bh->b_data; di->i_mode = cpu_to_le16(inode->i_mode); + ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, bh); out_trans: ocfs2_commit_trans(osb, handle); out: - mlog_exit(ret); return ret; } @@ -1544,8 +1528,9 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, * partial clusters here. There's no need to worry about * physical allocation - the zeroing code knows to skip holes. */ - mlog(0, "byte start: %llu, end: %llu\n", - (unsigned long long)start, (unsigned long long)end); + trace_ocfs2_zero_partial_clusters( + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)start, (unsigned long long)end); /* * If both edges are on a cluster boundary then there's no @@ -1569,8 +1554,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, if (tmpend > end) tmpend = end; - mlog(0, "1st range: start: %llu, tmpend: %llu\n", - (unsigned long long)start, (unsigned long long)tmpend); + trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start, + (unsigned long long)tmpend); ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); if (ret) @@ -1584,13 +1569,14 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, */ start = end & ~(osb->s_clustersize - 1); - mlog(0, "2nd range: start: %llu, end: %llu\n", - (unsigned long long)start, (unsigned long long)end); + trace_ocfs2_zero_partial_clusters_range2( + (unsigned long long)start, (unsigned long long)end); ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); if (ret) mlog_errno(ret); } + ocfs2_update_inode_fsync_trans(handle, inode, 1); ocfs2_commit_trans(osb, handle); out: @@ -1634,6 +1620,9 @@ static void ocfs2_calc_trunc_pos(struct inode *inode, range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); if (le32_to_cpu(rec->e_cpos) >= trunc_start) { + /* + * remove an entire extent record. + */ *trunc_cpos = le32_to_cpu(rec->e_cpos); /* * Skip holes if any. @@ -1644,7 +1633,16 @@ static void ocfs2_calc_trunc_pos(struct inode *inode, *blkno = le64_to_cpu(rec->e_blkno); *trunc_end = le32_to_cpu(rec->e_cpos); } else if (range > trunc_start) { + /* + * remove a partial extent record, which means we're + * removing the last extent record. + */ *trunc_cpos = trunc_start; + /* + * skip hole if any. + */ + if (range < *trunc_end) + *trunc_end = range; *trunc_len = *trunc_end - trunc_start; coff = trunc_start - le32_to_cpu(rec->e_cpos); *blkno = le64_to_cpu(rec->e_blkno) + @@ -1685,6 +1683,11 @@ static int ocfs2_remove_inode_range(struct inode *inode, ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); ocfs2_init_dealloc_ctxt(&dealloc); + trace_ocfs2_remove_inode_range( + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)byte_start, + (unsigned long long)byte_len); + if (byte_len == 0) return 0; @@ -1731,11 +1734,6 @@ static int ocfs2_remove_inode_range(struct inode *inode, trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits; cluster_in_el = trunc_end; - mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n", - (unsigned long long)OCFS2_I(inode)->ip_blkno, - (unsigned long long)byte_start, - (unsigned long long)byte_len, trunc_start, trunc_end); - ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); if (ret) { mlog_errno(ret); @@ -1820,6 +1818,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); out: + ocfs2_free_path(path); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &dealloc); @@ -1893,7 +1892,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, } size = sr->l_start + sr->l_len; - if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) { + if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 || + cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) { if (sr->l_len <= 0) { ret = -EINVAL; goto out_inode_unlock; @@ -1951,6 +1951,9 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, if (ret < 0) mlog_errno(ret); + if (file && (file->f_flags & O_SYNC)) + handle->h_sync = 1; + ocfs2_commit_trans(osb, handle); out_inode_unlock: @@ -1967,8 +1970,9 @@ out: int ocfs2_change_file_space(struct file *file, unsigned int cmd, struct ocfs2_space_resv *sr) { - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + int ret; if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) && !ocfs2_writes_unwritten_extents(osb)) @@ -1983,31 +1987,40 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd, if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); + ret = mnt_want_write_file(file); + if (ret) + return ret; + ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0); + mnt_drop_write_file(file); + return ret; } -static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset, +static long ocfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { + struct inode *inode = file_inode(file); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_space_resv sr; int change_size = 1; + int cmd = OCFS2_IOC_RESVSP64; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; if (!ocfs2_writes_unwritten_extents(osb)) return -EOPNOTSUPP; - if (S_ISDIR(inode->i_mode)) - return -ENODEV; - if (mode & FALLOC_FL_KEEP_SIZE) change_size = 0; + if (mode & FALLOC_FL_PUNCH_HOLE) + cmd = OCFS2_IOC_UNRESVSP64; + sr.l_whence = 0; sr.l_start = (s64)offset; sr.l_len = (s64)len; - return __ocfs2_change_file_space(NULL, inode, offset, - OCFS2_IOC_RESVSP64, &sr, change_size); + return __ocfs2_change_file_space(NULL, inode, offset, cmd, &sr, + change_size); } int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos, @@ -2049,6 +2062,16 @@ out: return ret; } +static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) +{ + int blockmask = inode->i_sb->s_blocksize - 1; + loff_t final_size = pos + count; + + if ((pos & blockmask) || (final_size & blockmask)) + return 1; + return 0; +} + static int ocfs2_prepare_inode_for_refcount(struct inode *inode, struct file *file, loff_t pos, size_t count, @@ -2068,7 +2091,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, *meta_level = 1; - ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); + ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); if (ret) mlog_errno(ret); out: @@ -2086,7 +2109,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, int ret = 0, meta_level = 0; struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; - loff_t saved_pos, end; + loff_t saved_pos = 0, end; /* * We start with a read level meta lock and only jump to an ex @@ -2105,7 +2128,7 @@ static int ocfs2_prepare_inode_for_write(struct file *file, * remove_suid() calls ->setattr without any hint that * we may have already done our cluster locking. Since * ocfs2_setattr() *must* take cluster locks to - * proceeed, this will lead us to recursively lock the + * proceed, this will lead us to recursively lock the * inode. There's also the dinode i_size state which * can be lost via setattr during extending writes (we * set inode->i_size at the end of a write. */ @@ -2125,12 +2148,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file, /* work on a copy of ppos until we're sure that we won't have * to recalculate it due to relocking. */ - if (appending) { + if (appending) saved_pos = i_size_read(inode); - mlog(0, "O_APPEND: inode->i_size=%llu\n", saved_pos); - } else { + else saved_pos = *ppos; - } end = saved_pos + count; @@ -2201,6 +2222,10 @@ static int ocfs2_prepare_inode_for_write(struct file *file, *ppos = saved_pos; out_unlock: + trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno, + saved_pos, appending, count, + direct_io, has_refcount); + if (meta_level >= 0) ocfs2_inode_unlock(inode, meta_level); @@ -2208,44 +2233,44 @@ out: return ret; } -static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) +static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, + struct iov_iter *from) { int ret, direct_io, appending, rw_level, have_alloc_sem = 0; int can_do_direct, has_refcount = 0; ssize_t written = 0; - size_t ocount; /* original count */ - size_t count; /* after file limit checks */ + size_t count = iov_iter_count(from); loff_t old_size, *ppos = &iocb->ki_pos; u32 old_clusters; struct file *file = iocb->ki_filp; - struct inode *inode = file->f_path.dentry->d_inode; + struct inode *inode = file_inode(file); struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); int full_coherency = !(osb->s_mount_opt & OCFS2_MOUNT_COHERENCY_BUFFERED); + int unaligned_dio = 0; - mlog_entry("(0x%p, %u, '%.*s')\n", file, - (unsigned int)nr_segs, - file->f_path.dentry->d_name.len, - file->f_path.dentry->d_name.name); + trace_ocfs2_file_aio_write(inode, file, file->f_path.dentry, + (unsigned long long)OCFS2_I(inode)->ip_blkno, + file->f_path.dentry->d_name.len, + file->f_path.dentry->d_name.name, + (unsigned int)from->nr_segs); /* GRRRRR */ - if (iocb->ki_left == 0) + if (iocb->ki_nbytes == 0) return 0; - vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); - appending = file->f_flags & O_APPEND ? 1 : 0; direct_io = file->f_flags & O_DIRECT ? 1 : 0; mutex_lock(&inode->i_mutex); + ocfs2_iocb_clear_sem_locked(iocb); + relock: - /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ + /* to match setattr's i_mutex -> rw_lock ordering */ if (direct_io) { - down_read(&inode->i_alloc_sem); have_alloc_sem = 1; + /* communicate with ocfs2_dio_end_io */ + ocfs2_iocb_set_sem_locked(iocb); } /* @@ -2273,7 +2298,7 @@ relock: ret = ocfs2_inode_lock(inode, NULL, 1); if (ret < 0) { mlog_errno(ret); - goto out_sems; + goto out; } ocfs2_inode_unlock(inode, 1); @@ -2281,20 +2306,23 @@ relock: can_do_direct = direct_io; ret = ocfs2_prepare_inode_for_write(file, ppos, - iocb->ki_left, appending, + iocb->ki_nbytes, appending, &can_do_direct, &has_refcount); if (ret < 0) { mlog_errno(ret); goto out; } + if (direct_io && !is_sync_kiocb(iocb)) + unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes, + *ppos); + /* * We can't complete the direct I/O as requested, fall back to * buffered I/O. */ if (direct_io && !can_do_direct) { ocfs2_rw_unlock(inode, rw_level); - up_read(&inode->i_alloc_sem); have_alloc_sem = 0; rw_level = -1; @@ -2303,6 +2331,16 @@ relock: goto relock; } + if (unaligned_dio) { + /* + * Wait on previous unaligned aio to complete before + * proceeding. + */ + mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio); + /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */ + ocfs2_iocb_set_unaligned_aio(iocb); + } + /* * To later detect whether a journal commit for sync writes is * necessary, we sample i_size, and cluster count here. @@ -2313,28 +2351,23 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_segment_checks(iov, &nr_segs, &ocount, - VERIFY_READ); - if (ret) - goto out_dio; - - count = ocount; ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode)); if (ret) goto out_dio; + iov_iter_truncate(from, count); if (direct_io) { - written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, - ppos, count, ocount); + written = generic_file_direct_write(iocb, from, *ppos); if (written < 0) { ret = written; goto out_dio; } } else { current->backing_dev_info = file->f_mapping->backing_dev_info; - written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos, - ppos, count, 0); + written = generic_perform_write(file, from, *ppos); + if (likely(written >= 0)) + iocb->ki_pos = *ppos + written; current->backing_dev_info = NULL; } @@ -2344,8 +2377,8 @@ out_dio: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, *ppos, + *ppos + count - 1); if (ret < 0) written = ret; @@ -2358,15 +2391,14 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, *ppos, + *ppos + count - 1); } /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that - * it can unlock our rw lock. (it's the clustered equivalent of - * i_alloc_sem; protects truncate from racing with pending ios). + * it can unlock our rw lock. * Unfortunately there are error cases which call end_io and others * that don't. so we don't have to unlock the rw_lock if either an * async dio is going to do it in the future or an end_io after an @@ -2375,6 +2407,12 @@ out_dio: if ((ret == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) { rw_level = -1; have_alloc_sem = 0; + unaligned_dio = 0; + } + + if (unaligned_dio) { + ocfs2_iocb_clear_unaligned_aio(iocb); + mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); } out: @@ -2383,96 +2421,12 @@ out: out_sems: if (have_alloc_sem) - up_read(&inode->i_alloc_sem); + ocfs2_iocb_clear_sem_locked(iocb); mutex_unlock(&inode->i_mutex); if (written) ret = written; - mlog_exit(ret); - return ret; -} - -static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, - struct file *out, - struct splice_desc *sd) -{ - int ret; - - ret = ocfs2_prepare_inode_for_write(out, &sd->pos, - sd->total_len, 0, NULL, NULL); - if (ret < 0) { - mlog_errno(ret); - return ret; - } - - return splice_from_pipe_feed(pipe, sd, pipe_to_file); -} - -static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, - loff_t *ppos, - size_t len, - unsigned int flags) -{ - int ret; - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; - struct splice_desc sd = { - .total_len = len, - .flags = flags, - .pos = *ppos, - .u.file = out, - }; - - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe, - (unsigned int)len, - out->f_path.dentry->d_name.len, - out->f_path.dentry->d_name.name); - - if (pipe->inode) - mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT); - - splice_from_pipe_begin(&sd); - do { - ret = splice_from_pipe_next(pipe, &sd); - if (ret <= 0) - break; - - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); - ret = ocfs2_rw_lock(inode, 1); - if (ret < 0) - mlog_errno(ret); - else { - ret = ocfs2_splice_to_file(pipe, out, &sd); - ocfs2_rw_unlock(inode, 1); - } - mutex_unlock(&inode->i_mutex); - } while (ret > 0); - splice_from_pipe_end(pipe, &sd); - - if (pipe->inode) - mutex_unlock(&pipe->inode->i_mutex); - - if (sd.num_spliced) - ret = sd.num_spliced; - - if (ret > 0) { - unsigned long nr_pages; - int err; - - nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - - err = generic_write_sync(out, *ppos, ret); - if (err) - ret = err; - else - *ppos += ret; - - balance_dirty_pages_ratelimited_nr(mapping, nr_pages); - } - - mlog_exit(ret); return ret; } @@ -2483,17 +2437,17 @@ static ssize_t ocfs2_file_splice_read(struct file *in, unsigned int flags) { int ret = 0, lock_level = 0; - struct inode *inode = in->f_path.dentry->d_inode; + struct inode *inode = file_inode(in); - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", in, pipe, - (unsigned int)len, - in->f_path.dentry->d_name.len, - in->f_path.dentry->d_name.name); + trace_ocfs2_file_splice_read(inode, in, in->f_path.dentry, + (unsigned long long)OCFS2_I(inode)->ip_blkno, + in->f_path.dentry->d_name.len, + in->f_path.dentry->d_name.name, len); /* - * See the comment in ocfs2_file_aio_read() + * See the comment in ocfs2_file_read_iter() */ - ret = ocfs2_inode_lock_atime(inode, in->f_vfsmnt, &lock_level); + ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; @@ -2503,23 +2457,22 @@ static ssize_t ocfs2_file_splice_read(struct file *in, ret = generic_file_splice_read(in, ppos, pipe, len, flags); bail: - mlog_exit(ret); return ret; } -static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) +static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, + struct iov_iter *to) { int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; struct file *filp = iocb->ki_filp; - struct inode *inode = filp->f_path.dentry->d_inode; + struct inode *inode = file_inode(filp); + + trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, + (unsigned long long)OCFS2_I(inode)->ip_blkno, + filp->f_path.dentry->d_name.len, + filp->f_path.dentry->d_name.name, + to->nr_segs); /* GRRRRR */ - mlog_entry("(0x%p, %u, '%.*s')\n", filp, - (unsigned int)nr_segs, - filp->f_path.dentry->d_name.len, - filp->f_path.dentry->d_name.name); if (!inode) { ret = -EINVAL; @@ -2527,13 +2480,15 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, goto bail; } + ocfs2_iocb_clear_sem_locked(iocb); + /* * buffered reads protect themselves in ->readpage(). O_DIRECT reads * need locks to protect pending reads from racing with truncate. */ if (filp->f_flags & O_DIRECT) { - down_read(&inode->i_alloc_sem); have_alloc_sem = 1; + ocfs2_iocb_set_sem_locked(iocb); ret = ocfs2_rw_lock(inode, 0); if (ret < 0) { @@ -2554,21 +2509,20 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, * like i_size. This allows the checks down below * generic_file_aio_read() a chance of actually working. */ - ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level); + ret = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); if (ret < 0) { mlog_errno(ret); goto bail; } ocfs2_inode_unlock(inode, lock_level); - ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); - if (ret == -EINVAL) - mlog(0, "generic_file_aio_read returned -EINVAL\n"); + ret = generic_file_read_iter(iocb, to); + trace_generic_file_aio_read_ret(ret); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); - /* see ocfs2_file_aio_write */ + /* see ocfs2_file_write_iter */ if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { rw_level = -1; have_alloc_sem = 0; @@ -2576,14 +2530,64 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, bail: if (have_alloc_sem) - up_read(&inode->i_alloc_sem); + ocfs2_iocb_clear_sem_locked(iocb); + if (rw_level != -1) ocfs2_rw_unlock(inode, rw_level); - mlog_exit(ret); return ret; } +/* Refer generic_file_llseek_unlocked() */ +static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + int ret = 0; + + mutex_lock(&inode->i_mutex); + + switch (whence) { + case SEEK_SET: + break; + case SEEK_END: + /* SEEK_END requires the OCFS2 inode lock for the file + * because it references the file's size. + */ + ret = ocfs2_inode_lock(inode, NULL, 0); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + offset += i_size_read(inode); + ocfs2_inode_unlock(inode, 0); + break; + case SEEK_CUR: + if (offset == 0) { + offset = file->f_pos; + goto out; + } + offset += file->f_pos; + break; + case SEEK_DATA: + case SEEK_HOLE: + ret = ocfs2_seek_data_hole_offset(file, &offset, whence); + if (ret) + goto out; + break; + default: + ret = -EINVAL; + goto out; + } + + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); + +out: + mutex_unlock(&inode->i_mutex); + if (ret) + return ret; + return offset; +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2592,14 +2596,17 @@ const struct inode_operations ocfs2_file_iops = { .getxattr = generic_getxattr, .listxattr = ocfs2_listxattr, .removexattr = generic_removexattr, - .fallocate = ocfs2_fallocate, .fiemap = ocfs2_fiemap, + .get_acl = ocfs2_iop_get_acl, + .set_acl = ocfs2_iop_set_acl, }; const struct inode_operations ocfs2_special_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, .permission = ocfs2_permission, + .get_acl = ocfs2_iop_get_acl, + .set_acl = ocfs2_iop_set_acl, }; /* @@ -2607,15 +2614,15 @@ const struct inode_operations ocfs2_special_file_iops = { * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! */ const struct file_operations ocfs2_fops = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, + .llseek = ocfs2_file_llseek, + .read = new_sync_read, + .write = new_sync_write, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, .open = ocfs2_file_open, - .aio_read = ocfs2_file_aio_read, - .aio_write = ocfs2_file_aio_write, + .read_iter = ocfs2_file_read_iter, + .write_iter = ocfs2_file_write_iter, .unlocked_ioctl = ocfs2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, @@ -2623,13 +2630,14 @@ const struct file_operations ocfs2_fops = { .lock = ocfs2_lock, .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, - .splice_write = ocfs2_file_splice_write, + .splice_write = iter_file_splice_write, + .fallocate = ocfs2_fallocate, }; const struct file_operations ocfs2_dops = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = ocfs2_readdir, + .iterate = ocfs2_readdir, .fsync = ocfs2_sync_file, .release = ocfs2_dir_release, .open = ocfs2_dir_open, @@ -2654,28 +2662,29 @@ const struct file_operations ocfs2_dops = { * the cluster. */ const struct file_operations ocfs2_fops_no_plocks = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, + .llseek = ocfs2_file_llseek, + .read = new_sync_read, + .write = new_sync_write, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, .open = ocfs2_file_open, - .aio_read = ocfs2_file_aio_read, - .aio_write = ocfs2_file_aio_write, + .read_iter = ocfs2_file_read_iter, + .write_iter = ocfs2_file_write_iter, .unlocked_ioctl = ocfs2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, - .splice_write = ocfs2_file_splice_write, + .splice_write = iter_file_splice_write, + .fallocate = ocfs2_fallocate, }; const struct file_operations ocfs2_dops_no_plocks = { .llseek = generic_file_llseek, .read = generic_read_dir, - .readdir = ocfs2_readdir, + .iterate = ocfs2_readdir, .fsync = ocfs2_sync_file, .release = ocfs2_dir_release, .open = ocfs2_dir_open, |
