From 2b1e55c389105b722cccadfa47f5615f57d8887f Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Wed, 11 Sep 2013 14:19:44 -0700 Subject: ocfs2: lighten up allocate transaction The issue scenario is as following: When fallocating a very large disk space for a small file, __ocfs2_extend_allocation attempts to get a very large transaction. For some journal sizes, there may be not enough room for this transaction, and the fallocate will fail. The patch below extends & restarts the transaction as necessary while allocating space, and should work with even the smallest journal. This patch refers ext4 resize. Test: # mkfs.ocfs2 -b 4K -C 32K -T datafiles /dev/sdc ...(jounral size is 32M) # mount.ocfs2 /dev/sdc /mnt/ocfs2/ # touch /mnt/ocfs2/1.log # fallocate -o 0 -l 400G /mnt/ocfs2/1.log fallocate: /mnt/ocfs2/1.log: fallocate failed: Cannot allocate memory # tail -f /var/log/messages [ 7372.278591] JBD: fallocate wants too many credits (2051 > 2048) [ 7372.278597] (fallocate,6438,0):__ocfs2_extend_allocation:709 ERROR: status = -12 [ 7372.278603] (fallocate,6438,0):ocfs2_allocate_unwritten_extents:1504 ERROR: status = -12 [ 7372.278607] (fallocate,6438,0):__ocfs2_change_file_space:1955 ERROR: status = -12 ^C With this patch, the test works well. Signed-off-by: Younger Liu Cc: Jie Liu Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 6 +----- fs/ocfs2/journal.c | 35 +++++++++++++++++++++++++++++++++++ fs/ocfs2/journal.h | 11 +++++++++++ fs/ocfs2/ocfs2_trace.h | 2 ++ 4 files changed, 49 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 3261d71319e..409c549ae02 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -671,11 +671,7 @@ restarted_transaction: } else { BUG_ON(why != RESTART_TRANS); - /* TODO: This can be more intelligent. */ - credits = ocfs2_calc_extend_credits(osb->sb, - &fe->id2.i_list, - clusters_to_add); - status = ocfs2_extend_trans(handle, credits); + status = ocfs2_allocate_extend_trans(handle, 1); if (status < 0) { /* handle still has to be committed at * this point. */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 242170d8397..a126cb37ca4 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -455,6 +455,41 @@ bail: return status; } +/* + * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. + * If that fails, restart the transaction & regain write access for the + * buffer head which is used for metadata modifications. + * Taken from Ext4: extend_or_restart_transaction() + */ +int ocfs2_allocate_extend_trans(handle_t *handle, int thresh) +{ + int status, old_nblks; + + BUG_ON(!handle); + + old_nblks = handle->h_buffer_credits; + trace_ocfs2_allocate_extend_trans(old_nblks, thresh); + + if (old_nblks < thresh) + return 0; + + status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + if (status > 0) { + status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) + mlog_errno(status); + } + +bail: + return status; +} + + struct ocfs2_triggers { struct jbd2_buffer_trigger_type ot_triggers; int ot_offset; diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 0a992737dca..0b479bab367 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -258,6 +258,17 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int ocfs2_commit_trans(struct ocfs2_super *osb, handle_t *handle); int ocfs2_extend_trans(handle_t *handle, int nblocks); +int ocfs2_allocate_extend_trans(handle_t *handle, + int thresh); + +/* + * Define an arbitrary limit for the amount of data we will anticipate + * writing to any given transaction. For unbounded transactions such as + * fallocate(2) we can write more than this, but we always + * start off at the maximum transaction size and grow the transaction + * optimistically as we go. + */ +#define OCFS2_MAX_TRANS_DATA 64U /* * Create access is for when we get a newly created buffer and we're diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 3b481f49063..1b60c62aa9d 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -2579,6 +2579,8 @@ DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); +DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); + DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); -- cgit v1.2.3-18-g5258 From f17c20dd2ec81e8ff328b81bc847da9429d0975b Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 11 Sep 2013 14:19:45 -0700 Subject: ocfs2: use i_size_read() to access i_size Though ocfs2 uses inode->i_mutex to protect i_size, there are both i_size_read/write() and direct accesses. Clean up all direct access to eliminate confusion. Signed-off-by: Junxiao Bi Cc: Jie Liu Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 2 +- fs/ocfs2/extent_map.c | 10 +++++----- fs/ocfs2/ioctl.c | 2 +- fs/ocfs2/journal.c | 8 ++++---- fs/ocfs2/move_extents.c | 2 +- fs/ocfs2/quota_global.c | 6 +++--- fs/ocfs2/quota_local.c | 12 ++++++------ 7 files changed, 21 insertions(+), 21 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 94417a85ce6..f37d3c0e205 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2044,7 +2044,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, out_write_size: pos += copied; - if (pos > inode->i_size) { + if (pos > i_size_read(inode)) { i_size_write(inode, pos); mark_inode_dirty(inode); } diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2487116d0d3..4bf2b763467 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -852,20 +852,20 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) down_read(&OCFS2_I(inode)->ip_alloc_sem); - if (*offset >= inode->i_size) { + if (*offset >= i_size_read(inode)) { ret = -ENXIO; goto out_unlock; } if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { if (whence == SEEK_HOLE) - *offset = inode->i_size; + *offset = i_size_read(inode); goto out_unlock; } clen = 0; cpos = *offset >> cs_bits; - cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size); + cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)); while (cpos < cend && !is_last) { ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size, @@ -904,8 +904,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence) extlen = clen; extlen <<= cs_bits; - if ((extoff + extlen) > inode->i_size) - extlen = inode->i_size - extoff; + if ((extoff + extlen) > i_size_read(inode)) + extlen = i_size_read(inode) - extoff; extoff += extlen; if (extoff > *offset) *offset = extoff; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 0c60ef2d805..fa32ce9b455 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -303,7 +303,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode, if (o2info_from_user(oij, req)) goto bail; - oij.ij_journal_size = osb->journal->j_inode->i_size; + oij.ij_journal_size = i_size_read(osb->journal->j_inode); o2info_set_request_filled(&oij.ij_req); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a126cb37ca4..44fc3e530c3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -836,14 +836,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) inode_lock = 1; di = (struct ocfs2_dinode *)bh->b_data; - if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { + if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) { mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", - inode->i_size); + i_size_read(inode)); status = -EINVAL; goto done; } - trace_ocfs2_journal_init(inode->i_size, + trace_ocfs2_journal_init(i_size_read(inode), (unsigned long long)inode->i_blocks, OCFS2_I(inode)->ip_clusters); @@ -1131,7 +1131,7 @@ static int ocfs2_force_read_journal(struct inode *inode) memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); - num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); + num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); v_blkno = 0; while (v_blkno < num_blocks) { status = ocfs2_extent_map_get_blocks(inode, v_blkno, diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 452068b4574..415928536c5 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -845,7 +845,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh, struct ocfs2_move_extents *range = context->range; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if ((inode->i_size == 0) || (range->me_len == 0)) + if ((i_size_read(inode) == 0) || (range->me_len == 0)) return 0; if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 332a281f217..aaa50611ec6 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; } - if (gqinode->i_size < off + len) { + if (i_size_read(gqinode) < off + len) { loff_t rounded_end = ocfs2_align_bytes_to_blocks(sb, off + len); @@ -778,8 +778,8 @@ static int ocfs2_acquire_dquot(struct dquot *dquot) */ WARN_ON(journal_current_handle()); status = ocfs2_extend_no_holes(gqinode, NULL, - gqinode->i_size + (need_alloc << sb->s_blocksize_bits), - gqinode->i_size); + i_size_read(gqinode) + (need_alloc << sb->s_blocksize_bits), + i_size_read(gqinode)); if (status < 0) goto out_dq; } diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 27fe7ee4874..2e4344be3b9 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -982,14 +982,14 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk( /* We are protected by dqio_sem so no locking needed */ status = ocfs2_extend_no_holes(lqinode, NULL, - lqinode->i_size + 2 * sb->s_blocksize, - lqinode->i_size); + i_size_read(lqinode) + 2 * sb->s_blocksize, + i_size_read(lqinode)); if (status < 0) { mlog_errno(status); goto out; } status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, - lqinode->i_size + 2 * sb->s_blocksize); + i_size_read(lqinode) + 2 * sb->s_blocksize); if (status < 0) { mlog_errno(status); goto out; @@ -1125,14 +1125,14 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file( /* We are protected by dqio_sem so no locking needed */ status = ocfs2_extend_no_holes(lqinode, NULL, - lqinode->i_size + sb->s_blocksize, - lqinode->i_size); + i_size_read(lqinode) + sb->s_blocksize, + i_size_read(lqinode)); if (status < 0) { mlog_errno(status); goto out; } status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh, - lqinode->i_size + sb->s_blocksize); + i_size_read(lqinode) + sb->s_blocksize); if (status < 0) { mlog_errno(status); goto out; -- cgit v1.2.3-18-g5258 From 98ac9125c5afed8c5d2e4c5824988f8ad51814e1 Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 11 Sep 2013 14:19:46 -0700 Subject: ocfs2: dlm_request_all_locks() should deal with the status sent from target node dlm_request_all_locks() should deal with the status sent from target node if DLM_LOCK_REQUEST_MSG is sent successfully, or recovery master will fall into endless loop, waiting for other nodes to send locks and DLM_RECO_DATA_DONE_MSG to me. NodeA NodeB selected as recovery master dlm_remaster_locks() ->dlm_request_all_locks() send DLM_LOCK_REQUEST_MSG to nodeA It happened that NodeA cannot alloc memory when it processes this message. dlm_request_all_locks_handler() do not queue dlm_request_all_locks_worker and returns -ENOMEM. It will never send locks and DLM_RECO_DATA_DONE_MSG to NodeB. NodeB do not deal with the status sent from nodeA, and will fall in endless loop waiting for the recovery state of NodeA to be changed. Signed-off-by: joyce Cc: Mark Fasheh Cc: Jeff Liu Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmrecovery.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 773bd32bfd8..f9455021815 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -787,6 +787,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, { struct dlm_lock_request lr; int ret; + int status; mlog(0, "\n"); @@ -800,13 +801,15 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from, // send message ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key, - &lr, sizeof(lr), request_from, NULL); + &lr, sizeof(lr), request_from, &status); /* negative status is handled by caller */ if (ret < 0) mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u " "to recover dead node %u\n", dlm->name, ret, request_from, dead_node); + else + ret = status; // return from here, then // sleep until all received or error return ret; -- cgit v1.2.3-18-g5258 From 7e9b79370733945b25c24e09d663b07c3936d10c Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Wed, 11 Sep 2013 14:19:47 -0700 Subject: ocfs2: ac_bits_wanted should be local_alloc_bits when returns -ENOSPC There is an issue in reserving and claiming space for localalloc, When localalloc space is not enough, it would claim space from global_bitmap. And if there is not enough free space in global_bitmap, the size of claiming space would set to half of orignal size and retry. The issue is as follows: osb->local_alloc_bits is set to half of orignal size in ocfs2_recalc_la_window(), but ac->ac_bits_wanted is set to osb->local_alloc_default_bits which is not changed. localalloc always reserves and claims local_alloc_default_bits space and returns ENOSPC. So, ac->ac_bits_wanted should be osb->local_alloc_bits which would be changed. Signed-off-by: Younger Liu Cc: Joel Becker Cc: Mark Fasheh Cc: Jeff Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/localalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index aebeacd807c..cd5496b7a0a 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -1082,7 +1082,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, } retry_enospc: - (*ac)->ac_bits_wanted = osb->local_alloc_default_bits; + (*ac)->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); if (status == -ENOSPC) { if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == @@ -1154,7 +1154,7 @@ retry_enospc: OCFS2_LA_DISABLED) goto bail; - ac->ac_bits_wanted = osb->local_alloc_default_bits; + ac->ac_bits_wanted = osb->local_alloc_bits; status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits, &cluster_off, -- cgit v1.2.3-18-g5258 From 8dd7903e48df3779bc424196c22dc73b66d0643e Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 11 Sep 2013 14:19:49 -0700 Subject: fs/ocfs2/cluster/tcp.c: fix possible null pointer dereferences Fix some possible null pointer dereferences that were detected by the static code analyser, smatch. Signed-off-by: Sunil Mushran Reported-by: Dan Carpenter Reported-by: Guozhonghua Cc: Sunil Mushran Cc: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/tcp.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d644dc61142..d04a3c2fad3 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -543,8 +543,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - printk(KERN_NOTICE "o2net: No longer connected to " - SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); + if (old_sc) + printk(KERN_NOTICE "o2net: No longer connected to " + SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -1695,13 +1696,12 @@ static void o2net_start_connect(struct work_struct *work) ret = 0; out: - if (ret) { + if (ret && sc) { printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); /* 0 err so that another will be queued and attempted * from set_nn_state */ - if (sc) - o2net_ensure_shutdown(nn, sc, 0); + o2net_ensure_shutdown(nn, sc, 0); } if (sc) sc_put(sc); -- cgit v1.2.3-18-g5258 From df53cd3b70712cd136f10ef79457623c5c3764a4 Mon Sep 17 00:00:00 2001 From: Dong Fang Date: Wed, 11 Sep 2013 14:19:50 -0700 Subject: ocfs2: use list_for_each_entry() instead of list_for_each() [dan.carpenter@oracle.com: fix up some NULL dereference bugs] Signed-off-by: Dong Fang Cc: Mark Fasheh Cc: Joel Becker Cc: Jeff Liu Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/heartbeat.c | 14 +++++--------- fs/ocfs2/dlm/dlmast.c | 8 +++----- fs/ocfs2/dlm/dlmcommon.h | 4 +--- fs/ocfs2/dlm/dlmconvert.c | 18 +++++++----------- fs/ocfs2/dlm/dlmdebug.c | 15 ++++----------- fs/ocfs2/dlm/dlmdomain.c | 35 ++++++++++++----------------------- fs/ocfs2/dlm/dlmlock.c | 9 ++------- fs/ocfs2/dlm/dlmmaster.c | 18 +++++------------- fs/ocfs2/dlm/dlmthread.c | 19 +++++-------------- fs/ocfs2/dlm/dlmunlock.c | 4 +--- 10 files changed, 45 insertions(+), 99 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 5c1c864e81c..25b72e82b8f 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -628,11 +628,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, struct o2nm_node *node, int idx) { - struct list_head *iter; struct o2hb_callback_func *f; - list_for_each(iter, &hbcall->list) { - f = list_entry(iter, struct o2hb_callback_func, hc_item); + list_for_each_entry(f, &hbcall->list, hc_item) { mlog(ML_HEARTBEAT, "calling funcs %p\n", f); (f->hc_func)(node, idx, f->hc_data); } @@ -2516,8 +2514,7 @@ unlock: int o2hb_register_callback(const char *region_uuid, struct o2hb_callback_func *hc) { - struct o2hb_callback_func *tmp; - struct list_head *iter; + struct o2hb_callback_func *f; struct o2hb_callback *hbcall; int ret; @@ -2540,10 +2537,9 @@ int o2hb_register_callback(const char *region_uuid, down_write(&o2hb_callback_sem); - list_for_each(iter, &hbcall->list) { - tmp = list_entry(iter, struct o2hb_callback_func, hc_item); - if (hc->hc_priority < tmp->hc_priority) { - list_add_tail(&hc->hc_item, iter); + list_for_each_entry(f, &hbcall->list, hc_item) { + if (hc->hc_priority < f->hc_priority) { + list_add_tail(&hc->hc_item, &f->hc_item); break; } } diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index fbec0be6232..b46278f9ae4 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -292,7 +292,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_lock *lock = NULL; struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; char *name; - struct list_head *iter, *head=NULL; + struct list_head *head = NULL; __be64 cookie; u32 flags; u8 node; @@ -373,8 +373,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, /* try convert queue for both ast/bast */ head = &res->converting; lock = NULL; - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, head, list) { if (lock->ml.cookie == cookie) goto do_ast; } @@ -385,8 +384,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, else head = &res->granted; - list_for_each(iter, head) { - lock = list_entry (iter, struct dlm_lock, list); + list_for_each_entry(lock, head, list) { if (lock->ml.cookie == cookie) goto do_ast; } diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index de854cca12a..e0517762fcc 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1079,11 +1079,9 @@ static inline int dlm_lock_compatible(int existing, int request) static inline int dlm_lock_on_list(struct list_head *head, struct dlm_lock *lock) { - struct list_head *iter; struct dlm_lock *tmplock; - list_for_each(iter, head) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, head, list) { if (tmplock == lock) return 1; } diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c index 29a886d1e82..e36d63ff178 100644 --- a/fs/ocfs2/dlm/dlmconvert.c +++ b/fs/ocfs2/dlm/dlmconvert.c @@ -123,7 +123,6 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, int *kick_thread) { enum dlm_status status = DLM_NORMAL; - struct list_head *iter; struct dlm_lock *tmplock=NULL; assert_spin_locked(&res->spinlock); @@ -185,16 +184,14 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm, /* upconvert from here on */ status = DLM_NORMAL; - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, &res->granted, list) { if (tmplock == lock) continue; if (!dlm_lock_compatible(tmplock->ml.type, type)) goto switch_queues; } - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(tmplock, &res->converting, list) { if (!dlm_lock_compatible(tmplock->ml.type, type)) goto switch_queues; /* existing conversion requests take precedence */ @@ -424,8 +421,8 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_ctxt *dlm = data; struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf; struct dlm_lock_resource *res = NULL; - struct list_head *iter; struct dlm_lock *lock = NULL; + struct dlm_lock *tmp_lock; struct dlm_lockstatus *lksb; enum dlm_status status = DLM_NORMAL; u32 flags; @@ -471,14 +468,13 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data, dlm_error(status); goto leave; } - list_for_each(iter, &res->granted) { - lock = list_entry(iter, struct dlm_lock, list); - if (lock->ml.cookie == cnv->cookie && - lock->ml.node == cnv->node_idx) { + list_for_each_entry(tmp_lock, &res->granted, list) { + if (tmp_lock->ml.cookie == cnv->cookie && + tmp_lock->ml.node == cnv->node_idx) { + lock = tmp_lock; dlm_lock_get(lock); break; } - lock = NULL; } spin_unlock(&res->spinlock); if (!lock) { diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 0e28e242226..e33cd7a3c58 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -96,7 +96,6 @@ static void __dlm_print_lock(struct dlm_lock *lock) void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) { - struct list_head *iter2; struct dlm_lock *lock; char buf[DLM_LOCKID_NAME_MAX]; @@ -118,18 +117,15 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) res->inflight_locks, atomic_read(&res->asts_reserved)); dlm_print_lockres_refmap(res); printk(" granted queue:\n"); - list_for_each(iter2, &res->granted) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { __dlm_print_lock(lock); } printk(" converting queue:\n"); - list_for_each(iter2, &res->converting) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->converting, list) { __dlm_print_lock(lock); } printk(" blocked queue:\n"); - list_for_each(iter2, &res->blocked) { - lock = list_entry(iter2, struct dlm_lock, list); + list_for_each_entry(lock, &res->blocked, list) { __dlm_print_lock(lock); } } @@ -446,7 +442,6 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) { struct dlm_master_list_entry *mle; struct hlist_head *bucket; - struct hlist_node *list; int i, out = 0; unsigned long total = 0, longest = 0, bucket_count = 0; @@ -456,9 +451,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len) spin_lock(&dlm->master_lock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry(mle, bucket, master_hash_node) { ++total; ++bucket_count; if (len - out < 200) diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index dbb17c07656..8b3382abf84 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -193,7 +193,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, unsigned int hash) { struct hlist_head *bucket; - struct hlist_node *list; + struct dlm_lock_resource *res; mlog(0, "%.*s\n", len, name); @@ -201,9 +201,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm, bucket = dlm_lockres_hash(dlm, hash); - hlist_for_each(list, bucket) { - struct dlm_lock_resource *res = hlist_entry(list, - struct dlm_lock_resource, hash_node); + hlist_for_each_entry(res, bucket, hash_node) { if (res->lockname.name[0] != name[0]) continue; if (unlikely(res->lockname.len != len)) @@ -262,22 +260,19 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm, static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len) { - struct dlm_ctxt *tmp = NULL; - struct list_head *iter; + struct dlm_ctxt *tmp; assert_spin_locked(&dlm_domain_lock); /* tmp->name here is always NULL terminated, * but domain may not be! */ - list_for_each(iter, &dlm_domains) { - tmp = list_entry (iter, struct dlm_ctxt, list); + list_for_each_entry(tmp, &dlm_domains, list) { if (strlen(tmp->name) == len && memcmp(tmp->name, domain, len)==0) - break; - tmp = NULL; + return tmp; } - return tmp; + return NULL; } /* For null terminated domain strings ONLY */ @@ -366,25 +361,22 @@ static void __dlm_get(struct dlm_ctxt *dlm) * you shouldn't trust your pointer. */ struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm) { - struct list_head *iter; - struct dlm_ctxt *target = NULL; + struct dlm_ctxt *target; + struct dlm_ctxt *ret = NULL; spin_lock(&dlm_domain_lock); - list_for_each(iter, &dlm_domains) { - target = list_entry (iter, struct dlm_ctxt, list); - + list_for_each_entry(target, &dlm_domains, list) { if (target == dlm) { __dlm_get(target); + ret = target; break; } - - target = NULL; } spin_unlock(&dlm_domain_lock); - return target; + return ret; } int dlm_domain_fully_joined(struct dlm_ctxt *dlm) @@ -2296,13 +2288,10 @@ static DECLARE_RWSEM(dlm_callback_sem); void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm, int node_num) { - struct list_head *iter; struct dlm_eviction_cb *cb; down_read(&dlm_callback_sem); - list_for_each(iter, &dlm->dlm_eviction_callbacks) { - cb = list_entry(iter, struct dlm_eviction_cb, ec_item); - + list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) { cb->ec_func(node_num, cb->ec_data); } up_read(&dlm_callback_sem); diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 47e67c2d228..5d32f7511f7 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -91,19 +91,14 @@ void dlm_destroy_lock_cache(void) static int dlm_can_grant_new_lock(struct dlm_lock_resource *res, struct dlm_lock *lock) { - struct list_head *iter; struct dlm_lock *tmplock; - list_for_each(iter, &res->granted) { - tmplock = list_entry(iter, struct dlm_lock, list); - + list_for_each_entry(tmplock, &res->granted, list) { if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) return 0; } - list_for_each(iter, &res->converting) { - tmplock = list_entry(iter, struct dlm_lock, list); - + list_for_each_entry(tmplock, &res->converting, list) { if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type)) return 0; if (!dlm_lock_compatible(tmplock->ml.convert_type, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 33ecbe0e673..cf0f103963b 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -342,16 +342,13 @@ static int dlm_find_mle(struct dlm_ctxt *dlm, { struct dlm_master_list_entry *tmpmle; struct hlist_head *bucket; - struct hlist_node *list; unsigned int hash; assert_spin_locked(&dlm->master_lock); hash = dlm_lockid_hash(name, namelen); bucket = dlm_master_hash(dlm, hash); - hlist_for_each(list, bucket) { - tmpmle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry(tmpmle, bucket, master_hash_node) { if (!dlm_mle_equal(dlm, tmpmle, name, namelen)) continue; dlm_get_mle(tmpmle); @@ -3183,7 +3180,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node) struct dlm_master_list_entry *mle; struct dlm_lock_resource *res; struct hlist_head *bucket; - struct hlist_node *list; + struct hlist_node *tmp; unsigned int i; mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node); @@ -3194,10 +3191,7 @@ top: spin_lock(&dlm->master_lock); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each(list, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); - + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { BUG_ON(mle->type != DLM_MLE_BLOCK && mle->type != DLM_MLE_MASTER && mle->type != DLM_MLE_MIGRATION); @@ -3378,7 +3372,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) int i; struct hlist_head *bucket; struct dlm_master_list_entry *mle; - struct hlist_node *tmp, *list; + struct hlist_node *tmp; /* * We notified all other nodes that we are exiting the domain and @@ -3394,9 +3388,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); - hlist_for_each_safe(list, tmp, bucket) { - mle = hlist_entry(list, struct dlm_master_list_entry, - master_hash_node); + hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) { if (mle->type != DLM_MLE_BLOCK) { mlog(ML_ERROR, "bad mle: %p\n", mle); dlm_print_one_mle(mle); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index e73c833fc2a..9db869de829 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -286,8 +286,6 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { struct dlm_lock *lock, *target; - struct list_head *iter; - struct list_head *head; int can_grant = 1; /* @@ -314,9 +312,7 @@ converting: dlm->name, res->lockname.len, res->lockname.name); BUG(); } - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, @@ -333,9 +329,8 @@ converting: target->ml.convert_type; } } - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + + list_for_each_entry(lock, &res->converting, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, @@ -384,9 +379,7 @@ blocked: goto leave; target = list_entry(res->blocked.next, struct dlm_lock, list); - head = &res->granted; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->granted, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { @@ -400,9 +393,7 @@ blocked: } } - head = &res->converting; - list_for_each(iter, head) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, &res->converting, list) { if (lock==target) continue; if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) { diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 850aa7e8753..5698b52cf5c 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c @@ -388,7 +388,6 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_ctxt *dlm = data; struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf; struct dlm_lock_resource *res = NULL; - struct list_head *iter; struct dlm_lock *lock = NULL; enum dlm_status status = DLM_NORMAL; int found = 0, i; @@ -458,8 +457,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data, } for (i=0; i<3; i++) { - list_for_each(iter, queue) { - lock = list_entry(iter, struct dlm_lock, list); + list_for_each_entry(lock, queue, list) { if (lock->ml.cookie == unlock->cookie && lock->ml.node == unlock->node_idx) { dlm_lock_get(lock); -- cgit v1.2.3-18-g5258 From 3d94ea51c1d8db6f41268a9d2aea5f5771e9a8d3 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 11 Sep 2013 14:19:51 -0700 Subject: ocfs2: clean up dead code in ocfs2_acl_from_xattr() In ocfs2_acl_from_xattr(), if size is less than sizeof(struct posix_acl_entry), it returns ERR_PTR(-EINVAL) directly. Then assign (size / sizeof(struct posix_acl_entry)) to count which will be at least 1, that means the following branch (count < 0) and (count == 0) will never be true. Signed-off-by: Joseph Qi Cc: Mark Fasheh Acked-by: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/acl.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 8a404576fb2..b4f788e0ca3 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -51,10 +51,6 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size) return ERR_PTR(-EINVAL); count = size / sizeof(struct posix_acl_entry); - if (count < 0) - return ERR_PTR(-EINVAL); - if (count == 0) - return NULL; acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) -- cgit v1.2.3-18-g5258 From 2b0f6eae2dd2f7f21dbf93241938a687f6757dea Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 11 Sep 2013 14:19:52 -0700 Subject: ocfs2: add missing return value check of ocfs2_get_clusters() In ocfs2_attach_refcount_tree() and ocfs2_duplicate_extent_list(), if error occurs when calling ocfs2_get_clusters(), it will go with unexpected behavior as local variables p_cluster, num_clusters and ext_flags are declared without initialization. Signed-off-by: Joseph Qi Reviewed-by: Jie Liu Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/refcounttree.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index a70d604593b..bf4dfc14bb2 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -3854,7 +3854,10 @@ static int ocfs2_attach_refcount_tree(struct inode *inode, while (cpos < clusters) { ret = ocfs2_get_clusters(inode, cpos, &p_cluster, &num_clusters, &ext_flags); - + if (ret) { + mlog_errno(ret); + goto unlock; + } if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) { ret = ocfs2_add_refcount_flag(inode, &di_et, &ref_tree->rf_ci, @@ -4025,7 +4028,10 @@ static int ocfs2_duplicate_extent_list(struct inode *s_inode, while (cpos < clusters) { ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster, &num_clusters, &ext_flags); - + if (ret) { + mlog_errno(ret); + goto out; + } if (p_cluster) { ret = ocfs2_add_refcounted_extent(t_inode, &et, ref_ci, ref_root_bh, -- cgit v1.2.3-18-g5258 From 4704aa30fc35010dd9c3ce1d9d2e77af09c2c081 Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 11 Sep 2013 14:19:53 -0700 Subject: ocfs2: fix a memory leak in __ocfs2_move_extents() The ocfs2 path is not properly freed which leads to a memory leak at __ocfs2_move_extents(). This patch stops the leaks of the ocfs2_path structure. Signed-off-by: Jie Liu Reviewed-by: Younger Liu Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/move_extents.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 415928536c5..3d3f3c83065 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -152,6 +152,7 @@ static int __ocfs2_move_extent(handle_t *handle, } out: + ocfs2_free_path(path); return ret; } -- cgit v1.2.3-18-g5258 From 17caf9555edc27a0c6df512de0879b357ebacae4 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 11 Sep 2013 14:19:55 -0700 Subject: ocfs2: add the missing return value check of ocfs2_xattr_get_clusters In ocfs2_xattr_value_attach_refcount(), if error occurs when calling ocfs2_xattr_get_clusters(), it will go with unexpected behavior since local variables p_cluster, num_clusters and ext_flags are declared without initialization. Signed-off-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Acked-by: Jie Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/xattr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 317ef0abccb..1cbc2231a9f 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -5881,6 +5881,10 @@ static int ocfs2_xattr_value_attach_refcount(struct inode *inode, while (cpos < clusters) { ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, &num_clusters, el, &ext_flags); + if (ret) { + mlog_errno(ret); + break; + } cpos += num_clusters; if ((ext_flags & OCFS2_EXT_REFCOUNTED)) -- cgit v1.2.3-18-g5258 From 6ea437a3639b15e312f81819bb20f737ff596194 Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Wed, 11 Sep 2013 14:19:56 -0700 Subject: ocfs2: free meta_ac and data_ac when ocfs2_start_trans fails in ocfs2_xattr_set() In ocfs2_xattr_set(), if ocfs2_start_trans failed, meta_ac and data_ac should be free. Otherwise, It would lead to a memory leak. Signed-off-by: Younger Liu Cc: Joseph Qi Reviewed-by: Jie Liu Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/xattr.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 1cbc2231a9f..18330f5b57b 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -3505,7 +3505,7 @@ int ocfs2_xattr_set(struct inode *inode, int ret, credits, ref_meta = 0, ref_credits = 0; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct inode *tl_inode = osb->osb_tl_inode; - struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; + struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, }; struct ocfs2_refcount_tree *ref_tree = NULL; struct ocfs2_xattr_info xi = { @@ -3609,13 +3609,14 @@ int ocfs2_xattr_set(struct inode *inode, if (IS_ERR(ctxt.handle)) { ret = PTR_ERR(ctxt.handle); mlog_errno(ret); - goto cleanup; + goto out_free_ac; } ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); ocfs2_commit_trans(osb, ctxt.handle); +out_free_ac: if (ctxt.data_ac) ocfs2_free_alloc_context(ctxt.data_ac); if (ctxt.meta_ac) -- cgit v1.2.3-18-g5258 From 69b2bd16d9792085d57865fcaac55753803a4f5d Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 11 Sep 2013 14:19:57 -0700 Subject: ocfs2/dlm: force clean refmap when doing local cleanup dlm_do_local_recovery_cleanup() should force clean refmap if the owner of lockres is UNKNOWN. Otherwise node may hang when umounting filesystems. Here's the situation: Node1 Node2 dlmlock() -> dlm_get_lock_resource() send DLM_MASTER_REQUEST_MSG to other nodes. trying to master this lockres, return MAYBE. selected as the master of lockresA, set mle->master to Node1, and do assert_master, send DLM_ASSERT_MASTER_MSG to Node2. Node 2 has interest on lockresA and return DLM_ASSERT_RESPONSE_MASTERY_REF then something happened and Node2 crashed. Receiving DLM_ASSERT_RESPONSE_MASTERY_REF, set Node2 into refmap, and keep sending DLM_ASSERT_MASTER_MSG to other nodes o2hb found node2 down, calling dlm_hb_node_down() --> dlm_do_local_recovery_cleanup() the master of lockresA is still UNKNOWN, no need to call dlm_free_dead_locks(). Set the master of lockresA to Node1, but Node2 stills remains in refmap. When Node1 umount, it found that the refmap of lockresA is not empty and attempted to migrate it to Node2, But Node2 is already down, so umount hang, trying to migrate lockresA again and again. Signed-off-by: joyce Cc: Mark Fasheh Cc: Joel Becker Cc: Jie Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmrecovery.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index f9455021815..0b5adca1b17 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -2331,6 +2331,14 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) } else if (res->owner == dlm->node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); + } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { + if (test_bit(dead_node, res->refmap)) { + mlog(0, "%s:%.*s: dead node %u had a ref, but had " + "no locks and had not purged before dying\n", + dlm->name, res->lockname.len, + res->lockname.name, dead_node); + dlm_lockres_clear_refmap_bit(dlm, res, dead_node); + } } spin_unlock(&res->spinlock); } -- cgit v1.2.3-18-g5258 From 6cae6d3189ef34647bca9b9b1d240ebd760e5dea Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 11 Sep 2013 14:19:58 -0700 Subject: ocfs2: fix possible double free in ocfs2_reflink_xattr_rec In ocfs2_reflink_xattr_rec(), meta_ac and data_ac are allocated by calling ocfs2_lock_reflink_xattr_rec_allocators(). Once an error occurs when allocating *data_ac, it frees *meta_ac which is allocated before. Here it mistakenly sets meta_ac to NULL but *meta_ac. Then ocfs2_reflink_xattr_rec() will try to free meta_ac again which is already invalid. Signed-off-by: Joseph Qi Reviewed-by: Jie Liu Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/xattr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 18330f5b57b..6ce0686eab7 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6802,7 +6802,7 @@ out: if (ret) { if (*meta_ac) { ocfs2_free_alloc_context(*meta_ac); - meta_ac = NULL; + *meta_ac = NULL; } } -- cgit v1.2.3-18-g5258 From 7aebff18b91ebdefe15bb7d3f5d711df8312a7fb Mon Sep 17 00:00:00 2001 From: Younger Liu Date: Wed, 11 Sep 2013 14:19:59 -0700 Subject: ocfs2: free path in ocfs2_remove_inode_range() In ocfs2_remove_inode_range(), there is a memory leak. The variable path has allocated memory with ocfs2_new_path_from_et(), but it is not free. Signed-off-by: Younger Liu Reviewed-by: Jie Liu Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 409c549ae02..4f8197caa48 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1796,6 +1796,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); out: + ocfs2_free_path(path); ocfs2_schedule_truncate_log_flush(osb, 1); ocfs2_run_deallocs(osb, &dealloc); -- cgit v1.2.3-18-g5258 From 9a239e4c68df78888f67b1d4e7d507e24ac6764f Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 11 Sep 2013 14:20:00 -0700 Subject: ocfs2: adjust code style for o2net_handler_tree_lookup() Code in o2net_handler_tree_lookup() may be corrupted by mistake. So adjust it to promote readability. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/tcp.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index d04a3c2fad3..8c50c238577 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -766,32 +766,32 @@ static struct o2net_msg_handler * o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, struct rb_node **ret_parent) { - struct rb_node **p = &o2net_handler_tree.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p = &o2net_handler_tree.rb_node; + struct rb_node *parent = NULL; struct o2net_msg_handler *nmh, *ret = NULL; int cmp; - while (*p) { - parent = *p; - nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); + while (*p) { + parent = *p; + nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); cmp = o2net_handler_cmp(nmh, msg_type, key); - if (cmp < 0) - p = &(*p)->rb_left; - else if (cmp > 0) - p = &(*p)->rb_right; - else { + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else { ret = nmh; - break; + break; } - } + } - if (ret_p != NULL) - *ret_p = p; - if (ret_parent != NULL) - *ret_parent = parent; + if (ret_p != NULL) + *ret_p = p; + if (ret_parent != NULL) + *ret_parent = parent; - return ret; + return ret; } static void o2net_handler_kref_release(struct kref *kref) -- cgit v1.2.3-18-g5258 From 03dbe88aa9cd0d7b0a876b38bd75ce73b4522454 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 11 Sep 2013 14:20:01 -0700 Subject: ocfs2: avoid possible NULL pointer dereference in o2net_accept_one() Since o2nm_get_node_by_num() may return NULL, we add this check in o2net_accept_one() to avoid possible NULL pointer dereference. Signed-off-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/tcp.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 8c50c238577..2cd2406b414 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1873,12 +1873,16 @@ static int o2net_accept_one(struct socket *sock) if (o2nm_this_node() >= node->nd_num) { local_node = o2nm_get_node_by_num(o2nm_this_node()); - printk(KERN_NOTICE "o2net: Unexpected connect attempt seen " - "at node '%s' (%u, %pI4:%d) from node '%s' (%u, " - "%pI4:%d)\n", local_node->nd_name, local_node->nd_num, - &(local_node->nd_ipv4_address), - ntohs(local_node->nd_ipv4_port), node->nd_name, - node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port)); + if (local_node) + printk(KERN_NOTICE "o2net: Unexpected connect attempt " + "seen at node '%s' (%u, %pI4:%d) from " + "node '%s' (%u, %pI4:%d)\n", + local_node->nd_name, local_node->nd_num, + &(local_node->nd_ipv4_address), + ntohs(local_node->nd_ipv4_port), + node->nd_name, + node->nd_num, &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); ret = -EINVAL; goto out; } -- cgit v1.2.3-18-g5258 From 6f8648e894498f769832b79399b1cfabd2973ea9 Mon Sep 17 00:00:00 2001 From: Joyce Date: Wed, 11 Sep 2013 14:20:03 -0700 Subject: ocfs2: fix a tiny race case when firing callbacks In o2hb_shutdown_slot() and o2hb_check_slot(), since event is defined as local, it is only valid during the call stack. So the following tiny race case may happen in a multi-volumes mounted environment: o2hb-vol1 o2hb-vol2 1) o2hb_shutdown_slot allocate local event1 2) queue_node_event add event1 to global o2hb_node_events 3) o2hb_shutdown_slot allocate local event2 4) queue_node_event add event2 to global o2hb_node_events 5) o2hb_run_event_list delete event1 from o2hb_node_events 6) o2hb_run_event_list event1 empty, return 7) o2hb_shutdown_slot event1 lifecycle ends 8) o2hb_fire_callbacks event1 is already *invalid* This patch lets it wait on o2hb_callback_sem when another thread is firing callbacks. And for performance consideration, we only call o2hb_run_event_list when there is an event queued. Signed-off-by: Joyce Signed-off-by: Joseph Qi Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/cluster/heartbeat.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 25b72e82b8f..363f0dcc924 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -639,16 +639,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall, /* Will run the list in order until we process the passed event */ static void o2hb_run_event_list(struct o2hb_node_event *queued_event) { - int empty; struct o2hb_callback *hbcall; struct o2hb_node_event *event; - spin_lock(&o2hb_live_lock); - empty = list_empty(&queued_event->hn_item); - spin_unlock(&o2hb_live_lock); - if (empty) - return; - /* Holding callback sem assures we don't alter the callback * lists when doing this, and serializes ourselves with other * processes wanting callbacks. */ @@ -707,6 +700,7 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) struct o2hb_node_event event = { .hn_item = LIST_HEAD_INIT(event.hn_item), }; struct o2nm_node *node; + int queued = 0; node = o2nm_get_node_by_num(slot->ds_node_num); if (!node) @@ -724,11 +718,13 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, slot->ds_node_num); + queued = 1; } } spin_unlock(&o2hb_live_lock); - o2hb_run_event_list(&event); + if (queued) + o2hb_run_event_list(&event); o2nm_node_put(node); } @@ -788,6 +784,7 @@ static int o2hb_check_slot(struct o2hb_region *reg, unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; unsigned int slot_dead_ms; int tmp; + int queued = 0; memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); @@ -881,6 +878,7 @@ fire_callbacks: slot->ds_node_num); changed = 1; + queued = 1; } list_add_tail(&slot->ds_live_item, @@ -932,6 +930,7 @@ fire_callbacks: node, slot->ds_node_num); changed = 1; + queued = 1; } /* We don't clear this because the node is still @@ -947,7 +946,8 @@ fire_callbacks: out: spin_unlock(&o2hb_live_lock); - o2hb_run_event_list(&event); + if (queued) + o2hb_run_event_list(&event); if (node) o2nm_node_put(node); -- cgit v1.2.3-18-g5258 From a72e27d3727b383be39498f8b5c9b944d30e0f9b Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Wed, 11 Sep 2013 14:20:04 -0700 Subject: ocfs2: remove unused variable ip in dlmfs_get_root_inode() Variable ip in dlmfs_get_root_inode() is defined but not used. So clean it up. Signed-off-by: Joseph Qi Reviewed-by: Jie Liu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlmfs/dlmfs.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 12bafb7265c..efa2b3d339e 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -401,11 +401,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) { struct inode *inode = new_inode(sb); umode_t mode = S_IFDIR | 0755; - struct dlmfs_inode_private *ip; if (inode) { - ip = DLMFS_I(inode); - inode->i_ino = get_next_ino(); inode_init_owner(inode, NULL, mode); inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info; -- cgit v1.2.3-18-g5258 From 28e8be31803b19d0d8f76216cb11b480b8a98bec Mon Sep 17 00:00:00 2001 From: Jie Liu Date: Wed, 11 Sep 2013 14:20:05 -0700 Subject: ocfs2: fix the end cluster offset of FIEMAP Call fiemap ioctl(2) with given start offset as well as an desired mapping range should show extents if possible. However, we somehow figure out the end offset of mapping via 'mapping_end -= cpos' before iterating the extent records which would cause problems if the given fiemap length is too small to a cluster size, e.g, Cluster size 4096: debugfs.ocfs2 1.6.3 Block Size Bits: 12 Cluster Size Bits: 12 The extended fiemap test utility From David: https://gist.github.com/anonymous/6172331 # dd if=/dev/urandom of=/ocfs2/test_file bs=1M count=1000 # ./fiemap /ocfs2/test_file 4096 10 start: 4096, length: 10 File /ocfs2/test_file has 0 extents: # Logical Physical Length Flags ^^^^^ <-- No extent is shown In this case, at ocfs2_fiemap(): cpos == mapping_end == 1. Hence the loop of searching extent records was not executed at all. This patch remove the in question 'mapping_end -= cpos', and loops until the cpos is larger than the mapping_end as usual. # ./fiemap /ocfs2/test_file 4096 10 start: 4096, length: 10 File /ocfs2/test_file has 1 extents: # Logical Physical Length Flags 0: 0000000000000000 0000000056a01000 0000000006a00000 0000 Signed-off-by: Jie Liu Reported-by: David Weber Tested-by: David Weber Cc: Sunil Mushran Cc: Mark Fashen Cc: Joel Becker Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/extent_map.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 4bf2b763467..767370b656c 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, cpos = map_start >> osb->s_clustersize_bits; mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, map_start + map_len); - mapping_end -= cpos; is_last = 0; while (cpos < mapping_end && !is_last) { u32 fe_flags; -- cgit v1.2.3-18-g5258 From 47df3ddedd22c3f8e68aff831edb7921937674a2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 11 Sep 2013 14:22:22 -0700 Subject: writeback: fix occasional slow sync(1) In case when system contains no dirty pages, wakeup_flusher_threads() will submit WB_SYNC_NONE writeback for 0 pages so wb_writeback() exits immediately without doing anything, even though there are dirty inodes in the system. Thus sync(1) will write all the dirty inodes from a WB_SYNC_ALL writeback pass which is slow. Fix the problem by using get_nr_dirty_pages() in wakeup_flusher_threads() instead of calculating number of dirty pages manually. That function also takes number of dirty inodes into account. Signed-off-by: Jan Kara Reported-by: Paul Taysom Cc: Wu Fengguang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 68851ff2fd4..87d77811802 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1049,10 +1049,8 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) { struct backing_dev_info *bdi; - if (!nr_pages) { - nr_pages = global_page_state(NR_FILE_DIRTY) + - global_page_state(NR_UNSTABLE_NFS); - } + if (!nr_pages) + nr_pages = get_nr_dirty_pages(); rcu_read_lock(); list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { -- cgit v1.2.3-18-g5258 From d9104d1ca9662498339c0de975b4666c30485f4e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 11 Sep 2013 14:22:24 -0700 Subject: mm: track vma changes with VM_SOFTDIRTY bit Pavel reported that in case if vma area get unmapped and then mapped (or expanded) in-place, the soft dirty tracker won't be able to recognize this situation since it works on pte level and ptes are get zapped on unmap, loosing soft dirty bit of course. So to resolve this situation we need to track actions on vma level, there VM_SOFTDIRTY flag comes in. When new vma area created (or old expanded) we set this bit, and keep it here until application calls for clearing soft dirty bit. Thus when user space application track memory changes now it can detect if vma area is renewed. Reported-by: Pavel Emelyanov Signed-off-by: Cyrill Gorcunov Cc: Andy Lutomirski Cc: Matt Mackall Cc: Xiao Guangrong Cc: Marcelo Tosatti Cc: KOSAKI Motohiro Cc: Stephen Rothwell Cc: Peter Zijlstra Cc: "Aneesh Kumar K.V" Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- fs/proc/task_mmu.c | 46 ++++++++++++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index fd774c7cb48..2d1e52a58fe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -266,7 +266,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; + vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 107d026f5d6..09228639b83 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -740,6 +740,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ptent = pte_file_clear_soft_dirty(ptent); } + if (vma->vm_flags & VM_SOFTDIRTY) + vma->vm_flags &= ~VM_SOFTDIRTY; + set_pte_at(vma->vm_mm, addr, pte, ptent); #endif } @@ -949,13 +952,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, if (is_migration_entry(entry)) page = migration_entry_to_page(entry); } else { - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + if (vma->vm_flags & VM_SOFTDIRTY) + flags2 |= __PM_SOFT_DIRTY; + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); return; } if (page && !PageAnon(page)) flags |= PM_FILE; - if (pte_soft_dirty(pte)) + if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte)) flags2 |= __PM_SOFT_DIRTY; *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); @@ -974,7 +979,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); else - *pme = make_pme(PM_NOT_PRESENT(pm->v2)); + *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2)); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, @@ -997,7 +1002,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { int pmd_flags2; - pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); + if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) + pmd_flags2 = __PM_SOFT_DIRTY; + else + pmd_flags2 = 0; + for (; addr != end; addr += PAGE_SIZE) { unsigned long offset; @@ -1015,12 +1024,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (pmd_trans_unstable(pmd)) return 0; for (; addr != end; addr += PAGE_SIZE) { + int flags2; /* check to see if we've left 'vma' behind * and need a new, higher one */ if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); - pme = make_pme(PM_NOT