diff options
Diffstat (limited to 'fs/notify/mark.c')
| -rw-r--r-- | fs/notify/mark.c | 146 | 
1 files changed, 75 insertions, 71 deletions
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 325185e514b..d90deaa08e7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -20,28 +20,29 @@   * fsnotify inode mark locking/lifetime/and refcnting   *   * REFCNT: - * The mark->refcnt tells how many "things" in the kernel currently are - * referencing this object.  The object typically will live inside the kernel - * with a refcnt of 2, one for each list it is on (i_list, g_list).  Any task - * which can find this object holding the appropriete locks, can take a reference - * and the object itself is guarenteed to survive until the reference is dropped. + * The group->recnt and mark->refcnt tell how many "things" in the kernel + * currently are referencing the objects. Both kind of objects typically will + * live inside the kernel with a refcnt of 2, one for its creation and one for + * the reference a group and a mark hold to each other. + * If you are holding the appropriate locks, you can take a reference and the + * object itself is guaranteed to survive until the reference is dropped.   *   * LOCKING: - * There are 3 spinlocks involved with fsnotify inode marks and they MUST - * be taken in order as follows: + * There are 3 locks involved with fsnotify inode marks and they MUST be taken + * in order as follows:   * + * group->mark_mutex   * mark->lock - * group->mark_lock   * inode->i_lock   * - * mark->lock protects 2 things, mark->group and mark->inode.  You must hold - * that lock to dereference either of these things (they could be NULL even with - * the lock) - * - * group->mark_lock protects the marks_list anchored inside a given group - * and each mark is hooked via the g_list.  It also sorta protects the - * free_g_list, which when used is anchored by a private list on the stack of the - * task which held the group->mark_lock. + * group->mark_mutex protects the marks_list anchored inside a given group and + * each mark is hooked via the g_list.  It also protects the groups private + * data (i.e group limits). + + * mark->lock protects the marks attributes like its masks and flags. + * Furthermore it protects the access to a reference of the group that the mark + * is assigned to as well as the access to a reference of the inode/vfsmount + * that is being watched by the mark.   *   * inode->i_lock protects the i_fsnotify_marks list anchored inside a   * given inode and each mark is hooked via the i_list. (and sorta the @@ -64,18 +65,11 @@   * inode.  We take i_lock and walk the i_fsnotify_marks safely.  For each   * mark on the list we take a reference (so the mark can't disappear under us).   * We remove that mark form the inode's list of marks and we add this mark to a - * private list anchored on the stack using i_free_list;  At this point we no - * longer fear anything finding the mark using the inode's list of marks. - * - * We can safely and locklessly run the private list on the stack of everything - * we just unattached from the original inode.  For each mark on the private list - * we grab the mark-> and can thus dereference mark->group and mark->inode.  If - * we see the group and inode are not NULL we take those locks.  Now holding all - * 3 locks we can completely remove the mark from other tasks finding it in the - * future.  Remember, 10 things might already be referencing this mark, but they - * better be holding a ref.  We drop our reference we took before we unhooked it - * from the inode.  When the ref hits 0 we can free the mark. - * + * private list anchored on the stack using i_free_list; we walk i_free_list + * and before we destroy the mark we make sure that we dont race with a + * concurrent destroy_group by getting a ref to the marks group and taking the + * groups mutex. +   * Very similarly for freeing by group, except we use free_g_list.   *   * This has the very interesting property of being able to run concurrently with @@ -91,9 +85,8 @@  #include <linux/slab.h>  #include <linux/spinlock.h>  #include <linux/srcu.h> -#include <linux/writeback.h> /* for inode_lock */ -#include <asm/atomic.h> +#include <linux/atomic.h>  #include <linux/fsnotify_backend.h>  #include "fsnotify.h" @@ -110,8 +103,11 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)  void fsnotify_put_mark(struct fsnotify_mark *mark)  { -	if (atomic_dec_and_test(&mark->refcnt)) +	if (atomic_dec_and_test(&mark->refcnt)) { +		if (mark->group) +			fsnotify_put_group(mark->group);  		mark->free_mark(mark); +	}  }  /* @@ -119,14 +115,14 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)   * The caller had better be holding a reference to this mark so we don't actually   * do the final put under the mark->lock   */ -void fsnotify_destroy_mark(struct fsnotify_mark *mark) +void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark, +				  struct fsnotify_group *group)  { -	struct fsnotify_group *group;  	struct inode *inode = NULL; -	spin_lock(&mark->lock); +	BUG_ON(!mutex_is_locked(&group->mark_mutex)); -	group = mark->group; +	spin_lock(&mark->lock);  	/* something else already called this function on this mark */  	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) { @@ -136,11 +132,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)  	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; -	/* 1 from caller and 1 for being on i_list/g_list */ -	BUG_ON(atomic_read(&mark->refcnt) < 2); - -	spin_lock(&group->mark_lock); -  	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {  		inode = mark->i.inode;  		fsnotify_destroy_inode_mark(mark); @@ -151,13 +142,22 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)  	list_del_init(&mark->g_list); -	spin_unlock(&group->mark_lock);  	spin_unlock(&mark->lock); +	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) +		iput(inode); +	/* release lock temporarily */ +	mutex_unlock(&group->mark_mutex); +  	spin_lock(&destroy_lock);  	list_add(&mark->destroy_list, &destroy_list);  	spin_unlock(&destroy_lock);  	wake_up(&destroy_waitq); +	/* +	 * We don't necessarily have a ref on mark from caller so the above destroy +	 * may have actually freed it, unless this group provides a 'freeing_mark' +	 * function which must be holding a reference. +	 */  	/*  	 * Some groups like to know that marks are being freed.  This is a @@ -179,16 +179,17 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)  	 * is just a lazy update (and could be a perf win...)  	 */ -	if (inode && (mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) -		iput(inode); +	atomic_dec(&group->num_marks); -	/* -	 * it's possible that this group tried to destroy itself, but this -	 * this mark was simultaneously being freed by inode.  If that's the -	 * case, we finish freeing the group here. -	 */ -	if (unlikely(atomic_dec_and_test(&group->num_marks))) -		fsnotify_final_destroy_group(group); +	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); +} + +void fsnotify_destroy_mark(struct fsnotify_mark *mark, +			   struct fsnotify_group *group) +{ +	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); +	fsnotify_destroy_mark_locked(mark, group); +	mutex_unlock(&group->mark_mutex);  }  void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask) @@ -213,26 +214,26 @@ void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mas   * These marks may be used for the fsnotify backend to determine which   * event types should be delivered to which group.   */ -int fsnotify_add_mark(struct fsnotify_mark *mark, -		      struct fsnotify_group *group, struct inode *inode, -		      struct vfsmount *mnt, int allow_dups) +int fsnotify_add_mark_locked(struct fsnotify_mark *mark, +			     struct fsnotify_group *group, struct inode *inode, +			     struct vfsmount *mnt, int allow_dups)  {  	int ret = 0;  	BUG_ON(inode && mnt);  	BUG_ON(!inode && !mnt); +	BUG_ON(!mutex_is_locked(&group->mark_mutex));  	/*  	 * LOCKING ORDER!!!! +	 * group->mark_mutex  	 * mark->lock -	 * group->mark_lock  	 * inode->i_lock  	 */  	spin_lock(&mark->lock); -	spin_lock(&group->mark_lock); -  	mark->flags |= FSNOTIFY_MARK_FLAG_ALIVE; +	fsnotify_get_group(group);  	mark->group = group;  	list_add(&mark->g_list, &group->marks_list);  	atomic_inc(&group->num_marks); @@ -250,11 +251,8 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,  		BUG();  	} -	spin_unlock(&group->mark_lock); -  	/* this will pin the object if appropriate */  	fsnotify_set_mark_mask_locked(mark, mark->mask); -  	spin_unlock(&mark->lock);  	if (inode) @@ -264,10 +262,10 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,  err:  	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;  	list_del_init(&mark->g_list); +	fsnotify_put_group(group);  	mark->group = NULL;  	atomic_dec(&group->num_marks); -	spin_unlock(&group->mark_lock);  	spin_unlock(&mark->lock);  	spin_lock(&destroy_lock); @@ -278,6 +276,16 @@ err:  	return ret;  } +int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, +		      struct inode *inode, struct vfsmount *mnt, int allow_dups) +{ +	int ret; +	mutex_lock(&group->mark_mutex); +	ret = fsnotify_add_mark_locked(mark, group, inode, mnt, allow_dups); +	mutex_unlock(&group->mark_mutex); +	return ret; +} +  /*   * clear any marks in a group in which mark->flags & flags is true   */ @@ -285,22 +293,16 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,  					 unsigned int flags)  {  	struct fsnotify_mark *lmark, *mark; -	LIST_HEAD(free_list); -	spin_lock(&group->mark_lock); +	mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);  	list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {  		if (mark->flags & flags) { -			list_add(&mark->free_g_list, &free_list); -			list_del_init(&mark->g_list);  			fsnotify_get_mark(mark); +			fsnotify_destroy_mark_locked(mark, group); +			fsnotify_put_mark(mark);  		}  	} -	spin_unlock(&group->mark_lock); - -	list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) { -		fsnotify_destroy_mark(mark); -		fsnotify_put_mark(mark); -	} +	mutex_unlock(&group->mark_mutex);  }  /* @@ -316,6 +318,8 @@ void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *ol  	assert_spin_locked(&old->lock);  	new->i.inode = old->i.inode;  	new->m.mnt = old->m.mnt; +	if (old->group) +		fsnotify_get_group(old->group);  	new->group = old->group;  	new->mask = old->mask;  	new->free_mark = old->free_mark; @@ -336,7 +340,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,  static int fsnotify_mark_destroy(void *ignored)  {  	struct fsnotify_mark *mark, *next; -	LIST_HEAD(private_destroy_list); +	struct list_head private_destroy_list;  	for (;;) {  		spin_lock(&destroy_lock);  | 
