diff options
Diffstat (limited to 'fs/locks.c')
| -rw-r--r-- | fs/locks.c | 507 | 
1 files changed, 336 insertions, 171 deletions
diff --git a/fs/locks.c b/fs/locks.c index b27a3005d78..717fbc404e6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -130,11 +130,15 @@  #include <linux/percpu.h>  #include <linux/lglock.h> +#define CREATE_TRACE_POINTS +#include <trace/events/filelock.h> +  #include <asm/uaccess.h>  #define IS_POSIX(fl)	(fl->fl_flags & FL_POSIX)  #define IS_FLOCK(fl)	(fl->fl_flags & FL_FLOCK) -#define IS_LEASE(fl)	(fl->fl_flags & FL_LEASE) +#define IS_LEASE(fl)	(fl->fl_flags & (FL_LEASE|FL_DELEG)) +#define IS_OFDLCK(fl)	(fl->fl_flags & FL_OFDLCK)  static bool lease_breaking(struct file_lock *fl)  { @@ -321,6 +325,7 @@ static int flock_make_lock(struct file *filp, struct file_lock **lock,  		return -ENOMEM;  	fl->fl_file = filp; +	fl->fl_owner = (fl_owner_t)filp;  	fl->fl_pid = current->tgid;  	fl->fl_flags = FL_FLOCK;  	fl->fl_type = type; @@ -344,48 +349,43 @@ static int assign_type(struct file_lock *fl, long type)  	return 0;  } -/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX - * style lock. - */ -static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, -			       struct flock *l) +static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, +				 struct flock64 *l)  { -	off_t start, end; -  	switch (l->l_whence) {  	case SEEK_SET: -		start = 0; +		fl->fl_start = 0;  		break;  	case SEEK_CUR: -		start = filp->f_pos; +		fl->fl_start = filp->f_pos;  		break;  	case SEEK_END: -		start = i_size_read(file_inode(filp)); +		fl->fl_start = i_size_read(file_inode(filp));  		break;  	default:  		return -EINVAL;  	} +	if (l->l_start > OFFSET_MAX - fl->fl_start) +		return -EOVERFLOW; +	fl->fl_start += l->l_start; +	if (fl->fl_start < 0) +		return -EINVAL;  	/* POSIX-1996 leaves the case l->l_len < 0 undefined;  	   POSIX-2001 defines it. */ -	start += l->l_start; -	if (start < 0) -		return -EINVAL; -	fl->fl_end = OFFSET_MAX;  	if (l->l_len > 0) { -		end = start + l->l_len - 1; -		fl->fl_end = end; +		if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) +			return -EOVERFLOW; +		fl->fl_end = fl->fl_start + l->l_len - 1; +  	} else if (l->l_len < 0) { -		end = start - 1; -		fl->fl_end = end; -		start += l->l_len; -		if (start < 0) +		if (fl->fl_start + l->l_len < 0)  			return -EINVAL; -	} -	fl->fl_start = start;	/* we record the absolute position */ -	if (fl->fl_end < fl->fl_start) -		return -EOVERFLOW; -	 +		fl->fl_end = fl->fl_start - 1; +		fl->fl_start += l->l_len; +	} else +		fl->fl_end = OFFSET_MAX; +  	fl->fl_owner = current->files;  	fl->fl_pid = current->tgid;  	fl->fl_file = filp; @@ -396,52 +396,21 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,  	return assign_type(fl, l->l_type);  } -#if BITS_PER_LONG == 32 -static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, -				 struct flock64 *l) +/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX + * style lock. + */ +static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, +			       struct flock *l)  { -	loff_t start; - -	switch (l->l_whence) { -	case SEEK_SET: -		start = 0; -		break; -	case SEEK_CUR: -		start = filp->f_pos; -		break; -	case SEEK_END: -		start = i_size_read(file_inode(filp)); -		break; -	default: -		return -EINVAL; -	} - -	start += l->l_start; -	if (start < 0) -		return -EINVAL; -	fl->fl_end = OFFSET_MAX; -	if (l->l_len > 0) { -		fl->fl_end = start + l->l_len - 1; -	} else if (l->l_len < 0) { -		fl->fl_end = start - 1; -		start += l->l_len; -		if (start < 0) -			return -EINVAL; -	} -	fl->fl_start = start;	/* we record the absolute position */ -	if (fl->fl_end < fl->fl_start) -		return -EOVERFLOW; -	 -	fl->fl_owner = current->files; -	fl->fl_pid = current->tgid; -	fl->fl_file = filp; -	fl->fl_flags = FL_POSIX; -	fl->fl_ops = NULL; -	fl->fl_lmops = NULL; +	struct flock64 ll = { +		.l_type = l->l_type, +		.l_whence = l->l_whence, +		.l_start = l->l_start, +		.l_len = l->l_len, +	}; -	return assign_type(fl, l->l_type); +	return flock64_to_posix_lock(filp, fl, &ll);  } -#endif  /* default lease lock manager operations */  static void lease_break_callback(struct file_lock *fl) @@ -462,7 +431,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)  	if (assign_type(fl, type) != 0)  		return -EINVAL; -	fl->fl_owner = current->files; +	fl->fl_owner = (fl_owner_t)current->files;  	fl->fl_pid = current->tgid;  	fl->fl_file = filp; @@ -511,8 +480,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)  }  /* Must be called with the i_lock held! */ -static inline void -locks_insert_global_locks(struct file_lock *fl) +static void locks_insert_global_locks(struct file_lock *fl)  {  	lg_local_lock(&file_lock_lglock);  	fl->fl_link_cpu = smp_processor_id(); @@ -521,8 +489,7 @@ locks_insert_global_locks(struct file_lock *fl)  }  /* Must be called with the i_lock held! */ -static inline void -locks_delete_global_locks(struct file_lock *fl) +static void locks_delete_global_locks(struct file_lock *fl)  {  	/*  	 * Avoid taking lock if already unhashed. This is safe since this check @@ -544,14 +511,12 @@ posix_owner_key(struct file_lock *fl)  	return (unsigned long)fl->fl_owner;  } -static inline void -locks_insert_global_blocked(struct file_lock *waiter) +static void locks_insert_global_blocked(struct file_lock *waiter)  {  	hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));  } -static inline void -locks_delete_global_blocked(struct file_lock *waiter) +static void locks_delete_global_blocked(struct file_lock *waiter)  {  	hash_del(&waiter->fl_link);  } @@ -581,7 +546,7 @@ static void locks_delete_block(struct file_lock *waiter)   * it seems like the reasonable thing to do.   *   * Must be called with both the i_lock and blocked_lock_lock held. The fl_block - * list itself is protected by the file_lock_list, but by ensuring that the + * list itself is protected by the blocked_lock_lock, but by ensuring that the   * i_lock is also held on insertions we can avoid taking the blocked_lock_lock   * in some cases when we see that the fl_block list is empty.   */ @@ -591,7 +556,7 @@ static void __locks_insert_block(struct file_lock *blocker,  	BUG_ON(!list_empty(&waiter->fl_block));  	waiter->fl_next = blocker;  	list_add_tail(&waiter->fl_block, &blocker->fl_block); -	if (IS_POSIX(blocker)) +	if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))  		locks_insert_global_blocked(waiter);  } @@ -652,15 +617,18 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl)  	locks_insert_global_locks(fl);  } -/* - * Delete a lock and then free it. - * Wake up processes that are blocked waiting for this lock, - * notify the FS that the lock has been cleared and - * finally free the lock. +/** + * locks_delete_lock - Delete a lock and then free it. + * @thisfl_p: pointer that points to the fl_next field of the previous + * 	      inode->i_flock list entry + * + * Unlink a lock from all lists and free the namespace reference, but don't + * free it yet. Wake up processes that are blocked waiting for this lock and + * notify the FS that the lock has been cleared.   *   * Must be called with the i_lock held!   */ -static void locks_delete_lock(struct file_lock **thisfl_p) +static void locks_unlink_lock(struct file_lock **thisfl_p)  {  	struct file_lock *fl = *thisfl_p; @@ -675,6 +643,18 @@ static void locks_delete_lock(struct file_lock **thisfl_p)  	}  	locks_wake_up_blocks(fl); +} + +/* + * Unlink a lock from all lists and free it. + * + * Must be called with i_lock held! + */ +static void locks_delete_lock(struct file_lock **thisfl_p) +{ +	struct file_lock *fl = *thisfl_p; + +	locks_unlink_lock(thisfl_p);  	locks_free_lock(fl);  } @@ -769,8 +749,16 @@ EXPORT_SYMBOL(posix_test_lock);   * Note: the above assumption may not be true when handling lock   * requests from a broken NFS client. It may also fail in the presence   * of tasks (such as posix threads) sharing the same open file table. - *   * To handle those cases, we just bail out after a few iterations. + * + * For FL_OFDLCK locks, the owner is the filp, not the files_struct. + * Because the owner is not even nominally tied to a thread of + * execution, the deadlock detection below can't reasonably work well. Just + * skip it for those. + * + * In principle, we could do a more limited deadlock detection on FL_OFDLCK + * locks that just checks for the case where two tasks are attempting to + * upgrade from read to write locks on the same inode.   */  #define MAX_DEADLK_ITERATIONS 10 @@ -793,6 +781,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,  {  	int i = 0; +	/* +	 * This deadlock detector can't reasonably detect deadlocks with +	 * FL_OFDLCK locks, since they aren't owned by a process, per-se. +	 */ +	if (IS_OFDLCK(caller_fl)) +		return 0; +  	while ((block_fl = what_owner_is_waiting_for(block_fl))) {  		if (i++ > MAX_DEADLK_ITERATIONS)  			return 0; @@ -1152,13 +1147,14 @@ EXPORT_SYMBOL(posix_lock_file_wait);  /**   * locks_mandatory_locked - Check for an active lock - * @inode: the file to check + * @file: the file to check   *   * Searches the inode's list of locks to find any POSIX locks which conflict.   * This function is called from locks_verify_locked() only.   */ -int locks_mandatory_locked(struct inode *inode) +int locks_mandatory_locked(struct file *file)  { +	struct inode *inode = file_inode(file);  	fl_owner_t owner = current->files;  	struct file_lock *fl; @@ -1169,7 +1165,7 @@ int locks_mandatory_locked(struct inode *inode)  	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {  		if (!IS_POSIX(fl))  			continue; -		if (fl->fl_owner != owner) +		if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file)  			break;  	}  	spin_unlock(&inode->i_lock); @@ -1195,19 +1191,30 @@ int locks_mandatory_area(int read_write, struct inode *inode,  {  	struct file_lock fl;  	int error; +	bool sleep = false;  	locks_init_lock(&fl); -	fl.fl_owner = current->files;  	fl.fl_pid = current->tgid;  	fl.fl_file = filp;  	fl.fl_flags = FL_POSIX | FL_ACCESS;  	if (filp && !(filp->f_flags & O_NONBLOCK)) -		fl.fl_flags |= FL_SLEEP; +		sleep = true;  	fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK;  	fl.fl_start = offset;  	fl.fl_end = offset + count - 1;  	for (;;) { +		if (filp) { +			fl.fl_owner = (fl_owner_t)filp; +			fl.fl_flags &= ~FL_SLEEP; +			error = __posix_lock_file(inode, &fl, NULL); +			if (!error) +				break; +		} + +		if (sleep) +			fl.fl_flags |= FL_SLEEP; +		fl.fl_owner = current->files;  		error = __posix_lock_file(inode, &fl, NULL);  		if (error != FILE_LOCK_DEFERRED)  			break; @@ -1283,6 +1290,7 @@ static void time_out_leases(struct inode *inode)  	before = &inode->i_flock;  	while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) { +		trace_time_out_leases(inode, fl);  		if (past_time(fl->fl_downgrade_time))  			lease_modify(before, F_RDLCK);  		if (past_time(fl->fl_break_time)) @@ -1292,28 +1300,40 @@ static void time_out_leases(struct inode *inode)  	}  } +static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) +{ +	if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) +		return false; +	return locks_conflict(breaker, lease); +} +  /**   *	__break_lease	-	revoke all outstanding leases on file   *	@inode: the inode of the file to return - *	@mode: the open mode (read or write) + *	@mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: + *	    break all leases + *	@type: FL_LEASE: break leases and delegations; FL_DELEG: break + *	    only delegations   *   *	break_lease (inlined for speed) has checked there already is at least   *	some kind of lock (maybe a lease) on this file.  Leases are broken on   *	a call to open() or truncate().  This function can sleep unless you   *	specified %O_NONBLOCK to your open().   */ -int __break_lease(struct inode *inode, unsigned int mode) +int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)  {  	int error = 0;  	struct file_lock *new_fl, *flock;  	struct file_lock *fl;  	unsigned long break_time;  	int i_have_this_lease = 0; +	bool lease_conflict = false;  	int want_write = (mode & O_ACCMODE) != O_RDONLY;  	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);  	if (IS_ERR(new_fl))  		return PTR_ERR(new_fl); +	new_fl->fl_flags = type;  	spin_lock(&inode->i_lock); @@ -1323,13 +1343,16 @@ int __break_lease(struct inode *inode, unsigned int mode)  	if ((flock == NULL) || !IS_LEASE(flock))  		goto out; -	if (!locks_conflict(flock, new_fl)) +	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { +		if (leases_conflict(fl, new_fl)) { +			lease_conflict = true; +			if (fl->fl_owner == current->files) +				i_have_this_lease = 1; +		} +	} +	if (!lease_conflict)  		goto out; -	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) -		if (fl->fl_owner == current->files) -			i_have_this_lease = 1; -  	break_time = 0;  	if (lease_break_time > 0) {  		break_time = jiffies + lease_break_time * HZ; @@ -1338,6 +1361,8 @@ int __break_lease(struct inode *inode, unsigned int mode)  	}  	for (fl = flock; fl && IS_LEASE(fl); fl = fl->fl_next) { +		if (!leases_conflict(fl, new_fl)) +			continue;  		if (want_write) {  			if (fl->fl_flags & FL_UNLOCK_PENDING)  				continue; @@ -1353,22 +1378,24 @@ int __break_lease(struct inode *inode, unsigned int mode)  	}  	if (i_have_this_lease || (mode & O_NONBLOCK)) { +		trace_break_lease_noblock(inode, new_fl);  		error = -EWOULDBLOCK;  		goto out;  	}  restart:  	break_time = flock->fl_break_time; -	if (break_time != 0) { +	if (break_time != 0)  		break_time -= jiffies; -		if (break_time == 0) -			break_time++; -	} +	if (break_time == 0) +		break_time++;  	locks_insert_block(flock, new_fl); +	trace_break_lease_block(inode, new_fl);  	spin_unlock(&inode->i_lock);  	error = wait_event_interruptible_timeout(new_fl->fl_wait,  						!new_fl->fl_next, break_time);  	spin_lock(&inode->i_lock); +	trace_break_lease_unblock(inode, new_fl);  	locks_delete_block(new_fl);  	if (error >= 0) {  		if (error == 0) @@ -1379,7 +1406,7 @@ restart:  		 */  		for (flock = inode->i_flock; flock && IS_LEASE(flock);  				flock = flock->fl_next) { -			if (locks_conflict(new_fl, flock)) +			if (leases_conflict(new_fl, flock))  				goto restart;  		}  		error = 0; @@ -1455,21 +1482,63 @@ int fcntl_getlease(struct file *filp)  	return type;  } +/** + * check_conflicting_open - see if the given dentry points to a file that has + * 			    an existing open that would conflict with the + * 			    desired lease. + * @dentry:	dentry to check + * @arg:	type of lease that we're trying to acquire + * + * Check to see if there's an existing open fd on this file that would + * conflict with the lease we're trying to set. + */ +static int +check_conflicting_open(const struct dentry *dentry, const long arg) +{ +	int ret = 0; +	struct inode *inode = dentry->d_inode; + +	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) +		return -EAGAIN; + +	if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || +	    (atomic_read(&inode->i_count) > 1))) +		ret = -EAGAIN; + +	return ret; +} +  static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)  {  	struct file_lock *fl, **before, **my_before = NULL, *lease;  	struct dentry *dentry = filp->f_path.dentry;  	struct inode *inode = dentry->d_inode; +	bool is_deleg = (*flp)->fl_flags & FL_DELEG;  	int error;  	lease = *flp; +	trace_generic_add_lease(inode, lease); -	error = -EAGAIN; -	if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) -		goto out; -	if ((arg == F_WRLCK) -	    && ((d_count(dentry) > 1) -		|| (atomic_read(&inode->i_count) > 1))) +	/* +	 * In the delegation case we need mutual exclusion with +	 * a number of operations that take the i_mutex.  We trylock +	 * because delegations are an optional optimization, and if +	 * there's some chance of a conflict--we'd rather not +	 * bother, maybe that's a sign this just isn't a good file to +	 * hand out a delegation on. +	 */ +	if (is_deleg && !mutex_trylock(&inode->i_mutex)) +		return -EAGAIN; + +	if (is_deleg && arg == F_WRLCK) { +		/* Write delegations are not currently supported: */ +		mutex_unlock(&inode->i_mutex); +		WARN_ON_ONCE(1); +		return -EINVAL; +	} + +	error = check_conflicting_open(dentry, arg); +	if (error)  		goto out;  	/* @@ -1514,9 +1583,22 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp  		goto out;  	locks_insert_lock(before, lease); -	return 0; - +	/* +	 * The check in break_lease() is lockless. It's possible for another +	 * open to race in after we did the earlier check for a conflicting +	 * open but before the lease was inserted. Check again for a +	 * conflicting open and cancel the lease if there is one. +	 * +	 * We also add a barrier here to ensure that the insertion of the lock +	 * precedes these checks. +	 */ +	smp_mb(); +	error = check_conflicting_open(dentry, arg); +	if (error) +		locks_unlink_lock(flp);  out: +	if (is_deleg) +		mutex_unlock(&inode->i_mutex);  	return error;  } @@ -1526,6 +1608,8 @@ static int generic_delete_lease(struct file *filp, struct file_lock **flp)  	struct dentry *dentry = filp->f_path.dentry;  	struct inode *inode = dentry->d_inode; +	trace_generic_delete_lease(inode, *flp); +  	for (before = &inode->i_flock;  			((fl = *before) != NULL) && IS_LEASE(fl);  			before = &fl->fl_next) { @@ -1579,7 +1663,7 @@ EXPORT_SYMBOL(generic_setlease);  static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)  { -	if (filp->f_op && filp->f_op->setlease) +	if (filp->f_op->setlease)  		return filp->f_op->setlease(filp, arg, lease);  	else  		return generic_setlease(filp, arg, lease); @@ -1771,7 +1855,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)  	if (error)  		goto out_free; -	if (f.file->f_op && f.file->f_op->flock) +	if (f.file->f_op->flock)  		error = f.file->f_op->flock(f.file,  					  (can_sleep) ? F_SETLKW : F_SETLK,  					  lock); @@ -1797,7 +1881,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)   */  int vfs_test_lock(struct file *filp, struct file_lock *fl)  { -	if (filp->f_op && filp->f_op->lock) +	if (filp->f_op->lock)  		return filp->f_op->lock(filp, F_GETLK, fl);  	posix_test_lock(filp, fl);  	return 0; @@ -1806,7 +1890,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);  static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)  { -	flock->l_pid = fl->fl_pid; +	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;  #if BITS_PER_LONG == 32  	/*  	 * Make sure we can represent the posix lock via @@ -1828,7 +1912,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)  #if BITS_PER_LONG == 32  static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)  { -	flock->l_pid = fl->fl_pid; +	flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;  	flock->l_start = fl->fl_start;  	flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :  		fl->fl_end - fl->fl_start + 1; @@ -1840,7 +1924,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)  /* Report the first existing lock that would conflict with l.   * This implements the F_GETLK command of fcntl().   */ -int fcntl_getlk(struct file *filp, struct flock __user *l) +int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)  {  	struct file_lock file_lock;  	struct flock flock; @@ -1857,6 +1941,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l)  	if (error)  		goto out; +	if (cmd == F_OFD_GETLK) { +		error = -EINVAL; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_GETLK; +		file_lock.fl_flags |= FL_OFDLCK; +		file_lock.fl_owner = (fl_owner_t)filp; +	} +  	error = vfs_test_lock(filp, &file_lock);  	if (error)  		goto out; @@ -1909,7 +2003,7 @@ out:   */  int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)  { -	if (filp->f_op && filp->f_op->lock) +	if (filp->f_op->lock)  		return filp->f_op->lock(filp, cmd, fl);  	else  		return posix_lock_file(filp, fl, conf); @@ -1940,6 +2034,22 @@ static int do_lock_file_wait(struct file *filp, unsigned int cmd,  	return error;  } +/* Ensure that fl->fl_filp has compatible f_mode for F_SETLK calls */ +static int +check_fmode_for_setlk(struct file_lock *fl) +{ +	switch (fl->fl_type) { +	case F_RDLCK: +		if (!(fl->fl_file->f_mode & FMODE_READ)) +			return -EBADF; +		break; +	case F_WRLCK: +		if (!(fl->fl_file->f_mode & FMODE_WRITE)) +			return -EBADF; +	} +	return 0; +} +  /* Apply the lock described by l to an open file descriptor.   * This implements both the F_SETLK and F_SETLKW commands of fcntl().   */ @@ -1976,25 +2086,36 @@ again:  	error = flock_to_posix_lock(filp, file_lock, &flock);  	if (error)  		goto out; -	if (cmd == F_SETLKW) { -		file_lock->fl_flags |= FL_SLEEP; -	} -	 -	error = -EBADF; -	switch (flock.l_type) { -	case F_RDLCK: -		if (!(filp->f_mode & FMODE_READ)) -			goto out; -		break; -	case F_WRLCK: -		if (!(filp->f_mode & FMODE_WRITE)) + +	error = check_fmode_for_setlk(file_lock); +	if (error) +		goto out; + +	/* +	 * If the cmd is requesting file-private locks, then set the +	 * FL_OFDLCK flag and override the owner. +	 */ +	switch (cmd) { +	case F_OFD_SETLK: +		error = -EINVAL; +		if (flock.l_pid != 0)  			goto out; + +		cmd = F_SETLK; +		file_lock->fl_flags |= FL_OFDLCK; +		file_lock->fl_owner = (fl_owner_t)filp;  		break; -	case F_UNLCK: -		break; -	default: +	case F_OFD_SETLKW:  		error = -EINVAL; -		goto out; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_SETLKW; +		file_lock->fl_flags |= FL_OFDLCK; +		file_lock->fl_owner = (fl_owner_t)filp; +		/* Fallthrough */ +	case F_SETLKW: +		file_lock->fl_flags |= FL_SLEEP;  	}  	error = do_lock_file_wait(filp, cmd, file_lock); @@ -2025,7 +2146,7 @@ out:  /* Report the first existing lock that would conflict with l.   * This implements the F_GETLK command of fcntl().   */ -int fcntl_getlk64(struct file *filp, struct flock64 __user *l) +int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)  {  	struct file_lock file_lock;  	struct flock64 flock; @@ -2042,6 +2163,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l)  	if (error)  		goto out; +	if (cmd == F_OFD_GETLK) { +		error = -EINVAL; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_GETLK64; +		file_lock.fl_flags |= FL_OFDLCK; +		file_lock.fl_owner = (fl_owner_t)filp; +	} +  	error = vfs_test_lock(filp, &file_lock);  	if (error)  		goto out; @@ -2094,25 +2225,36 @@ again:  	error = flock64_to_posix_lock(filp, file_lock, &flock);  	if (error)  		goto out; -	if (cmd == F_SETLKW64) { -		file_lock->fl_flags |= FL_SLEEP; -	} -	 -	error = -EBADF; -	switch (flock.l_type) { -	case F_RDLCK: -		if (!(filp->f_mode & FMODE_READ)) -			goto out; -		break; -	case F_WRLCK: -		if (!(filp->f_mode & FMODE_WRITE)) + +	error = check_fmode_for_setlk(file_lock); +	if (error) +		goto out; + +	/* +	 * If the cmd is requesting file-private locks, then set the +	 * FL_OFDLCK flag and override the owner. +	 */ +	switch (cmd) { +	case F_OFD_SETLK: +		error = -EINVAL; +		if (flock.l_pid != 0)  			goto out; + +		cmd = F_SETLK64; +		file_lock->fl_flags |= FL_OFDLCK; +		file_lock->fl_owner = (fl_owner_t)filp;  		break; -	case F_UNLCK: -		break; -	default: +	case F_OFD_SETLKW:  		error = -EINVAL; -		goto out; +		if (flock.l_pid != 0) +			goto out; + +		cmd = F_SETLKW64; +		file_lock->fl_flags |= FL_OFDLCK; +		file_lock->fl_owner = (fl_owner_t)filp; +		/* Fallthrough */ +	case F_SETLKW64: +		file_lock->fl_flags |= FL_SLEEP;  	}  	error = do_lock_file_wait(filp, cmd, file_lock); @@ -2173,7 +2315,7 @@ EXPORT_SYMBOL(locks_remove_posix);  /*   * This function is called on the last close of an open file.   */ -void locks_remove_flock(struct file *filp) +void locks_remove_file(struct file *filp)  {  	struct inode * inode = file_inode(filp);  	struct file_lock *fl; @@ -2182,8 +2324,11 @@ void locks_remove_flock(struct file *filp)  	if (!inode->i_flock)  		return; -	if (filp->f_op && filp->f_op->flock) { +	locks_remove_posix(filp, (fl_owner_t)filp); + +	if (filp->f_op->flock) {  		struct file_lock fl = { +			.fl_owner = (fl_owner_t)filp,  			.fl_pid = current->tgid,  			.fl_file = filp,  			.fl_flags = FL_FLOCK, @@ -2200,16 +2345,28 @@ void locks_remove_flock(struct file *filp)  	while ((fl = *before) != NULL) {  		if (fl->fl_file == filp) { -			if (IS_FLOCK(fl)) { -				locks_delete_lock(before); -				continue; -			}  			if (IS_LEASE(fl)) {  				lease_modify(before, F_UNLCK);  				continue;  			} -			/* What? */ -			BUG(); + +			/* +			 * There's a leftover lock on the list of a type that +			 * we didn't expect to see. Most likely a classic +			 * POSIX lock that ended up not getting released +			 * properly, or that raced onto the list somehow. Log +			 * some info about it and then just remove it from +			 * the list. +			 */ +			WARN(!IS_FLOCK(fl), +				"leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", +				MAJOR(inode->i_sb->s_dev), +				MINOR(inode->i_sb->s_dev), inode->i_ino, +				fl->fl_type, fl->fl_flags, +				fl->fl_start, fl->fl_end); + +			locks_delete_lock(before); +			continue;   		}  		before = &fl->fl_next;  	} @@ -2246,7 +2403,7 @@ EXPORT_SYMBOL(posix_unblock_lock);   */  int vfs_cancel_lock(struct file *filp, struct file_lock *fl)  { -	if (filp->f_op && filp->f_op->lock) +	if (filp->f_op->lock)  		return filp->f_op->lock(filp, F_CANCELLK, fl);  	return 0;  } @@ -2278,26 +2435,32 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,  	seq_printf(f, "%lld:%s ", id, pfx);  	if (IS_POSIX(fl)) { -		seq_printf(f, "%6s %s ", -			     (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", +		if (fl->fl_flags & FL_ACCESS) +			seq_puts(f, "ACCESS"); +		else if (IS_OFDLCK(fl)) +			seq_puts(f, "OFDLCK"); +		else +			seq_puts(f, "POSIX "); + +		seq_printf(f, " %s ",  			     (inode == NULL) ? "*NOINODE*" :  			     mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");  	} else if (IS_FLOCK(fl)) {  		if (fl->fl_type & LOCK_MAND) { -			seq_printf(f, "FLOCK  MSNFS     "); +			seq_puts(f, "FLOCK  MSNFS     ");  		} else { -			seq_printf(f, "FLOCK  ADVISORY  "); +			seq_puts(f, "FLOCK  ADVISORY  ");  		}  	} else if (IS_LEASE(fl)) { -		seq_printf(f, "LEASE  "); +		seq_puts(f, "LEASE  ");  		if (lease_breaking(fl)) -			seq_printf(f, "BREAKING  "); +			seq_puts(f, "BREAKING  ");  		else if (fl->fl_file) -			seq_printf(f, "ACTIVE    "); +			seq_puts(f, "ACTIVE    ");  		else -			seq_printf(f, "BREAKER   "); +			seq_puts(f, "BREAKER   ");  	} else { -		seq_printf(f, "UNKNOWN UNKNOWN  "); +		seq_puts(f, "UNKNOWN UNKNOWN  ");  	}  	if (fl->fl_type & LOCK_MAND) {  		seq_printf(f, "%s ", @@ -2329,7 +2492,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,  		else  			seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);  	} else { -		seq_printf(f, "0 EOF\n"); +		seq_puts(f, "0 EOF\n");  	}  } @@ -2349,6 +2512,7 @@ static int locks_show(struct seq_file *f, void *v)  }  static void *locks_start(struct seq_file *f, loff_t *pos) +	__acquires(&blocked_lock_lock)  {  	struct locks_iterator *iter = f->private; @@ -2367,6 +2531,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos)  }  static void locks_stop(struct seq_file *f, void *v) +	__releases(&blocked_lock_lock)  {  	spin_unlock(&blocked_lock_lock);  	lg_global_unlock(&file_lock_lglock);  | 
