diff options
Diffstat (limited to 'Documentation/filesystems/Locking')
| -rw-r--r-- | Documentation/filesystems/Locking | 97 |
1 files changed, 65 insertions, 32 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index e0cce2a5f82..b18dd177902 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -10,10 +10,9 @@ be able to use diff(1). --------------------------- dentry_operations -------------------------- prototypes: int (*d_revalidate)(struct dentry *, unsigned int); - int (*d_hash)(const struct dentry *, const struct inode *, - struct qstr *); - int (*d_compare)(const struct dentry *, const struct inode *, - const struct dentry *, const struct inode *, + int (*d_weak_revalidate)(struct dentry *, unsigned int); + int (*d_hash)(const struct dentry *, struct qstr *); + int (*d_compare)(const struct dentry *, const struct dentry *, unsigned int, const char *, const struct qstr *); int (*d_delete)(struct dentry *); void (*d_release)(struct dentry *); @@ -25,6 +24,7 @@ prototypes: locking rules: rename_lock ->d_lock may block rcu-walk d_revalidate: no no yes (ref-walk) maybe +d_weak_revalidate:no no yes no d_hash no no no maybe d_compare: yes no no maybe d_delete: no yes no no @@ -47,6 +47,8 @@ prototypes: int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); + int (*rename2) (struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); @@ -64,6 +66,7 @@ prototypes: int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode, int *opened); + int (*tmpfile) (struct inode *, struct dentry *, umode_t); locking rules: all may block @@ -77,10 +80,10 @@ mkdir: yes unlink: yes (both) rmdir: yes (both) (see below) rename: yes (all) (see below) +rename2: yes (all) (see below) readlink: no follow_link: no put_link: no -truncate: yes (see below) setattr: yes permission: no (may not block if called in rcu-walk mode) get_acl: no @@ -92,15 +95,12 @@ removexattr: yes fiemap: no update_time: no atomic_open: yes +tmpfile: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. - cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. - ->truncate() is never called directly - it's a callback, not a -method. It's called by vmtruncate() - deprecated library function used by -->setattr(). Locking information above applies to that call (i.e. is -inherited from ->setattr() - vmtruncate() is used when ATTR_SIZE had been -passed). + cross-directory ->rename() and rename2() has (per-superblock) +->s_vfs_rename_sem. See Documentation/filesystems/directory-locking for more detailed discussion of the locking scheme for directory operations. @@ -114,7 +114,6 @@ prototypes: int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); - void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); int (*freeze_fs) (struct super_block *); int (*unfreeze_fs) (struct super_block *); @@ -136,10 +135,9 @@ write_inode: drop_inode: !!!inode->i_lock!!! evict_inode: put_super: write -write_super: read sync_fs: read -freeze_fs: read -unfreeze_fs: read +freeze_fs: write +unfreeze_fs: write statfs: maybe(read) (see below) remount_fs: write umount_begin: no @@ -195,17 +193,18 @@ prototypes: loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); - int (*invalidatepage) (struct page *, unsigned long); + void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); + int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); - int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); + int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); int (*error_remove_page)(struct address_space *, struct page *); + int (*swap_activate)(struct file *); + int (*swap_deactivate)(struct file *); locking rules: All except set_page_dirty and freepage may block @@ -229,6 +228,8 @@ migratepage: yes (both) launder_page: yes is_partially_uptodate: yes error_remove_page: yes +swap_activate: no +swap_deactivate: no ->write_begin(), ->write_end(), ->sync_page() and ->readpage() may be called from the request handler (/dev/loop). @@ -312,8 +313,8 @@ filesystems and by the swapper. The latter will eventually go away. Please, keep it that way and don't breed new callers. ->invalidatepage() is called when the filesystem must attempt to drop -some or all of the buffers from the page when it is being truncated. It -returns zero on success. If ->invalidatepage is zero, the kernel uses +some or all of the buffers from the page when it is being truncated. It +returns zero on success. If ->invalidatepage is zero, the kernel uses block_invalidatepage() instead. ->releasepage() is called when the kernel is about to try to drop the @@ -330,6 +331,15 @@ cleaned, or an error value if not. Note that in order to prevent the page getting mapped back in and redirtied, it needs to be kept locked across the entire operation. + ->swap_activate will be called with a non-zero argument on +files backing (non block device backed) swapfiles. A return value +of zero indicates success, in which case this file can be used for +backing swapspace. The swapspace operations will be proxied to the +address space operations. + + ->swap_deactivate() will be called in the sys_swapoff() +path after ->swap_activate() returned success. + ----------------------- file_lock_operations ------------------------------ prototypes: void (*fl_copy_lock)(struct file_lock *, struct file_lock *); @@ -337,27 +347,38 @@ prototypes: locking rules: - file_lock_lock may block + inode->i_lock may block fl_copy_lock: yes no fl_release_private: maybe no ----------------------- lock_manager_operations --------------------------- prototypes: int (*lm_compare_owner)(struct file_lock *, struct file_lock *); + unsigned long (*lm_owner_key)(struct file_lock *); void (*lm_notify)(struct file_lock *); /* unblock callback */ int (*lm_grant)(struct file_lock *, struct file_lock *, int); - void (*lm_release_private)(struct file_lock *); void (*lm_break)(struct file_lock *); /* break_lease callback */ int (*lm_change)(struct file_lock **, int); locking rules: - file_lock_lock may block -lm_compare_owner: yes no -lm_notify: yes no -lm_grant: no no -lm_release_private: maybe no -lm_break: yes no -lm_change yes no + + inode->i_lock blocked_lock_lock may block +lm_compare_owner: yes[1] maybe no +lm_owner_key yes[1] yes no +lm_notify: yes yes no +lm_grant: no no no +lm_break: yes no no +lm_change yes no no + +[1]: ->lm_compare_owner and ->lm_owner_key are generally called with +*an* inode->i_lock held. It may not be the i_lock of the inode +associated with either file_lock argument! This is the case with deadlock +detection, since the code has to chase down the owners of locks that may +be entirely unrelated to the one on which the lock is being acquired. +For deadlock detection however, the blocked_lock_lock is also held. The +fact that these locks are held ensures that the file_locks do not +disappear out from under you while doing the comparison or generating an +owner key. --------------------------- buffer_head ----------------------------------- prototypes: @@ -409,7 +430,9 @@ prototypes: ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); - int (*readdir) (struct file *, void *, filldir_t); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); + int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); @@ -507,6 +530,7 @@ locking rules: open: yes close: yes fault: yes can return with page locked +map_pages: yes page_mkwrite: yes can return with page locked access: yes @@ -518,6 +542,15 @@ the page, then ensure it is not already truncated (the page lock will block subsequent truncate), and then return with VM_FAULT_LOCKED, and the page locked. The VM will unlock the page. + ->map_pages() is called when VM asks to map easy accessible pages. +Filesystem should find and map pages associated with offsets from "pgoff" +till "max_pgoff". ->map_pages() is called with page table locked and must +not block. If it's not possible to reach a page without blocking, +filesystem should skip it. Filesystem should use do_set_pte() to setup +page table entry. Pointer to entry associated with offset "pgoff" is +passed in "pte" field in vm_fault structure. Pointers to entries for other +offsets should be calculated relative to "pte". + ->page_mkwrite() is called when a previously read-only pte is about to become writeable. The filesystem again must ensure that there are no truncate/invalidate races, and then return with the page locked. If @@ -526,7 +559,7 @@ like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which will cause the VM to retry the fault. ->access() is called when get_user_pages() fails in -acces_process_vm(), typically used to debug a process through +access_process_vm(), typically used to debug a process through /proc/pid/mem or ptrace. This function is needed only for VM_IO | VM_PFNMAP VMAs. |
