diff options
Diffstat (limited to 'Documentation/filesystems')
| -rw-r--r-- | Documentation/filesystems/Locking | 23 | ||||
| -rw-r--r-- | Documentation/filesystems/affs.txt | 9 | ||||
| -rw-r--r-- | Documentation/filesystems/f2fs.txt | 29 | ||||
| -rw-r--r-- | Documentation/filesystems/nfs/nfs41-server.txt | 2 | ||||
| -rw-r--r-- | Documentation/filesystems/nilfs2.txt | 12 | ||||
| -rw-r--r-- | Documentation/filesystems/ntfs.txt | 2 | ||||
| -rw-r--r-- | Documentation/filesystems/porting | 6 | ||||
| -rw-r--r-- | Documentation/filesystems/proc.txt | 29 | ||||
| -rw-r--r-- | Documentation/filesystems/seq_file.txt | 9 | ||||
| -rw-r--r-- | Documentation/filesystems/sharedsubtree.txt | 2 | ||||
| -rw-r--r-- | Documentation/filesystems/vfat.txt | 5 | ||||
| -rw-r--r-- | Documentation/filesystems/vfs.txt | 31 |
12 files changed, 121 insertions, 38 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 5b0c083d7c0..b18dd177902 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -47,6 +47,8 @@ prototypes: int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); + int (*rename2) (struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); @@ -78,6 +80,7 @@ mkdir: yes unlink: yes (both) rmdir: yes (both) (see below) rename: yes (all) (see below) +rename2: yes (all) (see below) readlink: no follow_link: no put_link: no @@ -96,7 +99,8 @@ tmpfile: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. - cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. + cross-directory ->rename() and rename2() has (per-superblock) +->s_vfs_rename_sem. See Documentation/filesystems/directory-locking for more detailed discussion of the locking scheme for directory operations. @@ -192,13 +196,12 @@ prototypes: void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); + int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **, unsigned long *); int (*migratepage)(struct address_space *, struct page *, struct page *); int (*launder_page)(struct page *); - int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long); + int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); int (*error_remove_page)(struct address_space *, struct page *); int (*swap_activate)(struct file *); int (*swap_deactivate)(struct file *); @@ -427,6 +430,8 @@ prototypes: ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); @@ -525,6 +530,7 @@ locking rules: open: yes close: yes fault: yes can return with page locked +map_pages: yes page_mkwrite: yes can return with page locked access: yes @@ -536,6 +542,15 @@ the page, then ensure it is not already truncated (the page lock will block subsequent truncate), and then return with VM_FAULT_LOCKED, and the page locked. The VM will unlock the page. + ->map_pages() is called when VM asks to map easy accessible pages. +Filesystem should find and map pages associated with offsets from "pgoff" +till "max_pgoff". ->map_pages() is called with page table locked and must +not block. If it's not possible to reach a page without blocking, +filesystem should skip it. Filesystem should use do_set_pte() to setup +page table entry. Pointer to entry associated with offset "pgoff" is +passed in "pte" field in vm_fault structure. Pointers to entries for other +offsets should be calculated relative to "pte". + ->page_mkwrite() is called when a previously read-only pte is about to become writeable. The filesystem again must ensure that there are no truncate/invalidate races, and then return with the page locked. If diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt index 81ac488e375..71b63c2b984 100644 --- a/Documentation/filesystems/affs.txt +++ b/Documentation/filesystems/affs.txt @@ -49,6 +49,10 @@ mode=mode Sets the mode flags to the given (octal) value, regardless This is useful since most of the plain AmigaOS files will map to 600. +nofilenametruncate + The file system will return an error when filename exceeds + standard maximum filename length (30 characters). + reserved=num Sets the number of reserved blocks at the start of the partition to num. You should never need this option. Default is 2. @@ -181,9 +185,8 @@ tested, though several hundred MB have been read and written using this fs. For a most up-to-date list of bugs please consult fs/affs/Changes. -Filenames are truncated to 30 characters without warning (this -can be changed by setting the compile-time option AFFS_NO_TRUNCATE -in include/linux/amigaffs.h). +By default, filenames are truncated to 30 characters without warning. +'nofilenametruncate' mount option can change that behavior. Case is ignored by the affs in filename matching, but Linux shells do care about the case. Example (with /wb being an affs mounted fs): diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index b8d284975f0..51afba17bba 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -122,6 +122,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs inline_xattr Enable the inline xattrs feature. inline_data Enable the inline data feature: New created small(<~3.4k) files can be written into inode block. +flush_merge Merge concurrent cache_flush commands as much as possible + to eliminate redundant command issues. If the underlying + device handles the cache_flush command relatively slowly, + recommend to enable this option. ================================================================================ DEBUGFS ENTRIES @@ -169,9 +173,11 @@ Files in /sys/fs/f2fs/<devname> reclaim_segments This parameter controls the number of prefree segments to be reclaimed. If the number of prefree - segments is larger than this number, f2fs tries to - conduct checkpoint to reclaim the prefree segments - to free segments. By default, 100 segments, 200MB. + segments is larger than the number of segments + in the proportion to the percentage over total + volume size, f2fs tries to conduct checkpoint to + reclaim the prefree segments to free segments. + By default, 5% over total # of segments. max_small_discards This parameter controls the number of discard commands that consist small blocks less than 2MB. @@ -195,6 +201,17 @@ Files in /sys/fs/f2fs/<devname> cleaning operations. The default value is 4096 which covers 8GB block address range. + dir_level This parameter controls the directory level to + support large directory. If a directory has a + number of files, it can reduce the file lookup + latency by increasing this dir_level value. + Otherwise, it needs to decrease this value to + reduce the space overhead. The default value is 0. + + ram_thresh This parameter controls the memory footprint used + by free nids and cached nat entries. By default, + 10 is set, which indicates 10 MB / 1 GB RAM. + ================================================================================ USAGE ================================================================================ @@ -444,9 +461,11 @@ The number of blocks and buckets are determined by, # of blocks in level #n = | `- 4, Otherwise - ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2, + ,- 2^(n + dir_level), + | if n + dir_level < MAX_DIR_HASH_DEPTH / 2, # of buckets in level #n = | - `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise + `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), + Otherwise When F2FS finds a file name in a directory, at first a hash value of the file name is calculated. Then, F2FS scans the hash table in level #0 to find the diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt index b930ad08778..c49cd7e796e 100644 --- a/Documentation/filesystems/nfs/nfs41-server.txt +++ b/Documentation/filesystems/nfs/nfs41-server.txt @@ -176,7 +176,5 @@ Nonstandard compound limitations: ca_maxrequestsize request and a ca_maxresponsesize reply, so we may fail to live up to the promise we made in CREATE_SESSION fore channel negotiation. -* No more than one read-like operation allowed per compound; encoding - replies that cross page boundaries (except for read data) not handled. See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues. diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 06887d46ccf..41c3d332acc 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -25,9 +25,8 @@ available from the following download page. At least "mkfs.nilfs2", cleaner or garbage collector) are required. Details on the tools are described in the man pages included in the package. -Project web page: http://www.nilfs.org/en/ -Download page: http://www.nilfs.org/en/download.html -Git tree web page: http://www.nilfs.org/git/ +Project web page: http://nilfs.sourceforge.net/ +Download page: http://nilfs.sourceforge.net/en/download.html List info: http://vger.kernel.org/vger-lists.html#linux-nilfs Caveats @@ -111,6 +110,13 @@ Table of NILFS2 specific ioctls nilfs_resize utilities and by nilfs_cleanerd daemon. + NILFS_IOCTL_SET_SUINFO Modify segment usage info of requested + segments. This ioctl is used by + nilfs_cleanerd daemon to skip unnecessary + cleaning operation of segments and reduce + performance penalty or wear of flash device + due to redundant move of in-use blocks. + NILFS_IOCTL_GET_SUSTAT Return segment usage statistics. This ioctl is used in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon. diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt index 791af8dac06..61947facfc0 100644 --- a/Documentation/filesystems/ntfs.txt +++ b/Documentation/filesystems/ntfs.txt @@ -455,8 +455,6 @@ not have this problem with odd numbers of sectors. ChangeLog ========= -Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog. - 2.1.30: - Fix writev() (it kept writing the first segment over and over again instead of moving onto subsequent segments). diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index fe2b7ae6f96..0f3a1390bf0 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -295,9 +295,9 @@ in the beginning of ->setattr unconditionally. ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should be used instead. It gets called whenever the inode is evicted, whether it has remaining links or not. Caller does *not* evict the pagecache or inode-associated -metadata buffers; getting rid of those is responsibility of method, as it had -been for ->delete_inode(). Caller makes sure async writeback cannot be running -for the inode while (or after) ->evict_inode() is called. +metadata buffers; the method has to use truncate_inode_pages_final() to get rid +of those. Caller makes sure async writeback cannot be running for the inode while +(or after) ->evict_inode() is called. ->drop_inode() returns int now; it's called on final iput() with inode->i_lock held and it returns true if filesystems wants the inode to be diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index f00bee144ad..ddc531a74d0 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -234,7 +234,7 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7) ShdPnd bitmap of shared pending signals for the process SigBlk bitmap of blocked signals SigIgn bitmap of ignored signals - SigCgt bitmap of catched signals + SigCgt bitmap of caught signals CapInh bitmap of inheritable capabilities CapPrm bitmap of permitted capabilities CapEff bitmap of effective capabilities @@ -300,7 +300,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7) pending bitmap of pending signals blocked bitmap of blocked signals sigign bitmap of ignored signals - sigcatch bitmap of catched signals + sigcatch bitmap of caught signals wchan address where process went to sleep 0 (place holder) 0 (place holder) @@ -854,7 +854,8 @@ WritebackTmp: Memory used by FUSE for temporary writeback buffers if strict overcommit accounting is enabled (mode 2 in 'vm.overcommit_memory'). The CommitLimit is calculated with the following formula: - CommitLimit = ('vm.overcommit_ratio' * Physical RAM) + Swap + CommitLimit = ([total RAM pages] - [total huge TLB pages]) * + overcommit_ratio / 100 + [total swap pages] For example, on a system with 1G of physical RAM and 7G of swap with a `vm.overcommit_ratio` of 30 it would yield a CommitLimit of 7.3G. @@ -1245,8 +1246,9 @@ second). The meanings of the columns are as follows, from left to right: The "intr" line gives counts of interrupts serviced since boot time, for each of the possible system interrupts. The first column is the total of all -interrupts serviced; each subsequent column is the total for that particular -interrupt. +interrupts serviced including unnumbered architecture specific interrupts; +each subsequent column is the total for that particular numbered interrupt. +Unnumbered interrupts are not shown, only summed into the total. The "ctxt" line gives the total number of context switches across all CPUs. @@ -1648,18 +1650,21 @@ pids, so one need to either stop or freeze processes being inspected if precise results are needed. -3.7 /proc/<pid>/fdinfo/<fd> - Information about opened file +3.8 /proc/<pid>/fdinfo/<fd> - Information about opened file --------------------------------------------------------------- This file provides information associated with an opened file. The regular -files have at least two fields -- 'pos' and 'flags'. The 'pos' represents -the current offset of the opened file in decimal form [see lseek(2) for -details] and 'flags' denotes the octal O_xxx mask the file has been -created with [see open(2) for details]. +files have at least three fields -- 'pos', 'flags' and mnt_id. The 'pos' +represents the current offset of the opened file in decimal form [see lseek(2) +for details], 'flags' denotes the octal O_xxx mask the file has been +created with [see open(2) for details] and 'mnt_id' represents mount ID of +the file system containing the opened file [see 3.5 /proc/<pid>/mountinfo +for details]. A typical output is pos: 0 flags: 0100002 + mnt_id: 19 The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags pair provide additional information particular to the objects they represent. @@ -1668,6 +1673,7 @@ pair provide additional information particular to the objects they represent. ~~~~~~~~~~~~~ pos: 0 flags: 04002 + mnt_id: 9 eventfd-count: 5a where 'eventfd-count' is hex value of a counter. @@ -1676,6 +1682,7 @@ pair provide additional information particular to the objects they represent. ~~~~~~~~~~~~~~ pos: 0 flags: 04002 + mnt_id: 9 sigmask: 0000000000000200 where 'sigmask' is hex value of the signal mask associated @@ -1685,6 +1692,7 @@ pair provide additional information particular to the objects they represent. ~~~~~~~~~~~ pos: 0 flags: 02 + mnt_id: 9 tfd: 5 events: 1d data: ffffffffffffffff where 'tfd' is a target file descriptor number in decimal form, @@ -1718,6 +1726,7 @@ pair provide additional information particular to the objects they represent. pos: 0 flags: 02 + mnt_id: 9 fanotify flags:10 event-flags:0 fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003 fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4 diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt index a1e2e0dda90..1fe0ccb1af5 100644 --- a/Documentation/filesystems/seq_file.txt +++ b/Documentation/filesystems/seq_file.txt @@ -54,6 +54,15 @@ how the mechanism works without getting lost in other details. (Those wanting to see the full source for this module can find it at http://lwn.net/Articles/22359/). +Deprecated create_proc_entry + +Note that the above article uses create_proc_entry which was removed in +kernel 3.10. Current versions require the following update + +- entry = create_proc_entry("sequence", 0, NULL); +- if (entry) +- entry->proc_fops = &ct_file_ops; ++ entry = proc_create("sequence", 0, NULL, &ct_file_ops); The iterator interface diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt index 4ede421c968..32a173dd315 100644 --- a/Documentation/filesystems/sharedsubtree.txt +++ b/Documentation/filesystems/sharedsubtree.txt @@ -727,7 +727,7 @@ replicas continue to be exactly same. mkdir -p /tmp/m3 mount --rbind /root /tmp/m3 - I wont' draw the tree..but it has 24 vfsmounts + I won't draw the tree..but it has 24 vfsmounts at step i the number of vfsmounts is V[i] = i*V[i-1]. diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt index 4a93e98b290..ce1126aceed 100644 --- a/Documentation/filesystems/vfat.txt +++ b/Documentation/filesystems/vfat.txt @@ -172,6 +172,11 @@ nfs=stale_rw|nostale_ro To maintain backward compatibility, '-o nfs' is also accepted, defaulting to stale_rw +dos1xfloppy -- If set, use a fallback default BIOS Parameter Block + configuration, determined by backing device size. These static + parameters match defaults assumed by DOS 1.x for 160 kiB, + 180 kiB, 320 kiB, and 360 kiB floppies and floppy images. + <bool>: 0,1,yes,no,true,false diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c53784c119c..a1d0d7a3016 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -347,6 +347,8 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); + int (*rename2) (struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); @@ -414,6 +416,20 @@ otherwise noted. rename: called by the rename(2) system call to rename the object to have the parent and name given by the second inode and dentry. + rename2: this has an additional flags argument compared to rename. + If no flags are supported by the filesystem then this method + need not be implemented. If some flags are supported then the + filesystem must return -EINVAL for any unsupported or unknown + flags. Currently the following flags are implemented: + (1) RENAME_NOREPLACE: this flag indicates that if the target + of the rename exists the rename should fail with -EEXIST + instead of replacing the target. The VFS already checks for + existence, so for local filesystems the RENAME_NOREPLACE + implementation is equivalent to plain rename. + (2) RENAME_EXCHANGE: exchange source and target. Both must + exist; this is checked by the VFS. Unlike plain rename, + source and target may be of different type. + readlink: called by the readlink(2) system call. Only required if you want to support reading symbolic links @@ -573,14 +589,13 @@ struct address_space_operations { void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); - ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, - loff_t offset, unsigned long nr_segs); + ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset); struct page* (*get_xip_page)(struct address_space *, sector_t, int); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct page *, struct page *); int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page) (struct mapping *mapping, struct page *page); @@ -791,6 +806,8 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); + ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); @@ -821,11 +838,15 @@ otherwise noted. read: called by read(2) and related system calls - aio_read: called by io_submit(2) and other asynchronous I/O operations + aio_read: vectored, possibly asynchronous read + + read_iter: possibly asynchronous read with iov_iter as destination write: called by write(2) and related system calls - aio_write: called by io_submit(2) and other asynchronous I/O operations + aio_write: vectored, possibly asynchronous write + + write_iter: possibly asynchronous write with iov_iter as source iterate: called when the VFS needs to read the directory contents |
