aboutsummaryrefslogtreecommitdiff
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/00-INDEX60
-rw-r--r--Documentation/filesystems/9p.txt17
-rw-r--r--Documentation/filesystems/Locking25
-rw-r--r--Documentation/filesystems/affs.txt9
-rw-r--r--Documentation/filesystems/autofs4-mount-control.txt2
-rw-r--r--Documentation/filesystems/btrfs.txt81
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt73
-rw-r--r--Documentation/filesystems/directory-locking31
-rw-r--r--Documentation/filesystems/f2fs.txt54
-rw-r--r--Documentation/filesystems/hfsplus.txt2
-rw-r--r--Documentation/filesystems/nfs/00-INDEX4
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.txt44
-rw-r--r--Documentation/filesystems/nilfs2.txt68
-rw-r--r--Documentation/filesystems/ntfs.txt2
-rw-r--r--Documentation/filesystems/porting14
-rw-r--r--Documentation/filesystems/proc.txt45
-rw-r--r--Documentation/filesystems/seq_file.txt9
-rw-r--r--Documentation/filesystems/sharedsubtree.txt2
-rw-r--r--Documentation/filesystems/sysfs.txt6
-rw-r--r--Documentation/filesystems/vfat.txt7
-rw-r--r--Documentation/filesystems/vfs.txt57
21 files changed, 441 insertions, 171 deletions
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index 8042050eb26..ac28149aede 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -2,6 +2,8 @@
- this file (info on some of the filesystems supported by linux).
Locking
- info on locking rules as they pertain to Linux VFS.
+Makefile
+ - Makefile for building the filsystems-part of DocBook.
9p.txt
- 9p (v9fs) is an implementation of the Plan 9 remote fs protocol.
adfs.txt
@@ -10,24 +12,32 @@ afs.txt
- info and examples for the distributed AFS (Andrew File System) fs.
affs.txt
- info and mount options for the Amiga Fast File System.
+autofs4-mount-control.txt
+ - info on device control operations for autofs4 module.
automount-support.txt
- information about filesystem automount support.
befs.txt
- information about the BeOS filesystem for Linux.
bfs.txt
- info for the SCO UnixWare Boot Filesystem (BFS).
+btrfs.txt
+ - info for the BTRFS filesystem.
+caching/
+ - directory containing filesystem cache documentation.
ceph.txt
- - info for the Ceph Distributed File System
-cifs.txt
- - description of the CIFS filesystem.
+ - info for the Ceph Distributed File System.
+cifs/
+ - directory containing CIFS filesystem documentation and example code.
coda.txt
- description of the CODA filesystem.
configfs/
- directory containing configfs documentation and example code.
cramfs.txt
- info on the cram filesystem for small storage (ROMs etc).
-dentry-locking.txt
- - info on the RCU-based dcache locking model.
+debugfs.txt
+ - info on the debugfs filesystem.
+devpts.txt
+ - info on the devpts filesystem.
directory-locking
- info about the locking scheme used for directory operations.
dlmfs.txt
@@ -35,7 +45,7 @@ dlmfs.txt
dnotify.txt
- info about directory notification in Linux.
dnotify_test.c
- - example program for dnotify
+ - example program for dnotify.
ecryptfs.txt
- docs on eCryptfs: stacked cryptographic filesystem for Linux.
efivarfs.txt
@@ -48,12 +58,18 @@ ext3.txt
- info, mount options and specifications for the Ext3 filesystem.
ext4.txt
- info, mount options and specifications for the Ext4 filesystem.
-files.txt
- - info on file management in the Linux kernel.
f2fs.txt
- info and mount options for the F2FS filesystem.
+fiemap.txt
+ - info on fiemap ioctl.
+files.txt
+ - info on file management in the Linux kernel.
fuse.txt
- info on the Filesystem in User SpacE including mount options.
+gfs2-glocks.txt
+ - info on the Global File System 2 - Glock internal locking rules.
+gfs2-uevents.txt
+ - info on the Global File System 2 - uevents.
gfs2.txt
- info on the Global File System 2.
hfs.txt
@@ -84,40 +100,58 @@ ntfs.txt
- info and mount options for the NTFS filesystem (Windows NT).
ocfs2.txt
- info and mount options for the OCFS2 clustered filesystem.
+omfs.txt
+ - info on the Optimized MPEG FileSystem.
+path-lookup.txt
+ - info on path walking and name lookup locking.
+pohmelfs/
+ - directory containing pohmelfs filesystem documentation.
porting
- various information on filesystem porting.
proc.txt
- info on Linux's /proc filesystem.
+qnx6.txt
+ - info on the QNX6 filesystem.
+quota.txt
+ - info on Quota subsystem.
ramfs-rootfs-initramfs.txt
- info on the 'in memory' filesystems ramfs, rootfs and initramfs.
-reiser4.txt
- - info on the Reiser4 filesystem based on dancing tree algorithms.
relay.txt
- info on relay, for efficient streaming from kernel to user space.
romfs.txt
- description of the ROMFS filesystem.
seq_file.txt
- - how to use the seq_file API
+ - how to use the seq_file API.
sharedsubtree.txt
- a description of shared subtrees for namespaces.
spufs.txt
- info and mount options for the SPU filesystem used on Cell.
+squashfs.txt
+ - info on the squashfs filesystem.
sysfs-pci.txt
- info on accessing PCI device resources through sysfs.
+sysfs-tagging.txt
+ - info on sysfs tagging to avoid duplicates.
sysfs.txt
- info on sysfs, a ram-based filesystem for exporting kernel objects.
sysv-fs.txt
- info on the SystemV/V7/Xenix/Coherent filesystem.
tmpfs.txt
- info on tmpfs, a filesystem that holds all files in virtual memory.
+ubifs.txt
+ - info on the Unsorted Block Images FileSystem.
udf.txt
- info and mount options for the UDF filesystem.
ufs.txt
- info on the ufs filesystem.
vfat.txt
- - info on using the VFAT filesystem used in Windows NT and Windows 95
+ - info on using the VFAT filesystem used in Windows NT and Windows 95.
vfs.txt
- - overview of the Virtual File System
+ - overview of the Virtual File System.
+xfs-delayed-logging-design.txt
+ - info on the XFS Delayed Logging Design.
+xfs-self-describing-metadata.txt
+ - info on XFS Self Describing Metadata.
xfs.txt
- info and mount options for the XFS filesystem.
xip.txt
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
index 2c032144284..fec7144e817 100644
--- a/Documentation/filesystems/9p.txt
+++ b/Documentation/filesystems/9p.txt
@@ -69,10 +69,14 @@ OPTIONS
offering several exported file systems.
cache=mode specifies a caching policy. By default, no caches are used.
+ none = default no cache policy, metadata and data
+ alike are synchronous.
loose = no attempts are made at consistency,
intended for exclusive, read-only mounts
- fscache = use FS-Cache for a persistent, read-only
+ fscache = use FS-Cache for a persistent, read-only
cache backend.
+ mmap = minimal cache that is only used for read-write
+ mmap. Northing else is cached, like cache=none
debug=n specifies debug level. The debug level is a bitmask.
0x01 = display verbose error messages
@@ -147,8 +151,7 @@ on sourceforge (http://sourceforge.net/projects/v9fs).
News and other information is maintained on a Wiki.
(http://sf.net/apps/mediawiki/v9fs/index.php).
-Bug reports may be issued through the kernel.org bugzilla
-(http://bugzilla.kernel.org)
+Bug reports are best issued via the mailing list.
For more information on the Plan 9 Operating System check out
http://plan9.bell-labs.com/plan9
@@ -156,11 +159,3 @@ http://plan9.bell-labs.com/plan9
For information on Plan 9 from User Space (Plan 9 applications and libraries
ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
-
-STATUS
-======
-
-The 2.6 kernel support is working on PPC and x86.
-
-PLEASE USE THE KERNEL BUGZILLA TO REPORT PROBLEMS. (http://bugzilla.kernel.org)
-
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index fe7afe22538..b18dd177902 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -47,6 +47,8 @@ prototypes:
int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
+ int (*rename2) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
void * (*follow_link) (struct dentry *, struct nameidata *);
void (*put_link) (struct dentry *, struct nameidata *, void *);
@@ -78,6 +80,7 @@ mkdir: yes
unlink: yes (both)
rmdir: yes (both) (see below)
rename: yes (all) (see below)
+rename2: yes (all) (see below)
readlink: no
follow_link: no
put_link: no
@@ -96,7 +99,8 @@ tmpfile: no
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
victim.
- cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
+ cross-directory ->rename() and rename2() has (per-superblock)
+->s_vfs_rename_sem.
See Documentation/filesystems/directory-locking for more detailed discussion
of the locking scheme for directory operations.
@@ -192,13 +196,12 @@ prototypes:
void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
- int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs);
+ int (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
int (*get_xip_mem)(struct address_space *, pgoff_t, int, void **,
unsigned long *);
int (*migratepage)(struct address_space *, struct page *, struct page *);
int (*launder_page)(struct page *);
- int (*is_partially_uptodate)(struct page *, read_descriptor_t *, unsigned long);
+ int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
int (*error_remove_page)(struct address_space *, struct page *);
int (*swap_activate)(struct file *);
int (*swap_deactivate)(struct file *);
@@ -427,6 +430,8 @@ prototypes:
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+ ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -525,6 +530,7 @@ locking rules:
open: yes
close: yes
fault: yes can return with page locked
+map_pages: yes
page_mkwrite: yes can return with page locked
access: yes
@@ -536,6 +542,15 @@ the page, then ensure it is not already truncated (the page lock will block
subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
locked. The VM will unlock the page.
+ ->map_pages() is called when VM asks to map easy accessible pages.
+Filesystem should find and map pages associated with offsets from "pgoff"
+till "max_pgoff". ->map_pages() is called with page table locked and must
+not block. If it's not possible to reach a page without blocking,
+filesystem should skip it. Filesystem should use do_set_pte() to setup
+page table entry. Pointer to entry associated with offset "pgoff" is
+passed in "pte" field in vm_fault structure. Pointers to entries for other
+offsets should be calculated relative to "pte".
+
->page_mkwrite() is called when a previously read-only pte is
about to become writeable. The filesystem again must ensure that there are
no truncate/invalidate races, and then return with the page locked. If
@@ -544,7 +559,7 @@ like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
will cause the VM to retry the fault.
->access() is called when get_user_pages() fails in
-acces_process_vm(), typically used to debug a process through
+access_process_vm(), typically used to debug a process through
/proc/pid/mem or ptrace. This function is needed only for
VM_IO | VM_PFNMAP VMAs.
diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt
index 81ac488e375..71b63c2b984 100644
--- a/Documentation/filesystems/affs.txt
+++ b/Documentation/filesystems/affs.txt
@@ -49,6 +49,10 @@ mode=mode Sets the mode flags to the given (octal) value, regardless
This is useful since most of the plain AmigaOS files
will map to 600.
+nofilenametruncate
+ The file system will return an error when filename exceeds
+ standard maximum filename length (30 characters).
+
reserved=num Sets the number of reserved blocks at the start of the
partition to num. You should never need this option.
Default is 2.
@@ -181,9 +185,8 @@ tested, though several hundred MB have been read and written using
this fs. For a most up-to-date list of bugs please consult
fs/affs/Changes.
-Filenames are truncated to 30 characters without warning (this
-can be changed by setting the compile-time option AFFS_NO_TRUNCATE
-in include/linux/amigaffs.h).
+By default, filenames are truncated to 30 characters without warning.
+'nofilenametruncate' mount option can change that behavior.
Case is ignored by the affs in filename matching, but Linux shells
do care about the case. Example (with /wb being an affs mounted fs):
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt
index 4c95935cbcf..aff22113a98 100644
--- a/Documentation/filesystems/autofs4-mount-control.txt
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -255,7 +255,7 @@ AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT
Obtain and release a file descriptor for an autofs managed mount point
path. The open call requires an initialized struct autofs_dev_ioctl with
-the the path field set and the size field adjusted appropriately as well
+the path field set and the size field adjusted appropriately as well
as the arg1 field set to the device number of the autofs mount. The
device number can be obtained from the mount options shown in
/proc/mounts. The close call requires an initialized struct
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
index 9dae5940743..d11cc2f8077 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -38,7 +38,7 @@ Mount Options
=============
When mounting a btrfs filesystem, the following option are accepted.
-Unless otherwise specified, all options default to off.
+Options with (*) are default options and will not show in the mount options.
alloc_start=<bytes>
Debugging option to force all block allocations above a certain
@@ -46,10 +46,12 @@ Unless otherwise specified, all options default to off.
bytes, optionally with a K, M, or G suffix, case insensitive.
Default is 1MB.
+ noautodefrag(*)
autodefrag
- Detect small random writes into files and queue them up for the
- defrag process. Works best for small files; Not well suited for
- large database workloads.
+ Disable/enable auto defragmentation.
+ Auto defragmentation detects small random writes into files and queue
+ them up for the defrag process. Works best for small files;
+ Not well suited for large database workloads.
check_int
check_int_data
@@ -70,6 +72,12 @@ Unless otherwise specified, all options default to off.
See comments at the top of fs/btrfs/check-integrity.c for more info.
+ commit=<seconds>
+ Set the interval of periodic commit, 30 seconds by default. Higher
+ values defer data being synced to permanent storage with obvious
+ consequences when the system crashes. The upper bound is not forced,
+ but a warning is printed if it's more than 300 seconds (5 minutes).
+
compress
compress=<type>
compress-force
@@ -90,21 +98,26 @@ Unless otherwise specified, all options default to off.
can be avoided. Especially useful when trying to mount a multi-device
setup as root. May be specified multiple times for multiple devices.
+ nodiscard(*)
discard
- Issue frequent commands to let the block device reclaim space freed by
- the filesystem. This is useful for SSD devices, thinly provisioned
+ Disable/enable discard mount option.
+ Discard issues frequent commands to let the block device reclaim space
+ freed by the filesystem.
+ This is useful for SSD devices, thinly provisioned
LUNs and virtual machine images, but may have a significant
performance impact. (The fstrim command is also available to
initiate batch trims from userspace).
+ noenospc_debug(*)
enospc_debug
- Debugging option to be more verbose in some ENOSPC conditions.
+ Disable/enable debugging option to be more verbose in some ENOSPC conditions.
fatal_errors=<action>
Action to take when encountering a fatal error:
"bug" - BUG() on a fatal error. This is the default.
"panic" - panic() on a fatal error.
+ noflushoncommit(*)
flushoncommit
The 'flushoncommit' mount option forces any data dirtied by a write in a
prior transaction to commit as part of the current commit. This makes
@@ -128,33 +141,43 @@ Unless otherwise specified, all options default to off.
Specify that 1 metadata chunk should be allocated after every <value>
data chunks. Off by default.
+ acl(*)
noacl
- Disable support for Posix Access Control Lists (ACLs). See the
+ Enable/disable support for Posix Access Control Lists (ACLs). See the
acl(5) manual page for more information about ACLs.
+ barrier(*)
nobarrier
- Disables the use of block layer write barriers. Write barriers ensure
- that certain IOs make it through the device cache and are on persistent
- storage. If used on a device with a volatile (non-battery-backed)
- write-back cache, this option will lead to filesystem corruption on a
- system crash or power loss.
+ Enable/disable the use of block layer write barriers. Write barriers
+ ensure that certain IOs make it through the device cache and are on
+ persistent storage. If disabled on a device with a volatile
+ (non-battery-backed) write-back cache, nobarrier option will lead to
+ filesystem corruption on a system crash or power loss.
+ datacow(*)
nodatacow
- Disable data copy-on-write for newly created files. Implies nodatasum,
- and disables all compression.
+ Enable/disable data copy-on-write for newly created files.
+ Nodatacow implies nodatasum, and disables all compression.
+ datasum(*)
nodatasum
- Disable data checksumming for newly created files.
+ Enable/disable data checksumming for newly created files.
+ Datasum implies datacow.
+ treelog(*)
notreelog
- Disable the tree logging used for fsync and O_SYNC writes.
+ Enable/disable the tree logging used for fsync and O_SYNC writes.
recovery
Enable autorecovery attempts if a bad tree root is found at mount time.
Currently this scans a list of several previous tree roots and tries to
use the first readable.
- skip_balance
+ rescan_uuid_tree
+ Force check and rebuild procedure of the UUID tree. This should not
+ normally be needed.
+
+ skip_balance
Skip automatic resume of interrupted balance operation after mount.
May be resumed with "btrfs balance resume."
@@ -234,24 +257,14 @@ available from the git repository at the following location:
These include the following tools:
-mkfs.btrfs: create a filesystem
-
-btrfsctl: control program to create snapshots and subvolumes:
+* mkfs.btrfs: create a filesystem
- mount /dev/sda2 /mnt
- btrfsctl -s new_subvol_name /mnt
- btrfsctl -s snapshot_of_default /mnt/default
- btrfsctl -s snapshot_of_new_subvol /mnt/new_subvol_name
- btrfsctl -s snapshot_of_a_snapshot /mnt/snapshot_of_new_subvol
- ls /mnt
- default snapshot_of_a_snapshot snapshot_of_new_subvol
- new_subvol_name snapshot_of_default
+* btrfs: a single tool to manage the filesystems, refer to the manpage for more details
- Snapshots and subvolumes cannot be deleted right now, but you can
- rm -rf all the files and directories inside them.
+* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem
-btrfsck: do a limited check of the FS extent trees.
+Other tools for specific tasks:
-btrfs-debug-tree: print all of the FS metadata in text form. Example:
+* btrfs-convert: in-place conversion from ext2/3/4 filesystems
- btrfs-debug-tree /dev/sda2 >& big_output_file
+* btrfs-image: dump filesystem metadata for debugging
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
index 11a0a40ce44..aed6b94160b 100644
--- a/Documentation/filesystems/caching/netfs-api.txt
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -29,15 +29,16 @@ This document contains the following sections:
(6) Index registration
(7) Data file registration
(8) Miscellaneous object registration
- (9) Setting the data file size
+ (9) Setting the data file size
(10) Page alloc/read/write
(11) Page uncaching
(12) Index and data file consistency
- (13) Miscellaneous cookie operations
- (14) Cookie unregistration
- (15) Index invalidation
- (16) Data file invalidation
- (17) FS-Cache specific page flags.
+ (13) Cookie enablement
+ (14) Miscellaneous cookie operations
+ (15) Cookie unregistration
+ (16) Index invalidation
+ (17) Data file invalidation
+ (18) FS-Cache specific page flags.
=============================
@@ -334,7 +335,8 @@ the path to the file:
struct fscache_cookie *
fscache_acquire_cookie(struct fscache_cookie *parent,
const struct fscache_object_def *def,
- void *netfs_data);
+ void *netfs_data,
+ bool enable);
This function creates an index entry in the index represented by parent,
filling in the index entry by calling the operations pointed to by def.
@@ -350,6 +352,10 @@ object needs to be created somewhere down the hierarchy. Furthermore, an index
may be created in several different caches independently at different times.
This is all handled transparently, and the netfs doesn't see any of it.
+A cookie will be created in the disabled state if enabled is false. A cookie
+must be enabled to do anything with it. A disabled cookie can be enabled by
+calling fscache_enable_cookie() (see below).
+
For example, with AFS, a cell would be added to the primary index. This index
entry would have a dependent inode containing a volume location index for the
volume mappings within this cell:
@@ -357,7 +363,7 @@ volume mappings within this cell:
cell->cache =
fscache_acquire_cookie(afs_cache_netfs.primary_index,
&afs_cell_cache_index_def,
- cell);
+ cell, true);
Then when a volume location was accessed, it would be entered into the cell's
index and an inode would be allocated that acts as a volume type and hash chain
@@ -366,7 +372,7 @@ combination:
vlocation->cache =
fscache_acquire_cookie(cell->cache,
&afs_vlocation_cache_index_def,
- vlocation);
+ vlocation, true);
And then a particular flavour of volume (R/O for example) could be added to
that index, creating another index for vnodes (AFS inode equivalents):
@@ -374,7 +380,7 @@ that index, creating another index for vnodes (AFS inode equivalents):
volume->cache =
fscache_acquire_cookie(vlocation->cache,
&afs_volume_cache_index_def,
- volume);
+ volume, true);
======================
@@ -388,7 +394,7 @@ the object definition should be something other than index type.
vnode->cache =
fscache_acquire_cookie(volume->cache,
&afs_vnode_cache_object_def,
- vnode);
+ vnode, true);
=================================
@@ -404,7 +410,7 @@ it would be some other type of object such as a data file.
xattr->cache =
fscache_acquire_cookie(vnode->cache,
&afs_xattr_cache_object_def,
- xattr);
+ xattr, true);
Miscellaneous objects might be used to store extended attributes or directory
entries for example.
@@ -733,6 +739,47 @@ Note that partial updates may happen automatically at other times, such as when
data blocks are added to a data file object.
+=================
+COOKIE ENABLEMENT
+=================
+
+Cookies exist in one of two states: enabled and disabled. If a cookie is
+disabled, it ignores all attempts to acquire child cookies; check, update or
+invalidate its state; allocate, read or write backing pages - though it is
+still possible to uncache pages and relinquish the cookie.
+
+The initial enablement state is set by fscache_acquire_cookie(), but the cookie
+can be enabled or disabled later. To disable a cookie, call:
+
+ void fscache_disable_cookie(struct fscache_cookie *cookie,
+ bool invalidate);
+
+If the cookie is not already disabled, this locks the cookie against other
+enable and disable ops, marks the cookie as being disabled, discards or
+invalidates any backing objects and waits for cessation of activity on any
+associated object before unlocking the cookie.
+
+All possible failures are handled internally. The caller should consider
+calling fscache_uncache_all_inode_pages() afterwards to make sure all page
+markings are cleared up.
+
+Cookies can be enabled or reenabled with:
+
+ void fscache_enable_cookie(struct fscache_cookie *cookie,
+ bool (*can_enable)(void *data),
+ void *data)
+
+If the cookie is not already enabled, this locks the cookie against other
+enable and disable ops, invokes can_enable() and, if the cookie is not an index
+cookie, will begin the procedure of acquiring backing objects.
+
+The optional can_enable() function is passed the data argument and returns a
+ruling as to whether or not enablement should actually be permitted to begin.
+
+All possible failures are handled internally. The cookie will only be marked
+as enabled if provisional backing objects are allocated.
+
+
===============================
MISCELLANEOUS COOKIE OPERATIONS
===============================
@@ -778,7 +825,7 @@ COOKIE UNREGISTRATION
To get rid of a cookie, this function should be called.
void fscache_relinquish_cookie(struct fscache_cookie *cookie,
- int retire);
+ bool retire);
If retire is non-zero, then the object will be marked for recycling, and all
copies of it will be removed from all active caches in which it is present.
diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking
index ff7b611abf3..09bbf9a54f8 100644
--- a/Documentation/filesystems/directory-locking
+++ b/Documentation/filesystems/directory-locking
@@ -2,6 +2,10 @@
kinds of locks - per-inode (->i_mutex) and per-filesystem
(->s_vfs_rename_mutex).
+ When taking the i_mutex on multiple non-directory objects, we
+always acquire the locks in order by increasing address. We'll call
+that "inode pointer" order in the following.
+
For our purposes all operations fall in 5 classes:
1) read access. Locking rules: caller locks directory we are accessing.
@@ -12,8 +16,9 @@ kinds of locks - per-inode (->i_mutex) and per-filesystem
locks victim and calls the method.
4) rename() that is _not_ cross-directory. Locking rules: caller locks
-the parent, finds source and target, if target already exists - locks it
-and then calls the method.
+the parent and finds source and target. If target already exists, lock
+it. If source is a non-directory, lock it. If that means we need to
+lock both, lock them in inode pointer order.
5) link creation. Locking rules:
* lock parent
@@ -30,7 +35,9 @@ rules:
fail with -ENOTEMPTY
* if new parent is equal to or is a descendent of source
fail with -ELOOP
- * if target exists - lock it.
+ * If target exists, lock it. If source is a non-directory, lock
+ it. In case that means we need to lock both source and target,
+ do so in inode pointer order.
* call the method.
@@ -56,9 +63,11 @@ objects - A < B iff A is an ancestor of B.
renames will be blocked on filesystem lock and we don't start changing
the order until we had acquired all locks).
-(3) any operation holds at most one lock on non-directory object and
- that lock is acquired after all other locks. (Proof: see descriptions
- of operations).
+(3) locks on non-directory objects are acquired only after locks on
+ directory objects, and are acquired in inode pointer order.
+ (Proof: all operations but renames take lock on at most one
+ non-directory object, except renames, which take locks on source and
+ target in inode pointer order in the case they are not directories.)
Now consider the minimal deadlock. Each process is blocked on
attempt to acquire some lock and already holds at least one lock. Let's
@@ -66,9 +75,13 @@ consider the set of contended locks. First of all, filesystem lock is
not contended, since any process blocked on it is not holding any locks.
Thus all processes are blocked on ->i_mutex.
- Non-directory objects are not contended due to (3). Thus link
-creation can't be a part of deadlock - it can't be blocked on source
-and it means that it doesn't hold any locks.
+ By (3), any process holding a non-directory lock can only be
+waiting on another non-directory lock with a larger address. Therefore
+the process holding the "largest" such lock can always make progress, and
+non-directory objects are not included in the set of contended locks.
+
+ Thus link creation can't be a part of deadlock - it can't be
+blocked on source and it means that it doesn't hold any locks.
Any contended object is either held by cross-directory rename or
has a child that is also contended. Indeed, suppose that it is held by
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index 3cd27bed634..51afba17bba 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -119,6 +119,13 @@ active_logs=%u Support configuring the number of active logs. In the
Default number is 6.
disable_ext_identify Disable the extension list configured by mkfs, so f2fs
does not aware of cold files such as media files.
+inline_xattr Enable the inline xattrs feature.
+inline_data Enable the inline data feature: New created small(<~3.4k)
+ files can be written into inode block.
+flush_merge Merge concurrent cache_flush commands as much as possible
+ to eliminate redundant command issues. If the underlying
+ device handles the cache_flush command relatively slowly,
+ recommend to enable this option.
================================================================================
DEBUGFS ENTRIES
@@ -164,6 +171,47 @@ Files in /sys/fs/f2fs/<devname>
gc_idle = 1 will select the Cost Benefit approach
& setting gc_idle = 2 will select the greedy aproach.
+ reclaim_segments This parameter controls the number of prefree
+ segments to be reclaimed. If the number of prefree
+ segments is larger than the number of segments
+ in the proportion to the percentage over total
+ volume size, f2fs tries to conduct checkpoint to
+ reclaim the prefree segments to free segments.
+ By default, 5% over total # of segments.
+
+ max_small_discards This parameter controls the number of discard
+ commands that consist small blocks less than 2MB.
+ The candidates to be discarded are cached until
+ checkpoint is triggered, and issued during the
+ checkpoint. By default, it is disabled with 0.
+
+ ipu_policy This parameter controls the policy of in-place
+ updates in f2fs. There are five policies:
+ 0: F2FS_IPU_FORCE, 1: F2FS_IPU_SSR,
+ 2: F2FS_IPU_UTIL, 3: F2FS_IPU_SSR_UTIL,
+ 4: F2FS_IPU_DISABLE.
+
+ min_ipu_util This parameter controls the threshold to trigger
+ in-place-updates. The number indicates percentage
+ of the filesystem utilization, and used by
+ F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
+
+ max_victim_search This parameter controls the number of trials to
+ find a victim segment when conducting SSR and
+ cleaning operations. The default value is 4096
+ which covers 8GB block address range.
+
+ dir_level This parameter controls the directory level to
+ support large directory. If a directory has a
+ number of files, it can reduce the file lookup
+ latency by increasing this dir_level value.
+ Otherwise, it needs to decrease this value to
+ reduce the space overhead. The default value is 0.
+
+ ram_thresh This parameter controls the memory footprint used
+ by free nids and cached nat entries. By default,
+ 10 is set, which indicates 10 MB / 1 GB RAM.
+
================================================================================
USAGE
================================================================================
@@ -413,9 +461,11 @@ The number of blocks and buckets are determined by,
# of blocks in level #n = |
`- 4, Otherwise
- ,- 2^n, if n < MAX_DIR_HASH_DEPTH / 2,
+ ,- 2^(n + dir_level),
+ | if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
# of buckets in level #n = |
- `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1), Otherwise
+ `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+ Otherwise
When F2FS finds a file name in a directory, at first a hash value of the file
name is calculated. Then, F2FS scans the hash table in level #0 to find the
diff --git a/Documentation/filesystems/hfsplus.txt b/Documentation/filesystems/hfsplus.txt
index af1628a1061..59f7569fc9e 100644
--- a/Documentation/filesystems/hfsplus.txt
+++ b/Documentation/filesystems/hfsplus.txt
@@ -56,4 +56,4 @@ References
kernel source: <file:fs/hfsplus>
-Apple Technote 1150 http://developer.apple.com/technotes/tn/tn1150.html
+Apple Technote 1150 https://developer.apple.com/legacy/library/technotes/tn/tn1150.html
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
index 66eb6c8c533..53f3b596ac0 100644
--- a/Documentation/filesystems/nfs/00-INDEX
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -12,6 +12,8 @@ nfs41-server.txt
- info on the Linux server implementation of NFSv4 minor version 1.
nfs-rdma.txt
- how to install and setup the Linux NFS/RDMA client and server software
+nfsd-admin-interfaces.txt
+ - Administrative interfaces for nfsd.
nfsroot.txt
- short guide on setting up a diskless box with NFS root filesystem.
pnfs.txt
@@ -20,5 +22,5 @@ rpc-cache.txt
- introduction to the caching mechanisms in the sunrpc layer.
idmapper.txt
- information for configuring request-keys to be used by idmapper
-knfsd-rpcgss.txt
+rpc-server-gss.txt
- Information on GSS authentication support in the NFS Server
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index 01c2db76979..c49cd7e796e 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -5,11 +5,11 @@ Server support for minorversion 1 can be controlled using the
by reading this file will contain either "+4.1" or "-4.1"
correspondingly.
-Currently, server support for minorversion 1 is disabled by default.
-It can be enabled at run time by writing the string "+4.1" to
+Currently, server support for minorversion 1 is enabled by default.
+It can be disabled at run time by writing the string "-4.1" to
the /proc/fs/nfsd/versions control file. Note that to write this
-control file, the nfsd service must be taken down. Use your user-mode
-nfs-utils to set this up; see rpc.nfsd(8)
+control file, the nfsd service must be taken down. You can use rpc.nfsd
+for this; see rpc.nfsd(8).
(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
"-4", respectively. Therefore, code meant to work on both new and old
@@ -29,29 +29,6 @@ are still under development out of tree.
See http://wiki.linux-nfs.org/wiki/index.php/PNFS_prototype_design
for more information.
-The current implementation is intended for developers only: while it
-does support ordinary file operations on clients we have tested against
-(including the linux client), it is incomplete in ways which may limit
-features unexpectedly, cause known bugs in rare cases, or cause
-interoperability problems with future clients. Known issues:
-
- - gss support is questionable: currently mounts with kerberos
- from a linux client are possible, but we aren't really
- conformant with the spec (for example, we don't use kerberos
- on the backchannel correctly).
- - We do not support SSV, which provides security for shared
- client-server state (thus preventing unauthorized tampering
- with locks and opens, for example). It is mandatory for
- servers to support this, though no clients use it yet.
-
-In addition, some limitations are inherited from the current NFSv4
-implementation:
-
- - Incomplete delegation enforcement: if a file is renamed or
- unlinked by a local process, a client holding a delegation may
- continue to indefinitely allow opens of the file under the old
- name.
-
The table below, taken from the NFSv4.1 document, lists
the operations that are mandatory to implement (REQ), optional
(OPT), and NFSv4.0 operations that are required not to implement (MNI)
@@ -169,6 +146,16 @@ NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
Implementation notes:
+SSV:
+* The spec claims this is mandatory, but we don't actually know of any
+ implementations, so we're ignoring it for now. The server returns
+ NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
+
+GSS on the backchannel:
+* Again, theoretically required but not widely implemented (in
+ particular, the current Linux client doesn't request it). We return
+ NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
+
DELEGPURGE:
* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
@@ -176,7 +163,6 @@ DELEGPURGE:
now.
EXCHANGE_ID:
-* only SP4_NONE state protection supported
* implementation ids are ignored
CREATE_SESSION:
@@ -190,7 +176,5 @@ Nonstandard compound limitations:
ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
fail to live up to the promise we made in CREATE_SESSION fore channel
negotiation.
-* No more than one read-like operation allowed per compound; encoding
- replies that cross page boundaries (except for read data) not handled.
See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
index 873a2ab2e9f..41c3d332acc 100644
--- a/Documentation/filesystems/nilfs2.txt
+++ b/Documentation/filesystems/nilfs2.txt
@@ -25,9 +25,8 @@ available from the following download page. At least "mkfs.nilfs2",
cleaner or garbage collector) are required. Details on the tools are
described in the man pages included in the package.
-Project web page: http://www.nilfs.org/en/
-Download page: http://www.nilfs.org/en/download.html
-Git tree web page: http://www.nilfs.org/git/
+Project web page: http://nilfs.sourceforge.net/
+Download page: http://nilfs.sourceforge.net/en/download.html
List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
Caveats
@@ -81,6 +80,69 @@ nodiscard(*) The discard/TRIM commands are sent to the underlying
block device when blocks are freed. This is useful
for SSD devices and sparse/thinly-provisioned LUNs.
+Ioctls
+======
+
+There is some NILFS2 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all NILFS2 specific ioctls are
+shown in the table below.
+
+Table of NILFS2 specific ioctls
+..............................................................................
+ Ioctl Description
+ NILFS_IOCTL_CHANGE_CPMODE Change mode of given checkpoint between
+ checkpoint and snapshot state. This ioctl is
+ used in chcp and mkcp utilities.
+
+ NILFS_IOCTL_DELETE_CHECKPOINT Remove checkpoint from NILFS2 file system.
+ This ioctl is used in rmcp utility.
+
+ NILFS_IOCTL_GET_CPINFO Return info about requested checkpoints. This
+ ioctl is used in lscp utility and by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_CPSTAT Return checkpoints statistics. This ioctl is
+ used by lscp, rmcp utilities and by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_SUINFO Return segment usage info about requested
+ segments. This ioctl is used in lssu,
+ nilfs_resize utilities and by nilfs_cleanerd
+ daemon.
+
+ NILFS_IOCTL_SET_SUINFO Modify segment usage info of requested
+ segments. This ioctl is used by
+ nilfs_cleanerd daemon to skip unnecessary
+ cleaning operation of segments and reduce
+ performance penalty or wear of flash device
+ due to redundant move of in-use blocks.
+
+ NILFS_IOCTL_GET_SUSTAT Return segment usage statistics. This ioctl
+ is used in lssu, nilfs_resize utilities and
+ by nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_VINFO Return information on virtual block addresses.
+ This ioctl is used by nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_BDESCS Return information about descriptors of disk
+ block numbers. This ioctl is used by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_CLEAN_SEGMENTS Do garbage collection operation in the
+ environment of requested parameters from
+ userspace. This ioctl is used by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_SYNC Make a checkpoint. This ioctl is used in
+ mkcp utility.
+
+ NILFS_IOCTL_RESIZE Resize NILFS2 volume. This ioctl is used
+ by nilfs_resize utility.
+
+ NILFS_IOCTL_SET_ALLOC_RANGE Define lower limit of segments in bytes and
+ upper limit of segments in bytes. This ioctl
+ is used by nilfs_resize utility.
+
NILFS2 usage
============
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
index 791af8dac06..61947facfc0 100644
--- a/Documentation/filesystems/ntfs.txt
+++ b/Documentation/filesystems/ntfs.txt
@@ -455,8 +455,6 @@ not have this problem with odd numbers of sectors.
ChangeLog
=========
-Note, a technical ChangeLog aimed at kernel hackers is in fs/ntfs/ChangeLog.
-
2.1.30:
- Fix writev() (it kept writing the first segment over and over again
instead of moving onto subsequent segments).
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index f0890581f7f..0f3a1390bf0 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -295,9 +295,9 @@ in the beginning of ->setattr unconditionally.
->clear_inode() and ->delete_inode() are gone; ->evict_inode() should
be used instead. It gets called whenever the inode is evicted, whether it has
remaining links or not. Caller does *not* evict the pagecache or inode-associated
-metadata buffers; getting rid of those is responsibility of method, as it had
-been for ->delete_inode(). Caller makes sure async writeback cannot be running
-for the inode while (or after) ->evict_inode() is called.
+metadata buffers; the method has to use truncate_inode_pages_final() to get rid
+of those. Caller makes sure async writeback cannot be running for the inode while
+(or after) ->evict_inode() is called.
->drop_inode() returns int now; it's called on final iput() with
inode->i_lock held and it returns true if filesystems wants the inode to be
@@ -455,3 +455,11 @@ in your dentry operations instead.
vfs_follow_link has been removed. Filesystems must use nd_set_link
from ->follow_link for normal symlinks, or nd_jump_link for magic
/proc/<pid> style links.
+--
+[mandatory]
+ iget5_locked()/ilookup5()/ilookup5_nowait() test() callback used to be
+ called with both ->i_lock and inode_hash_lock held; the former is *not*
+ taken anymore, so verify that your callbacks do not rely on it (none
+ of the in-tree instances did). inode_hash_lock is still held,
+ of course, so they are still serialized wrt removal from inode hash,
+ as well as wrt set() callback of iget5_locked().
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index 823c95faebd..ddc531a74d0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -234,7 +234,7 @@ Table 1-2: Contents of the status files (as of 2.6.30-rc7)
ShdPnd bitmap of shared pending signals for the process
SigBlk bitmap of blocked signals
SigIgn bitmap of ignored signals
- SigCgt bitmap of catched signals
+ SigCgt bitmap of caught signals
CapInh bitmap of inheritable capabilities
CapPrm bitmap of permitted capabilities
CapEff bitmap of effective capabilities
@@ -300,7 +300,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
pending bitmap of pending signals
blocked bitmap of blocked signals
sigign bitmap of ignored signals
- sigcatch bitmap of catched signals
+ sigcatch bitmap of caught signals
wchan address where process went to sleep
0 (place holder)
0 (place holder)
@@ -460,6 +460,7 @@ manner. The codes are the following:
nl - non-linear mapping
ar - architecture specific flag
dd - do not include area into core dump
+ sd - soft-dirty flag
mm - mixed map area
hg - huge page advise flag
nh - no-huge page advise flag
@@ -546,7 +547,7 @@ Table 1-5: Kernel info in /proc
sys See chapter 2
sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4)
tty Info of tty drivers
- uptime System uptime
+ uptime Wall clock since boot, combined idle time of all cpus
version Kernel version
video bttv info of video resources (2.4)
vmallocinfo Show vmalloced areas
@@ -766,6 +767,7 @@ The "Locked" indicates whether the mapping is locked in memory or not.
MemTotal: 16344972 kB
MemFree: 13634064 kB
+MemAvailable: 14836172 kB
Buffers: 3656 kB
Cached: 1195708 kB
SwapCached: 0 kB
@@ -798,6 +800,14 @@ AnonHugePages: 49152 kB
MemTotal: Total usable ram (i.e. physical ram minus a few reserved
bits and the kernel binary code)
MemFree: The sum of LowFree+HighFree
+MemAvailable: An estimate of how much memory is available for starting new
+ applications, without swapping. Calculated from MemFree,
+ SReclaimable, the size of the file LRU lists, and the low
+ watermarks in each zone.
+ The estimate takes into account that the system needs some
+ page cache to function well, and that not all reclaimable
+ slab will be reclaimable, due to items being in use. The
+ impact of those factors will vary from system to system.
Buffers: Relatively temporary storage for raw disk blocks
shouldn't get tremendously large (20MB or so)
Cached: in-memory cache for files read from the disk (the
@@ -844,7 +854,8 @@ WritebackTmp: Memory used by FUSE for temporary writeback buffers
if strict overcommit accounting is enabled (mode 2 in
'vm.overcommit_memory').
The CommitLimit is calculated with the following formula:
- CommitLimit = ('vm.overcommit_ratio' * Physical RAM) + Swap
+ CommitLimit = ([total RAM pages] - [total huge TLB pages]) *
+ overcommit_ratio / 100 + [total swap pages]
For example, on a system with 1G of physical RAM and 7G
of swap with a `vm.overcommit_ratio` of 30 it would
yield a CommitLimit of 7.3G.
@@ -1235,8 +1246,9 @@ second). The meanings of the columns are as follows, from left to right:
The "intr" line gives counts of interrupts serviced since boot time, for each
of the possible system interrupts. The first column is the total of all
-interrupts serviced; each subsequent column is the total for that particular
-interrupt.
+interrupts serviced including unnumbered architecture specific interrupts;
+each subsequent column is the total for that particular numbered interrupt.
+Unnumbered interrupts are not shown, only summed into the total.
The "ctxt" line gives the total number of context switches across all CPUs.
@@ -1376,8 +1388,8 @@ may allocate from based on an estimation of its current memory and swap use.
For example, if a task is using all allowed memory, its badness score will be
1000. If it is using half of its allowed memory, its score will be 500.
-There is an additional factor included in the badness score: root
-processes are given 3% extra memory over other tasks.
+There is an additional factor included in the badness score: the current memory
+and swap usage is discounted by 3% for root processes.
The amount of "allowed" memory depends on the context in which the oom killer
was called. If it is due to the memory assigned to the allocating task's cpuset
@@ -1638,18 +1650,21 @@ pids, so one need to either stop or freeze processes being inspected
if precise results are needed.
-3.7 /proc/<pid>/fdinfo/<fd> - Information about opened file
+3.8 /proc/<pid>/fdinfo/<fd> - Information about opened file
---------------------------------------------------------------
This file provides information associated with an opened file. The regular
-files have at least two fields -- 'pos' and 'flags'. The 'pos' represents
-the current offset of the opened file in decimal form [see lseek(2) for
-details] and 'flags' denotes the octal O_xxx mask the file has been
-created with [see open(2) for details].
+files have at least three fields -- 'pos', 'flags' and mnt_id. The 'pos'
+represents the current offset of the opened file in decimal form [see lseek(2)
+for details], 'flags' denotes the octal O_xxx mask the file has been
+created with [see open(2) for details] and 'mnt_id' represents mount ID of
+the file system containing the opened file [see 3.5 /proc/<pid>/mountinfo
+for details].
A typical output is
pos: 0
flags: 0100002
+ mnt_id: 19
The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
pair provide additional information particular to the objects they represent.
@@ -1658,6 +1673,7 @@ pair provide additional information particular to the objects they represent.
~~~~~~~~~~~~~
pos: 0
flags: 04002
+ mnt_id: 9
eventfd-count: 5a
where 'eventfd-count' is hex value of a counter.
@@ -1666,6 +1682,7 @@ pair provide additional information particular to the objects they represent.
~~~~~~~~~~~~~~
pos: 0
flags: 04002
+ mnt_id: 9
sigmask: 0000000000000200
where 'sigmask' is hex value of the signal mask associated
@@ -1675,6 +1692,7 @@ pair provide additional information particular to the objects they represent.
~~~~~~~~~~~
pos: 0
flags: 02
+ mnt_id: 9
tfd: 5 events: 1d data: ffffffffffffffff
where 'tfd' is a target file descriptor number in decimal form,
@@ -1708,6 +1726,7 @@ pair provide additional information particular to the objects they represent.
pos: 0
flags: 02
+ mnt_id: 9
fanotify flags:10 event-flags:0
fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003
fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt
index a1e2e0dda90..1fe0ccb1af5 100644
--- a/Documentation/filesystems/seq_file.txt
+++ b/Documentation/filesystems/seq_file.txt
@@ -54,6 +54,15 @@ how the mechanism works without getting lost in other details. (Those
wanting to see the full source for this module can find it at
http://lwn.net/Articles/22359/).
+Deprecated create_proc_entry
+
+Note that the above article uses create_proc_entry which was removed in
+kernel 3.10. Current versions require the following update
+
+- entry = create_proc_entry("sequence", 0, NULL);
+- if (entry)
+- entry->proc_fops = &ct_file_ops;
++ entry = proc_create("sequence", 0, NULL, &ct_file_ops);
The iterator interface
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
index 4ede421c968..32a173dd315 100644
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -727,7 +727,7 @@ replicas continue to be exactly same.
mkdir -p /tmp/m3
mount --rbind /root /tmp/m3
- I wont' draw the tree..but it has 24 vfsmounts
+ I won't draw the tree..but it has 24 vfsmounts
at step i the number of vfsmounts is V[i] = i*V[i-1].
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index a6619b7064b..b35a64b82f9 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -108,12 +108,12 @@ static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
is equivalent to doing:
static struct device_attribute dev_attr_foo = {
- .attr = {
+ .attr = {
.name = "foo",
.mode = S_IWUSR | S_IRUGO,
- .show = show_foo,
- .store = store_foo,
},
+ .show = show_foo,
+ .store = store_foo,
};
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index aa1f459fa6c..ce1126aceed 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -172,6 +172,11 @@ nfs=stale_rw|nostale_ro
To maintain backward compatibility, '-o nfs' is also accepted,
defaulting to stale_rw
+dos1xfloppy -- If set, use a fallback default BIOS Parameter Block
+ configuration, determined by backing device size. These static
+ parameters match defaults assumed by DOS 1.x for 160 kiB,
+ 180 kiB, 320 kiB, and 360 kiB floppies and floppy images.
+
<bool>: 0,1,yes,no,true,false
@@ -307,7 +312,7 @@ the following:
<proceeding files...>
<slot #3, id = 0x43, characters = "h is long">
- <slot #2, id = 0x02, characters = "xtension which">
+ <slot #2, id = 0x02, characters = "xtension whic">
<slot #1, id = 0x01, characters = "My Big File.E">
<directory entry, name = "MYBIGFIL.EXT">
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index f93a88250a4..a1d0d7a3016 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -347,6 +347,8 @@ struct inode_operations {
int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
+ int (*rename2) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
void * (*follow_link) (struct dentry *, struct nameidata *);
void (*put_link) (struct dentry *, struct nameidata *, void *);
@@ -359,11 +361,9 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*update_time)(struct inode *, struct timespec *, int);
- int (*atomic_open)(struct inode *, struct dentry *,
+ int (*atomic_open)(struct inode *, struct dentry *, struct file *,
+ unsigned open_flag, umode_t create_mode, int *opened);
int (*tmpfile) (struct inode *, struct dentry *, umode_t);
-} ____cacheline_aligned;
- struct file *, unsigned open_flag,
- umode_t create_mode, int *opened);
};
Again, all methods are called without any locks being held, unless
@@ -416,6 +416,20 @@ otherwise noted.
rename: called by the rename(2) system call to rename the object to
have the parent and name given by the second inode and dentry.
+ rename2: this has an additional flags argument compared to rename.
+ If no flags are supported by the filesystem then this method
+ need not be implemented. If some flags are supported then the
+ filesystem must return -EINVAL for any unsupported or unknown
+ flags. Currently the following flags are implemented:
+ (1) RENAME_NOREPLACE: this flag indicates that if the target
+ of the rename exists the rename should fail with -EEXIST
+ instead of replacing the target. The VFS already checks for
+ existence, so for local filesystems the RENAME_NOREPLACE
+ implementation is equivalent to plain rename.
+ (2) RENAME_EXCHANGE: exchange source and target. Both must
+ exist; this is checked by the VFS. Unlike plain rename,
+ source and target may be of different type.
+
readlink: called by the readlink(2) system call. Only required if
you want to support reading symbolic links
@@ -470,9 +484,11 @@ otherwise noted.
method the filesystem can look up, possibly create and open the file in
one atomic operation. If it cannot perform this (e.g. the file type
turned out to be wrong) it may signal this by returning 1 instead of
- usual 0 or -ve . This method is only called if the last
- component is negative or needs lookup. Cached positive dentries are
- still handled by f_op->open().
+ usual 0 or -ve . This method is only called if the last component is
+ negative or needs lookup. Cached positive dentries are still handled by
+ f_op->open(). If the file was created, the FILE_CREATED flag should be
+ set in "opened". In case of O_EXCL the method must only succeed if the
+ file didn't exist and hence FILE_CREATED shall always be set on success.
tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to
atomically creating, opening and unlinking a file in given directory.
@@ -573,14 +589,13 @@ struct address_space_operations {
void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
- ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs);
+ ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
struct page* (*get_xip_page)(struct address_space *, sector_t,
int);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *);
- int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
+ int (*is_partially_uptodate) (struct page *, unsigned long,
unsigned long);
void (*is_dirty_writeback) (struct page *, bool *, bool *);
int (*error_remove_page) (struct mapping *mapping, struct page *page);
@@ -782,7 +797,7 @@ struct file_operations
----------------------
This describes how the VFS can manipulate an open file. As of kernel
-3.5, the following members are defined:
+3.12, the following members are defined:
struct file_operations {
struct module *owner;
@@ -791,6 +806,8 @@ struct file_operations {
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+ ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -803,9 +820,6 @@ struct file_operations {
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
@@ -814,6 +828,7 @@ struct file_operations {
ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long arg, struct file_lock **);
long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
+ int (*show_fdinfo)(struct seq_file *m, struct file *f);
};
Again, all methods are called without any locks being held, unless
@@ -823,11 +838,15 @@ otherwise noted.
read: called by read(2) and related system calls
- aio_read: called by io_submit(2) and other asynchronous I/O operations
+ aio_read: vectored, possibly asynchronous read
+
+ read_iter: possibly asynchronous read with iov_iter as destination
write: called by write(2) and related system calls
- aio_write: called by io_submit(2) and other asynchronous I/O operations
+ aio_write: vectored, possibly asynchronous write
+
+ write_iter: possibly asynchronous write with iov_iter as source
iterate: called when the VFS needs to read the directory contents
@@ -864,12 +883,6 @@ otherwise noted.
lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
commands
- readv: called by the readv(2) system call
-
- writev: called by the writev(2) system call
-
- sendfile: called by the sendfile(2) system call
-
get_unmapped_area: called by the mmap(2) system call
check_flags: called by the fcntl(2) system call for F_SETFL command