aboutsummaryrefslogtreecommitdiff
path: root/Documentation/filesystems/vfs.txt
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems/vfs.txt')
-rw-r--r--Documentation/filesystems/vfs.txt308
1 files changed, 195 insertions, 113 deletions
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 94cf97b901d..a1d0d7a3016 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -95,10 +95,11 @@ functions:
extern int unregister_filesystem(struct file_system_type *);
The passed struct file_system_type describes your filesystem. When a
-request is made to mount a device onto a directory in your filespace,
-the VFS will call the appropriate get_sb() method for the specific
-filesystem. The dentry for the mount point will then be updated to
-point to the root inode for the new filesystem.
+request is made to mount a filesystem onto a directory in your namespace,
+the VFS will call the appropriate mount() method for the specific
+filesystem. New vfsmount referring to the tree returned by ->mount()
+will be attached to the mountpoint, so that when pathname resolution
+reaches the mountpoint it will jump into the root of that vfsmount.
You can see all filesystems that are registered to the kernel in the
file /proc/filesystems.
@@ -107,14 +108,14 @@ file /proc/filesystems.
struct file_system_type
-----------------------
-This describes the filesystem. As of kernel 2.6.22, the following
+This describes the filesystem. As of kernel 2.6.39, the following
members are defined:
struct file_system_type {
const char *name;
int fs_flags;
- int (*get_sb) (struct file_system_type *, int,
- const char *, void *, struct vfsmount *);
+ struct dentry *(*mount) (struct file_system_type *, int,
+ const char *, void *);
void (*kill_sb) (struct super_block *);
struct module *owner;
struct file_system_type * next;
@@ -128,11 +129,11 @@ struct file_system_type {
fs_flags: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
- get_sb: the method to call when a new instance of this
+ mount: the method to call when a new instance of this
filesystem should be mounted
kill_sb: the method to call when an instance of this filesystem
- should be unmounted
+ should be shut down
owner: for internal VFS use: you should initialize this to THIS_MODULE in
most cases.
@@ -141,7 +142,7 @@ struct file_system_type {
s_lock_key, s_umount_key: lockdep-specific
-The get_sb() method has the following arguments:
+The mount() method has the following arguments:
struct file_system_type *fs_type: describes the filesystem, partly initialized
by the specific filesystem code
@@ -153,32 +154,39 @@ The get_sb() method has the following arguments:
void *data: arbitrary mount options, usually comes as an ASCII
string (see "Mount Options" section)
- struct vfsmount *mnt: a vfs-internal representation of a mount point
+The mount() method must return the root dentry of the tree requested by
+caller. An active reference to its superblock must be grabbed and the
+superblock must be locked. On failure it should return ERR_PTR(error).
-The get_sb() method must determine if the block device specified
-in the dev_name and fs_type contains a filesystem of the type the method
-supports. If it succeeds in opening the named block device, it initializes a
-struct super_block descriptor for the filesystem contained by the block device.
-On failure it returns an error.
+The arguments match those of mount(2) and their interpretation
+depends on filesystem type. E.g. for block filesystems, dev_name is
+interpreted as block device name, that device is opened and if it
+contains a suitable filesystem image the method creates and initializes
+struct super_block accordingly, returning its root dentry to caller.
+
+->mount() may choose to return a subtree of existing filesystem - it
+doesn't have to create a new one. The main result from the caller's
+point of view is a reference to dentry at the root of (sub)tree to
+be attached; creation of new superblock is a common side effect.
The most interesting member of the superblock structure that the
-get_sb() method fills in is the "s_op" field. This is a pointer to
+mount() method fills in is the "s_op" field. This is a pointer to
a "struct super_operations" which describes the next level of the
filesystem implementation.
-Usually, a filesystem uses one of the generic get_sb() implementations
-and provides a fill_super() method instead. The generic methods are:
+Usually, a filesystem uses one of the generic mount() implementations
+and provides a fill_super() callback instead. The generic variants are:
- get_sb_bdev: mount a filesystem residing on a block device
+ mount_bdev: mount a filesystem residing on a block device
- get_sb_nodev: mount a filesystem that is not backed by a device
+ mount_nodev: mount a filesystem that is not backed by a device
- get_sb_single: mount a filesystem which shares the instance between
+ mount_single: mount a filesystem which shares the instance between
all mounts
-A fill_super() method implementation has the following arguments:
+A fill_super() callback implementation has the following arguments:
- struct super_block *sb: the superblock structure. The method fill_super()
+ struct super_block *sb: the superblock structure. The callback
must initialize this properly.
void *data: arbitrary mount options, usually comes as an ASCII
@@ -203,12 +211,11 @@ struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
- void (*dirty_inode) (struct inode *);
+ void (*dirty_inode) (struct inode *, int flags);
int (*write_inode) (struct inode *, int);
void (*drop_inode) (struct inode *);
void (*delete_inode) (struct inode *);
void (*put_super) (struct super_block *);
- void (*write_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
int (*freeze_fs) (struct super_block *);
int (*unfreeze_fs) (struct super_block *);
@@ -217,10 +224,12 @@ struct super_operations {
void (*clear_inode) (struct inode *);
void (*umount_begin) (struct super_block *);
- int (*show_options)(struct seq_file *, struct vfsmount *);
+ int (*show_options)(struct seq_file *, struct dentry *);
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+ int (*nr_cached_objects)(struct super_block *);
+ void (*free_cached_objects)(struct super_block *, int);
};
All methods are called without any locks being held, unless otherwise
@@ -246,7 +255,7 @@ or bottom half).
should be synchronous or not, not all filesystems check this flag.
drop_inode: called when the last access to the inode is dropped,
- with the inode_lock spinlock held.
+ with the inode->i_lock spinlock held.
This method should be either NULL (normal UNIX filesystem
semantics) or "generic_delete_inode" (for filesystems that do not
@@ -263,9 +272,6 @@ or bottom half).
put_super: called when the VFS wishes to free the superblock
(i.e. unmount). This is called with the superblock lock held
- write_super: called when the VFS superblock needs to be written to
- disc. This method is optional
-
sync_fs: called when VFS is writing out all dirty data associated with
a superblock. The second parameter indicates whether the method
should wait until the write out has been completed. Optional.
@@ -293,6 +299,26 @@ or bottom half).
quota_write: called by the VFS to write to filesystem quota file.
+ nr_cached_objects: called by the sb cache shrinking function for the
+ filesystem to return the number of freeable cached objects it contains.
+ Optional.
+
+ free_cache_objects: called by the sb cache shrinking function for the
+ filesystem to scan the number of objects indicated to try to free them.
+ Optional, but any filesystem implementing this method needs to also
+ implement ->nr_cached_objects for it to be called correctly.
+
+ We can't do anything with any errors that the filesystem might
+ encountered, hence the void return type. This will never be called if
+ the VM is trying to reclaim under GFP_NOFS conditions, hence this
+ method does not need to handle that situation itself.
+
+ Implementations must include conditional reschedule calls inside any
+ scanning loop that is done. This allows the VFS to determine
+ appropriate scan batch sizes without having to worry about whether
+ implementations will cause holdoff problems due to large scan batch
+ sizes.
+
Whoever sets up the inode is responsible for filling in the "i_op" field. This
is a pointer to a "struct inode_operations" which describes the methods that
can be performed on individual inodes.
@@ -311,29 +337,33 @@ This describes how the VFS can manipulate an inode in your
filesystem. As of kernel 2.6.22, the following members are defined:
struct inode_operations {
- int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
- struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+ int (*create) (struct inode *,struct dentry *, umode_t, bool);
+ struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
int (*link) (struct dentry *,struct inode *,struct dentry *);
int (*unlink) (struct inode *,struct dentry *);
int (*symlink) (struct inode *,struct dentry *,const char *);
- int (*mkdir) (struct inode *,struct dentry *,int);
+ int (*mkdir) (struct inode *,struct dentry *,umode_t);
int (*rmdir) (struct inode *,struct dentry *);
- int (*mknod) (struct inode *,struct dentry *,int,dev_t);
+ int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
int (*rename) (struct inode *, struct dentry *,
struct inode *, struct dentry *);
+ int (*rename2) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
void * (*follow_link) (struct dentry *, struct nameidata *);
void (*put_link) (struct dentry *, struct nameidata *, void *);
- void (*truncate) (struct inode *);
- int (*permission) (struct inode *, int, unsigned int);
- int (*check_acl)(struct inode *, int, unsigned int);
+ int (*permission) (struct inode *, int);
+ int (*get_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
- void (*truncate_range)(struct inode *, loff_t, loff_t);
+ void (*update_time)(struct inode *, struct timespec *, int);
+ int (*atomic_open)(struct inode *, struct dentry *, struct file *,
+ unsigned open_flag, umode_t create_mode, int *opened);
+ int (*tmpfile) (struct inode *, struct dentry *, umode_t);
};
Again, all methods are called without any locks being held, unless
@@ -386,6 +416,20 @@ otherwise noted.
rename: called by the rename(2) system call to rename the object to
have the parent and name given by the second inode and dentry.
+ rename2: this has an additional flags argument compared to rename.
+ If no flags are supported by the filesystem then this method
+ need not be implemented. If some flags are supported then the
+ filesystem must return -EINVAL for any unsupported or unknown
+ flags. Currently the following flags are implemented:
+ (1) RENAME_NOREPLACE: this flag indicates that if the target
+ of the rename exists the rename should fail with -EEXIST
+ instead of replacing the target. The VFS already checks for
+ existence, so for local filesystems the RENAME_NOREPLACE
+ implementation is equivalent to plain rename.
+ (2) RENAME_EXCHANGE: exchange source and target. Both must
+ exist; this is checked by the VFS. Unlike plain rename,
+ source and target may be of different type.
+
readlink: called by the readlink(2) system call. Only required if
you want to support reading symbolic links
@@ -402,20 +446,10 @@ otherwise noted.
started might not be in the page cache at the end of the
walk).
- truncate: Deprecated. This will not be called if ->setsize is defined.
- Called by the VFS to change the size of a file. The
- i_size field of the inode is set to the desired size by the
- VFS before this method is called. This method is called by
- the truncate(2) system call and related functionality.
-
- Note: ->truncate and vmtruncate are deprecated. Do not add new
- instances/calls of these. Filesystems should be converted to do their
- truncate sequence via ->setattr().
-
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
- May be called in rcu-walk mode (flags & IPERM_FLAG_RCU). If in rcu-walk
+ May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
mode, the filesystem must check the permission without blocking or
storing to the inode.
@@ -442,9 +476,22 @@ otherwise noted.
removexattr: called by the VFS to remove an extended attribute from
a file. This method is called by removexattr(2) system call.
- truncate_range: a method provided by the underlying filesystem to truncate a
- range of blocks , i.e. punch a hole somewhere in a file.
+ update_time: called by the VFS to update a specific time or the i_version of
+ an inode. If this is not defined the VFS will update the inode itself
+ and call mark_inode_dirty_sync.
+
+ atomic_open: called on the last component of an open. Using this optional
+ method the filesystem can look up, possibly create and open the file in
+ one atomic operation. If it cannot perform this (e.g. the file type
+ turned out to be wrong) it may signal this by returning 1 instead of
+ usual 0 or -ve . This method is only called if the last component is
+ negative or needs lookup. Cached positive dentries are still handled by
+ f_op->open(). If the file was created, the FILE_CREATED flag should be
+ set in "opened". In case of O_EXCL the method must only succeed if the
+ file didn't exist and hence FILE_CREATED shall always be set on success.
+ tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to
+ atomically creating, opening and unlinking a file in given directory.
The Address Space Object
========================
@@ -523,12 +570,11 @@ struct address_space_operations
-------------------------------
This describes how the VFS can manipulate mapping of a file to page cache in
-your filesystem. As of kernel 2.6.22, the following members are defined:
+your filesystem. The following members are defined:
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
- int (*sync_page)(struct page *);
int (*writepages)(struct address_space *, struct writeback_control *);
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
@@ -540,17 +586,21 @@ struct address_space_operations {
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
sector_t (*bmap)(struct address_space *, sector_t);
- int (*invalidatepage) (struct page *, unsigned long);
+ void (*invalidatepage) (struct page *, unsigned int, unsigned int);
int (*releasepage) (struct page *, int);
void (*freepage)(struct page *);
- ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs);
+ ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
struct page* (*get_xip_page)(struct address_space *, sector_t,
int);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *);
+ int (*is_partially_uptodate) (struct page *, unsigned long,
+ unsigned long);
+ void (*is_dirty_writeback) (struct page *, bool *, bool *);
int (*error_remove_page) (struct mapping *mapping, struct page *page);
+ int (*swap_activate)(struct file *);
+ int (*swap_deactivate)(struct file *);
};
writepage: called by the VM to write a dirty page to backing store.
@@ -579,13 +629,6 @@ struct address_space_operations {
In this case, the page will be relocated, relocked and if
that all succeeds, ->readpage will be called again.
- sync_page: called by the VM to notify the backing store to perform all
- queued I/O operations for a page. I/O operations for other pages
- associated with this address_space object may also be performed.
-
- This function is optional and is called only for pages with
- PG_Writeback set while waiting for the writeback to complete.
-
writepages: called by the VM to write out pages associated with the
address_space object. If wbc->sync_mode is WBC_SYNC_ALL, then
the writeback_control will specify a range of pages that must be
@@ -657,14 +700,14 @@ struct address_space_operations {
invalidatepage: If a page has PagePrivate set, then invalidatepage
will be called when part or all of the page is to be removed
from the address space. This generally corresponds to either a
- truncation or a complete invalidation of the address space
- (in the latter case 'offset' will always be 0).
- Any private data associated with the page should be updated
- to reflect this truncation. If offset is 0, then
- the private data should be released, because the page
- must be able to be completely discarded. This may be done by
- calling the ->releasepage function, but in this case the
- release MUST succeed.
+ truncation, punch hole or a complete invalidation of the address
+ space (in the latter case 'offset' will always be 0 and 'length'
+ will be PAGE_CACHE_SIZE). Any private data associated with the page
+ should be updated to reflect this truncation. If offset is 0 and
+ length is PAGE_CACHE_SIZE, then the private data should be released,
+ because the page must be able to be completely discarded. This may
+ be done by calling the ->releasepage function, but in this case the
+ release MUST succeed.
releasepage: releasepage is called on PagePrivate pages to indicate
that the page should be freed if possible. ->releasepage
@@ -714,11 +757,35 @@ struct address_space_operations {
prevent redirtying the page, it is kept locked during the whole
operation.
+ is_partially_uptodate: Called by the VM when reading a file through the
+ pagecache when the underlying blocksize != pagesize. If the required
+ block is up to date then the read can complete without needing the IO
+ to bring the whole page up to date.
+
+ is_dirty_writeback: Called by the VM when attempting to reclaim a page.
+ The VM uses dirty and writeback information to determine if it needs
+ to stall to allow flushers a chance to complete some IO. Ordinarily
+ it can use PageDirty and PageWriteback but some filesystems have
+ more complex state (unstable pages in NFS prevent reclaim) or
+ do not set those flags due to locking problems (jbd). This callback
+ allows a filesystem to indicate to the VM if a page should be
+ treated as dirty or writeback for the purposes of stalling.
+
error_remove_page: normally set to generic_error_remove_page if truncation
is ok for this address space. Used for memory failure handling.
Setting this implies you deal with pages going away under you,
unless you have them locked or reference counts increased.
+ swap_activate: Called when swapon is used on a file to allocate
+ space if necessary and pin the block lookup information in
+ memory. A return value of zero indicates success,
+ in which case this file can be used to back swapspace. The
+ swapspace operations will be proxied to this address space's
+ ->swap_{out,in} methods.
+
+ swap_deactivate: Called during swapoff on files where swap_activate
+ was successful.
+
The File Object
===============
@@ -730,7 +797,7 @@ struct file_operations
----------------------
This describes how the VFS can manipulate an open file. As of kernel
-2.6.22, the following members are defined:
+3.12, the following members are defined:
struct file_operations {
struct module *owner;
@@ -739,7 +806,9 @@ struct file_operations {
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
- int (*readdir) (struct file *, void *, filldir_t);
+ ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+ ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ int (*iterate) (struct file *, struct dir_context *);
unsigned int (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
@@ -747,19 +816,19 @@ struct file_operations {
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *);
int (*release) (struct inode *, struct file *);
- int (*fsync) (struct file *, int datasync);
+ int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *);
- ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
+ int (*setlease)(struct file *, long arg, struct file_lock **);
+ long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
+ int (*show_fdinfo)(struct seq_file *m, struct file *f);
};
Again, all methods are called without any locks being held, unless
@@ -769,13 +838,17 @@ otherwise noted.
read: called by read(2) and related system calls
- aio_read: called by io_submit(2) and other asynchronous I/O operations
+ aio_read: vectored, possibly asynchronous read
+
+ read_iter: possibly asynchronous read with iov_iter as destination
write: called by write(2) and related system calls
- aio_write: called by io_submit(2) and other asynchronous I/O operations
+ aio_write: vectored, possibly asynchronous write
- readdir: called when the VFS needs to read the directory contents
+ write_iter: possibly asynchronous write with iov_iter as source
+
+ iterate: called when the VFS needs to read the directory contents
poll: called by the VFS when a process wants to check if there is
activity on this file and (optionally) go to sleep until there
@@ -810,12 +883,6 @@ otherwise noted.
lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
commands
- readv: called by the readv(2) system call
-
- writev: called by the writev(2) system call
-
- sendfile: called by the sendfile(2) system call
-
get_unmapped_area: called by the mmap(2) system call
check_flags: called by the fcntl(2) system call for F_SETFL command
@@ -828,6 +895,11 @@ otherwise noted.
splice_read: called by the VFS to splice data from file to a pipe. This
method is used by the splice(2) system call
+ setlease: called by the VFS to set or release a file lock lease.
+ setlease has the file_lock_lock held and must not sleep.
+
+ fallocate: called by the VFS to preallocate blocks or punch a hole.
+
Note that the file operations are implemented by the specific
filesystem in which the inode resides. When opening a device node
(character or block special) most filesystems will call special
@@ -854,55 +926,72 @@ the VFS uses a default. As of kernel 2.6.22, the following members are
defined:
struct dentry_operations {
- int (*d_revalidate)(struct dentry *, struct nameidata *);
- int (*d_hash)(const struct dentry *, const struct inode *,
- struct qstr *);
- int (*d_compare)(const struct dentry *, const struct inode *,
- const struct dentry *, const struct inode *,
+ int (*d_revalidate)(struct dentry *, unsigned int);
+ int (*d_weak_revalidate)(struct dentry *, unsigned int);
+ int (*d_hash)(const struct dentry *, struct qstr *);
+ int (*d_compare)(const struct dentry *, const struct dentry *,
unsigned int, const char *, const struct qstr *);
int (*d_delete)(const struct dentry *);
void (*d_release)(struct dentry *);
void (*d_iput)(struct dentry *, struct inode *);
char *(*d_dname)(struct dentry *, char *, int);
struct vfsmount *(*d_automount)(struct path *);
- int (*d_manage)(struct dentry *, bool, bool);
+ int (*d_manage)(struct dentry *, bool);
};
d_revalidate: called when the VFS needs to revalidate a dentry. This
is called whenever a name look-up finds a dentry in the
- dcache. Most filesystems leave this as NULL, because all their
- dentries in the dcache are valid
+ dcache. Most local filesystems leave this as NULL, because all their
+ dentries in the dcache are valid. Network filesystems are different
+ since things can change on the server without the client necessarily
+ being aware of it.
- d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU).
+ This function should return a positive value if the dentry is still
+ valid, and zero or a negative error code if it isn't.
+
+ d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
If in rcu-walk mode, the filesystem must revalidate the dentry without
blocking or storing to the dentry, d_parent and d_inode should not be
- used without care (because they can go NULL), instead nd->inode should
- be used.
+ used without care (because they can change and, in d_inode case, even
+ become NULL under us).
If a situation is encountered that rcu-walk cannot handle, return
-ECHILD and it will be called again in ref-walk mode.
+ d_weak_revalidate: called when the VFS needs to revalidate a "jumped" dentry.
+ This is called when a path-walk ends at dentry that was not acquired by
+ doing a lookup in the parent directory. This includes "/", "." and "..",
+ as well as procfs-style symlinks and mountpoint traversal.
+
+ In this case, we are less concerned with whether the dentry is still
+ fully correct, but rather that the inode is still valid. As with
+ d_revalidate, most local filesystems will set this to NULL since their
+ dcache entries are always valid.
+
+ This function has the same return code semantics as d_revalidate.
+
+ d_weak_revalidate is only called after leaving rcu-walk mode.
+
d_hash: called when the VFS adds a dentry to the hash table. The first
dentry passed to d_hash is the parent directory that the name is
- to be hashed into. The inode is the dentry's inode.
+ to be hashed into.
Same locking and synchronisation rules as d_compare regarding
what is safe to dereference etc.
d_compare: called to compare a dentry name with a given name. The first
dentry is the parent of the dentry to be compared, the second is
- the parent's inode, then the dentry and inode (may be NULL) of the
- child dentry. len and name string are properties of the dentry to be
- compared. qstr is the name to compare it with.
+ the child dentry. len and name string are properties of the dentry
+ to be compared. qstr is the name to compare it with.
Must be constant and idempotent, and should not take locks if
- possible, and should not or store into the dentry or inodes.
- Should not dereference pointers outside the dentry or inodes without
+ possible, and should not or store into the dentry.
+ Should not dereference pointers outside the dentry without
lots of care (eg. d_parent, d_inode, d_name should not be used).
However, our vfsmount is pinned, and RCU held, so the dentries and
inodes won't disappear, neither will our sb or filesystem module.
- ->i_sb and ->d_sb may be used.
+ ->d_sb may be used.
It is a tricky calling convention because it needs to be called under
"rcu-walk", ie. without any locks or references on things.
@@ -961,13 +1050,9 @@ struct dentry_operations {
mounted on it and not to check the automount flag. Any other error
code will abort pathwalk completely.
- If the 'mounting_here' parameter is true, then namespace_sem is being
- held by the caller and the function should not initiate any mounts or
- unmounts that it will then wait for.
-
If the 'rcu_walk' parameter is true, then the caller is doing a
pathwalk in RCU-walk mode. Sleeping is not permitted in this mode,
- and the caller can be asked to leave it and call again by returing
+ and the caller can be asked to leave it and call again by returning
-ECHILD.
This function is only used if DCACHE_MANAGE_TRANSIT is set on the
@@ -1027,9 +1112,6 @@ manipulate dentries:
and the dentry is returned. The caller must use dput()
to free the dentry when it finishes using it.
-For further information on dentry locking, please refer to the document
-Documentation/filesystems/dentry-locking.txt.
-
Mount Options
=============