From a0612b1f0b3d851458dafe5886e33d58c1967440 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 12 May 2008 14:01:49 -0700 Subject: uml: hppfs fixes hppfs tidying and fixes noticed during hch's get_inode work - style fixes a copy_to_user got its return value checked hppfs_write no longer fiddles file->f_pos because it gets and returns pos in its arguments hppfs_delete_inode dputs the underlyng procfs dentry stored in its private data and mntputs the vfsmnt stashed in s_fs_info hppfs_put_super no longer needs to mntput the s_fs_info, so it no longer needs to exist hppfs_readlink and hppfs_follow_link were doing a bunch of stuff with a struct file which they didn't use there is now a ->permission which calls generic_permission get_inode was always returning 0 for some reason - it now returns an inode if nothing bad happened Signed-off-by: Jeff Dike Cc: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hppfs/hppfs_kern.c | 82 +++++++++++++++++++-------------------------------- 1 file changed, 30 insertions(+), 52 deletions(-) (limited to 'fs') diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 8601d8ef3b5..65077aa90f0 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -33,7 +33,7 @@ struct hppfs_private { }; struct hppfs_inode_info { - struct dentry *proc_dentry; + struct dentry *proc_dentry; struct inode vfs_inode; }; @@ -52,7 +52,7 @@ static int is_pid(struct dentry *dentry) int i; sb = dentry->d_sb; - if ((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) + if (dentry->d_parent != sb->s_root) return 0; for (i = 0; i < dentry->d_name.len; i++) { @@ -136,7 +136,7 @@ static int file_removed(struct dentry *dentry, const char *file) } static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + struct nameidata *nd) { struct dentry *proc_dentry, *new, *parent; struct inode *inode; @@ -254,6 +254,8 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, int err; if (hppfs->contents != NULL) { + int rem; + if (*ppos >= hppfs->len) return 0; @@ -267,8 +269,10 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, if (off + count > hppfs->len) count = hppfs->len - off; - copy_to_user(buf, &data->contents[off], count); - *ppos += count; + rem = copy_to_user(buf, &data->contents[off], count); + *ppos += count - rem; + if (rem > 0) + return -EFAULT; } else if (hppfs->host_fd != -1) { err = os_seek_file(hppfs->host_fd, *ppos); if (err) { @@ -285,21 +289,15 @@ static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, return count; } -static ssize_t hppfs_write(struct file *file, const char __user *buf, size_t len, - loff_t *ppos) +static ssize_t hppfs_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) { struct hppfs_private *data = file->private_data; struct file *proc_file = data->proc_file; ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - int err; write = proc_file->f_path.dentry->d_inode->i_fop->write; - - proc_file->f_pos = file->f_pos; - err = (*write)(proc_file, buf, len, &proc_file->f_pos); - file->f_pos = proc_file->f_pos; - - return err; + return (*write)(proc_file, buf, len, ppos); } static int open_host_sock(char *host_file, int *filter_out) @@ -357,7 +355,7 @@ static struct hppfs_data *hppfs_get_data(int fd, int filter, if (filter) { while ((n = read_proc(proc_file, data->contents, - sizeof(data->contents), NULL, 0)) > 0) + sizeof(data->contents), NULL, 0)) > 0) os_write_file(fd, data->contents, n); err = os_shutdown_socket(fd, 0, 1); if (err) { @@ -429,8 +427,8 @@ static int file_mode(int fmode) static int hppfs_open(struct inode *inode, struct file *file) { struct hppfs_private *data; - struct dentry *proc_dentry; struct vfsmount *proc_mnt; + struct dentry *proc_dentry; char *host_file; int err, fd, type, filter; @@ -492,8 +490,8 @@ static int hppfs_open(struct inode *inode, struct file *file) static int hppfs_dir_open(struct inode *inode, struct file *file) { struct hppfs_private *data; - struct dentry *proc_dentry; struct vfsmount *proc_mnt; + struct dentry *proc_dentry; int err; err = -ENOMEM; @@ -620,6 +618,9 @@ static struct inode *hppfs_alloc_inode(struct super_block *sb) void hppfs_delete_inode(struct inode *ino) { + dput(HPPFS_I(ino)->proc_dentry); + mntput(ino->i_sb->s_fs_info); + clear_inode(ino); } @@ -628,69 +629,46 @@ static void hppfs_destroy_inode(struct inode *inode) kfree(HPPFS_I(inode)); } -static void hppfs_put_super(struct super_block *sb) -{ - mntput(sb->s_fs_info); -} - static const struct super_operations hppfs_sbops = { .alloc_inode = hppfs_alloc_inode, .destroy_inode = hppfs_destroy_inode, .delete_inode = hppfs_delete_inode, .statfs = hppfs_statfs, - .put_super = hppfs_put_super, }; static int hppfs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { - struct file *proc_file; struct dentry *proc_dentry; - struct vfsmount *proc_mnt; - int ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - proc_mnt = dentry->d_sb->s_fs_info; - - proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY); - if (IS_ERR(proc_file)) - return PTR_ERR(proc_file); - - ret = proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, buflen); - - fput(proc_file); - - return ret; + return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, + buflen); } -static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) { - struct file *proc_file; struct dentry *proc_dentry; - struct vfsmount *proc_mnt; - void *ret; proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - proc_mnt = dentry->d_sb->s_fs_info; - - proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), O_RDONLY); - if (IS_ERR(proc_file)) - return proc_file; - - ret = proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); - fput(proc_file); + return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); +} - return ret; +int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, NULL); } static const struct inode_operations hppfs_dir_iops = { .lookup = hppfs_lookup, + .permission = hppfs_permission, }; static const struct inode_operations hppfs_link_iops = { .readlink = hppfs_readlink, .follow_link = hppfs_follow_link, + .permission = hppfs_permission, }; static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) @@ -712,7 +690,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_fop = &hppfs_file_fops; } - HPPFS_I(inode)->proc_dentry = dentry; + HPPFS_I(inode)->proc_dentry = dget(dentry); inode->i_uid = proc_ino->i_uid; inode->i_gid = proc_ino->i_gid; @@ -725,7 +703,7 @@ static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) inode->i_size = proc_ino->i_size; inode->i_blocks = proc_ino->i_blocks; - return 0; + return inode; } static int hppfs_fill_super(struct super_block *sb, void *d, int silent) -- cgit v1.2.3-70-g09d2 From 46d7b522ebf486edbd096965d534cc6465e9e309 Mon Sep 17 00:00:00 2001 From: Jeff Dike Date: Mon, 12 May 2008 14:01:50 -0700 Subject: uml: move hppfs_kern.c to hppfs.c There's no reason for the _kern in hppfs_kern.c, so move it to hppfs.c. Signed-off-by: Jeff Dike Cc: WANG Cong Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hppfs/Makefile | 6 +- fs/hppfs/hppfs.c | 771 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/hppfs/hppfs_kern.c | 771 -------------------------------------------------- 3 files changed, 774 insertions(+), 774 deletions(-) create mode 100644 fs/hppfs/hppfs.c delete mode 100644 fs/hppfs/hppfs_kern.c (limited to 'fs') diff --git a/fs/hppfs/Makefile b/fs/hppfs/Makefile index 6890433f759..8a1f5034436 100644 --- a/fs/hppfs/Makefile +++ b/fs/hppfs/Makefile @@ -1,9 +1,9 @@ # -# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com) +# Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com) # Licensed under the GPL # -hppfs-objs := hppfs_kern.o +hppfs-objs := hppfs.o obj-y = -obj-$(CONFIG_HPPFS) += hppfs.o +obj-$(CONFIG_HPPFS) += $(hppfs-objs) diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c new file mode 100644 index 00000000000..65077aa90f0 --- /dev/null +++ b/fs/hppfs/hppfs.c @@ -0,0 +1,771 @@ +/* + * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) + * Licensed under the GPL + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "os.h" + +static struct inode *get_inode(struct super_block *, struct dentry *); + +struct hppfs_data { + struct list_head list; + char contents[PAGE_SIZE - sizeof(struct list_head)]; +}; + +struct hppfs_private { + struct file *proc_file; + int host_fd; + loff_t len; + struct hppfs_data *contents; +}; + +struct hppfs_inode_info { + struct dentry *proc_dentry; + struct inode vfs_inode; +}; + +static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) +{ + return container_of(inode, struct hppfs_inode_info, vfs_inode); +} + +#define HPPFS_SUPER_MAGIC 0xb00000ee + +static const struct super_operations hppfs_sbops; + +static int is_pid(struct dentry *dentry) +{ + struct super_block *sb; + int i; + + sb = dentry->d_sb; + if (dentry->d_parent != sb->s_root) + return 0; + + for (i = 0; i < dentry->d_name.len; i++) { + if (!isdigit(dentry->d_name.name[i])) + return 0; + } + return 1; +} + +static char *dentry_name(struct dentry *dentry, int extra) +{ + struct dentry *parent; + char *root, *name; + const char *seg_name; + int len, seg_len; + + len = 0; + parent = dentry; + while (parent->d_parent != parent) { + if (is_pid(parent)) + len += strlen("pid") + 1; + else len += parent->d_name.len + 1; + parent = parent->d_parent; + } + + root = "proc"; + len += strlen(root); + name = kmalloc(len + extra + 1, GFP_KERNEL); + if (name == NULL) + return NULL; + + name[len] = '\0'; + parent = dentry; + while (parent->d_parent != parent) { + if (is_pid(parent)) { + seg_name = "pid"; + seg_len = strlen("pid"); + } + else { + seg_name = parent->d_name.name; + seg_len = parent->d_name.len; + } + + len -= seg_len + 1; + name[len] = '/'; + strncpy(&name[len + 1], seg_name, seg_len); + parent = parent->d_parent; + } + strncpy(name, root, strlen(root)); + return name; +} + +static int file_removed(struct dentry *dentry, const char *file) +{ + char *host_file; + int extra, fd; + + extra = 0; + if (file != NULL) + extra += strlen(file) + 1; + + host_file = dentry_name(dentry, extra + strlen("/remove")); + if (host_file == NULL) { + printk(KERN_ERR "file_removed : allocation failed\n"); + return -ENOMEM; + } + + if (file != NULL) { + strcat(host_file, "/"); + strcat(host_file, file); + } + strcat(host_file, "/remove"); + + fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); + kfree(host_file); + if (fd > 0) { + os_close_file(fd); + return 1; + } + return 0; +} + +static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, + struct nameidata *nd) +{ + struct dentry *proc_dentry, *new, *parent; + struct inode *inode; + int err, deleted; + + deleted = file_removed(dentry, NULL); + if (deleted < 0) + return ERR_PTR(deleted); + else if (deleted) + return ERR_PTR(-ENOENT); + + err = -ENOMEM; + parent = HPPFS_I(ino)->proc_dentry; + mutex_lock(&parent->d_inode->i_mutex); + proc_dentry = d_lookup(parent, &dentry->d_name); + if (proc_dentry == NULL) { + proc_dentry = d_alloc(parent, &dentry->d_name); + if (proc_dentry == NULL) { + mutex_unlock(&parent->d_inode->i_mutex); + goto out; + } + new = (*parent->d_inode->i_op->lookup)(parent->d_inode, + proc_dentry, NULL); + if (new) { + dput(proc_dentry); + proc_dentry = new; + } + } + mutex_unlock(&parent->d_inode->i_mutex); + + if (IS_ERR(proc_dentry)) + return proc_dentry; + + err = -ENOMEM; + inode = get_inode(ino->i_sb, proc_dentry); + if (!inode) + goto out_dput; + + d_add(dentry, inode); + return NULL; + + out_dput: + dput(proc_dentry); + out: + return ERR_PTR(err); +} + +static const struct inode_operations hppfs_file_iops = { +}; + +static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count, + loff_t *ppos, int is_user) +{ + ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); + ssize_t n; + + read = file->f_path.dentry->d_inode->i_fop->read; + + if (!is_user) + set_fs(KERNEL_DS); + + n = (*read)(file, buf, count, &file->f_pos); + + if (!is_user) + set_fs(USER_DS); + + if (ppos) + *ppos = file->f_pos; + return n; +} + +static ssize_t hppfs_read_file(int fd, char __user *buf, ssize_t count) +{ + ssize_t n; + int cur, err; + char *new_buf; + + n = -ENOMEM; + new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (new_buf == NULL) { + printk(KERN_ERR "hppfs_read_file : kmalloc failed\n"); + goto out; + } + n = 0; + while (count > 0) { + cur = min_t(ssize_t, count, PAGE_SIZE); + err = os_read_file(fd, new_buf, cur); + if (err < 0) { + printk(KERN_ERR "hppfs_read : read failed, " + "errno = %d\n", err); + n = err; + goto out_free; + } else if (err == 0) + break; + + if (copy_to_user(buf, new_buf, err)) { + n = -EFAULT; + goto out_free; + } + n += err; + count -= err; + } + out_free: + kfree(new_buf); + out: + return n; +} + +static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct hppfs_private *hppfs = file->private_data; + struct hppfs_data *data; + loff_t off; + int err; + + if (hppfs->contents != NULL) { + int rem; + + if (*ppos >= hppfs->len) + return 0; + + data = hppfs->contents; + off = *ppos; + while (off >= sizeof(data->contents)) { + data = list_entry(data->list.next, struct hppfs_data, + list); + off -= sizeof(data->contents); + } + + if (off + count > hppfs->len) + count = hppfs->len - off; + rem = copy_to_user(buf, &data->contents[off], count); + *ppos += count - rem; + if (rem > 0) + return -EFAULT; + } else if (hppfs->host_fd != -1) { + err = os_seek_file(hppfs->host_fd, *ppos); + if (err) { + printk(KERN_ERR "hppfs_read : seek failed, " + "errno = %d\n", err); + return err; + } + count = hppfs_read_file(hppfs->host_fd, buf, count); + if (count > 0) + *ppos += count; + } + else count = read_proc(hppfs->proc_file, buf, count, ppos, 1); + + return count; +} + +static ssize_t hppfs_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = data->proc_file; + ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); + + write = proc_file->f_path.dentry->d_inode->i_fop->write; + return (*write)(proc_file, buf, len, ppos); +} + +static int open_host_sock(char *host_file, int *filter_out) +{ + char *end; + int fd; + + end = &host_file[strlen(host_file)]; + strcpy(end, "/rw"); + *filter_out = 1; + fd = os_connect_socket(host_file); + if (fd > 0) + return fd; + + strcpy(end, "/r"); + *filter_out = 0; + fd = os_connect_socket(host_file); + return fd; +} + +static void free_contents(struct hppfs_data *head) +{ + struct hppfs_data *data; + struct list_head *ele, *next; + + if (head == NULL) + return; + + list_for_each_safe(ele, next, &head->list) { + data = list_entry(ele, struct hppfs_data, list); + kfree(data); + } + kfree(head); +} + +static struct hppfs_data *hppfs_get_data(int fd, int filter, + struct file *proc_file, + struct file *hppfs_file, + loff_t *size_out) +{ + struct hppfs_data *data, *new, *head; + int n, err; + + err = -ENOMEM; + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) { + printk(KERN_ERR "hppfs_get_data : head allocation failed\n"); + goto failed; + } + + INIT_LIST_HEAD(&data->list); + + head = data; + *size_out = 0; + + if (filter) { + while ((n = read_proc(proc_file, data->contents, + sizeof(data->contents), NULL, 0)) > 0) + os_write_file(fd, data->contents, n); + err = os_shutdown_socket(fd, 0, 1); + if (err) { + printk(KERN_ERR "hppfs_get_data : failed to shut down " + "socket\n"); + goto failed_free; + } + } + while (1) { + n = os_read_file(fd, data->contents, sizeof(data->contents)); + if (n < 0) { + err = n; + printk(KERN_ERR "hppfs_get_data : read failed, " + "errno = %d\n", err); + goto failed_free; + } else if (n == 0) + break; + + *size_out += n; + + if (n < sizeof(data->contents)) + break; + + new = kmalloc(sizeof(*data), GFP_KERNEL); + if (new == 0) { + printk(KERN_ERR "hppfs_get_data : data allocation " + "failed\n"); + err = -ENOMEM; + goto failed_free; + } + + INIT_LIST_HEAD(&new->list); + list_add(&new->list, &data->list); + data = new; + } + return head; + + failed_free: + free_contents(head); + failed: + return ERR_PTR(err); +} + +static struct hppfs_private *hppfs_data(void) +{ + struct hppfs_private *data; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return data; + + *data = ((struct hppfs_private ) { .host_fd = -1, + .len = -1, + .contents = NULL } ); + return data; +} + +static int file_mode(int fmode) +{ + if (fmode == (FMODE_READ | FMODE_WRITE)) + return O_RDWR; + if (fmode == FMODE_READ) + return O_RDONLY; + if (fmode == FMODE_WRITE) + return O_WRONLY; + return 0; +} + +static int hppfs_open(struct inode *inode, struct file *file) +{ + struct hppfs_private *data; + struct vfsmount *proc_mnt; + struct dentry *proc_dentry; + char *host_file; + int err, fd, type, filter; + + err = -ENOMEM; + data = hppfs_data(); + if (data == NULL) + goto out; + + host_file = dentry_name(file->f_path.dentry, strlen("/rw")); + if (host_file == NULL) + goto out_free2; + + proc_dentry = HPPFS_I(inode)->proc_dentry; + proc_mnt = inode->i_sb->s_fs_info; + + /* XXX This isn't closed anywhere */ + data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), + file_mode(file->f_mode)); + err = PTR_ERR(data->proc_file); + if (IS_ERR(data->proc_file)) + goto out_free1; + + type = os_file_type(host_file); + if (type == OS_TYPE_FILE) { + fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); + if (fd >= 0) + data->host_fd = fd; + else + printk(KERN_ERR "hppfs_open : failed to open '%s', " + "errno = %d\n", host_file, -fd); + + data->contents = NULL; + } else if (type == OS_TYPE_DIR) { + fd = open_host_sock(host_file, &filter); + if (fd > 0) { + data->contents = hppfs_get_data(fd, filter, + data->proc_file, + file, &data->len); + if (!IS_ERR(data->contents)) + data->host_fd = fd; + } else + printk(KERN_ERR "hppfs_open : failed to open a socket " + "in '%s', errno = %d\n", host_file, -fd); + } + kfree(host_file); + + file->private_data = data; + return 0; + + out_free1: + kfree(host_file); + out_free2: + free_contents(data->contents); + kfree(data); + out: + return err; +} + +static int hppfs_dir_open(struct inode *inode, struct file *file) +{ + struct hppfs_private *data; + struct vfsmount *proc_mnt; + struct dentry *proc_dentry; + int err; + + err = -ENOMEM; + data = hppfs_data(); + if (data == NULL) + goto out; + + proc_dentry = HPPFS_I(inode)->proc_dentry; + proc_mnt = inode->i_sb->s_fs_info; + data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), + file_mode(file->f_mode)); + err = PTR_ERR(data->proc_file); + if (IS_ERR(data->proc_file)) + goto out_free; + + file->private_data = data; + return 0; + + out_free: + kfree(data); + out: + return err; +} + +static loff_t hppfs_llseek(struct file *file, loff_t off, int where) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = data->proc_file; + loff_t (*llseek)(struct file *, loff_t, int); + loff_t ret; + + llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek; + if (llseek != NULL) { + ret = (*llseek)(proc_file, off, where); + if (ret < 0) + return ret; + } + + return default_llseek(file, off, where); +} + +static const struct file_operations hppfs_file_fops = { + .owner = NULL, + .llseek = hppfs_llseek, + .read = hppfs_read, + .write = hppfs_write, + .open = hppfs_open, +}; + +struct hppfs_dirent { + void *vfs_dirent; + filldir_t filldir; + struct dentry *dentry; +}; + +static int hppfs_filldir(void *d, const char *name, int size, + loff_t offset, u64 inode, unsigned int type) +{ + struct hppfs_dirent *dirent = d; + + if (file_removed(dirent->dentry, name)) + return 0; + + return (*dirent->filldir)(dirent->vfs_dirent, name, size, offset, + inode, type); +} + +static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) +{ + struct hppfs_private *data = file->private_data; + struct file *proc_file = data->proc_file; + int (*readdir)(struct file *, void *, filldir_t); + struct hppfs_dirent dirent = ((struct hppfs_dirent) + { .vfs_dirent = ent, + .filldir = filldir, + .dentry = file->f_path.dentry + }); + int err; + + readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir; + + proc_file->f_pos = file->f_pos; + err = (*readdir)(proc_file, &dirent, hppfs_filldir); + file->f_pos = proc_file->f_pos; + + return err; +} + +static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) +{ + return 0; +} + +static const struct file_operations hppfs_dir_fops = { + .owner = NULL, + .readdir = hppfs_readdir, + .open = hppfs_dir_open, + .fsync = hppfs_fsync, +}; + +static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) +{ + sf->f_blocks = 0; + sf->f_bfree = 0; + sf->f_bavail = 0; + sf->f_files = 0; + sf->f_ffree = 0; + sf->f_type = HPPFS_SUPER_MAGIC; + return 0; +} + +static struct inode *hppfs_alloc_inode(struct super_block *sb) +{ + struct hppfs_inode_info *hi; + + hi = kmalloc(sizeof(*hi), GFP_KERNEL); + if (!hi) + return NULL; + + hi->proc_dentry = NULL; + inode_init_once(&hi->vfs_inode); + return &hi->vfs_inode; +} + +void hppfs_delete_inode(struct inode *ino) +{ + dput(HPPFS_I(ino)->proc_dentry); + mntput(ino->i_sb->s_fs_info); + + clear_inode(ino); +} + +static void hppfs_destroy_inode(struct inode *inode) +{ + kfree(HPPFS_I(inode)); +} + +static const struct super_operations hppfs_sbops = { + .alloc_inode = hppfs_alloc_inode, + .destroy_inode = hppfs_destroy_inode, + .delete_inode = hppfs_delete_inode, + .statfs = hppfs_statfs, +}; + +static int hppfs_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct dentry *proc_dentry; + + proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; + return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, + buflen); +} + +static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *proc_dentry; + + proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; + + return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); +} + +int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + return generic_permission(inode, mask, NULL); +} + +static const struct inode_operations hppfs_dir_iops = { + .lookup = hppfs_lookup, + .permission = hppfs_permission, +}; + +static const struct inode_operations hppfs_link_iops = { + .readlink = hppfs_readlink, + .follow_link = hppfs_follow_link, + .permission = hppfs_permission, +}; + +static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) +{ + struct inode *proc_ino = dentry->d_inode; + struct inode *inode = new_inode(sb); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (S_ISDIR(dentry->d_inode->i_mode)) { + inode->i_op = &hppfs_dir_iops; + inode->i_fop = &hppfs_dir_fops; + } else if (S_ISLNK(dentry->d_inode->i_mode)) { + inode->i_op = &hppfs_link_iops; + inode->i_fop = &hppfs_file_fops; + } else { + inode->i_op = &hppfs_file_iops; + inode->i_fop = &hppfs_file_fops; + } + + HPPFS_I(inode)->proc_dentry = dget(dentry); + + inode->i_uid = proc_ino->i_uid; + inode->i_gid = proc_ino->i_gid; + inode->i_atime = proc_ino->i_atime; + inode->i_mtime = proc_ino->i_mtime; + inode->i_ctime = proc_ino->i_ctime; + inode->i_ino = proc_ino->i_ino; + inode->i_mode = proc_ino->i_mode; + inode->i_nlink = proc_ino->i_nlink; + inode->i_size = proc_ino->i_size; + inode->i_blocks = proc_ino->i_blocks; + + return inode; +} + +static int hppfs_fill_super(struct super_block *sb, void *d, int silent) +{ + struct inode *root_inode; + struct vfsmount *proc_mnt; + int err = -ENOENT; + + proc_mnt = do_kern_mount("proc", 0, "proc", NULL); + if (IS_ERR(proc_mnt)) + goto out; + + sb->s_blocksize = 1024; + sb->s_blocksize_bits = 10; + sb->s_magic = HPPFS_SUPER_MAGIC; + sb->s_op = &hppfs_sbops; + sb->s_fs_info = proc_mnt; + + err = -ENOMEM; + root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root); + if (!root_inode) + goto out_mntput; + + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_iput; + + return 0; + + out_iput: + iput(root_inode); + out_mntput: + mntput(proc_mnt); + out: + return(err); +} + +static int hppfs_read_super(struct file_system_type *type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) +{ + return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); +} + +static struct file_system_type hppfs_type = { + .owner = THIS_MODULE, + .name = "hppfs", + .get_sb = hppfs_read_super, + .kill_sb = kill_anon_super, + .fs_flags = 0, +}; + +static int __init init_hppfs(void) +{ + return register_filesystem(&hppfs_type); +} + +static void __exit exit_hppfs(void) +{ + unregister_filesystem(&hppfs_type); +} + +module_init(init_hppfs) +module_exit(exit_hppfs) +MODULE_LICENSE("GPL"); diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c deleted file mode 100644 index 65077aa90f0..00000000000 --- a/fs/hppfs/hppfs_kern.c +++ /dev/null @@ -1,771 +0,0 @@ -/* - * Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Licensed under the GPL - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "os.h" - -static struct inode *get_inode(struct super_block *, struct dentry *); - -struct hppfs_data { - struct list_head list; - char contents[PAGE_SIZE - sizeof(struct list_head)]; -}; - -struct hppfs_private { - struct file *proc_file; - int host_fd; - loff_t len; - struct hppfs_data *contents; -}; - -struct hppfs_inode_info { - struct dentry *proc_dentry; - struct inode vfs_inode; -}; - -static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) -{ - return container_of(inode, struct hppfs_inode_info, vfs_inode); -} - -#define HPPFS_SUPER_MAGIC 0xb00000ee - -static const struct super_operations hppfs_sbops; - -static int is_pid(struct dentry *dentry) -{ - struct super_block *sb; - int i; - - sb = dentry->d_sb; - if (dentry->d_parent != sb->s_root) - return 0; - - for (i = 0; i < dentry->d_name.len; i++) { - if (!isdigit(dentry->d_name.name[i])) - return 0; - } - return 1; -} - -static char *dentry_name(struct dentry *dentry, int extra) -{ - struct dentry *parent; - char *root, *name; - const char *seg_name; - int len, seg_len; - - len = 0; - parent = dentry; - while (parent->d_parent != parent) { - if (is_pid(parent)) - len += strlen("pid") + 1; - else len += parent->d_name.len + 1; - parent = parent->d_parent; - } - - root = "proc"; - len += strlen(root); - name = kmalloc(len + extra + 1, GFP_KERNEL); - if (name == NULL) - return NULL; - - name[len] = '\0'; - parent = dentry; - while (parent->d_parent != parent) { - if (is_pid(parent)) { - seg_name = "pid"; - seg_len = strlen("pid"); - } - else { - seg_name = parent->d_name.name; - seg_len = parent->d_name.len; - } - - len -= seg_len + 1; - name[len] = '/'; - strncpy(&name[len + 1], seg_name, seg_len); - parent = parent->d_parent; - } - strncpy(name, root, strlen(root)); - return name; -} - -static int file_removed(struct dentry *dentry, const char *file) -{ - char *host_file; - int extra, fd; - - extra = 0; - if (file != NULL) - extra += strlen(file) + 1; - - host_file = dentry_name(dentry, extra + strlen("/remove")); - if (host_file == NULL) { - printk(KERN_ERR "file_removed : allocation failed\n"); - return -ENOMEM; - } - - if (file != NULL) { - strcat(host_file, "/"); - strcat(host_file, file); - } - strcat(host_file, "/remove"); - - fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); - kfree(host_file); - if (fd > 0) { - os_close_file(fd); - return 1; - } - return 0; -} - -static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) -{ - struct dentry *proc_dentry, *new, *parent; - struct inode *inode; - int err, deleted; - - deleted = file_removed(dentry, NULL); - if (deleted < 0) - return ERR_PTR(deleted); - else if (deleted) - return ERR_PTR(-ENOENT); - - err = -ENOMEM; - parent = HPPFS_I(ino)->proc_dentry; - mutex_lock(&parent->d_inode->i_mutex); - proc_dentry = d_lookup(parent, &dentry->d_name); - if (proc_dentry == NULL) { - proc_dentry = d_alloc(parent, &dentry->d_name); - if (proc_dentry == NULL) { - mutex_unlock(&parent->d_inode->i_mutex); - goto out; - } - new = (*parent->d_inode->i_op->lookup)(parent->d_inode, - proc_dentry, NULL); - if (new) { - dput(proc_dentry); - proc_dentry = new; - } - } - mutex_unlock(&parent->d_inode->i_mutex); - - if (IS_ERR(proc_dentry)) - return proc_dentry; - - err = -ENOMEM; - inode = get_inode(ino->i_sb, proc_dentry); - if (!inode) - goto out_dput; - - d_add(dentry, inode); - return NULL; - - out_dput: - dput(proc_dentry); - out: - return ERR_PTR(err); -} - -static const struct inode_operations hppfs_file_iops = { -}; - -static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count, - loff_t *ppos, int is_user) -{ - ssize_t (*read)(struct file *, char __user *, size_t, loff_t *); - ssize_t n; - - read = file->f_path.dentry->d_inode->i_fop->read; - - if (!is_user) - set_fs(KERNEL_DS); - - n = (*read)(file, buf, count, &file->f_pos); - - if (!is_user) - set_fs(USER_DS); - - if (ppos) - *ppos = file->f_pos; - return n; -} - -static ssize_t hppfs_read_file(int fd, char __user *buf, ssize_t count) -{ - ssize_t n; - int cur, err; - char *new_buf; - - n = -ENOMEM; - new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (new_buf == NULL) { - printk(KERN_ERR "hppfs_read_file : kmalloc failed\n"); - goto out; - } - n = 0; - while (count > 0) { - cur = min_t(ssize_t, count, PAGE_SIZE); - err = os_read_file(fd, new_buf, cur); - if (err < 0) { - printk(KERN_ERR "hppfs_read : read failed, " - "errno = %d\n", err); - n = err; - goto out_free; - } else if (err == 0) - break; - - if (copy_to_user(buf, new_buf, err)) { - n = -EFAULT; - goto out_free; - } - n += err; - count -= err; - } - out_free: - kfree(new_buf); - out: - return n; -} - -static ssize_t hppfs_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - struct hppfs_private *hppfs = file->private_data; - struct hppfs_data *data; - loff_t off; - int err; - - if (hppfs->contents != NULL) { - int rem; - - if (*ppos >= hppfs->len) - return 0; - - data = hppfs->contents; - off = *ppos; - while (off >= sizeof(data->contents)) { - data = list_entry(data->list.next, struct hppfs_data, - list); - off -= sizeof(data->contents); - } - - if (off + count > hppfs->len) - count = hppfs->len - off; - rem = copy_to_user(buf, &data->contents[off], count); - *ppos += count - rem; - if (rem > 0) - return -EFAULT; - } else if (hppfs->host_fd != -1) { - err = os_seek_file(hppfs->host_fd, *ppos); - if (err) { - printk(KERN_ERR "hppfs_read : seek failed, " - "errno = %d\n", err); - return err; - } - count = hppfs_read_file(hppfs->host_fd, buf, count); - if (count > 0) - *ppos += count; - } - else count = read_proc(hppfs->proc_file, buf, count, ppos, 1); - - return count; -} - -static ssize_t hppfs_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *); - - write = proc_file->f_path.dentry->d_inode->i_fop->write; - return (*write)(proc_file, buf, len, ppos); -} - -static int open_host_sock(char *host_file, int *filter_out) -{ - char *end; - int fd; - - end = &host_file[strlen(host_file)]; - strcpy(end, "/rw"); - *filter_out = 1; - fd = os_connect_socket(host_file); - if (fd > 0) - return fd; - - strcpy(end, "/r"); - *filter_out = 0; - fd = os_connect_socket(host_file); - return fd; -} - -static void free_contents(struct hppfs_data *head) -{ - struct hppfs_data *data; - struct list_head *ele, *next; - - if (head == NULL) - return; - - list_for_each_safe(ele, next, &head->list) { - data = list_entry(ele, struct hppfs_data, list); - kfree(data); - } - kfree(head); -} - -static struct hppfs_data *hppfs_get_data(int fd, int filter, - struct file *proc_file, - struct file *hppfs_file, - loff_t *size_out) -{ - struct hppfs_data *data, *new, *head; - int n, err; - - err = -ENOMEM; - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) { - printk(KERN_ERR "hppfs_get_data : head allocation failed\n"); - goto failed; - } - - INIT_LIST_HEAD(&data->list); - - head = data; - *size_out = 0; - - if (filter) { - while ((n = read_proc(proc_file, data->contents, - sizeof(data->contents), NULL, 0)) > 0) - os_write_file(fd, data->contents, n); - err = os_shutdown_socket(fd, 0, 1); - if (err) { - printk(KERN_ERR "hppfs_get_data : failed to shut down " - "socket\n"); - goto failed_free; - } - } - while (1) { - n = os_read_file(fd, data->contents, sizeof(data->contents)); - if (n < 0) { - err = n; - printk(KERN_ERR "hppfs_get_data : read failed, " - "errno = %d\n", err); - goto failed_free; - } else if (n == 0) - break; - - *size_out += n; - - if (n < sizeof(data->contents)) - break; - - new = kmalloc(sizeof(*data), GFP_KERNEL); - if (new == 0) { - printk(KERN_ERR "hppfs_get_data : data allocation " - "failed\n"); - err = -ENOMEM; - goto failed_free; - } - - INIT_LIST_HEAD(&new->list); - list_add(&new->list, &data->list); - data = new; - } - return head; - - failed_free: - free_contents(head); - failed: - return ERR_PTR(err); -} - -static struct hppfs_private *hppfs_data(void) -{ - struct hppfs_private *data; - - data = kmalloc(sizeof(*data), GFP_KERNEL); - if (data == NULL) - return data; - - *data = ((struct hppfs_private ) { .host_fd = -1, - .len = -1, - .contents = NULL } ); - return data; -} - -static int file_mode(int fmode) -{ - if (fmode == (FMODE_READ | FMODE_WRITE)) - return O_RDWR; - if (fmode == FMODE_READ) - return O_RDONLY; - if (fmode == FMODE_WRITE) - return O_WRONLY; - return 0; -} - -static int hppfs_open(struct inode *inode, struct file *file) -{ - struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; - char *host_file; - int err, fd, type, filter; - - err = -ENOMEM; - data = hppfs_data(); - if (data == NULL) - goto out; - - host_file = dentry_name(file->f_path.dentry, strlen("/rw")); - if (host_file == NULL) - goto out_free2; - - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; - - /* XXX This isn't closed anywhere */ - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode)); - err = PTR_ERR(data->proc_file); - if (IS_ERR(data->proc_file)) - goto out_free1; - - type = os_file_type(host_file); - if (type == OS_TYPE_FILE) { - fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); - if (fd >= 0) - data->host_fd = fd; - else - printk(KERN_ERR "hppfs_open : failed to open '%s', " - "errno = %d\n", host_file, -fd); - - data->contents = NULL; - } else if (type == OS_TYPE_DIR) { - fd = open_host_sock(host_file, &filter); - if (fd > 0) { - data->contents = hppfs_get_data(fd, filter, - data->proc_file, - file, &data->len); - if (!IS_ERR(data->contents)) - data->host_fd = fd; - } else - printk(KERN_ERR "hppfs_open : failed to open a socket " - "in '%s', errno = %d\n", host_file, -fd); - } - kfree(host_file); - - file->private_data = data; - return 0; - - out_free1: - kfree(host_file); - out_free2: - free_contents(data->contents); - kfree(data); - out: - return err; -} - -static int hppfs_dir_open(struct inode *inode, struct file *file) -{ - struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; - int err; - - err = -ENOMEM; - data = hppfs_data(); - if (data == NULL) - goto out; - - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode)); - err = PTR_ERR(data->proc_file); - if (IS_ERR(data->proc_file)) - goto out_free; - - file->private_data = data; - return 0; - - out_free: - kfree(data); - out: - return err; -} - -static loff_t hppfs_llseek(struct file *file, loff_t off, int where) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - loff_t (*llseek)(struct file *, loff_t, int); - loff_t ret; - - llseek = proc_file->f_path.dentry->d_inode->i_fop->llseek; - if (llseek != NULL) { - ret = (*llseek)(proc_file, off, where); - if (ret < 0) - return ret; - } - - return default_llseek(file, off, where); -} - -static const struct file_operations hppfs_file_fops = { - .owner = NULL, - .llseek = hppfs_llseek, - .read = hppfs_read, - .write = hppfs_write, - .open = hppfs_open, -}; - -struct hppfs_dirent { - void *vfs_dirent; - filldir_t filldir; - struct dentry *dentry; -}; - -static int hppfs_filldir(void *d, const char *name, int size, - loff_t offset, u64 inode, unsigned int type) -{ - struct hppfs_dirent *dirent = d; - - if (file_removed(dirent->dentry, name)) - return 0; - - return (*dirent->filldir)(dirent->vfs_dirent, name, size, offset, - inode, type); -} - -static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) -{ - struct hppfs_private *data = file->private_data; - struct file *proc_file = data->proc_file; - int (*readdir)(struct file *, void *, filldir_t); - struct hppfs_dirent dirent = ((struct hppfs_dirent) - { .vfs_dirent = ent, - .filldir = filldir, - .dentry = file->f_path.dentry - }); - int err; - - readdir = proc_file->f_path.dentry->d_inode->i_fop->readdir; - - proc_file->f_pos = file->f_pos; - err = (*readdir)(proc_file, &dirent, hppfs_filldir); - file->f_pos = proc_file->f_pos; - - return err; -} - -static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) -{ - return 0; -} - -static const struct file_operations hppfs_dir_fops = { - .owner = NULL, - .readdir = hppfs_readdir, - .open = hppfs_dir_open, - .fsync = hppfs_fsync, -}; - -static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) -{ - sf->f_blocks = 0; - sf->f_bfree = 0; - sf->f_bavail = 0; - sf->f_files = 0; - sf->f_ffree = 0; - sf->f_type = HPPFS_SUPER_MAGIC; - return 0; -} - -static struct inode *hppfs_alloc_inode(struct super_block *sb) -{ - struct hppfs_inode_info *hi; - - hi = kmalloc(sizeof(*hi), GFP_KERNEL); - if (!hi) - return NULL; - - hi->proc_dentry = NULL; - inode_init_once(&hi->vfs_inode); - return &hi->vfs_inode; -} - -void hppfs_delete_inode(struct inode *ino) -{ - dput(HPPFS_I(ino)->proc_dentry); - mntput(ino->i_sb->s_fs_info); - - clear_inode(ino); -} - -static void hppfs_destroy_inode(struct inode *inode) -{ - kfree(HPPFS_I(inode)); -} - -static const struct super_operations hppfs_sbops = { - .alloc_inode = hppfs_alloc_inode, - .destroy_inode = hppfs_destroy_inode, - .delete_inode = hppfs_delete_inode, - .statfs = hppfs_statfs, -}; - -static int hppfs_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - struct dentry *proc_dentry; - - proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer, - buflen); -} - -static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct dentry *proc_dentry; - - proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; - - return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); -} - -int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return generic_permission(inode, mask, NULL); -} - -static const struct inode_operations hppfs_dir_iops = { - .lookup = hppfs_lookup, - .permission = hppfs_permission, -}; - -static const struct inode_operations hppfs_link_iops = { - .readlink = hppfs_readlink, - .follow_link = hppfs_follow_link, - .permission = hppfs_permission, -}; - -static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) -{ - struct inode *proc_ino = dentry->d_inode; - struct inode *inode = new_inode(sb); - - if (!inode) - return ERR_PTR(-ENOMEM); - - if (S_ISDIR(dentry->d_inode->i_mode)) { - inode->i_op = &hppfs_dir_iops; - inode->i_fop = &hppfs_dir_fops; - } else if (S_ISLNK(dentry->d_inode->i_mode)) { - inode->i_op = &hppfs_link_iops; - inode->i_fop = &hppfs_file_fops; - } else { - inode->i_op = &hppfs_file_iops; - inode->i_fop = &hppfs_file_fops; - } - - HPPFS_I(inode)->proc_dentry = dget(dentry); - - inode->i_uid = proc_ino->i_uid; - inode->i_gid = proc_ino->i_gid; - inode->i_atime = proc_ino->i_atime; - inode->i_mtime = proc_ino->i_mtime; - inode->i_ctime = proc_ino->i_ctime; - inode->i_ino = proc_ino->i_ino; - inode->i_mode = proc_ino->i_mode; - inode->i_nlink = proc_ino->i_nlink; - inode->i_size = proc_ino->i_size; - inode->i_blocks = proc_ino->i_blocks; - - return inode; -} - -static int hppfs_fill_super(struct super_block *sb, void *d, int silent) -{ - struct inode *root_inode; - struct vfsmount *proc_mnt; - int err = -ENOENT; - - proc_mnt = do_kern_mount("proc", 0, "proc", NULL); - if (IS_ERR(proc_mnt)) - goto out; - - sb->s_blocksize = 1024; - sb->s_blocksize_bits = 10; - sb->s_magic = HPPFS_SUPER_MAGIC; - sb->s_op = &hppfs_sbops; - sb->s_fs_info = proc_mnt; - - err = -ENOMEM; - root_inode = get_inode(sb, proc_mnt->mnt_sb->s_root); - if (!root_inode) - goto out_mntput; - - sb->s_root = d_alloc_root(root_inode); - if (!sb->s_root) - goto out_iput; - - return 0; - - out_iput: - iput(root_inode); - out_mntput: - mntput(proc_mnt); - out: - return(err); -} - -static int hppfs_read_super(struct file_system_type *type, - int flags, const char *dev_name, - void *data, struct vfsmount *mnt) -{ - return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); -} - -static struct file_system_type hppfs_type = { - .owner = THIS_MODULE, - .name = "hppfs", - .get_sb = hppfs_read_super, - .kill_sb = kill_anon_super, - .fs_flags = 0, -}; - -static int __init init_hppfs(void) -{ - return register_filesystem(&hppfs_type); -} - -static void __exit exit_hppfs(void) -{ - unregister_filesystem(&hppfs_type); -} - -module_init(init_hppfs) -module_exit(exit_hppfs) -MODULE_LICENSE("GPL"); -- cgit v1.2.3-70-g09d2 From 8dc4e37362a5dc910d704d52ac6542bfd49ddc2f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 12 May 2008 14:02:04 -0700 Subject: ecryptfs: clean up (un)lock_parent dget(dentry->d_parent) --> dget_parent(dentry) unlock_parent() is racy and unnecessary. Replace single caller with unlock_dir(). There are several other suspect uses of ->d_parent in ecryptfs... Signed-off-by: Miklos Szeredi Cc: Michael Halcrow Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/inode.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 0a1397335a8..c92cc1c00aa 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -37,17 +37,11 @@ static struct dentry *lock_parent(struct dentry *dentry) { struct dentry *dir; - dir = dget(dentry->d_parent); + dir = dget_parent(dentry); mutex_lock_nested(&(dir->d_inode->i_mutex), I_MUTEX_PARENT); return dir; } -static void unlock_parent(struct dentry *dentry) -{ - mutex_unlock(&(dentry->d_parent->d_inode->i_mutex)); - dput(dentry->d_parent); -} - static void unlock_dir(struct dentry *dir) { mutex_unlock(&dir->d_inode->i_mutex); @@ -426,8 +420,9 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) int rc = 0; struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir); + struct dentry *lower_dir_dentry; - lock_parent(lower_dentry); + lower_dir_dentry = lock_parent(lower_dentry); rc = vfs_unlink(lower_dir_inode, lower_dentry); if (rc) { printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc); @@ -439,7 +434,7 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ctime = dir->i_ctime; d_drop(dentry); out_unlock: - unlock_parent(lower_dentry); + unlock_dir(lower_dir_dentry); return rc; } -- cgit v1.2.3-70-g09d2 From bb45d64224e5cafe8c8e0d18a20da998e5a7dc93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 12 May 2008 14:02:06 -0700 Subject: ufs: remove unneeded ufs_put_inode prototype Signed-off-by: Christoph Hellwig Acked-by: Evgeniy Dushistov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ufs/ufs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 244a1aaa940..11c035168ea 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -107,7 +107,6 @@ extern struct inode * ufs_new_inode (struct inode *, int); /* inode.c */ extern struct inode *ufs_iget(struct super_block *, unsigned long); -extern void ufs_put_inode (struct inode *); extern int ufs_write_inode (struct inode *, int); extern int ufs_sync_inode (struct inode *); extern void ufs_delete_inode (struct inode *); -- cgit v1.2.3-70-g09d2 From 9377abd026bf9bde7db90dac09170034bf6d1cbf Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 May 2008 14:02:08 -0700 Subject: quota: don't call sync_fs() from vfs_quota_off() when there's no quota turn off Sometimes, vfs_quota_off() is called on a partially set up super block (for example when fill_super() fails for some reason). In such cases we cannot call ->sync_fs() because it can Oops because of not properly filled in super block. So in case we find there's not quota to turn off, we just skip everything and return which fixes the above problem. [akpm@linux-foundation.org: fxi tpyo] Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/dquot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/dquot.c b/fs/dquot.c index dfba1623ccc..5ac77da1995 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1491,6 +1491,16 @@ int vfs_quota_off(struct super_block *sb, int type, int remount) /* We need to serialize quota_off() for device */ mutex_lock(&dqopt->dqonoff_mutex); + + /* + * Skip everything if there's nothing to do. We have to do this because + * sometimes we are called when fill_super() failed and calling + * sync_fs() in such cases does no good. + */ + if (!sb_any_quota_enabled(sb) && !sb_any_quota_suspended(sb)) { + mutex_unlock(&dqopt->dqonoff_mutex); + return 0; + } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { toputinode[cnt] = NULL; if (type != -1 && cnt != type) -- cgit v1.2.3-70-g09d2 From 289f8e27ed435dcbefad132def06f4e84351e94f Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 12 May 2008 14:02:13 -0700 Subject: capabilities: add bounding set to /proc/self/status There is currently no way to query the bounding set of another task. As there appears to be no security reason not to, and as Michael Kerrisk points out the following valid reasons to do so exist: * consistency (I can see all of the other per-thread/process sets in /proc/.../status) * debugging -- I could imagine that it would make the job of debugging an application that uses capabilities a little simpler. this patch adds the bounding set to /proc/self/status right after the effective set. Signed-off-by: Serge E. Hallyn Acked-by: Michael Kerrisk Acked-by: Andrew G. Morgan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/proc/array.c b/fs/proc/array.c index dca997a93bf..9e3b8c33c24 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -298,6 +298,7 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p) render_cap_t(m, "CapInh:\t", &p->cap_inheritable); render_cap_t(m, "CapPrm:\t", &p->cap_permitted); render_cap_t(m, "CapEff:\t", &p->cap_effective); + render_cap_t(m, "CapBnd:\t", &p->cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, -- cgit v1.2.3-70-g09d2 From 706322496b3a58af3cf258db2b553d6933656eef Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Mon, 12 May 2008 14:02:21 -0700 Subject: Fix hfsplus oops on image without extents Fix an oops with a corrupted hfs+ image. See http://bugzilla.kernel.org/show_bug.cgi?id=10548 for details. Problem is that we call hfs_btree_open() from hfsplus_fill_super() to set HFSPLUS_SB(sb).[ext_tree|cat_tree] Both trees are still NULL at this moment. If hfs_btree_open() fails for any reason it calls iput() on the page, which gets to hfsplus_releasepage() which tries to access HFSPLUS_SB(sb).* which is still NULL and oopses while dereferencing it. [akpm@linux-foundation.org: build fix] Signed-off-by: Eric Sesterhenn Cc: Roman Zippel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/inode.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d53b2af91c2..67e1c8b467c 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -65,6 +65,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) BUG(); return 0; } + if (!tree) + return 0; if (tree->node_size >= PAGE_CACHE_SIZE) { nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT); spin_lock(&tree->hash_lock); -- cgit v1.2.3-70-g09d2 From 4cd1a8fc3d3cd740416b14ece2693dbb5d065eaf Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Mon, 12 May 2008 14:02:31 -0700 Subject: memcg: fix possible panic when CONFIG_MM_OWNER=y When mm destruction happens, we should pass mm_update_next_owner() the old mm. But unfortunately new mm is passed in exec_mmap(). Thus, kernel panic is possible when a multi-threaded process uses exec(). Also, the owner member comment description is wrong. mm->owner does not necessarily point to the thread group leader. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KOSAKI Motohiro Acked-by: Balbir Singh Cc: "Paul Menage" Cc: "KAMEZAWA Hiroyuki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/mm_types.h | 13 +++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index aeaa9791d8b..1f8a24aa1f8 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -736,7 +736,7 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); - mm_update_next_owner(mm); + mm_update_next_owner(old_mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index eb7c16cc955..02a27ae7853 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -226,8 +226,17 @@ struct mm_struct { rwlock_t ioctx_list_lock; /* aio lock */ struct kioctx *ioctx_list; #ifdef CONFIG_MM_OWNER - struct task_struct *owner; /* The thread group leader that */ - /* owns the mm_struct. */ + /* + * "owner" points to a task that is regarded as the canonical + * user/owner of this mm. All of the following must be true in + * order for it to be changed: + * + * current == mm->owner + * current->mm != mm + * new_owner->mm == mm + * new_owner->alloc_lock is held + */ + struct task_struct *owner; #endif #ifdef CONFIG_PROC_FS -- cgit v1.2.3-70-g09d2 From 78bb6cb9a890d3d50ca3b02fce9223d3e734ab9b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 12 May 2008 14:02:32 -0700 Subject: fuse: add flag to turn on big writes Prior to 2.6.26 fuse only supported single page write requests. In theory all fuse filesystem should be able support bigger than 4k writes, as there's nothing in the API to prevent it. Unfortunately there's a known case in NTFS-3G where big writes cause filesystem corruption. There could also be other filesystems, where the lack of testing with big write requests would result in bugs. To prevent such problems on a kernel upgrade, disable big writes by default, but let filesystems set a flag to turn it on. Signed-off-by: Miklos Szeredi Cc: Szabolcs Szakacsits Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fuse/file.c | 2 ++ fs/fuse/fuse_i.h | 3 +++ fs/fuse/inode.c | 5 ++++- include/linux/fuse.h | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f28cf8b46f8..8092f0d9fd1 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -804,6 +804,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, if (offset == PAGE_CACHE_SIZE) offset = 0; + if (!fc->big_writes) + break; } while (iov_iter_count(ii) && count < fc->max_write && req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index dadffa21a20..bae948657c4 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -404,6 +404,9 @@ struct fuse_conn { /** Is bmap not implemented by fs? */ unsigned no_bmap : 1; + /** Do multi-page cached writes */ + unsigned big_writes : 1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 79b61587383..fb77e096213 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -576,6 +576,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->no_lock = 1; if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; + if (arg->flags & FUSE_BIG_WRITES) + fc->big_writes = 1; } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; @@ -599,7 +601,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->major = FUSE_KERNEL_VERSION; arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; - arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC; + arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | + FUSE_BIG_WRITES; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 5c86f1196c3..d4828219769 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -109,6 +109,7 @@ struct fuse_file_lock { #define FUSE_POSIX_LOCKS (1 << 1) #define FUSE_FILE_OPS (1 << 2) #define FUSE_ATOMIC_O_TRUNC (1 << 3) +#define FUSE_BIG_WRITES (1 << 5) /** * Release flags -- cgit v1.2.3-70-g09d2 From f36f21ecca9ee688301174e5f2e0827827a7a7ff Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 12 May 2008 14:02:33 -0700 Subject: Fix misuses of bdevname() bdevname() fills the buffer that it is given as a parameter, so calling strcpy() or snprintf() on the returned value is redundant (and probably not guaranteed to work - I don't think strcpy and snprintf support overlapping buffers.) Signed-off-by: Jean Delvare Cc: Stephen Tweedie Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- block/blktrace.c | 2 +- block/compat_ioctl.c | 2 +- fs/ext4/mballoc.c | 6 ++---- fs/jbd2/journal.c | 4 ++-- 4 files changed, 6 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/block/blktrace.c b/block/blktrace.c index 568588cd16b..b2cbb4e5d76 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -476,7 +476,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) switch (cmd) { case BLKTRACESETUP: - strcpy(b, bdevname(bdev, b)); + bdevname(bdev, b); ret = blk_trace_setup(q, b, bdev->bd_dev, arg); break; case BLKTRACESTART: diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index c70d0b6f666..c23177e4623 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -555,7 +555,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) if (copy_from_user(&cbuts, arg, sizeof(cbuts))) return -EFAULT; - strcpy(b, bdevname(bdev, b)); + bdevname(bdev, b); buts = (struct blk_user_trace_setup) { .act_mask = cbuts.act_mask, diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index fbec2ef9379..b128bdc0f55 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2639,8 +2639,7 @@ static int ext4_mb_init_per_dev_proc(struct super_block *sb) struct proc_dir_entry *proc; char devname[64]; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); @@ -2674,8 +2673,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) if (sbi->s_mb_proc == NULL) return -EINVAL; - snprintf(devname, sizeof(devname) - 1, "%s", - bdevname(sb->s_bdev, devname)); + bdevname(sb->s_bdev, devname); remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 53632e3e845..2e24567c4a7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -901,7 +901,7 @@ static void jbd2_stats_proc_init(journal_t *journal) { char name[BDEVNAME_SIZE]; - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + bdevname(journal->j_dev, name); journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); if (journal->j_proc_entry) { proc_create_data("history", S_IRUGO, journal->j_proc_entry, @@ -915,7 +915,7 @@ static void jbd2_stats_proc_exit(journal_t *journal) { char name[BDEVNAME_SIZE]; - snprintf(name, sizeof(name) - 1, "%s", bdevname(journal->j_dev, name)); + bdevname(journal->j_dev, name); remove_proc_entry("info", journal->j_proc_entry); remove_proc_entry("history", journal->j_proc_entry); remove_proc_entry(name, proc_jbd2_stats); -- cgit v1.2.3-70-g09d2 From 43f14d856f013a4cc63da2c765617c665274338c Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 12 May 2008 14:02:40 -0700 Subject: eCryptFS: fix imbalanced mutex locking Fix imbalanced calls for mutex lock/unlock on ecryptfs_daemon_hash_mux Revealed by Ingo Molnar: http://lkml.org/lkml/2008/5/7/260 Signed-off-by: Cyrill Gorcunov Cc: Michael Halcrow Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ecryptfs/miscdev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 788995efd1d..6560da1a58c 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c @@ -257,12 +257,14 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count, mutex_lock(&daemon->mux); if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) { rc = 0; + mutex_unlock(&ecryptfs_daemon_hash_mux); printk(KERN_WARNING "%s: Attempt to read from zombified " "daemon\n", __func__); goto out_unlock_daemon; } if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) { rc = 0; + mutex_unlock(&ecryptfs_daemon_hash_mux); goto out_unlock_daemon; } /* This daemon will not go away so long as this flag is set */ -- cgit v1.2.3-70-g09d2 From b32a09db4fb9a87246ba4e7726a979ac4709ad97 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Tue, 26 Feb 2008 09:57:11 -0600 Subject: add match_strlcpy() us it to make v9fs make uname and remotename parsing more robust match_strcpy() is a somewhat creepy function: the caller needs to make sure that the destination buffer is big enough, and when he screws up or forgets, match_strcpy() happily overruns the buffer. There's exactly one customer: v9fs_parse_options(). I believe it currently can't overflow its buffer, but that's not exactly obvious. The source string is a substing of the mount options. The kernel silently truncates those to PAGE_SIZE bytes, including the terminating zero. See compat_sys_mount() and do_mount(). The destination buffer is obtained from __getname(), which allocates from name_cachep, which is initialized by vfs_caches_init() for size PATH_MAX. We're safe as long as PATH_MAX <= PAGE_SIZE. PATH_MAX is 4096. As far as I know, the smallest PAGE_SIZE is also 4096. Here's a patch that makes the code a bit more obviously correct. It doesn't depend on PATH_MAX <= PAGE_SIZE. Signed-off-by: Markus Armbruster Cc: Latchesar Ionkov Cc: Jim Meyering Cc: "Randy.Dunlap" Signed-off-by: Andrew Morton Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 4 ++-- include/linux/parser.h | 2 +- lib/parser.c | 32 ++++++++++++++++++++------------ 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 9b0f0222e8b..e307fbd34fa 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -125,10 +125,10 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->afid = option; break; case Opt_uname: - match_strcpy(v9ses->uname, &args[0]); + match_strlcpy(v9ses->uname, &args[0], PATH_MAX); break; case Opt_remotename: - match_strcpy(v9ses->aname, &args[0]); + match_strlcpy(v9ses->aname, &args[0], PATH_MAX); break; case Opt_nodevmap: v9ses->nodev = 1; diff --git a/include/linux/parser.h b/include/linux/parser.h index 26b2bdfcaf0..7dcd0507575 100644 --- a/include/linux/parser.h +++ b/include/linux/parser.h @@ -29,5 +29,5 @@ int match_token(char *, match_table_t table, substring_t args[]); int match_int(substring_t *, int *result); int match_octal(substring_t *, int *result); int match_hex(substring_t *, int *result); -void match_strcpy(char *, const substring_t *); +size_t match_strlcpy(char *, const substring_t *, size_t); char *match_strdup(const substring_t *); diff --git a/lib/parser.c b/lib/parser.c index 703c8c13b34..4f0cbc03e0e 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -182,18 +182,25 @@ int match_hex(substring_t *s, int *result) } /** - * match_strcpy: - copies the characters from a substring_t to a string - * @to: string to copy characters to. - * @s: &substring_t to copy + * match_strlcpy: - Copy the characters from a substring_t to a sized buffer + * @dest: where to copy to + * @src: &substring_t to copy + * @size: size of destination buffer * - * Description: Copies the set of characters represented by the given - * &substring_t @s to the c-style string @to. Caller guarantees that @to is - * large enough to hold the characters of @s. + * Description: Copy the characters in &substring_t @src to the + * c-style string @dest. Copy no more than @size - 1 characters, plus + * the terminating NUL. Return length of @src. */ -void match_strcpy(char *to, const substring_t *s) +size_t match_strlcpy(char *dest, const substring_t *src, size_t size) { - memcpy(to, s->from, s->to - s->from); - to[s->to - s->from] = '\0'; + size_t ret = src->to - src->from; + + if (size) { + size_t len = ret >= size ? size - 1 : ret; + memcpy(dest, src->from, len); + dest[len] = '\0'; + } + return ret; } /** @@ -206,9 +213,10 @@ void match_strcpy(char *to, const substring_t *s) */ char *match_strdup(const substring_t *s) { - char *p = kmalloc(s->to - s->from + 1, GFP_KERNEL); + size_t sz = s->to - s->from + 1; + char *p = kmalloc(sz, GFP_KERNEL); if (p) - match_strcpy(p, s); + match_strlcpy(p, s, sz); return p; } @@ -216,5 +224,5 @@ EXPORT_SYMBOL(match_token); EXPORT_SYMBOL(match_int); EXPORT_SYMBOL(match_octal); EXPORT_SYMBOL(match_hex); -EXPORT_SYMBOL(match_strcpy); +EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); -- cgit v1.2.3-70-g09d2 From ee443996a35c1e04f210cafd43d5a98d41e46085 Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Wed, 5 Mar 2008 07:08:09 -0600 Subject: 9p: Documentation updates The kernel-doc comments of much of the 9p system have been in disarray since reorganization. This patch fixes those problems, adds additional documentation and a template book which collects the 9p information. Signed-off-by: Eric Van Hensbergen --- fs/9p/fid.h | 15 +++ fs/9p/v9fs.c | 8 +- fs/9p/v9fs.h | 85 +++++++++++----- fs/9p/vfs_addr.c | 2 +- fs/9p/vfs_dir.c | 2 +- fs/9p/vfs_file.c | 11 ++- fs/9p/vfs_inode.c | 50 +++++++--- fs/9p/vfs_super.c | 1 + include/net/9p/9p.h | 239 +++++++++++++++++++++++++++++++++++++++------ include/net/9p/client.h | 35 +++++++ include/net/9p/transport.h | 43 ++++++++ net/9p/conv.c | 128 +++++++++++++++++++++++- net/9p/error.c | 11 ++- net/9p/fcprint.c | 8 ++ net/9p/mod.c | 7 +- net/9p/trans_fd.c | 146 ++++++++++++++++++++++----- net/9p/trans_virtio.c | 155 +++++++++++++++++++++++++++-- net/9p/util.c | 32 ++++-- 18 files changed, 854 insertions(+), 124 deletions(-) (limited to 'fs') diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 26e07df783b..c3bbd6af996 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -22,6 +22,21 @@ #include +/** + * struct v9fs_dentry - 9p private data stored in dentry d_fsdata + * @lock: protects the fidlist + * @fidlist: list of FIDs currently associated with this dentry + * + * This structure defines the 9p private data associated with + * a particular dentry. In particular, this private data is used + * to lookup which 9P FID handle should be used for a particular VFS + * operation. FID handles are associated with dentries instead of + * inodes in order to more closely map functionality to the Plan 9 + * expected behavior for FID reclaimation and tracking. + * + * See Also: Mapping FIDs to Linux VFS model in + * Design and Implementation of the Linux 9P File System documentation + */ struct v9fs_dentry { spinlock_t lock; /* protect fidlist */ struct list_head fidlist; diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index e307fbd34fa..79d310c0018 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -71,7 +71,6 @@ static match_table_t tokens = { /** * v9fs_parse_options - parse mount options into session structure - * @options: options string passed from mount * @v9ses: existing v9fs session information * */ @@ -256,9 +255,12 @@ void v9fs_session_close(struct v9fs_session_info *v9ses) } /** - * v9fs_session_cancel - mark transport as disconnected - * and cancel all pending requests. + * v9fs_session_cancel - terminate a session + * @v9ses: session to terminate + * + * mark transport as disconnected and cancel all pending requests. */ + void v9fs_session_cancel(struct v9fs_session_info *v9ses) { P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); p9_client_disconnect(v9ses->clnt); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 7d3a1018db5..a7d56719299 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -21,18 +21,69 @@ * */ -/* - * Session structure provides information for an opened session - * - */ +/** + * enum p9_session_flags - option flags for each 9P session + * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions + * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy + * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) + * @V9FS_ACCESS_ANY: use a single attach for all users + * @V9FS_ACCESS_MASK: bit mask of different ACCESS options + * + * Session flags reflect options selected by users at mount time + */ +enum p9_session_flags { + V9FS_EXTENDED = 0x01, + V9FS_ACCESS_SINGLE = 0x02, + V9FS_ACCESS_USER = 0x04, + V9FS_ACCESS_ANY = 0x06, + V9FS_ACCESS_MASK = 0x06, +}; + +/* possible values of ->cache */ +/** + * enum p9_cache_modes - user specified cache preferences + * @CACHE_NONE: do not cache data, dentries, or directory contents (default) + * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency + * + * eventually support loose, tight, time, session, default always none + */ + +enum p9_cache_modes { + CACHE_NONE, + CACHE_LOOSE, +}; + +/** + * struct v9fs_session_info - per-instance session information + * @flags: session options of type &p9_session_flags + * @nodev: set to 1 to disable device mapping + * @debug: debug level + * @afid: authentication handle + * @cache: cache mode of type &p9_cache_modes + * @options: copy of options string given by user + * @uname: string user name to mount hierarchy as + * @aname: mount specifier for remote hierarchy + * @maxdata: maximum data to be sent/recvd per protocol message + * @dfltuid: default numeric userid to mount hierarchy as + * @dfltgid: default numeric groupid to mount hierarchy as + * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy + * @clnt: reference to 9P network client instantiated for this session + * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug + * + * This structure holds state for each session instance established during + * a sys_mount() . + * + * Bugs: there seems to be a lot of state which could be condensed and/or + * removed. + */ struct v9fs_session_info { /* options */ - unsigned char flags; /* session flags */ - unsigned char nodev; /* set to 1 if no disable device mapping */ - unsigned short debug; /* debug level */ - unsigned int afid; /* authentication fid */ - unsigned int cache; /* cache mode */ + unsigned char flags; + unsigned char nodev; + unsigned short debug; + unsigned int afid; + unsigned int cache; char *options; /* copy of mount options */ char *uname; /* user name to mount as */ @@ -45,22 +96,6 @@ struct v9fs_session_info { struct dentry *debugfs_dir; }; -/* session flags */ -enum { - V9FS_EXTENDED = 0x01, /* 9P2000.u */ - V9FS_ACCESS_MASK = 0x06, /* access mask */ - V9FS_ACCESS_SINGLE = 0x02, /* only one user can access the files */ - V9FS_ACCESS_USER = 0x04, /* attache per user */ - V9FS_ACCESS_ANY = 0x06, /* use the same attach for all users */ -}; - -/* possible values of ->cache */ -/* eventually support loose, tight, time, session, default always none */ -enum { - CACHE_NONE, /* default */ - CACHE_LOOSE, /* no consistency */ -}; - extern struct dentry *v9fs_debugfs_root; struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 6248f0e727a..97d3aed5798 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -43,7 +43,7 @@ /** * v9fs_vfs_readpage - read an entire page in from 9P * - * @file: file being read + * @filp: file being read * @page: structure to page * */ diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 0924d4477da..88e3787c6ea 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -60,7 +60,7 @@ static inline int dt_type(struct p9_stat *mistat) /** * v9fs_dir_readdir - read a directory - * @filep: opened file structure + * @filp: opened file structure * @dirent: directory structure ??? * @filldir: function to populate directory structure ??? * diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a616fff8906..0d55affe37d 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -90,10 +90,11 @@ int v9fs_file_open(struct inode *inode, struct file *file) /** * v9fs_file_lock - lock a file (or directory) - * @inode: inode to be opened - * @file: file being opened + * @filp: file to be locked + * @cmd: lock command + * @fl: file lock structure * - * XXX - this looks like a local only lock, we should extend into 9P + * Bugs: this looks like a local only lock, we should extend into 9P * by using open exclusive */ @@ -118,7 +119,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) /** * v9fs_file_read - read from a file - * @filep: file pointer to read + * @filp: file pointer to read * @data: data buffer to read data into * @count: size of buffer * @offset: offset at which to read data @@ -142,7 +143,7 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count, /** * v9fs_file_write - write to a file - * @filep: file pointer to write + * @filp: file pointer to write * @data: data buffer to write data from * @count: size of buffer * @offset: offset at which to write data diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 6a28842052e..40fa807bd92 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -129,6 +129,12 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode) return res; } +/** + * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits + * @uflags: flags to convert + * + */ + int v9fs_uflags2omode(int uflags) { int ret; @@ -312,6 +318,14 @@ error: } */ +/** + * v9fs_inode_from_fid - populate an inode by issuing a attribute request + * @v9ses: session information + * @fid: fid to issue attribute request for + * @sb: superblock on which to create inode + * + */ + static struct inode * v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb) @@ -384,9 +398,12 @@ v9fs_open_created(struct inode *inode, struct file *file) /** * v9fs_create - Create a file + * @v9ses: session information + * @dir: directory that dentry is being created in * @dentry: dentry that is being created * @perm: create permissions * @mode: open mode + * @extension: 9p2000.u extension string to support devices, etc. * */ static struct p9_fid * @@ -461,7 +478,7 @@ error: /** * v9fs_vfs_create - VFS hook to create files - * @inode: directory inode that is being created + * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @mode: create permissions * @nd: path information @@ -519,7 +536,7 @@ error: /** * v9fs_vfs_mkdir - VFS mkdir hook to create a directory - * @inode: inode that is being unlinked + * @dir: inode that is being unlinked * @dentry: dentry that is being unlinked * @mode: mode for new directory * @@ -703,9 +720,9 @@ done: /** * v9fs_vfs_getattr - retrieve file metadata - * @mnt - mount information - * @dentry - file to get attributes on - * @stat - metadata structure to populate + * @mnt: mount information + * @dentry: file to get attributes on + * @stat: metadata structure to populate * */ @@ -928,7 +945,7 @@ done: /** * v9fs_vfs_readlink - read a symlink's location * @dentry: dentry for symlink - * @buf: buffer to load symlink location into + * @buffer: buffer to load symlink location into * @buflen: length of buffer * */ @@ -996,10 +1013,12 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) * v9fs_vfs_put_link - release a symlink path * @dentry: dentry for symlink * @nd: nameidata + * @p: unused * */ -static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +static void +v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) { char *s = nd_get_link(nd); @@ -1008,6 +1027,15 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void __putname(s); } +/** + * v9fs_vfs_mkspecial - create a special file + * @dir: inode to create special file in + * @dentry: dentry to create + * @mode: mode to create special file + * @extension: 9p2000.u format extension string representing special file + * + */ + static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, int mode, const char *extension) { @@ -1037,7 +1065,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, * @dentry: dentry for symlink * @symname: symlink data * - * See 9P2000.u RFC for more information + * See Also: 9P2000.u RFC for more information * */ @@ -1058,10 +1086,6 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) * */ -/* XXX - lots of code dup'd from symlink and creates, - * figure out a better reuse strategy - */ - static int v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) @@ -1098,7 +1122,7 @@ clunk_fid: * @dir: inode destination for new link * @dentry: dentry for file * @mode: mode for creation - * @dev_t: device associated with special file + * @rdev: device associated with special file * */ diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index a452ac67fc9..ba10f172626 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -75,6 +75,7 @@ static int v9fs_set_super(struct super_block *s, void *data) * v9fs_fill_super - populate superblock with info * @sb: superblock * @v9ses: session information + * @flags: flags propagated from v9fs_get_sb() * */ diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 585eb449699..7bfb2f2e423 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -29,14 +29,31 @@ #ifdef CONFIG_NET_9P_DEBUG -#define P9_DEBUG_ERROR (1<<0) -#define P9_DEBUG_9P (1<<2) -#define P9_DEBUG_VFS (1<<3) -#define P9_DEBUG_CONV (1<<4) -#define P9_DEBUG_MUX (1<<5) -#define P9_DEBUG_TRANS (1<<6) -#define P9_DEBUG_SLABS (1<<7) -#define P9_DEBUG_FCALL (1<<8) +/** + * enum p9_debug_flags - bits for mount time debug parameter + * @P9_DEBUG_ERROR: more verbose error messages including original error string + * @P9_DEBUG_9P: 9P protocol tracing + * @P9_DEBUG_VFS: VFS API tracing + * @P9_DEBUG_CONV: protocol conversion tracing + * @P9_DEBUG_MUX: trace management of concurrent transactions + * @P9_DEBUG_TRANS: transport tracing + * @P9_DEBUG_SLABS: memory management tracing + * @P9_DEBUG_FCALL: verbose dump of protocol messages + * + * These flags are passed at mount time to turn on various levels of + * verbosity and tracing which will be output to the system logs. + */ + +enum p9_debug_flags { + P9_DEBUG_ERROR = (1<<0), + P9_DEBUG_9P = (1<<2), + P9_DEBUG_VFS = (1<<3), + P9_DEBUG_CONV = (1<<4), + P9_DEBUG_MUX = (1<<5), + P9_DEBUG_TRANS = (1<<6), + P9_DEBUG_SLABS = (1<<7), + P9_DEBUG_FCALL = (1<<8), +}; extern unsigned int p9_debug_level; @@ -62,9 +79,47 @@ do { \ format , __FUNCTION__, task_pid_nr(current), ## arg); \ } while (0) +/** + * enum p9_msg_t - 9P message types + * @P9_TVERSION: version handshake request + * @P9_RVERSION: version handshake response + * @P9_TAUTH: request to establish authentication channel + * @P9_RAUTH: response with authentication information + * @P9_TATTACH: establish user access to file service + * @P9_RATTACH: response with top level handle to file hierarchy + * @P9_TERROR: not used + * @P9_RERROR: response for any failed request + * @P9_TFLUSH: request to abort a previous request + * @P9_RFLUSH: response when previous request has been cancelled + * @P9_TWALK: descend a directory hierarchy + * @P9_RWALK: response with new handle for position within hierarchy + * @P9_TOPEN: prepare a handle for I/O on an existing file + * @P9_ROPEN: response with file access information + * @P9_TCREATE: prepare a handle for I/O on a new file + * @P9_RCREATE: response with file access information + * @P9_TREAD: request to transfer data from a file or directory + * @P9_RREAD: response with data requested + * @P9_TWRITE: reuqest to transfer data to a file + * @P9_RWRITE: response with out much data was transfered to file + * @P9_TCLUNK: forget about a handle to an entity within the file system + * @P9_RCLUNK: response when server has forgotten about the handle + * @P9_TREMOVE: request to remove an entity from the hierarchy + * @P9_RREMOVE: response when server has removed the entity + * @P9_TSTAT: request file entity attributes + * @P9_RSTAT: response with file entity attributes + * @P9_TWSTAT: request to update file entity attributes + * @P9_RWSTAT: response when file entity attributes are updated + * + * There are 14 basic operations in 9P2000, paired as + * requests and responses. The one special case is ERROR + * as there is no @P9_TERROR request for clients to transmit to + * the server, but the server may respond to any other request + * with an @P9_RERROR. + * + * See Also: http://plan9.bell-labs.com/sys/man/5/INDEX.html + */ -/* Message Types */ -enum { +enum p9_msg_t { P9_TVERSION = 100, P9_RVERSION, P9_TAUTH = 102, @@ -95,30 +150,71 @@ enum { P9_RWSTAT, }; -/* open modes */ -enum { +/** + * enum p9_open_mode_t - 9P open modes + * @P9_OREAD: open file for reading only + * @P9_OWRITE: open file for writing only + * @P9_ORDWR: open file for reading or writing + * @P9_OEXEC: open file for execution + * @P9_OTRUNC: truncate file to zero-length before opening it + * @P9_OREXEC: close the file when an exec(2) system call is made + * @P9_ORCLOSE: remove the file when the file is closed + * @P9_OAPPEND: open the file and seek to the end + * @P9_OEXCL: only create a file, do not open it + * + * 9P open modes differ slightly from Posix standard modes. + * In particular, there are extra modes which specify different + * semantic behaviors than may be available on standard Posix + * systems. For example, @P9_OREXEC and @P9_ORCLOSE are modes that + * most likely will not be issued from the Linux VFS client, but may + * be supported by servers. + * + * See Also: http://plan9.bell-labs.com/magic/man2html/2/open + */ + +enum p9_open_mode_t { P9_OREAD = 0x00, P9_OWRITE = 0x01, P9_ORDWR = 0x02, P9_OEXEC = 0x03, - P9_OEXCL = 0x04, P9_OTRUNC = 0x10, P9_OREXEC = 0x20, P9_ORCLOSE = 0x40, P9_OAPPEND = 0x80, -}; - -/* permissions */ -enum { + P9_OEXCL = 0x1000, +}; + +/** + * enum p9_perm_t - 9P permissions + * @P9_DMDIR: mode bite for directories + * @P9_DMAPPEND: mode bit for is append-only + * @P9_DMEXCL: mode bit for excluse use (only one open handle allowed) + * @P9_DMMOUNT: mode bite for mount points + * @P9_DMAUTH: mode bit for authentication file + * @P9_DMTMP: mode bit for non-backed-up files + * @P9_DMSYMLINK: mode bit for symbolic links (9P2000.u) + * @P9_DMLINK: mode bit for hard-link (9P2000.u) + * @P9_DMDEVICE: mode bit for device files (9P2000.u) + * @P9_DMNAMEDPIPE: mode bit for named pipe (9P2000.u) + * @P9_DMSOCKET: mode bit for socket (9P2000.u) + * @P9_DMSETUID: mode bit for setuid (9P2000.u) + * @P9_DMSETGID: mode bit for setgid (9P2000.u) + * @P9_DMSETVTX: mode bit for sticky bit (9P2000.u) + * + * 9P permissions differ slightly from Posix standard modes. + * + * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat + */ +enum p9_perm_t { P9_DMDIR = 0x80000000, P9_DMAPPEND = 0x40000000, P9_DMEXCL = 0x20000000, P9_DMMOUNT = 0x10000000, P9_DMAUTH = 0x08000000, P9_DMTMP = 0x04000000, +/* 9P2000.u extensions */ P9_DMSYMLINK = 0x02000000, P9_DMLINK = 0x01000000, - /* 9P2000.u extensions */ P9_DMDEVICE = 0x00800000, P9_DMNAMEDPIPE = 0x00200000, P9_DMSOCKET = 0x00100000, @@ -127,8 +223,26 @@ enum { P9_DMSETVTX = 0x00010000, }; -/* qid.types */ -enum { +/** + * enum p9_qid_t - QID types + * @P9_QTDIR: directory + * @P9_QTAPPEND: append-only + * @P9_QTEXCL: excluse use (only one open handle allowed) + * @P9_QTMOUNT: mount points + * @P9_QTAUTH: authentication file + * @P9_QTTMP: non-backed-up files + * @P9_QTSYMLINK: symbolic links (9P2000.u) + * @P9_QTLINK: hard-link (9P2000.u) + * @P9_QTFILE: normal files + * + * QID types are a subset of permissions - they are primarily + * used to differentiate semantics for a file system entity via + * a jump-table. Their value is also the most signifigant 16 bits + * of the permission_t + * + * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat + */ +enum p9_qid_t { P9_QTDIR = 0x80, P9_QTAPPEND = 0x40, P9_QTEXCL = 0x20, @@ -140,6 +254,7 @@ enum { P9_QTFILE = 0x00, }; +/* 9P Magic Numbers */ #define P9_NOTAG (u16)(~0) #define P9_NOFID (u32)(~0) #define P9_MAXWELEM 16 @@ -147,19 +262,69 @@ enum { /* ample room for Twrite/Rread header */ #define P9_IOHDRSZ 24 +/** + * struct p9_str - length prefixed string type + * @len: length of the string + * @str: the string + * + * The protocol uses length prefixed strings for all + * string data, so we replicate that for our internal + * string members. + */ + struct p9_str { u16 len; char *str; }; -/* qids are the unique ID for a file (like an inode */ +/** + * struct p9_qid - file system entity information + * @type: 8-bit type &p9_qid_t + * @version: 16-bit monotonically incrementing version number + * @path: 64-bit per-server-unique ID for a file system element + * + * qids are identifiers used by 9P servers to track file system + * entities. The type is used to differentiate semantics for operations + * on the entity (ie. read means something different on a directory than + * on a file). The path provides a server unique index for an entity + * (roughly analogous to an inode number), while the version is updated + * every time a file is modified and can be used to maintain cache + * coherency between clients and serves. + * Servers will often differentiate purely synthetic entities by setting + * their version to 0, signaling that they should never be cached and + * should be accessed synchronously. + * + * See Also://plan9.bell-labs.com/magic/man2html/2/stat + */ + struct p9_qid { u8 type; u32 version; u64 path; }; -/* Plan 9 file metadata (stat) structure */ +/** + * struct p9_stat - file system metadata information + * @size: length prefix for this stat structure instance + * @type: the type of the server (equivilent to a major number) + * @dev: the sub-type of the server (equivilent to a minor number) + * @qid: unique id from the server of type &p9_qid + * @mode: Plan 9 format permissions of type &p9_perm_t + * @atime: Last access/read time + * @mtime: Last modify/write time + * @length: file length + * @name: last element of path (aka filename) in type &p9_str + * @uid: owner name in type &p9_str + * @gid: group owner in type &p9_str + * @muid: last modifier in type &p9_str + * @extension: area used to encode extended UNIX support in type &p9_str + * @n_uid: numeric user id of owner (part of 9p2000.u extension) + * @n_gid: numeric group id (part of 9p2000.u extension) + * @n_muid: numeric user id of laster modifier (part of 9p2000.u extension) + * + * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat + */ + struct p9_stat { u16 size; u16 type; @@ -179,10 +344,14 @@ struct p9_stat { u32 n_muid; /* 9p2000.u extensions */ }; -/* file metadata (stat) structure used to create Twstat message - The is similar to p9_stat, but the strings don't point to - the same memory block and should be freed separately -*/ +/* + * file metadata (stat) structure used to create Twstat message + * The is identical to &p9_stat, but the strings don't point to + * the same memory block and should be freed separately + * + * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat + */ + struct p9_wstat { u16 size; u16 type; @@ -335,10 +504,20 @@ struct p9_twstat { struct p9_rwstat { }; -/* - * fcall is the primary packet structure - * - */ +/** + * struct p9_fcall - primary packet structure + * @size: prefixed length of the structure + * @id: protocol operating identifier of type &p9_msg_t + * @tag: transaction id of the request + * @sdata: payload + * @params: per-operation parameters + * + * &p9_fcall represents the structure for all 9P RPC + * transactions. Requests are packaged into fcalls, and reponses + * must be extracted from them. + * + * See Also: http://plan9.bell-labs.com/magic/man2html/2/fcall + */ struct p9_fcall { u32 size; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index e52f93d9ac5..c936dd14de4 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -26,6 +26,23 @@ #ifndef NET_9P_CLIENT_H #define NET_9P_CLIENT_H +/** + * struct p9_client - per client instance state + * @lock: protect @fidlist + * @msize: maximum data size negotiated by protocol + * @dotu: extension flags negotiated by protocol + * @trans_mod: module API instantiated with this client + * @trans: tranport instance state and API + * @conn: connection state information used by trans_fd + * @fidpool: fid handle accounting for session + * @fidlist: List of active fid handles + * + * The client structure is used to keep track of various per-client + * state that has been instantiated. + * + * Bugs: duplicated data and potentially unnecessary elements. + */ + struct p9_client { spinlock_t lock; /* protect client structure */ int msize; @@ -38,6 +55,24 @@ struct p9_client { struct list_head fidlist; }; +/** + * struct p9_fid - file system entity handle + * @clnt: back pointer to instantiating &p9_client + * @fid: numeric identifier for this handle + * @mode: current mode of this fid (enum?) + * @qid: the &p9_qid server identifier this handle points to + * @iounit: the server reported maximum transaction size for this file + * @uid: the numeric uid of the local user who owns this handle + * @aux: transport specific information (unused?) + * @rdir_fpos: tracks offset of file position when reading directory contents + * @rdir_pos: (unused?) + * @rdir_fcall: holds response of last directory read request + * @flist: per-client-instance fid tracking + * @dlist: per-dentry fid tracking + * + * TODO: This needs lots of explanation. + */ + struct p9_fid { struct p9_client *clnt; u32 fid; diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index d2209ae9d18..240e0de888c 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -26,12 +26,40 @@ #ifndef NET_9P_TRANSPORT_H #define NET_9P_TRANSPORT_H +/** + * enum p9_trans_status - different states of underlying transports + * @Connected: transport is connected and healthy + * @Disconnected: transport has been disconnected + * @Hung: transport is connected by wedged + * + * This enumeration details the various states a transport + * instatiation can be in. + */ + enum p9_trans_status { Connected, Disconnected, Hung, }; +/** + * struct p9_trans - per-transport state and API + * @status: transport &p9_trans_status + * @msize: negotiated maximum packet size (duplicate from client) + * @extended: negotiated protocol extensions (duplicate from client) + * @priv: transport private data + * @close: member function to disconnect and close the transport + * @rpc: member function to issue a request to the transport + * + * This is the basic API for a transport instance. It is used as + * a handle by the client to issue requests. This interface is currently + * in flux during reorganization. + * + * Bugs: there is lots of duplicated data here and its not clear that + * the member functions need to be per-instance versus per transport + * module. + */ + struct p9_trans { enum p9_trans_status status; int msize; @@ -42,6 +70,21 @@ struct p9_trans { struct p9_fcall **rc); }; +/** + * struct p9_trans_module - transport module interface + * @list: used to maintain a list of currently available transports + * @name: the human-readable name of the transport + * @maxsize: transport provided maximum packet size + * @def: set if this transport should be considered the default + * @create: member function to create a new connection on this transport + * + * This is the basic API for a transport module which is registered by the + * transport module with the 9P core network module and used by the client + * to instantiate a new connection on a transport. + * + * Bugs: the transport module list isn't protected. + */ + struct p9_trans_module { struct list_head list; char *name; /* name of transport */ diff --git a/net/9p/conv.c b/net/9p/conv.c index 3fe35d532c8..44547201f5b 100644 --- a/net/9p/conv.c +++ b/net/9p/conv.c @@ -197,7 +197,7 @@ static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid) /** * p9_size_wstat - calculate the size of a variable length stat struct - * @stat: metadata (stat) structure + * @wstat: metadata (stat) structure * @dotu: non-zero if 9P2000.u * */ @@ -511,6 +511,12 @@ p9_create_common(struct cbuf *bufp, u32 size, u8 id) return fc; } +/** + * p9_set_tag - set the tag field of an &p9_fcall structure + * @fc: fcall structure to set tag within + * @tag: tag id to set + */ + void p9_set_tag(struct p9_fcall *fc, u16 tag) { fc->tag = tag; @@ -518,6 +524,12 @@ void p9_set_tag(struct p9_fcall *fc, u16 tag) } EXPORT_SYMBOL(p9_set_tag); +/** + * p9_create_tversion - allocates and creates a T_VERSION request + * @msize: requested maximum data size + * @version: version string to negotiate + * + */ struct p9_fcall *p9_create_tversion(u32 msize, char *version) { int size; @@ -542,6 +554,16 @@ error: } EXPORT_SYMBOL(p9_create_tversion); +/** + * p9_create_tauth - allocates and creates a T_AUTH request + * @afid: handle to use for authentication protocol + * @uname: user name attempting to authenticate + * @aname: mount specifier for remote server + * @n_uname: numeric id for user attempting to authneticate + * @dotu: 9P2000.u extension flag + * + */ + struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, u32 n_uname, int dotu) { @@ -580,6 +602,18 @@ error: } EXPORT_SYMBOL(p9_create_tauth); +/** + * p9_create_tattach - allocates and creates a T_ATTACH request + * @fid: handle to use for the new mount point + * @afid: handle to use for authentication protocol + * @uname: user name attempting to attach + * @aname: mount specifier for remote server + * @n_uname: numeric id for user attempting to attach + * @n_uname: numeric id for user attempting to attach + * @dotu: 9P2000.u extension flag + * + */ + struct p9_fcall * p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, u32 n_uname, int dotu) @@ -616,6 +650,12 @@ error: } EXPORT_SYMBOL(p9_create_tattach); +/** + * p9_create_tflush - allocates and creates a T_FLUSH request + * @oldtag: tag id for the transaction we are attempting to cancel + * + */ + struct p9_fcall *p9_create_tflush(u16 oldtag) { int size; @@ -639,6 +679,15 @@ error: } EXPORT_SYMBOL(p9_create_tflush); +/** + * p9_create_twalk - allocates and creates a T_FLUSH request + * @fid: handle we are traversing from + * @newfid: a new handle for this transaction + * @nwname: number of path elements to traverse + * @wnames: array of path elements + * + */ + struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, char **wnames) { @@ -677,6 +726,13 @@ error: } EXPORT_SYMBOL(p9_create_twalk); +/** + * p9_create_topen - allocates and creates a T_OPEN request + * @fid: handle we are trying to open + * @mode: what mode we are trying to open the file in + * + */ + struct p9_fcall *p9_create_topen(u32 fid, u8 mode) { int size; @@ -701,6 +757,19 @@ error: } EXPORT_SYMBOL(p9_create_topen); +/** + * p9_create_tcreate - allocates and creates a T_CREATE request + * @fid: handle of directory we are trying to create in + * @name: name of the file we are trying to create + * @perm: permissions for the file we are trying to create + * @mode: what mode we are trying to open the file in + * @extension: 9p2000.u extension string (for special files) + * @dotu: 9p2000.u enabled flag + * + * Note: Plan 9 create semantics include opening the resulting file + * which is why mode is included. + */ + struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, char *extension, int dotu) { @@ -736,6 +805,13 @@ error: } EXPORT_SYMBOL(p9_create_tcreate); +/** + * p9_create_tread - allocates and creates a T_READ request + * @fid: handle of the file we are trying to read + * @offset: offset to start reading from + * @count: how many bytes to read + */ + struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count) { int size; @@ -761,6 +837,17 @@ error: } EXPORT_SYMBOL(p9_create_tread); +/** + * p9_create_twrite - allocates and creates a T_WRITE request from the kernel + * @fid: handle of the file we are trying to write + * @offset: offset to start writing at + * @count: how many bytes to write + * @data: data to write + * + * This function will create a requst with data buffers from the kernel + * such as the page cache. + */ + struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, const char *data) { @@ -794,6 +881,16 @@ error: } EXPORT_SYMBOL(p9_create_twrite); +/** + * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace + * @fid: handle of the file we are trying to write + * @offset: offset to start writing at + * @count: how many bytes to write + * @data: data to write + * + * This function will create a request with data buffers from userspace + */ + struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, const char __user *data) { @@ -827,6 +924,14 @@ error: } EXPORT_SYMBOL(p9_create_twrite_u); +/** + * p9_create_tclunk - allocate a request to forget about a file handle + * @fid: handle of the file we closing or forgetting about + * + * clunk is used both to close open files and to discard transient handles + * which may be created during meta-data operations and hierarchy traversal. + */ + struct p9_fcall *p9_create_tclunk(u32 fid) { int size; @@ -850,6 +955,12 @@ error: } EXPORT_SYMBOL(p9_create_tclunk); +/** + * p9_create_tremove - allocate and create a request to remove a file + * @fid: handle of the file or directory we are removing + * + */ + struct p9_fcall *p9_create_tremove(u32 fid) { int size; @@ -873,6 +984,12 @@ error: } EXPORT_SYMBOL(p9_create_tremove); +/** + * p9_create_tstat - allocate and populate a request for attributes + * @fid: handle of the file or directory we are trying to get the attributes of + * + */ + struct p9_fcall *p9_create_tstat(u32 fid) { int size; @@ -896,6 +1013,14 @@ error: } EXPORT_SYMBOL(p9_create_tstat); +/** + * p9_create_tstat - allocate and populate a request to change attributes + * @fid: handle of the file or directory we are trying to change + * @wstat: &p9_stat structure with attributes we wish to set + * @dotu: 9p2000.u enabled flag + * + */ + struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, int dotu) { @@ -922,3 +1047,4 @@ error: return fc; } EXPORT_SYMBOL(p9_create_twstat); + diff --git a/net/9p/error.c b/net/9p/error.c index 64104b9cb42..388770c3631 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -33,6 +33,13 @@ #include #include +/** + * struct errormap - map string errors from Plan 9 to Linux numeric ids + * @name: string sent over 9P + * @val: numeric id most closely representing @name + * @namelen: length of string + * @list: hash-table list for string lookup + */ struct errormap { char *name; int val; @@ -177,8 +184,7 @@ static struct errormap errmap[] = { }; /** - * p9_error_init - preload - * @errstr: error string + * p9_error_init - preload mappings into hash list * */ @@ -206,6 +212,7 @@ EXPORT_SYMBOL(p9_error_init); /** * errstr2errno - convert error string to error number * @errstr: error string + * @len: length of error string * */ diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c index 40244fbd9b0..53dd8e28dd8 100644 --- a/net/9p/fcprint.c +++ b/net/9p/fcprint.c @@ -142,6 +142,14 @@ p9_printdata(char *buf, int buflen, u8 *data, int datalen) return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16); } +/** + * p9_printfcall - decode and print a protocol structure into a buffer + * @buf: buffer to deposit decoded structure into + * @buflen: available space in buffer + * @fc: protocol rpc structure of type &p9_fcall + * @extended: whether or not session is operating with extended protocol + */ + int p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) { diff --git a/net/9p/mod.c b/net/9p/mod.c index c285aab2af0..c6d9695949e 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -39,9 +39,6 @@ module_param_named(debug, p9_debug_level, uint, 0); MODULE_PARM_DESC(debug, "9P debugging level"); #endif -extern int p9_mux_global_init(void); -extern void p9_mux_global_exit(void); - /* * Dynamic Transport Registration Routines * @@ -52,7 +49,7 @@ static struct p9_trans_module *v9fs_default_transport; /** * v9fs_register_trans - register a new transport with 9p - * @m - structure describing the transport module and entry points + * @m: structure describing the transport module and entry points * */ void v9fs_register_trans(struct p9_trans_module *m) @@ -65,7 +62,7 @@ EXPORT_SYMBOL(v9fs_register_trans); /** * v9fs_match_trans - match transport versus registered transports - * @arg: string identifying transport + * @name: string identifying transport * */ struct p9_trans_module *v9fs_match_trans(const substring_t *name) diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index f624dff7685..c6eda999fa7 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -47,12 +47,29 @@ #define SCHED_TIMEOUT 10 #define MAXPOLLWADDR 2 +/** + * struct p9_fd_opts - per-transport options + * @rfd: file descriptor for reading (trans=fd) + * @wfd: file descriptor for writing (trans=fd) + * @port: port to connect to (trans=tcp) + * + */ + struct p9_fd_opts { int rfd; int wfd; u16 port; }; + +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + struct p9_trans_fd { struct file *rd; struct file *wr; @@ -90,10 +107,24 @@ enum { }; struct p9_req; - typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); + +/** + * struct p9_req - fd mux encoding of an rpc transaction + * @lock: protects req_list + * @tag: numeric tag for rpc transaction + * @tcall: request &p9_fcall structure + * @rcall: response &p9_fcall structure + * @err: error state + * @cb: callback for when response is received + * @cba: argument to pass to callback + * @flush: flag to indicate RPC has been flushed + * @req_list: list link for higher level objects to chain requests + * + */ + struct p9_req { - spinlock_t lock; /* protect request structure */ + spinlock_t lock; int tag; struct p9_fcall *tcall; struct p9_fcall *rcall; @@ -104,7 +135,39 @@ struct p9_req { struct list_head req_list; }; -struct p9_mux_poll_task; +struct p9_mux_poll_task { + struct task_struct *task; + struct list_head mux_list; + int muxnum; +}; + +/** + * struct p9_conn - fd mux connection state information + * @lock: protects mux_list (?) + * @mux_list: list link for mux to manage multiple connections (?) + * @poll_task: task polling on this connection + * @msize: maximum size for connection (dup) + * @extended: 9p2000.u flag (dup) + * @trans: reference to transport instance for this connection + * @tagpool: id accounting for transactions + * @err: error state + * @equeue: event wait_q (?) + * @req_list: accounting for requests which have been sent + * @unsent_req_list: accounting for requests that haven't been sent + * @rcall: current response &p9_fcall structure + * @rpos: read position in current frame + * @rbuf: current read buffer + * @wpos: write position for current frame + * @wsize: amount of data to write for current frame + * @wbuf: current write buffer + * @poll_wait: array of wait_q's for various worker threads + * @poll_waddr: ???? + * @pt: poll state + * @rq: current read work + * @wq: current write work + * @wsched: ???? + * + */ struct p9_conn { spinlock_t lock; /* protect lock structure */ @@ -132,11 +195,16 @@ struct p9_conn { unsigned long wsched; }; -struct p9_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; -}; +/** + * struct p9_mux_rpc - fd mux rpc accounting structure + * @m: connection this request was issued on + * @err: error state + * @tcall: request &p9_fcall + * @rcall: response &p9_fcall + * @wqueue: wait queue that client is blocked on for this rpc + * + * Bug: isn't this information duplicated elsewhere like &p9_req + */ struct p9_mux_rpc { struct p9_conn *m; @@ -207,10 +275,12 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag) /** * p9_mux_calc_poll_procs - calculates the number of polling procs - * based on the number of mounted v9fs filesystems. + * @muxnum: number of mounts * + * Calculation is based on the number of mounted v9fs filesystems. * The current implementation returns sqrt of the number of mounts. */ + static int p9_mux_calc_poll_procs(int muxnum) { int n; @@ -331,12 +401,11 @@ static void p9_mux_poll_stop(struct p9_conn *m) /** * p9_conn_create - allocate and initialize the per-session mux data - * Creates the polling task if this is the first session. + * @trans: transport structure * - * @trans - transport structure - * @msize - maximum message size - * @extended - extended flag + * Note: Creates the polling task if this is the first session. */ + static struct p9_conn *p9_conn_create(struct p9_trans *trans) { int i, n; @@ -406,7 +475,10 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans) /** * p9_mux_destroy - cancels all pending requests and frees mux resources + * @m: mux to destroy + * */ + static void p9_conn_destroy(struct p9_conn *m) { P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, @@ -429,9 +501,14 @@ static void p9_conn_destroy(struct p9_conn *m) } /** - * p9_pollwait - called by files poll operation to add v9fs-poll task - * to files wait queue + * p9_pollwait - add poll task to the wait queue + * @filp: file pointer being polled + * @wait_address: wait_q to block on + * @p: poll state + * + * called by files poll operation to add v9fs-poll task to files wait queue */ + static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { @@ -462,7 +539,10 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) /** * p9_poll_mux - polls a mux and schedules read or write works if necessary + * @m: connection to poll + * */ + static void p9_poll_mux(struct p9_conn *m) { int n; @@ -499,9 +579,14 @@ static void p9_poll_mux(struct p9_conn *m) } /** - * p9_poll_proc - polls all v9fs transports for new events and queues - * the appropriate work to the work queue + * p9_poll_proc - poll worker thread + * @a: thread state and arguments + * + * polls all v9fs transports for new events and queues the appropriate + * work to the work queue + * */ + static int p9_poll_proc(void *a) { struct p9_conn *m, *mtmp; @@ -527,7 +612,10 @@ static int p9_poll_proc(void *a) /** * p9_write_work - called when a transport can send some data + * @work: container for work to be done + * */ + static void p9_write_work(struct work_struct *work) { int n, err; @@ -638,7 +726,10 @@ static void process_request(struct p9_conn *m, struct p9_req *req) /** * p9_read_work - called when there is some data to be read from a transport + * @work: container of work to be done + * */ + static void p9_read_work(struct work_struct *work) { int n, err; @@ -793,7 +884,9 @@ error: * @tc: request to be sent * @cb: callback function to call when response is received * @cba: parameter to pass to the callback function + * */ + static struct p9_req *p9_send_request(struct p9_conn *m, struct p9_fcall *tc, p9_conn_req_callback cb, void *cba) @@ -961,10 +1054,12 @@ p9_conn_rpc_cb(struct p9_req *req, void *a) /** * p9_fd_rpc- sends 9P request and waits until a response is available. * The function can be interrupted. - * @m: mux data + * @t: transport data * @tc: request to be sent * @rc: pointer where a pointer to the response is stored + * */ + int p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) { @@ -1041,8 +1136,10 @@ p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) * @m: mux data * @tc: request to be sent * @cb: callback function to be called when response arrives - * @cba: value to pass to the callback function + * @a: value to pass to the callback function + * */ + int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, p9_conn_req_callback cb, void *a) { @@ -1065,7 +1162,9 @@ int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, * p9_conn_cancel - cancel all pending requests with error * @m: mux data * @err: error code + * */ + void p9_conn_cancel(struct p9_conn *m, int err) { struct p9_req *req, *rtmp; @@ -1099,7 +1198,7 @@ void p9_conn_cancel(struct p9_conn *m, int err) /** * v9fs_parse_options - parse mount options into session structure * @options: options string passed from mount - * @v9ses: existing v9fs session information + * @opts: transport-specific structure to parse options into * */ @@ -1193,11 +1292,12 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) /** * p9_fd_read- read from a fd - * @v9ses: session information + * @trans: transport instance state * @v: buffer to receive data into * @len: size of receive buffer * */ + static int p9_fd_read(struct p9_trans *trans, void *v, int len) { int ret; @@ -1220,11 +1320,12 @@ static int p9_fd_read(struct p9_trans *trans, void *v, int len) /** * p9_fd_write - write to a socket - * @v9ses: session information + * @trans: transport instance state * @v: buffer to send data from * @len: size of send buffer * */ + static int p9_fd_write(struct p9_trans *trans, void *v, int len) { int ret; @@ -1296,6 +1397,7 @@ end: * @trans: private socket structure * */ + static void p9_fd_close(struct p9_trans *trans) { struct p9_trans_fd *ts; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index de7a9f532ed..0bab1f23590 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -55,23 +55,69 @@ static int chan_index; #define P9_INIT_MAXTAG 16 -#define REQ_STATUS_IDLE 0 -#define REQ_STATUS_SENT 1 -#define REQ_STATUS_RCVD 2 -#define REQ_STATUS_FLSH 3 + +/** + * enum p9_req_status_t - virtio request status + * @REQ_STATUS_IDLE: request slot unused + * @REQ_STATUS_SENT: request sent to server + * @REQ_STATUS_RCVD: response received from server + * @REQ_STATUS_FLSH: request has been flushed + * + * The @REQ_STATUS_IDLE state is used to mark a request slot as unused + * but use is actually tracked by the idpool structure which handles tag + * id allocation. + * + */ + +enum p9_req_status_t { + REQ_STATUS_IDLE, + REQ_STATUS_SENT, + REQ_STATUS_RCVD, + REQ_STATUS_FLSH, +}; + +/** + * struct p9_req_t - virtio request slots + * @status: status of this request slot + * @wq: wait_queue for the client to block on for this request + * + * The virtio transport uses an array to track outstanding requests + * instead of a list. While this may incurr overhead during initial + * allocation or expansion, it makes request lookup much easier as the + * tag id is a index into an array. (We use tag+1 so that we can accomodate + * the -1 tag for the T_VERSION request). + * This also has the nice effect of only having to allocate wait_queues + * once, instead of constantly allocating and freeing them. Its possible + * other resources could benefit from this scheme as well. + * + */ struct p9_req_t { int status; wait_queue_head_t *wq; }; -/* We keep all per-channel information in a structure. +/** + * struct virtio_chan - per-instance transport information + * @initialized: whether the channel is initialized + * @inuse: whether the channel is in use + * @lock: protects multiple elements within this structure + * @vdev: virtio dev associated with this channel + * @vq: virtio queue associated with this channel + * @tagpool: accounting for tag ids (and request slots) + * @reqs: array of request slots + * @max_tag: current number of request_slots allocated + * @sg: scatter gather list which is used to pack a request (protected?) + * + * We keep all per-channel information in a structure. * This structure is allocated within the devices dev->mem space. * A pointer to the structure will get put in the transport private. + * */ + static struct virtio_chan { - bool initialized; /* channel is initialized */ - bool inuse; /* channel is in use */ + bool initialized; + bool inuse; spinlock_t lock; @@ -86,7 +132,19 @@ static struct virtio_chan { struct scatterlist sg[VIRTQUEUE_NUM]; } channels[MAX_9P_CHAN]; -/* Lookup requests by tag */ +/** + * p9_lookup_tag - Lookup requests by tag + * @c: virtio channel to lookup tag within + * @tag: numeric id for transaction + * + * this is a simple array lookup, but will grow the + * request_slots as necessary to accomodate transaction + * ids which did not previously have a slot. + * + * Bugs: there is currently no upper limit on request slots set + * here, but that should be constrained by the id accounting. + */ + static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag) { /* This looks up the original request by tag so we know which @@ -130,6 +188,15 @@ static unsigned int rest_of_page(void *data) return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE); } +/** + * p9_virtio_close - reclaim resources of a channel + * @trans: transport state + * + * This reclaims a channel by freeing its resources and + * reseting its inuse flag. + * + */ + static void p9_virtio_close(struct p9_trans *trans) { struct virtio_chan *chan = trans->priv; @@ -151,6 +218,19 @@ static void p9_virtio_close(struct p9_trans *trans) kfree(trans); } +/** + * req_done - callback which signals activity from the server + * @vq: virtio queue activity was received on + * + * This notifies us that the server has triggered some activity + * on the virtio channel - most likely a response to request we + * sent. Figure out which requests now have responses and wake up + * those threads. + * + * Bugs: could do with some additional sanity checking, but appears to work. + * + */ + static void req_done(struct virtqueue *vq) { struct virtio_chan *chan = vq->vdev->priv; @@ -169,6 +249,20 @@ static void req_done(struct virtqueue *vq) spin_unlock_irqrestore(&chan->lock, flags); } +/** + * pack_sg_list - pack a scatter gather list from a linear buffer + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @limit: maximum segment to pack data to + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + * + * sg_lists have multiple segments of various sizes. This will pack + * arbitrary data into an existing scatter gather list, segmenting the + * data as necessary within constraints. + * + */ + static int pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, int count) @@ -189,6 +283,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, return index-start; } +/** + * p9_virtio_rpc - issue a request and wait for a response + * @t: transport state + * @tc: &p9_fcall request to transmit + * @rc: &p9_fcall to put reponse into + * + */ + static int p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) { @@ -263,6 +365,16 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) return 0; } +/** + * p9_virtio_probe - probe for existence of 9P virtio channels + * @vdev: virtio device to probe + * + * This probes for existing virtio channels. At present only + * a single channel is in use, so in the future more work may need + * to be done here. + * + */ + static int p9_virtio_probe(struct virtio_device *vdev) { int err; @@ -307,11 +419,28 @@ fail: return err; } -/* This sets up a transport channel for 9p communication. Right now + +/** + * p9_virtio_create - allocate a new virtio channel + * @devname: string identifying the channel to connect to (unused) + * @args: args passed from sys_mount() for per-transport options (unused) + * @msize: requested maximum packet size + * @extended: 9p2000.u enabled flag + * + * This sets up a transport channel for 9p communication. Right now * we only match the first available channel, but eventually we couldlook up * alternate channels by matching devname versus a virtio_config entry. * We use a simple reference count mechanism to ensure that only a single - * mount has a channel open at a time. */ + * mount has a channel open at a time. + * + * Bugs: doesn't allow identification of a specific channel + * to allocate, channels are allocated sequentially. This was + * a pragmatic decision to get things rolling, but ideally some + * way of identifying the channel to attach to would be nice + * if we are going to support multiple channels. + * + */ + static struct p9_trans * p9_virtio_create(const char *devname, char *args, int msize, unsigned char extended) @@ -360,6 +489,12 @@ p9_virtio_create(const char *devname, char *args, int msize, return trans; } +/** + * p9_virtio_remove - clean up resources associated with a virtio device + * @vdev: virtio device to remove + * + */ + static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; diff --git a/net/9p/util.c b/net/9p/util.c index ef7215565d8..4d564604533 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -32,11 +32,23 @@ #include #include +/** + * struct p9_idpool - per-connection accounting for tag idpool + * @lock: protects the pool + * @pool: idr to allocate tag id from + * + */ + struct p9_idpool { spinlock_t lock; struct idr pool; }; +/** + * p9_idpool_create - create a new per-connection id pool + * + */ + struct p9_idpool *p9_idpool_create(void) { struct p9_idpool *p; @@ -52,6 +64,11 @@ struct p9_idpool *p9_idpool_create(void) } EXPORT_SYMBOL(p9_idpool_create); +/** + * p9_idpool_destroy - create a new per-connection id pool + * @p: idpool to destory + */ + void p9_idpool_destroy(struct p9_idpool *p) { idr_destroy(&p->pool); @@ -61,9 +78,9 @@ EXPORT_SYMBOL(p9_idpool_destroy); /** * p9_idpool_get - allocate numeric id from pool - * @p - pool to allocate from + * @p: pool to allocate from * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ @@ -94,9 +111,10 @@ EXPORT_SYMBOL(p9_idpool_get); /** * p9_idpool_put - release numeric id from pool - * @p - pool to allocate from + * @id: numeric id which is being released + * @p: pool to release id into * - * XXX - This seems to be an awful generic function, should it be in idr.c with + * Bugs: This seems to be an awful generic function, should it be in idr.c with * the lock included in struct idr? */ @@ -111,11 +129,13 @@ EXPORT_SYMBOL(p9_idpool_put); /** * p9_idpool_check - check if the specified id is available - * @id - id to check - * @p - pool + * @id: id to check + * @p: pool to check */ + int p9_idpool_check(int id, struct p9_idpool *p) { return idr_find(&p->pool, id) != NULL; } EXPORT_SYMBOL(p9_idpool_check); + -- cgit v1.2.3-70-g09d2 From ab31267dfeddf80b2e483f077c8b03905993722b Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Thu, 6 Mar 2008 17:10:28 -0600 Subject: fs/9p/v9fs.c (v9fs_parse_options): Handle kstrdup and match_strdup failure. Now that this function can fail, return an int, diagnose other option-parsing failures, and adjust the sole caller: (v9fs_session_init): Handle kstrdup failure. Propagate any new v9fs_parse_options failure "up". Signed-off-by: Jim Meyering Cc: Ron Minnich Cc: Latchesar Ionkov Signed-off-by: Andrew Morton Acked-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 79d310c0018..5c1ccaf0416 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -73,16 +73,17 @@ static match_table_t tokens = { * v9fs_parse_options - parse mount options into session structure * @v9ses: existing v9fs session information * + * Return 0 upon success, -ERRNO upon failure. */ -static void v9fs_parse_options(struct v9fs_session_info *v9ses) +static int v9fs_parse_options(struct v9fs_session_info *v9ses) { char *options; substring_t args[MAX_OPT_ARGS]; char *p; int option = 0; char *s, *e; - int ret; + int ret = 0; /* setup defaults */ v9ses->afid = ~0; @@ -90,19 +91,26 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) v9ses->cache = 0; if (!v9ses->options) - return; + return 0; options = kstrdup(v9ses->options, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; token = match_token(p, tokens, args); if (token < Opt_uname) { - ret = match_int(&args[0], &option); - if (ret < 0) { + int r = match_int(&args[0], &option); + if (r < 0) { P9_DPRINTK(P9_DEBUG_ERROR, "integer field, but no integer?\n"); + ret = r; continue; } } @@ -138,6 +146,13 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) case Opt_access: s = match_strdup(&args[0]); + if (!s) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy" + " of option argument\n"); + ret = -ENOMEM; + break; + } v9ses->flags &= ~V9FS_ACCESS_MASK; if (strcmp(s, "user") == 0) v9ses->flags |= V9FS_ACCESS_USER; @@ -157,6 +172,7 @@ static void v9fs_parse_options(struct v9fs_session_info *v9ses) } } kfree(options); + return ret; } /** @@ -172,6 +188,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, { int retval = -EINVAL; struct p9_fid *fid; + int rc; v9ses->uname = __getname(); if (!v9ses->uname) @@ -190,7 +207,18 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; v9ses->options = kstrdup(data, GFP_KERNEL); - v9fs_parse_options(v9ses); + if (!v9ses->options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + retval = -ENOMEM; + goto error; + } + + rc = v9fs_parse_options(v9ses); + if (rc < 0) { + retval = rc; + goto error; + } v9ses->clnt = p9_client_create(dev_name, v9ses->options); -- cgit v1.2.3-70-g09d2 From 887b3ece65be7b643dfdae0d433c91a26a3f437d Mon Sep 17 00:00:00 2001 From: Eric Van Hensbergen Date: Thu, 8 May 2008 20:26:37 -0500 Subject: 9p: fix error path during early mount There was some cleanup issues during early mount which would trigger a kernel bug for certain types of failure. This patch reorganizes the cleanup to get rid of the bad behavior. This also merges the 9pnet and 9pnet_fd modules for the purpose of configuration and initialization. Keeping the fd transport separate from the core 9pnet code seemed like a good idea at the time, but in practice has caused more harm and confusion than good. Signed-off-by: Eric Van Hensbergen --- fs/9p/v9fs.c | 13 +++++++------ fs/9p/vfs_super.c | 34 ++++++++++++++++------------------ include/net/9p/9p.h | 1 + include/net/9p/transport.h | 1 - net/9p/Kconfig | 10 ---------- net/9p/Makefile | 3 --- net/9p/mod.c | 1 + net/9p/trans_fd.c | 29 ++++++++++++++++++++++------- 8 files changed, 47 insertions(+), 45 deletions(-) (limited to 'fs') diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 5c1ccaf0416..047c791427a 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -206,12 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, v9ses->uid = ~0; v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; - v9ses->options = kstrdup(data, GFP_KERNEL); - if (!v9ses->options) { - P9_DPRINTK(P9_DEBUG_ERROR, + if (data) { + v9ses->options = kstrdup(data, GFP_KERNEL); + if (!v9ses->options) { + P9_DPRINTK(P9_DEBUG_ERROR, "failed to allocate copy of option string\n"); - retval = -ENOMEM; - goto error; + retval = -ENOMEM; + goto error; + } } rc = v9fs_parse_options(v9ses); @@ -260,7 +262,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, return fid; error: - v9fs_session_close(v9ses); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index ba10f172626..bf59c396049 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -128,29 +128,26 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, fid = v9fs_session_init(v9ses, dev_name, data); if (IS_ERR(fid)) { retval = PTR_ERR(fid); - fid = NULL; - kfree(v9ses); - v9ses = NULL; - goto error; + goto close_session; } st = p9_client_stat(fid); if (IS_ERR(st)) { retval = PTR_ERR(st); - goto error; + goto clunk_fid; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); - goto error; + goto free_stat; } v9fs_fill_super(sb, v9ses, flags); inode = v9fs_get_inode(sb, S_IFDIR | mode); if (IS_ERR(inode)) { retval = PTR_ERR(inode); - goto error; + goto release_sb; } inode->i_uid = uid; @@ -159,7 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, root = d_alloc_root(inode); if (!root) { retval = -ENOMEM; - goto error; + goto release_sb; } sb->s_root = root; @@ -170,21 +167,22 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, return simple_set_mnt(mnt, sb); -error: - kfree(st); - if (fid) - p9_client_clunk(fid); - - if (v9ses) { - v9fs_session_close(v9ses); - kfree(v9ses); - } - +release_sb: if (sb) { up_write(&sb->s_umount); deactivate_super(sb); } +free_stat: + kfree(st); + +clunk_fid: + p9_client_clunk(fid); + +close_session: + v9fs_session_close(v9ses); + kfree(v9ses); + return retval; } diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h index 7bfb2f2e423..b3d3e27c629 100644 --- a/include/net/9p/9p.h +++ b/include/net/9p/9p.h @@ -595,4 +595,5 @@ int p9_idpool_check(int id, struct p9_idpool *p); int p9_error_init(void); int p9_errstr2errno(char *, int); +int p9_trans_fd_init(void); #endif /* NET_9P_H */ diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h index 240e0de888c..0db3a4038dc 100644 --- a/include/net/9p/transport.h +++ b/include/net/9p/transport.h @@ -96,5 +96,4 @@ struct p9_trans_module { void v9fs_register_trans(struct p9_trans_module *m); struct p9_trans_module *v9fs_match_trans(const substring_t *name); struct p9_trans_module *v9fs_default_trans(void); - #endif /* NET_9P_TRANSPORT_H */ diff --git a/net/9p/Kconfig b/net/9p/Kconfig index bafc50c9e6f..ff34c5acc13 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,16 +13,6 @@ menuconfig NET_9P If unsure, say N. -config NET_9P_FD - depends on NET_9P - default y if NET_9P - tristate "9P File Descriptor Transports (Experimental)" - help - This builds support for file descriptor transports for 9p - which includes support for TCP/IP, named pipes, or passed - file descriptors. TCP/IP is the default transport for 9p, - so if you are going to use 9p, you'll likely want this. - config NET_9P_VIRTIO depends on NET_9P && EXPERIMENTAL && VIRTIO tristate "9P Virtio Transport (Experimental)" diff --git a/net/9p/Makefile b/net/9p/Makefile index 8a105110189..519219480db 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,5 +1,4 @@ obj-$(CONFIG_NET_9P) := 9pnet.o -obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o 9pnet-objs := \ @@ -9,8 +8,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o error.o \ fcprint.o \ util.o \ - -9pnet_fd-objs := \ trans_fd.o \ 9pnet_virtio-objs := \ diff --git a/net/9p/mod.c b/net/9p/mod.c index c6d9695949e..bdee1fb7cc6 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -107,6 +107,7 @@ static int __init init_p9(void) p9_error_init(); printk(KERN_INFO "Installing 9P2000 support\n"); + p9_trans_fd_init(); return ret; } diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 97b103b7049..4507f744f44 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -1433,6 +1433,23 @@ static void p9_fd_close(struct p9_trans *trans) kfree(ts); } +/* + * stolen from NFS - maybe should be made a generic function? + */ +static inline int valid_ipaddr4(const char *buf) +{ + int rc, count, in[4]; + + rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); + if (rc != 4) + return -EINVAL; + for (count = 0; count < 4; count++) { + if (in[count] > 255) + return -EINVAL; + } + return 0; +} + static struct p9_trans * p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) { @@ -1447,6 +1464,9 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) if (err < 0) return ERR_PTR(err); + if (valid_ipaddr4(addr) < 0) + return ERR_PTR(-EINVAL); + csocket = NULL; trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); if (!trans) @@ -1625,7 +1645,7 @@ static struct p9_trans_module p9_fd_trans = { .create = p9_trans_create_fd, }; -static int __init p9_trans_fd_init(void) +int p9_trans_fd_init(void) { int ret = p9_mux_global_init(); if (ret) { @@ -1639,9 +1659,4 @@ static int __init p9_trans_fd_init(void) return 0; } - -module_init(p9_trans_fd_init); - -MODULE_AUTHOR("Latchesar Ionkov "); -MODULE_AUTHOR("Eric Van Hensbergen "); -MODULE_LICENSE("GPL"); +EXPORT_SYMBOL(p9_trans_fd_init); -- cgit v1.2.3-70-g09d2 From 772279c5f1dceb58d451dca94b557fd89b1ce890 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Wed, 14 May 2008 16:05:41 -0700 Subject: jbd: need to hold j_state_lock to updates to transaction t_state to T_COMMIT Updating the current transaction's t_state is protected by j_state_lock. We need to do the same when updating the t_state to T_COMMIT. Signed-off-by: Mingming Cao Acked-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/commit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index cd931ef1f00..5a8ca61498c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -470,7 +470,9 @@ void journal_commit_transaction(journal_t *journal) * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ + spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_COMMIT; + spin_unlock(&journal->j_state_lock); J_ASSERT(commit_transaction->t_nr_buffers <= commit_transaction->t_outstanding_credits); -- cgit v1.2.3-70-g09d2 From 7e01c8e5420b6c7f9d85d34c15d8c7a15c9fc720 Mon Sep 17 00:00:00 2001 From: Tiger Yang Date: Wed, 14 May 2008 16:05:47 -0700 Subject: ext3/4: fix uninitialized bs in ext3/4_xattr_set_handle() This fix the uninitialized bs when we try to replace a xattr entry in ibody with the new value which require more than free space. This situation only happens we format ext3/4 with inode size more than 128 and we have put xattr entries both in ibody and block. The consequences about this bug is we will lost the xattr block which pointed by i_file_acl with all xattr entires in it. We will alloc a new xattr block and put that large value entry in it. The old xattr block will become orphan block. Signed-off-by: Tiger Yang Cc: Cc: Andreas Gruenbacher Acked-by: Andreas Dilger Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/xattr.c | 5 +++++ fs/ext4/xattr.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'fs') diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index d4a4f0e9ff6..175414ac221 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -1000,6 +1000,11 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, i.value = NULL; error = ext3_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { + if (EXT3_I(inode)->i_file_acl && !bs.s.base) { + error = ext3_xattr_block_find(inode, &i, &bs); + if (error) + goto cleanup; + } error = ext3_xattr_block_set(handle, inode, &i, &bs); if (error) goto cleanup; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3fbc2c6c3d0..ff08633f398 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1009,6 +1009,11 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, i.value = NULL; error = ext4_xattr_block_set(handle, inode, &i, &bs); } else if (error == -ENOSPC) { + if (EXT4_I(inode)->i_file_acl && !bs.s.base) { + error = ext4_xattr_block_find(inode, &i, &bs); + if (error) + goto cleanup; + } error = ext4_xattr_block_set(handle, inode, &i, &bs); if (error) goto cleanup; -- cgit v1.2.3-70-g09d2 From dfc5d03f12e706c19ee37734184ea96582ef931d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 19:11:51 -0400 Subject: ext4: correct mount option parsing to detect when quota options can be changed We should not allow user to change quota mount options when quota is just suspended. It would make mount options and internal quota state inconsistent. Also we should not allow user to change quota format when quota is turned on. On the other hand we can just silently ignore when some option is set to the value it already has (mount does this on remount). Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 52dd0679a4e..686ebcc2e6c 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -979,7 +979,7 @@ static int parse_options (char *options, struct super_block *sb, int data_opt = 0; int option; #ifdef CONFIG_QUOTA - int qtype; + int qtype, qfmt; char *qname; #endif @@ -1162,7 +1162,9 @@ static int parse_options (char *options, struct super_block *sb, case Opt_grpjquota: qtype = GRPQUOTA; set_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + !sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change journalled " "quota options when quota turned on.\n"); @@ -1200,7 +1202,9 @@ set_qf_name: case Opt_offgrpjquota: qtype = GRPQUOTA; clear_qf_name: - if (sb_any_quota_enabled(sb)) { + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change " "journalled quota options when " "quota turned on.\n"); @@ -1213,10 +1217,20 @@ clear_qf_name: sbi->s_qf_names[qtype] = NULL; break; case Opt_jqfmt_vfsold: - sbi->s_jquota_fmt = QFMT_VFS_OLD; - break; + qfmt = QFMT_VFS_OLD; + goto set_qf_format; case Opt_jqfmt_vfsv0: - sbi->s_jquota_fmt = QFMT_VFS_V0; + qfmt = QFMT_VFS_V0; +set_qf_format: + if ((sb_any_quota_enabled(sb) || + sb_any_quota_suspended(sb)) && + sbi->s_jquota_fmt != qfmt) { + printk(KERN_ERR "EXT4-fs: Cannot change " + "journaled quota options when " + "quota turned on.\n"); + return 0; + } + sbi->s_jquota_fmt = qfmt; break; case Opt_quota: case Opt_usrquota: -- cgit v1.2.3-70-g09d2 From cd59e7b9781a35716b8a3e8c4aa2d48081d7daf7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 19:11:51 -0400 Subject: ext4: Fix mount messages when quota disabled When quota is disabled, we should not print 'journaled quota not supported' when user tried to mount non-journaled quota. Also fix typo in the message. Signed-off-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 686ebcc2e6c..94a527261ca 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1255,6 +1255,9 @@ set_qf_format: case Opt_quota: case Opt_usrquota: case Opt_grpquota: + printk(KERN_ERR + "EXT4-fs: quota options not supported.\n"); + break; case Opt_usrjquota: case Opt_grpjquota: case Opt_offusrjquota: @@ -1262,7 +1265,7 @@ set_qf_format: case Opt_jqfmt_vfsold: case Opt_jqfmt_vfsv0: printk(KERN_ERR - "EXT4-fs: journalled quota options not " + "EXT4-fs: journaled quota options not " "supported.\n"); break; case Opt_noquota: -- cgit v1.2.3-70-g09d2 From 0623543b3335c8e439cacf21af99bbf45da42c5a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 19:11:51 -0400 Subject: ext4: fix synchronization of quota files in journal=data mode In journal=data mode, it is not enough to do write_inode_now as done in vfs_quota_on() to write all data to their final location (which is needed for quota_read to work correctly). Calling journal_flush() does its job. Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94a527261ca..cddf7f0e0fd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3170,23 +3170,42 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, if (!test_opt(sb, QUOTA)) return -EINVAL; - /* Not journalling quota? */ - if ((!EXT4_SB(sb)->s_qf_names[USRQUOTA] && - !EXT4_SB(sb)->s_qf_names[GRPQUOTA]) || remount) + /* When remounting, no checks are needed and in fact, path is NULL */ + if (remount) return vfs_quota_on(sb, type, format_id, path, remount); + err = path_lookup(path, LOOKUP_FOLLOW, &nd); if (err) return err; + /* Quotafile not on the same filesystem? */ if (nd.path.mnt->mnt_sb != sb) { path_put(&nd.path); return -EXDEV; } - /* Quotafile not of fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) - printk(KERN_WARNING - "EXT4-fs: Quota file not on filesystem root. " - "Journalled quota will not work.\n"); + /* Journaling quota? */ + if (EXT4_SB(sb)->s_qf_names[type]) { + /* Quotafile not of fs root? */ + if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + printk(KERN_WARNING + "EXT4-fs: Quota file not on filesystem root. " + "Journaled quota will not work.\n"); + } + + /* + * When we journal data on quota file, we have to flush journal to see + * all updates to the file when we bypass pagecache... + */ + if (ext4_should_journal_data(nd.path.dentry->d_inode)) { + /* + * We don't need to lock updates but journal_flush() could + * otherwise be livelocked... + */ + jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); + jbd2_journal_flush(EXT4_SB(sb)->s_journal); + jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); + } + path_put(&nd.path); return vfs_quota_on(sb, type, format_id, path, remount); } -- cgit v1.2.3-70-g09d2 From 2c8be6b222f76c332d9faeb00c047996d340632c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 May 2008 21:27:55 -0400 Subject: ext4: fix typos in messages and comments (journalled -> journaled) Cc: Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cddf7f0e0fd..09d9359c805 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1166,7 +1166,7 @@ set_qf_name: sb_any_quota_suspended(sb)) && !sbi->s_qf_names[qtype]) { printk(KERN_ERR - "EXT4-fs: Cannot change journalled " + "EXT4-fs: Cannot change journaled " "quota options when quota turned on.\n"); return 0; } @@ -1206,7 +1206,7 @@ clear_qf_name: sb_any_quota_suspended(sb)) && sbi->s_qf_names[qtype]) { printk(KERN_ERR "EXT4-fs: Cannot change " - "journalled quota options when " + "journaled quota options when " "quota turned on.\n"); return 0; } @@ -1350,14 +1350,14 @@ set_qf_format: } if (!sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " + printk(KERN_ERR "EXT4-fs: journaled quota format " "not specified.\n"); return 0; } } else { if (sbi->s_jquota_fmt) { - printk(KERN_ERR "EXT4-fs: journalled quota format " - "specified with no journalling " + printk(KERN_ERR "EXT4-fs: journaled quota format " + "specified with no journaling " "enabled.\n"); return 0; } @@ -1598,7 +1598,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, int ret = ext4_quota_on_mount(sb, i); if (ret < 0) printk(KERN_ERR - "EXT4-fs: Cannot turn on journalled " + "EXT4-fs: Cannot turn on journaled " "quota: error %d\n", ret); } } @@ -3123,7 +3123,7 @@ static int ext4_release_dquot(struct dquot *dquot) static int ext4_mark_dquot_dirty(struct dquot *dquot) { - /* Are we journalling quotas? */ + /* Are we journaling quotas? */ if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { dquot_mark_dquot_dirty(dquot); -- cgit v1.2.3-70-g09d2 From 1930479c4b6bbcb6f164a5b3498e0d98329967f4 Mon Sep 17 00:00:00 2001 From: Valerie Clement Date: Tue, 13 May 2008 19:31:14 -0400 Subject: ext4: mballoc fix mb_normalize_request algorithm for 1KB block size filesystems In case of inode preallocation, the number of blocks to allocate depends on the file size and it is calculated in ext4_mb_normalize_request(). Each group in the filesystem is then checked to find one that can be used for allocation; this is done in ext4_mb_good_group(). When a file bigger than 4MB is created, the requested number of blocks to preallocate, calculated by ext4_mb_normalize_request is 4096. However for a filesystem with 1KB block size, the maximum size of the block buddies used by the multiblock allocator is 2048, so none of groups in the filesystem satisfies the search criteria in ext4_mb_good_group(). Scanning all the filesystem groups impacts performance. This was demonstrated by using a freshly created, 70GB, 1k block filesystem, with caches dropped write before the test via /proc/sys/vm/drop_caches, and with the filesystem mounted with nodelalloc and nodealloc,nomballoc. The time to write an 8 megabyte file using "dd if=/dev/zero of=/mnt/test/fo bs=8k count=1k conv=fsync" took 35.5091 seconds (236kB/s) with nodellaloc, and 0.233754 seconds (35.9 MB/s) with the nodelloc,nomballoc options. With a 1TB partition, it took several minutes to write 8MB! This patch modifies the algorithm in ext4_mb_normalize_group_request to calculate the number of blocks to allocate by taking into account the maximum size of free blocks chunks handled by the multiblock allocator. It has also been tested for filesystems with 2KB and 4KB block sizes to ensure that those cases don't regress. Reviewed-by: Aneesh Kumar K.V Signed-off-by: Valerie Clement Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/mballoc.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b128bdc0f55..1d7fde99452 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2880,12 +2880,11 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, if (size < i_size_read(ac->ac_inode)) size = i_size_read(ac->ac_inode); - /* max available blocks in a free group */ - max = EXT4_BLOCKS_PER_GROUP(ac->ac_sb) - 1 - 1 - - EXT4_SB(ac->ac_sb)->s_itb_per_group; + /* max size of free chunks */ + max = 2 << bsbits; -#define NRL_CHECK_SIZE(req, size, max,bits) \ - (req <= (size) || max <= ((size) >> bits)) +#define NRL_CHECK_SIZE(req, size, max, chunk_size) \ + (req <= (size) || max <= (chunk_size)) /* first, try to predict filesize */ /* XXX: should this table be tunable? */ @@ -2904,16 +2903,16 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac, size = 512 * 1024; } else if (size <= 1024 * 1024) { size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, bsbits)) { + } else if (NRL_CHECK_SIZE(size, 4 * 1024 * 1024, max, 2 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> - (20 - bsbits)) << 20; - size = 1024 * 1024; - } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, bsbits)) { + (21 - bsbits)) << 21; + size = 2 * 1024 * 1024; + } else if (NRL_CHECK_SIZE(size, 8 * 1024 * 1024, max, 4 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (22 - bsbits)) << 22; size = 4 * 1024 * 1024; } else if (NRL_CHECK_SIZE(ac->ac_o_ex.fe_len, - (8<<20)>>bsbits, max, bsbits)) { + (8<<20)>>bsbits, max, 8 * 1024)) { start_off = ((loff_t)ac->ac_o_ex.fe_logical >> (23 - bsbits)) << 23; size = 8 * 1024 * 1024; -- cgit v1.2.3-70-g09d2 From 519deca0496a4df07d15acf3181ca5d573bffdec Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 15 May 2008 14:43:20 -0400 Subject: ext4: Retry block allocation if new blocks are allocated from system zone. If the block allocator gets blocks out of system zone ext4 calls ext4_error. But if the file system is mounted with errors=continue retry block allocation. We need to mark the system zone blocks as in use to make sure retry don't pick them again System zone is the block range mapping block bitmap, inode bitmap and inode table. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/balloc.c | 17 +++++++++++------ fs/ext4/mballoc.c | 47 ++++++++++++++++++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index da994374ec3..30494c5da84 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -287,11 +287,11 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group) (int)block_group, (unsigned long long)bitmap_blk); return NULL; } - if (!ext4_valid_block_bitmap(sb, desc, block_group, bh)) { - put_bh(bh); - return NULL; - } - + ext4_valid_block_bitmap(sb, desc, block_group, bh); + /* + * file system mounted not to panic on error, + * continue with corrupt bitmap + */ return bh; } /* @@ -1770,7 +1770,12 @@ allocated: "Allocating block in system zone - " "blocks from %llu, length %lu", ret_block, num); - goto out; + /* + * claim_block marked the blocks we allocated + * as in use. So we may want to selectively + * mark some of the blocks as free + */ + goto retry_alloc; } performed_allocation = 1; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1d7fde99452..873ad9b3418 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2736,7 +2736,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, struct ext4_sb_info *sbi; struct super_block *sb; ext4_fsblk_t block; - int err; + int err, len; BUG_ON(ac->ac_status != AC_STATUS_FOUND); BUG_ON(ac->ac_b_ex.fe_len <= 0); @@ -2770,14 +2770,27 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, + ac->ac_b_ex.fe_start + le32_to_cpu(es->s_first_data_block); - if (block == ext4_block_bitmap(sb, gdp) || - block == ext4_inode_bitmap(sb, gdp) || - in_range(block, ext4_inode_table(sb, gdp), - EXT4_SB(sb)->s_itb_per_group)) { - + len = ac->ac_b_ex.fe_len; + if (in_range(ext4_block_bitmap(sb, gdp), block, len) || + in_range(ext4_inode_bitmap(sb, gdp), block, len) || + in_range(block, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group) || + in_range(block + len - 1, ext4_inode_table(sb, gdp), + EXT4_SB(sb)->s_itb_per_group)) { ext4_error(sb, __func__, "Allocating block in system zone - block = %llu", block); + /* File system mounted not to panic on error + * Fix the bitmap and repeat the block allocation + * We leak some of the blocks here. + */ + mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), + bitmap_bh->b_data, ac->ac_b_ex.fe_start, + ac->ac_b_ex.fe_len); + err = ext4_journal_dirty_metadata(handle, bitmap_bh); + if (!err) + err = -EAGAIN; + goto out_err; } #ifdef AGGRESSIVE_CHECK { @@ -4032,7 +4045,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ac->ac_op = EXT4_MB_HISTORY_ALLOC; ext4_mb_normalize_request(ac, ar); - repeat: /* allocate space in core */ ext4_mb_regular_allocator(ac); @@ -4046,10 +4058,21 @@ repeat: } if (likely(ac->ac_status == AC_STATUS_FOUND)) { - ext4_mb_mark_diskspace_used(ac, handle); - *errp = 0; - block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); - ar->len = ac->ac_b_ex.fe_len; + *errp = ext4_mb_mark_diskspace_used(ac, handle); + if (*errp == -EAGAIN) { + ac->ac_b_ex.fe_group = 0; + ac->ac_b_ex.fe_start = 0; + ac->ac_b_ex.fe_len = 0; + ac->ac_status = AC_STATUS_CONTINUE; + goto repeat; + } else if (*errp) { + ac->ac_b_ex.fe_len = 0; + ar->len = 0; + ext4_mb_show_ac(ac); + } else { + block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); + ar->len = ac->ac_b_ex.fe_len; + } } else { freed = ext4_mb_discard_preallocations(sb, ac->ac_o_ex.fe_len); if (freed) @@ -4236,6 +4259,8 @@ do_more: ext4_error(sb, __func__, "Freeing blocks in system zone - " "Block = %lu, count = %lu", block, count); + /* err = 0. ext4_std_error should be a no op */ + goto error_return; } BUFFER_TRACE(bitmap_bh, "getting write access"); -- cgit v1.2.3-70-g09d2 From 02c471cb17203c748e9bc87003052c1f46e5df69 Mon Sep 17 00:00:00 2001 From: Mingming Cao Date: Thu, 15 May 2008 14:46:17 -0400 Subject: jbd2: update transaction t_state to T_COMMIT fix Updating the current transaction's t_state is protected by j_state_lock. We need to do the same when updating the t_state to T_COMMIT. Acked-by: Jan Kara Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" Signed-off-by: Andrew Morton --- fs/jbd2/commit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index e0139786f71..4d99685fdce 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -560,7 +560,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) * transaction! Now comes the tricky part: we need to write out * metadata. Loop over the transaction's entire buffer list: */ + spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_COMMIT; + spin_unlock(&journal->j_state_lock); stats.u.run.rs_logging = jiffies; stats.u.run.rs_flushing = jbd2_time_diff(stats.u.run.rs_flushing, -- cgit v1.2.3-70-g09d2