aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 15:45:43 +0900
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-13 15:45:43 +0900
commit5cbb3d216e2041700231bcfc383ee5f8b7fc8b74 (patch)
treea738fa82dbcefa9bd283c08bc67f38827be63937 /fs
parent9bc9ccd7db1c9f043f75380b5a5b94912046a60e (diff)
parent4e9b45a19241354daec281d7a785739829b52359 (diff)
Merge branch 'akpm' (patches from Andrew Morton)
Merge first patch-bomb from Andrew Morton: "Quite a lot of other stuff is banked up awaiting further next->mainline merging, but this batch contains: - Lots of random misc patches - OCFS2 - Most of MM - backlight updates - lib/ updates - printk updates - checkpatch updates - epoll tweaking - rtc updates - hfs - hfsplus - documentation - procfs - update gcov to gcc-4.7 format - IPC" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (269 commits) ipc, msg: fix message length check for negative values ipc/util.c: remove unnecessary work pending test devpts: plug the memory leak in kill_sb ./Makefile: export initial ramdisk compression config option init/Kconfig: add option to disable kernel compression drivers: w1: make w1_slave::flags long to avoid memory corruption drivers/w1/masters/ds1wm.cuse dev_get_platdata() drivers/memstick/core/ms_block.c: fix unreachable state in h_msb_read_page() drivers/memstick/core/mspro_block.c: fix attributes array allocation drivers/pps/clients/pps-gpio.c: remove redundant of_match_ptr kernel/panic.c: reduce 1 byte usage for print tainted buffer gcov: reuse kbasename helper kernel/gcov/fs.c: use pr_warn() kernel/module.c: use pr_foo() gcov: compile specific gcov implementation based on gcc version gcov: add support for gcc 4.7 gcov format gcov: move gcov structs definitions to a gcc version specific file kernel/taskstats.c: return -ENOMEM when alloc memory fails in add_del_listener() kernel/taskstats.c: add nla_nest_cancel() for failure processing between nla_nest_start() and nla_nest_end() kernel/sysctl_binary.c: use scnprintf() instead of snprintf() ...
Diffstat (limited to 'fs')
-rw-r--r--fs/cramfs/Kconfig5
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/eventpoll.c145
-rw-r--r--fs/exec.c6
-rw-r--r--fs/fs-writeback.c33
-rw-r--r--fs/hfs/btree.h5
-rw-r--r--fs/hfsplus/btree.c112
-rw-r--r--fs/hfsplus/hfsplus_fs.h10
-rw-r--r--fs/hfsplus/hfsplus_raw.h11
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hfsplus/xattr.c207
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/aops.c32
-rw-r--r--fs/ocfs2/buffer_head_io.c4
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/cluster/masklog.h3
-rw-r--r--fs/ocfs2/dir.c12
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c8
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c7
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/move_extents.c11
-rw-r--r--fs/ocfs2/namei.c2
-rw-r--r--fs/ocfs2/refcounttree.c20
-rw-r--r--fs/ocfs2/resize.c12
-rw-r--r--fs/ocfs2/stackglue.c8
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/ocfs2/xattr.c28
-rw-r--r--fs/proc/Kconfig4
-rw-r--r--fs/proc/inode.c16
-rw-r--r--fs/proc/kcore.c3
-rw-r--r--fs/proc/meminfo.c5
-rw-r--r--fs/proc/task_mmu.c17
-rw-r--r--fs/sync.c15
-rw-r--r--fs/xfs/xfs_super.c2
37 files changed, 601 insertions, 206 deletions
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index cd06466f365..11b29d491b7 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -1,5 +1,5 @@
config CRAMFS
- tristate "Compressed ROM file system support (cramfs)"
+ tristate "Compressed ROM file system support (cramfs) (OBSOLETE)"
depends on BLOCK
select ZLIB_INFLATE
help
@@ -16,4 +16,7 @@ config CRAMFS
cramfs. Note that the root file system (the one containing the
directory /) cannot be compiled as a module.
+ This filesystem is obsoleted by SquashFS, which is much better
+ in terms of performance and features.
+
If unsure, say N.
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c7c83ff0f75..9c0444cccbe 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -566,8 +566,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
mutex_lock(&parent->d_inode->i_mutex);
if (child != dentry) {
- next = list_entry(child->d_u.d_child.next, struct dentry,
- d_u.d_child);
+ next = list_next_entry(child, d_u.d_child);
goto up;
}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 073d30b9d1a..a726b9f29cb 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -498,6 +498,7 @@ static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 983e3960abf..79b65c3b9e8 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -41,6 +41,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/compat.h>
+#include <linux/rculist.h>
/*
* LOCKING:
@@ -133,8 +134,12 @@ struct nested_calls {
* of these on a server and we do not want this to take another cache line.
*/
struct epitem {
- /* RB tree node used to link this structure to the eventpoll RB tree */
- struct rb_node rbn;
+ union {
+ /* RB tree node links this structure to the eventpoll RB tree */
+ struct rb_node rbn;
+ /* Used to free the struct epitem */
+ struct rcu_head rcu;
+ };
/* List header used to link this structure to the eventpoll ready list */
struct list_head rdllink;
@@ -580,14 +585,14 @@ static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
* @sproc: Pointer to the scan callback.
* @priv: Private opaque data passed to the @sproc callback.
* @depth: The current depth of recursive f_op->poll calls.
+ * @ep_locked: caller already holds ep->mtx
*
* Returns: The same integer error code returned by the @sproc callback.
*/
static int ep_scan_ready_list(struct eventpoll *ep,
int (*sproc)(struct eventpoll *,
struct list_head *, void *),
- void *priv,
- int depth)
+ void *priv, int depth, bool ep_locked)
{
int error, pwake = 0;
unsigned long flags;
@@ -598,7 +603,9 @@ static int ep_scan_ready_list(struct eventpoll *ep,
* We need to lock this because we could be hit by
* eventpoll_release_file() and epoll_ctl().
*/
- mutex_lock_nested(&ep->mtx, depth);
+
+ if (!ep_locked)
+ mutex_lock_nested(&ep->mtx, depth);
/*
* Steal the ready list, and re-init the original one to the
@@ -662,7 +669,8 @@ static int ep_scan_ready_list(struct eventpoll *ep,
}
spin_unlock_irqrestore(&ep->lock, flags);
- mutex_unlock(&ep->mtx);
+ if (!ep_locked)
+ mutex_unlock(&ep->mtx);
/* We have to call this outside the lock */
if (pwake)
@@ -671,6 +679,12 @@ static int ep_scan_ready_list(struct eventpoll *ep,
return error;
}
+static void epi_rcu_free(struct rcu_head *head)
+{
+ struct epitem *epi = container_of(head, struct epitem, rcu);
+ kmem_cache_free(epi_cache, epi);
+}
+
/*
* Removes a "struct epitem" from the eventpoll RB tree and deallocates
* all the associated resources. Must be called with "mtx" held.
@@ -692,8 +706,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
/* Remove the current item from the list of epoll hooks */
spin_lock(&file->f_lock);
- if (ep_is_linked(&epi->fllink))
- list_del_init(&epi->fllink);
+ list_del_rcu(&epi->fllink);
spin_unlock(&file->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -704,9 +717,14 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
spin_unlock_irqrestore(&ep->lock, flags);
wakeup_source_unregister(ep_wakeup_source(epi));
-
- /* At this point it is safe to free the eventpoll item */
- kmem_cache_free(epi_cache, epi);
+ /*
+ * At this point it is safe to free the eventpoll item. Use the union
+ * field epi->rcu, since we are trying to minimize the size of
+ * 'struct epitem'. The 'rbn' field is no longer in use. Protected by
+ * ep->mtx. The rcu read side, reverse_path_check_proc(), does not make
+ * use of the rbn field.
+ */
+ call_rcu(&epi->rcu, epi_rcu_free);
atomic_long_dec(&ep->user->epoll_watches);
@@ -807,15 +825,34 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
return 0;
}
+static void ep_ptable_queue_proc(struct file *file, wait_queue_head_t *whead,
+ poll_table *pt);
+
+struct readyevents_arg {
+ struct eventpoll *ep;
+ bool locked;
+};
+
static int ep_poll_readyevents_proc(void *priv, void *cookie, int call_nests)
{
- return ep_scan_ready_list(priv, ep_read_events_proc, NULL, call_nests + 1);
+ struct readyevents_arg *arg = priv;
+
+ return ep_scan_ready_list(arg->ep, ep_read_events_proc, NULL,
+ call_nests + 1, arg->locked);
}
static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
{
int pollflags;
struct eventpoll *ep = file->private_data;
+ struct readyevents_arg arg;
+
+ /*
+ * During ep_insert() we already hold the ep->mtx for the tfile.
+ * Prevent re-aquisition.
+ */
+ arg.locked = wait && (wait->_qproc == ep_ptable_queue_proc);
+ arg.ep = ep;
/* Insert inside our poll wait queue */
poll_wait(file, &ep->poll_wait, wait);
@@ -827,7 +864,7 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
* could re-enter here.
*/
pollflags = ep_call_nested(&poll_readywalk_ncalls, EP_MAX_NESTS,
- ep_poll_readyevents_proc, ep, ep, current);
+ ep_poll_readyevents_proc, &arg, ep, current);
return pollflags != -1 ? pollflags : 0;
}
@@ -872,7 +909,6 @@ static const struct file_operations eventpoll_fops = {
*/
void eventpoll_release_file(struct file *file)
{
- struct list_head *lsthead = &file->f_ep_links;
struct eventpoll *ep;
struct epitem *epi;
@@ -890,17 +926,12 @@ void eventpoll_release_file(struct file *file)
* Besides, ep_remove() acquires the lock, so we can't hold it here.
*/
mutex_lock(&epmutex);
-
- while (!list_empty(lsthead)) {
- epi = list_first_entry(lsthead, struct epitem, fllink);
-
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
ep = epi->ep;
- list_del_init(&epi->fllink);
mutex_lock_nested(&ep->mtx, 0);
ep_remove(ep, epi);
mutex_unlock(&ep->mtx);
}
-
mutex_unlock(&epmutex);
}
@@ -1138,7 +1169,9 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
struct file *child_file;
struct epitem *epi;
- list_for_each_entry(epi, &file->f_ep_links, fllink) {
+ /* CTL_DEL can remove links here, but that can't increase our count */
+ rcu_read_lock();
+ list_for_each_entry_rcu(epi, &file->f_ep_links, fllink) {
child_file = epi->ep->file;
if (is_file_epoll(child_file)) {
if (list_empty(&child_file->f_ep_links)) {
@@ -1160,6 +1193,7 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
"file is not an ep!\n");
}
}
+ rcu_read_unlock();
return error;
}
@@ -1231,7 +1265,7 @@ static noinline void ep_destroy_wakeup_source(struct epitem *epi)
* Must be called with "mtx" held.
*/
static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
- struct file *tfile, int fd)
+ struct file *tfile, int fd, int full_check)
{
int error, revents, pwake = 0;
unsigned long flags;
@@ -1286,7 +1320,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* Add the current item to the list of active epoll hook for this file */
spin_lock(&tfile->f_lock);
- list_add_tail(&epi->fllink, &tfile->f_ep_links);
+ list_add_tail_rcu(&epi->fllink, &tfile->f_ep_links);
spin_unlock(&tfile->f_lock);
/*
@@ -1297,7 +1331,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* now check if we've created too many backpaths */
error = -EINVAL;
- if (reverse_path_check())
+ if (full_check && reverse_path_check())
goto error_remove_epi;
/* We have to drop the new item inside our item list to keep track of it */
@@ -1327,8 +1361,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
error_remove_epi:
spin_lock(&tfile->f_lock);
- if (ep_is_linked(&epi->fllink))
- list_del_init(&epi->fllink);
+ list_del_rcu(&epi->fllink);
spin_unlock(&tfile->f_lock);
rb_erase(&epi->rbn, &ep->rbr);
@@ -1521,7 +1554,7 @@ static int ep_send_events(struct eventpoll *ep,
esed.maxevents = maxevents;
esed.events = events;
- return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0);
+ return ep_scan_ready_list(ep, ep_send_events_proc, &esed, 0, false);
}
static inline struct timespec ep_set_mstimeout(long ms)
@@ -1791,11 +1824,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
struct epoll_event __user *, event)
{
int error;
- int did_lock_epmutex = 0;
+ int full_check = 0;
struct fd f, tf;
struct eventpoll *ep;
struct epitem *epi;
struct epoll_event epds;
+ struct eventpoll *tep = NULL;
error = -EFAULT;
if (ep_op_has_event(op) &&
@@ -1844,26 +1878,40 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
* and hang them on the tfile_check_list, so we can check that we
* haven't created too many possible wakeup paths.
*
- * We need to hold the epmutex across both ep_insert and ep_remove
- * b/c we want to make sure we are looking at a coherent view of
- * epoll network.
+ * We do not need to take the global 'epumutex' on EPOLL_CTL_ADD when
+ * the epoll file descriptor is attaching directly to a wakeup source,
+ * unless the epoll file descriptor is nested. The purpose of taking the
+ * 'epmutex' on add is to prevent complex toplogies such as loops and
+ * deep wakeup paths from forming in parallel through multiple
+ * EPOLL_CTL_ADD operations.
*/
- if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
- mutex_lock(&epmutex);
- did_lock_epmutex = 1;
- }
+ mutex_lock_nested(&ep->mtx, 0);
if (op == EPOLL_CTL_ADD) {
- if (is_file_epoll(tf.file)) {
- error = -ELOOP;
- if (ep_loop_check(ep, tf.file) != 0) {
- clear_tfile_check_list();
- goto error_tgt_fput;
+ if (!list_empty(&f.file->f_ep_links) ||
+ is_file_epoll(tf.file)) {
+ full_check = 1;
+ mutex_unlock(&ep->mtx);
+ mutex_lock(&epmutex);
+ if (is_file_epoll(tf.file)) {
+ error = -ELOOP;
+ if (ep_loop_check(ep, tf.file) != 0) {
+ clear_tfile_check_list();
+ goto error_tgt_fput;
+ }
+ } else
+ list_add(&tf.file->f_tfile_llink,
+ &tfile_check_list);
+ mutex_lock_nested(&ep->mtx, 0);
+ if (is_file_epoll(tf.file)) {
+ tep = tf.file->private_data;
+ mutex_lock_nested(&tep->mtx, 1);
}
- } else
- list_add(&tf.file->f_tfile_llink, &tfile_check_list);
+ }
+ }
+ if (op == EPOLL_CTL_DEL && is_file_epoll(tf.file)) {
+ tep = tf.file->private_data;
+ mutex_lock_nested(&tep->mtx, 1);
}
-
- mutex_lock_nested(&ep->mtx, 0);
/*
* Try to lookup the file inside our RB tree, Since we grabbed "mtx"
@@ -1877,10 +1925,11 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
case EPOLL_CTL_ADD:
if (!epi) {
epds.events |= POLLERR | POLLHUP;
- error = ep_insert(ep, &epds, tf.file, fd);
+ error = ep_insert(ep, &epds, tf.file, fd, full_check);
} else
error = -EEXIST;
- clear_tfile_check_list();
+ if (full_check)
+ clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1896,10 +1945,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = -ENOENT;
break;
}
+ if (tep != NULL)
+ mutex_unlock(&tep->mtx);
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (did_lock_epmutex)
+ if (full_check)
mutex_unlock(&epmutex);
fdput(tf);
diff --git a/fs/exec.c b/fs/exec.c
index be4c81c7251..977319fd77f 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1664,6 +1664,12 @@ int __get_dumpable(unsigned long mm_flags)
return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 09c11329a17..1f4a10ece2f 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -40,13 +40,18 @@
struct wb_writeback_work {
long nr_pages;
struct super_block *sb;
- unsigned long *older_than_this;
+ /*
+ * Write only inodes dirtied before this time. Don't forget to set
+ * older_than_this_is_set when you set this.
+ */
+ unsigned long older_than_this;
enum writeback_sync_modes sync_mode;
unsigned int tagged_writepages:1;
unsigned int for_kupdate:1;
unsigned int range_cyclic:1;
unsigned int for_background:1;
unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
+ unsigned int older_than_this_is_set:1;
enum wb_reason reason; /* why was writeback initiated? */
struct list_head list; /* pending work list */
@@ -247,10 +252,10 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0;
int moved = 0;
+ WARN_ON_ONCE(!work->older_than_this_is_set);
while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev);
- if (work->older_than_this &&
- inode_dirtied_after(inode, *work->older_than_this))
+ if (inode_dirtied_after(inode, work->older_than_this))
break;
list_move(&inode->i_wb_list, &tmp);
moved++;
@@ -734,6 +739,8 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
.sync_mode = WB_SYNC_NONE,
.range_cyclic = 1,
.reason = reason,
+ .older_than_this = jiffies,
+ .older_than_this_is_set = 1,
};
spin_lock(&wb->list_lock);
@@ -792,12 +799,13 @@ static long wb_writeback(struct bdi_writeback *wb,
{
unsigned long wb_start = jiffies;
long nr_pages = work->nr_pages;
- unsigned long oldest_jif;
struct inode *inode;
long progress;
- oldest_jif = jiffies;
- work->older_than_this = &oldest_jif;
+ if (!work->older_than_this_is_set) {
+ work->older_than_this = jiffies;
+ work->older_than_this_is_set = 1;
+ }
spin_lock(&wb->list_lock);
for (;;) {
@@ -831,10 +839,10 @@ static long wb_writeback(struct bdi_writeback *wb,
* safe.
*/
if (work->for_kupdate) {
- oldest_jif = jiffies -
+ work->older_than_this = jiffies -
msecs_to_jiffies(dirty_expire_interval * 10);
} else if (work->for_background)
- oldest_jif = jiffies;
+ work->older_than_this = jiffies;
trace_writeback_start(wb->bdi, work);
if (list_empty(&wb->b_io))
@@ -1346,18 +1354,21 @@ EXPORT_SYMBOL(try_to_writeback_inodes_sb);
/**
* sync_inodes_sb - sync sb inode pages
- * @sb: the superblock
+ * @sb: the superblock
+ * @older_than_this: timestamp
*
* This function writes and waits on any dirty inode belonging to this
- * super_block.
+ * superblock that has been dirtied before given timestamp.
*/
-void sync_inodes_sb(struct super_block *sb)
+void sync_inodes_sb(struct super_block *sb, unsigned long older_than_this)
{
DECLARE_COMPLETION_ONSTACK(done);
struct wb_writeback_work work = {
.sb = sb,
.sync_mode = WB_SYNC_ALL,
.nr_pages = LONG_MAX,
+ .older_than_this = older_than_this,
+ .older_than_this_is_set = 1,
.range_cyclic = 0,
.done = &done,
.reason = WB_REASON_SYNC,
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h
index 2a1d712f85d..f6bd266d70b 100644
--- a/fs/hfs/btree.h
+++ b/fs/hfs/btree.h
@@ -153,11 +153,6 @@ struct hfs_btree_header_rec {
u32 reserved3[16];
} __packed;
-#define HFS_NODE_INDEX 0x00 /* An internal (index) node */
-#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */
-#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */
-#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */
-
#define BTREE_ATTR_BADCLOSE 0x00000001 /* b-tree not closed properly. not
used by hfsplus. */
#define HFS_TREE_BIGKEYS 0x00000002 /* key length is u16 instead of u8.
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 0c6540c9116..0fcec8b2a90 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -15,6 +15,118 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
+/*
+ * Initial source code of clump size calculation is gotten
+ * from http://opensource.apple.com/tarballs/diskdev_cmds/
+ */
+#define CLUMP_ENTRIES 15
+
+static short clumptbl[CLUMP_ENTRIES * 3] = {
+/*
+ * Volume Attributes Catalog Extents
+ * Size Clump (MB) Clump (MB) Clump (MB)
+ */
+ /* 1GB */ 4, 4, 4,
+ /* 2GB */ 6, 6, 4,
+ /* 4GB */ 8, 8, 4,
+ /* 8GB */ 11, 11, 5,
+ /*
+ * For volumes 16GB and larger, we want to make sure that a full OS
+ * install won't require fragmentation of the Catalog or Attributes
+ * B-trees. We do this by making the clump sizes sufficiently large,
+ * and by leaving a gap after the B-trees for them to grow into.
+ *
+ * For SnowLeopard 10A298, a FullNetInstall with all packages selected
+ * results in:
+ * Catalog B-tree Header
+ * nodeSize: 8192
+ * totalNodes: 31616
+ * freeNodes: 1978
+ * (used = 231.55 MB)
+ * Attributes B-tree Header
+ * nodeSize: 8192
+ * totalNodes: 63232
+ * freeNodes: 958
+ * (used = 486.52 MB)
+ *
+ * We also want Time Machine backup volumes to have a sufficiently
+ * large clump size to reduce fragmentation.
+ *
+ * The series of numbers for Catalog and Attribute form a geometric
+ * series. For Catalog (16GB to 512GB), each term is 8**(1/5) times
+ * the previous term. For Attributes (16GB to 512GB), each term is
+ * 4**(1/5) times the previous term. For 1TB to 16TB, each term is
+ * 2**(1/5) times the previous term.
+ */
+ /* 16GB */ 64, 32, 5,
+ /* 32GB */ 84, 49, 6,
+ /* 64GB */ 111, 74, 7,
+ /* 128GB */ 147, 111, 8,
+ /* 256GB */ 194, 169, 9,
+ /* 512GB */ 256, 256, 11,
+ /* 1TB */ 294, 294, 14,
+ /* 2TB */ 338, 338, 16,
+ /* 4TB */ 388, 388, 20,
+ /* 8TB */ 446, 446, 25,
+ /* 16TB */ 512, 512, 32
+};
+
+u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size,
+ u64 sectors, int file_id)
+{
+ u32 mod = max(node_size, block_size);
+ u32 clump_size;
+ int column;
+ int i;
+
+ /* Figure out which column of the above table to use for this file. */
+ switch (file_id) {
+ case HFSPLUS_ATTR_CNID:
+ column = 0;
+ break;
+ case HFSPLUS_CAT_CNID:
+ column = 1;
+ break;
+ default:
+ column = 2;
+ break;
+ }
+
+ /*
+ * The default clump size is 0.8% of the volume size. And
+ * it must also be a multiple of the node and block size.
+ */
+ if (sectors < 0x200000) {
+ clump_size = sectors << 2; /* 0.8 % */
+ if (clump_size < (8 * node_size))
+ clump_size = 8 * node_size;
+ } else {
+ /* turn exponent into table index... */
+ for (i = 0, sectors = sectors >> 22;
+ sectors && (i < CLUMP_ENTRIES - 1);
+ ++i, sectors = sectors >> 1) {
+ /* empty body */
+ }
+
+ clump_size = clumptbl[column + (i) * 3] * 1024 * 1024;
+ }
+
+ /*
+ * Round the clump size to a multiple of node and block size.
+ * NOTE: This rounds down.
+ */
+ clump_size /= mod;
+ clump_size *= mod;
+
+ /*
+ * Rounding down could have rounded down to 0 if the block size was
+ * greater than the clump size. If so, just use one block or node.
+ */
+ if (clump_size == 0)
+ clump_size = mod;
+
+ return clump_size;
+}
/* Get a reference to a B*Tree and do some initial checks */
struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 2b9cd01696e..08846425b67 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -127,6 +127,14 @@ struct hfs_bnode {
#define HFS_BNODE_DELETED 4
/*
+ * Attributes file states
+ */
+#define HFSPLUS_EMPTY_ATTR_TREE 0
+#define HFSPLUS_CREATING_ATTR_TREE 1
+#define HFSPLUS_VALID_ATTR_TREE 2
+#define HFSPLUS_FAILED_ATTR_TREE 3
+
+/*
* HFS+ superblock info (built from Volume Header on disk)
*/
@@ -141,6 +149,7 @@ struct hfsplus_sb_info {
struct hfs_btree *ext_tree;
struct hfs_btree *cat_tree;
struct hfs_btree *attr_tree;
+ atomic_t attr_tree_state;
struct inode *alloc_file;
struct inode *hidden_dir;
struct nls_table *nls;
@@ -380,6 +389,7 @@ int hfsplus_block_allocate(struct super_block *, u32, u32, u32 *);
int hfsplus_block_free(struct super_block *, u32, u32);
/* btree.c */
+u32 hfsplus_calc_btree_clump_size(u32, u32, u64, int);
struct hfs_btree *hfs_btree_open(struct super_block *, u32);
void hfs_btree_close(struct hfs_btree *);
int hfs_btree_write(struct hfs_btree *);
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 452ede01b03..8ffb3a8ffe7 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -156,10 +156,10 @@ struct hfs_bnode_desc {
} __packed;
/* HFS+ BTree node types */
-#define HFS_NODE_INDEX 0x00
-#define HFS_NODE_HEADER 0x01
-#define HFS_NODE_MAP 0x02
-#define HFS_NODE_LEAF 0xFF
+#define HFS_NODE_INDEX 0x00 /* An internal (index) node */
+#define HFS_NODE_HEADER 0x01 /* The tree header node (node 0) */
+#define HFS_NODE_MAP 0x02 /* Holds part of the bitmap of used nodes */
+#define HFS_NODE_LEAF 0xFF /* A leaf (ndNHeight==1) node */
/* HFS+ BTree header */
struct hfs_btree_header_rec {
@@ -187,6 +187,9 @@ struct hfs_btree_header_rec {
/* HFS+ BTree misc info */
#define HFSPLUS_TREE_HEAD 0
#define HFSPLUS_NODE_MXSZ 32768
+#define HFSPLUS_ATTR_TREE_NODE_SIZE 8192
+#define HFSPLUS_BTREE_HDR_NODE_RECS_COUNT 3
+#define HFSPLUS_BTREE_HDR_USER_BYTES 128
/* Some special File ID numbers (stolen from hfs.h) */
#define HFSPLUS_POR_CNID 1 /* Parent Of the Root */
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4c4d142cf89..80875aa640e 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -474,12 +474,14 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
pr_err("failed to load catalog file\n");
goto out_close_ext_tree;
}
+ atomic_set(&sbi->attr_tree_state, HFSPLUS_EMPTY_ATTR_TREE);
if (vhdr->attr_file.total_blocks != 0) {
sbi->attr_tree = hfs_btree_open(sb, HFSPLUS_ATTR_CNID);
if (!sbi->attr_tree) {
pr_err("failed to load attributes file\n");
goto out_close_cat_tree;
}
+ atomic_set(&sbi->attr_tree_state, HFSPLUS_VALID_ATTR_TREE);
}
sb->s_xattr = hfsplus_xattr_handlers;
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index bd8471fb9a6..efc85b1377c 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -127,6 +127,208 @@ static int can_set_xattr(struct inode *inode, const char *name,
return 0;
}
+static void hfsplus_init_header_node(struct inode *attr_file,
+ u32 clump_size,
+ char *buf, size_t node_size)
+{
+ struct hfs_bnode_desc *desc;
+ struct hfs_btree_header_rec *head;
+ u16 offset;
+ __be16 *rec_offsets;
+ u32 hdr_node_map_rec_bits;
+ char *bmp;
+ u32 used_nodes;