diff options
author | Tejun Heo <tj@kernel.org> | 2011-11-28 09:46:22 -0800 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-11-28 09:46:22 -0800 |
commit | d4bbf7e7759afc172e2bfbc5c416324590049cdd (patch) | |
tree | 7eab5ee5481cd3dcf1162329fec827177640018a /mm/backing-dev.c | |
parent | a150439c4a97db379f0ed6faa46fbbb6e7bf3cb2 (diff) | |
parent | 401d0069cb344f401bc9d264c31db55876ff78c0 (diff) |
Merge branch 'master' into x86/memblock
Conflicts & resolutions:
* arch/x86/xen/setup.c
dc91c728fd "xen: allow extra memory to be in multiple regions"
24aa07882b "memblock, x86: Replace memblock_x86_reserve/free..."
conflicted on xen_add_extra_mem() updates. The resolution is
trivial as the latter just want to replace
memblock_x86_reserve_range() with memblock_reserve().
* drivers/pci/intel-iommu.c
166e9278a3f "x86/ia64: intel-iommu: move to drivers/iommu/"
5dfe8660a3d "bootmem: Replace work_with_active_regions() with..."
conflicted as the former moved the file under drivers/iommu/.
Resolved by applying the chnages from the latter on the moved
file.
* mm/Kconfig
6661672053a "memblock: add NO_BOOTMEM config symbol"
c378ddd53f9 "memblock, x86: Make ARCH_DISCARD_MEMBLOCK a config option"
conflicted trivially. Both added config options. Just
letting both add their own options resolves the conflict.
* mm/memblock.c
d1f0ece6cdc "mm/memblock.c: small function definition fixes"
ed7b56a799c "memblock: Remove memblock_memory_can_coalesce()"
confliected. The former updates function removed by the
latter. Resolution is trivial.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'mm/backing-dev.c')
-rw-r--r-- | mm/backing-dev.c | 127 |
1 files changed, 87 insertions, 40 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f032e6e1e09..71034f41a2b 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -45,6 +45,17 @@ static struct timer_list sync_supers_timer; static int bdi_sync_supers(void *); static void sync_supers_timer_fn(unsigned long); +void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2) +{ + if (wb1 < wb2) { + spin_lock(&wb1->list_lock); + spin_lock_nested(&wb2->list_lock, 1); + } else { + spin_lock(&wb2->list_lock); + spin_lock_nested(&wb1->list_lock, 1); + } +} + #ifdef CONFIG_DEBUG_FS #include <linux/debugfs.h> #include <linux/seq_file.h> @@ -67,34 +78,44 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) struct inode *inode; nr_dirty = nr_io = nr_more_io = 0; - spin_lock(&inode_wb_list_lock); + spin_lock(&wb->list_lock); list_for_each_entry(inode, &wb->b_dirty, i_wb_list) nr_dirty++; list_for_each_entry(inode, &wb->b_io, i_wb_list) nr_io++; list_for_each_entry(inode, &wb->b_more_io, i_wb_list) nr_more_io++; - spin_unlock(&inode_wb_list_lock); + spin_unlock(&wb->list_lock); global_dirty_limits(&background_thresh, &dirty_thresh); bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh); #define K(x) ((x) << (PAGE_SHIFT - 10)) seq_printf(m, - "BdiWriteback: %8lu kB\n" - "BdiReclaimable: %8lu kB\n" - "BdiDirtyThresh: %8lu kB\n" - "DirtyThresh: %8lu kB\n" - "BackgroundThresh: %8lu kB\n" - "b_dirty: %8lu\n" - "b_io: %8lu\n" - "b_more_io: %8lu\n" - "bdi_list: %8u\n" - "state: %8lx\n", + "BdiWriteback: %10lu kB\n" + "BdiReclaimable: %10lu kB\n" + "BdiDirtyThresh: %10lu kB\n" + "DirtyThresh: %10lu kB\n" + "BackgroundThresh: %10lu kB\n" + "BdiDirtied: %10lu kB\n" + "BdiWritten: %10lu kB\n" + "BdiWriteBandwidth: %10lu kBps\n" + "b_dirty: %10lu\n" + "b_io: %10lu\n" + "b_more_io: %10lu\n" + "bdi_list: %10u\n" + "state: %10lx\n", (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), (unsigned long) K(bdi_stat(bdi, BDI_RECLAIMABLE)), - K(bdi_thresh), K(dirty_thresh), - K(background_thresh), nr_dirty, nr_io, nr_more_io, + K(bdi_thresh), + K(dirty_thresh), + K(background_thresh), + (unsigned long) K(bdi_stat(bdi, BDI_DIRTIED)), + (unsigned long) K(bdi_stat(bdi, BDI_WRITTEN)), + (unsigned long) K(bdi->write_bandwidth), + nr_dirty, + nr_io, + nr_more_io, !list_empty(&bdi->bdi_list), bdi->state); #undef K @@ -249,18 +270,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi) return wb_has_dirty_io(&bdi->wb); } -static void bdi_flush_io(struct backing_dev_info *bdi) -{ - struct writeback_control wbc = { - .sync_mode = WB_SYNC_NONE, - .older_than_this = NULL, - .range_cyclic = 1, - .nr_to_write = 1024, - }; - - writeback_inodes_wb(&bdi->wb, &wbc); -} - /* * kupdated() used to do this. We cannot do it from the bdi_forker_thread() * or we risk deadlocking on ->s_umount. The longer term solution would be @@ -352,6 +361,17 @@ static unsigned long bdi_longest_inactive(void) return max(5UL * 60 * HZ, interval); } +/* + * Clear pending bit and wakeup anybody waiting for flusher thread creation or + * shutdown + */ +static void bdi_clear_pending(struct backing_dev_info *bdi) +{ + clear_bit(BDI_pending, &bdi->state); + smp_mb__after_clear_bit(); + wake_up_bit(&bdi->state, BDI_pending); +} + static int bdi_forker_thread(void *ptr) { struct bdi_writeback *me = ptr; @@ -383,6 +403,12 @@ static int bdi_forker_thread(void *ptr) } spin_lock_bh(&bdi_lock); + /* + * In the following loop we are going to check whether we have + * some work to do without any synchronization with tasks + * waking us up to do work for them. Set the task state here + * so that we don't miss wakeups after verifying conditions. + */ set_current_state(TASK_INTERRUPTIBLE); list_for_each_entry(bdi, &bdi_list, bdi_list) { @@ -446,9 +472,11 @@ static int bdi_forker_thread(void *ptr) if (IS_ERR(task)) { /* * If thread creation fails, force writeout of - * the bdi from the thread. + * the bdi from the thread. Hopefully 1024 is + * large enough for efficient IO. */ - bdi_flush_io(bdi); + writeback_inodes_wb(&bdi->wb, 1024, + WB_REASON_FORKER_THREAD); } else { /* * The spinlock makes sure we do not lose @@ -461,11 +489,13 @@ static int bdi_forker_thread(void *ptr) spin_unlock_bh(&bdi->wb_lock); wake_up_process(task); } + bdi_clear_pending(bdi); break; case KILL_THREAD: __set_current_state(TASK_RUNNING); kthread_stop(task); + bdi_clear_pending(bdi); break; case NO_ACTION: @@ -481,16 +511,8 @@ static int bdi_forker_thread(void *ptr) else schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); try_to_freeze(); - /* Back to the main loop */ - continue; + break; } - - /* - * Clear pending bit and wakeup anybody waiting to tear us down. - */ - clear_bit(BDI_pending, &bdi->state); - smp_mb__after_clear_bit(); - wake_up_bit(&bdi->state, BDI_pending); } return 0; @@ -505,7 +527,7 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) list_del_rcu(&bdi->bdi_list); spin_unlock_bh(&bdi_lock); - synchronize_rcu(); + synchronize_rcu_expedited(); } int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -606,6 +628,7 @@ static void bdi_prune_sb(struct backing_dev_info *bdi) void bdi_unregister(struct backing_dev_info *bdi) { if (bdi->dev) { + bdi_set_min_ratio(bdi, 0); trace_writeback_bdi_unregister(bdi); bdi_prune_sb(bdi); del_timer_sync(&bdi->wb.wakeup_timer); @@ -628,9 +651,15 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) INIT_LIST_HEAD(&wb->b_dirty); INIT_LIST_HEAD(&wb->b_io); INIT_LIST_HEAD(&wb->b_more_io); + spin_lock_init(&wb->list_lock); setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); } +/* + * Initial write bandwidth: 100 MB/s + */ +#define INIT_BW (100 << (20 - PAGE_SHIFT)) + int bdi_init(struct backing_dev_info *bdi) { int i, err; @@ -653,6 +682,15 @@ int bdi_init(struct backing_dev_info *bdi) } bdi->dirty_exceeded = 0; + + bdi->bw_time_stamp = jiffies; + bdi->written_stamp = 0; + + bdi->balanced_dirty_ratelimit = INIT_BW; + bdi->dirty_ratelimit = INIT_BW; + bdi->write_bandwidth = INIT_BW; + bdi->avg_write_bandwidth = INIT_BW; + err = prop_local_init_percpu(&bdi->completions); if (err) { @@ -676,15 +714,24 @@ void bdi_destroy(struct backing_dev_info *bdi) if (bdi_has_dirty_io(bdi)) { struct bdi_writeback *dst = &default_backing_dev_info.wb; - spin_lock(&inode_wb_list_lock); + bdi_lock_two(&bdi->wb, dst); list_splice(&bdi->wb.b_dirty, &dst->b_dirty); list_splice(&bdi->wb.b_io, &dst->b_io); list_splice(&bdi->wb.b_more_io, &dst->b_more_io); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&bdi->wb.list_lock); + spin_unlock(&dst->list_lock); } bdi_unregister(bdi); + /* + * If bdi_unregister() had already been called earlier, the + * wakeup_timer could still be armed because bdi_prune_sb() + * can race with the bdi_wakeup_thread_delayed() calls from + * __mark_inode_dirty(). + */ + del_timer_sync(&bdi->wb.wakeup_timer); + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); |