diff options
Diffstat (limited to 'mm/backing-dev.c')
| -rw-r--r-- | mm/backing-dev.c | 259 | 
1 files changed, 29 insertions, 230 deletions
| diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 41733c5dc82..50251749225 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -31,13 +31,14 @@ EXPORT_SYMBOL_GPL(noop_backing_dev_info);  static struct class *bdi_class;  /* - * bdi_lock protects updates to bdi_list and bdi_pending_list, as well as - * reader side protection for bdi_pending_list. bdi_list has RCU reader side + * bdi_lock protects updates to bdi_list. bdi_list has RCU reader side   * locking.   */  DEFINE_SPINLOCK(bdi_lock);  LIST_HEAD(bdi_list); -LIST_HEAD(bdi_pending_list); + +/* bdi_wq serves all asynchronous writeback tasks */ +struct workqueue_struct *bdi_wq;  void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)  { @@ -257,6 +258,11 @@ static int __init default_bdi_init(void)  {  	int err; +	bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_FREEZABLE | +					      WQ_UNBOUND | WQ_SYSFS, 0); +	if (!bdi_wq) +		return -ENOMEM; +  	err = bdi_init(&default_backing_dev_info);  	if (!err)  		bdi_register(&default_backing_dev_info, NULL, "default"); @@ -271,26 +277,6 @@ int bdi_has_dirty_io(struct backing_dev_info *bdi)  	return wb_has_dirty_io(&bdi->wb);  } -static void wakeup_timer_fn(unsigned long data) -{ -	struct backing_dev_info *bdi = (struct backing_dev_info *)data; - -	spin_lock_bh(&bdi->wb_lock); -	if (bdi->wb.task) { -		trace_writeback_wake_thread(bdi); -		wake_up_process(bdi->wb.task); -	} else if (bdi->dev) { -		/* -		 * When bdi tasks are inactive for long time, they are killed. -		 * In this case we have to wake-up the forker thread which -		 * should create and run the bdi thread. -		 */ -		trace_writeback_wake_forker_thread(bdi); -		wake_up_process(default_backing_dev_info.wb.task); -	} -	spin_unlock_bh(&bdi->wb_lock); -} -  /*   * This function is used when the first inode for this bdi is marked dirty. It   * wakes-up the corresponding bdi thread which should then take care of the @@ -307,176 +293,7 @@ void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi)  	unsigned long timeout;  	timeout = msecs_to_jiffies(dirty_writeback_interval * 10); -	mod_timer(&bdi->wb.wakeup_timer, jiffies + timeout); -} - -/* - * Calculate the longest interval (jiffies) bdi threads are allowed to be - * inactive. - */ -static unsigned long bdi_longest_inactive(void) -{ -	unsigned long interval; - -	interval = msecs_to_jiffies(dirty_writeback_interval * 10); -	return max(5UL * 60 * HZ, interval); -} - -/* - * Clear pending bit and wakeup anybody waiting for flusher thread creation or - * shutdown - */ -static void bdi_clear_pending(struct backing_dev_info *bdi) -{ -	clear_bit(BDI_pending, &bdi->state); -	smp_mb__after_clear_bit(); -	wake_up_bit(&bdi->state, BDI_pending); -} - -static int bdi_forker_thread(void *ptr) -{ -	struct bdi_writeback *me = ptr; - -	current->flags |= PF_SWAPWRITE; -	set_freezable(); - -	/* -	 * Our parent may run at a different priority, just set us to normal -	 */ -	set_user_nice(current, 0); - -	for (;;) { -		struct task_struct *task = NULL; -		struct backing_dev_info *bdi; -		enum { -			NO_ACTION,   /* Nothing to do */ -			FORK_THREAD, /* Fork bdi thread */ -			KILL_THREAD, /* Kill inactive bdi thread */ -		} action = NO_ACTION; - -		/* -		 * Temporary measure, we want to make sure we don't see -		 * dirty data on the default backing_dev_info -		 */ -		if (wb_has_dirty_io(me) || !list_empty(&me->bdi->work_list)) { -			del_timer(&me->wakeup_timer); -			wb_do_writeback(me, 0); -		} - -		spin_lock_bh(&bdi_lock); -		/* -		 * In the following loop we are going to check whether we have -		 * some work to do without any synchronization with tasks -		 * waking us up to do work for them. Set the task state here -		 * so that we don't miss wakeups after verifying conditions. -		 */ -		set_current_state(TASK_INTERRUPTIBLE); - -		list_for_each_entry(bdi, &bdi_list, bdi_list) { -			bool have_dirty_io; - -			if (!bdi_cap_writeback_dirty(bdi) || -			     bdi_cap_flush_forker(bdi)) -				continue; - -			WARN(!test_bit(BDI_registered, &bdi->state), -			     "bdi %p/%s is not registered!\n", bdi, bdi->name); - -			have_dirty_io = !list_empty(&bdi->work_list) || -					wb_has_dirty_io(&bdi->wb); - -			/* -			 * If the bdi has work to do, but the thread does not -			 * exist - create it. -			 */ -			if (!bdi->wb.task && have_dirty_io) { -				/* -				 * Set the pending bit - if someone will try to -				 * unregister this bdi - it'll wait on this bit. -				 */ -				set_bit(BDI_pending, &bdi->state); -				action = FORK_THREAD; -				break; -			} - -			spin_lock(&bdi->wb_lock); - -			/* -			 * If there is no work to do and the bdi thread was -			 * inactive long enough - kill it. The wb_lock is taken -			 * to make sure no-one adds more work to this bdi and -			 * wakes the bdi thread up. -			 */ -			if (bdi->wb.task && !have_dirty_io && -			    time_after(jiffies, bdi->wb.last_active + -						bdi_longest_inactive())) { -				task = bdi->wb.task; -				bdi->wb.task = NULL; -				spin_unlock(&bdi->wb_lock); -				set_bit(BDI_pending, &bdi->state); -				action = KILL_THREAD; -				break; -			} -			spin_unlock(&bdi->wb_lock); -		} -		spin_unlock_bh(&bdi_lock); - -		/* Keep working if default bdi still has things to do */ -		if (!list_empty(&me->bdi->work_list)) -			__set_current_state(TASK_RUNNING); - -		switch (action) { -		case FORK_THREAD: -			__set_current_state(TASK_RUNNING); -			task = kthread_create(bdi_writeback_thread, &bdi->wb, -					      "flush-%s", dev_name(bdi->dev)); -			if (IS_ERR(task)) { -				/* -				 * If thread creation fails, force writeout of -				 * the bdi from the thread. Hopefully 1024 is -				 * large enough for efficient IO. -				 */ -				writeback_inodes_wb(&bdi->wb, 1024, -						    WB_REASON_FORKER_THREAD); -			} else { -				/* -				 * The spinlock makes sure we do not lose -				 * wake-ups when racing with 'bdi_queue_work()'. -				 * And as soon as the bdi thread is visible, we -				 * can start it. -				 */ -				spin_lock_bh(&bdi->wb_lock); -				bdi->wb.task = task; -				spin_unlock_bh(&bdi->wb_lock); -				wake_up_process(task); -			} -			bdi_clear_pending(bdi); -			break; - -		case KILL_THREAD: -			__set_current_state(TASK_RUNNING); -			kthread_stop(task); -			bdi_clear_pending(bdi); -			break; - -		case NO_ACTION: -			if (!wb_has_dirty_io(me) || !dirty_writeback_interval) -				/* -				 * There are no dirty data. The only thing we -				 * should now care about is checking for -				 * inactive bdi threads and killing them. Thus, -				 * let's sleep for longer time, save energy and -				 * be friendly for battery-driven devices. -				 */ -				schedule_timeout(bdi_longest_inactive()); -			else -				schedule_timeout(msecs_to_jiffies(dirty_writeback_interval * 10)); -			try_to_freeze(); -			break; -		} -	} - -	return 0; +	mod_delayed_work(bdi_wq, &bdi->wb.dwork, timeout);  }  /* @@ -489,6 +306,9 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi)  	spin_unlock_bh(&bdi_lock);  	synchronize_rcu_expedited(); + +	/* bdi_list is now unused, clear it to mark @bdi dying */ +	INIT_LIST_HEAD(&bdi->bdi_list);  }  int bdi_register(struct backing_dev_info *bdi, struct device *parent, @@ -508,20 +328,6 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent,  	bdi->dev = dev; -	/* -	 * Just start the forker thread for our default backing_dev_info, -	 * and add other bdi's to the list. They will get a thread created -	 * on-demand when they need it. -	 */ -	if (bdi_cap_flush_forker(bdi)) { -		struct bdi_writeback *wb = &bdi->wb; - -		wb->task = kthread_run(bdi_forker_thread, wb, "bdi-%s", -						dev_name(dev)); -		if (IS_ERR(wb->task)) -			return PTR_ERR(wb->task); -	} -  	bdi_debug_register(bdi, dev_name(dev));  	set_bit(BDI_registered, &bdi->state); @@ -545,8 +351,6 @@ EXPORT_SYMBOL(bdi_register_dev);   */  static void bdi_wb_shutdown(struct backing_dev_info *bdi)  { -	struct task_struct *task; -  	if (!bdi_cap_writeback_dirty(bdi))  		return; @@ -556,22 +360,20 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)  	bdi_remove_from_list(bdi);  	/* -	 * If setup is pending, wait for that to complete first +	 * Drain work list and shutdown the delayed_work.  At this point, +	 * @bdi->bdi_list is empty telling bdi_Writeback_workfn() that @bdi +	 * is dying and its work_list needs to be drained no matter what.  	 */ -	wait_on_bit(&bdi->state, BDI_pending, bdi_sched_wait, -			TASK_UNINTERRUPTIBLE); +	mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); +	flush_delayed_work(&bdi->wb.dwork); +	WARN_ON(!list_empty(&bdi->work_list));  	/* -	 * Finally, kill the kernel thread. We don't need to be RCU -	 * safe anymore, since the bdi is gone from visibility. +	 * This shouldn't be necessary unless @bdi for some reason has +	 * unflushed dirty IO after work_list is drained.  Do it anyway +	 * just in case.  	 */ -	spin_lock_bh(&bdi->wb_lock); -	task = bdi->wb.task; -	bdi->wb.task = NULL; -	spin_unlock_bh(&bdi->wb_lock); - -	if (task) -		kthread_stop(task); +	cancel_delayed_work_sync(&bdi->wb.dwork);  }  /* @@ -597,10 +399,8 @@ void bdi_unregister(struct backing_dev_info *bdi)  		bdi_set_min_ratio(bdi, 0);  		trace_writeback_bdi_unregister(bdi);  		bdi_prune_sb(bdi); -		del_timer_sync(&bdi->wb.wakeup_timer); -		if (!bdi_cap_flush_forker(bdi)) -			bdi_wb_shutdown(bdi); +		bdi_wb_shutdown(bdi);  		bdi_debug_unregister(bdi);  		spin_lock_bh(&bdi->wb_lock); @@ -622,7 +422,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)  	INIT_LIST_HEAD(&wb->b_io);  	INIT_LIST_HEAD(&wb->b_more_io);  	spin_lock_init(&wb->list_lock); -	setup_timer(&wb->wakeup_timer, wakeup_timer_fn, (unsigned long)bdi); +	INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);  }  /* @@ -695,12 +495,11 @@ void bdi_destroy(struct backing_dev_info *bdi)  	bdi_unregister(bdi);  	/* -	 * If bdi_unregister() had already been called earlier, the -	 * wakeup_timer could still be armed because bdi_prune_sb() -	 * can race with the bdi_wakeup_thread_delayed() calls from -	 * __mark_inode_dirty(). +	 * If bdi_unregister() had already been called earlier, the dwork +	 * could still be pending because bdi_prune_sb() can race with the +	 * bdi_wakeup_thread_delayed() calls from __mark_inode_dirty().  	 */ -	del_timer_sync(&bdi->wb.wakeup_timer); +	cancel_delayed_work_sync(&bdi->wb.dwork);  	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)  		percpu_counter_destroy(&bdi->bdi_stat[i]); | 
