From a0a1a5fd4fb15ec61117c759fe9f5c16c53d9e9c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 29 Jun 2010 10:07:12 +0200 Subject: workqueue: reimplement workqueue freeze using max_active Currently, workqueue freezing is implemented by marking the worker freezeable and calling try_to_freeze() from dispatch loop. Reimplement it using cwq->limit so that the workqueue is frozen instead of the worker. * workqueue_struct->saved_max_active is added which stores the specified max_active on initialization. * On freeze, all cwq->max_active's are quenched to zero. Freezing is complete when nr_active on all cwqs reach zero. * On thaw, all cwq->max_active's are restored to wq->saved_max_active and the worklist is repopulated. This new implementation allows having single shared pool of workers per cpu. Signed-off-by: Tejun Heo --- kernel/power/process.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/process.c b/kernel/power/process.c index 71ae29052ab..028a99598f4 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * Timeout for stopping processes @@ -35,6 +36,7 @@ static int try_to_freeze_tasks(bool sig_only) struct task_struct *g, *p; unsigned long end_time; unsigned int todo; + bool wq_busy = false; struct timeval start, end; u64 elapsed_csecs64; unsigned int elapsed_csecs; @@ -42,6 +44,10 @@ static int try_to_freeze_tasks(bool sig_only) do_gettimeofday(&start); end_time = jiffies + TIMEOUT; + + if (!sig_only) + freeze_workqueues_begin(); + while (true) { todo = 0; read_lock(&tasklist_lock); @@ -63,6 +69,12 @@ static int try_to_freeze_tasks(bool sig_only) todo++; } while_each_thread(g, p); read_unlock(&tasklist_lock); + + if (!sig_only) { + wq_busy = freeze_workqueues_busy(); + todo += wq_busy; + } + if (!todo || time_after(jiffies, end_time)) break; @@ -86,8 +98,12 @@ static int try_to_freeze_tasks(bool sig_only) */ printk("\n"); printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds " - "(%d tasks refusing to freeze):\n", - elapsed_csecs / 100, elapsed_csecs % 100, todo); + "(%d tasks refusing to freeze, wq_busy=%d):\n", + elapsed_csecs / 100, elapsed_csecs % 100, + todo - wq_busy, wq_busy); + + thaw_workqueues(); + read_lock(&tasklist_lock); do_each_thread(g, p) { task_lock(p); @@ -157,6 +173,7 @@ void thaw_processes(void) oom_killer_enable(); printk("Restarting tasks ... "); + thaw_workqueues(); thaw_tasks(true); thaw_tasks(false); schedule(); -- cgit v1.2.3-70-g09d2 From 90133673395849c9d4e66a563f2d0d91d92aa461 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Mon, 7 Jun 2010 22:23:12 +0200 Subject: PM / Hibernate: Fix typos in comments in kernel/power/swap.c There are a few typos in kernel/power/swap.c. Fix them. Signed-off-by: Cesar Eduardo Barros Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- kernel/power/swap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/power') diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b0bb2177839..7c3ae83e41d 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -32,7 +32,7 @@ /* * The swap map is a data structure used for keeping track of each page * written to a swap partition. It consists of many swap_map_page - * structures that contain each an array of MAP_PAGE_SIZE swap entries. + * structures that contain each an array of MAP_PAGE_ENTRIES swap entries. * These structures are stored on the swap and linked together with the * help of the .next_swap member. * @@ -148,7 +148,7 @@ sector_t alloc_swapdev_block(int swap) /** * free_all_swap_pages - free swap pages allocated for saving image data. - * It also frees the extents used to register which swap entres had been + * It also frees the extents used to register which swap entries had been * allocated. */ -- cgit v1.2.3-70-g09d2 From c125e96f044427f38d106fab7bc5e4a5e6a18262 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 5 Jul 2010 22:43:53 +0200 Subject: PM: Make it possible to avoid races between wakeup and system sleep One of the arguments during the suspend blockers discussion was that the mainline kernel didn't contain any mechanisms making it possible to avoid races between wakeup and system suspend. Generally, there are two problems in that area. First, if a wakeup event occurs exactly when /sys/power/state is being written to, it may be delivered to user space right before the freezer kicks in, so the user space consumer of the event may not be able to process it before the system is suspended. Second, if a wakeup event occurs after user space has been frozen, it is not generally guaranteed that the ongoing transition of the system into a sleep state will be aborted. To address these issues introduce a new global sysfs attribute, /sys/power/wakeup_count, associated with a running counter of wakeup events and three helper functions, pm_stay_awake(), pm_relax(), and pm_wakeup_event(), that may be used by kernel subsystems to control the behavior of this attribute and to request the PM core to abort system transitions into a sleep state already in progress. The /sys/power/wakeup_count file may be read from or written to by user space. Reads will always succeed (unless interrupted by a signal) and return the current value of the wakeup events counter. Writes, however, will only succeed if the written number is equal to the current value of the wakeup events counter. If a write is successful, it will cause the kernel to save the current value of the wakeup events counter and to abort the subsequent system transition into a sleep state if any wakeup events are reported after the write has returned. [The assumption is that before writing to /sys/power/state user space will first read from /sys/power/wakeup_count. Next, user space consumers of wakeup events will have a chance to acknowledge or veto the upcoming system transition to a sleep state. Finally, if the transition is allowed to proceed, /sys/power/wakeup_count will be written to and if that succeeds, /sys/power/state will be written to as well. Still, if any wakeup events are reported to the PM core by kernel subsystems after that point, the transition will be aborted.] Additionally, put a wakeup events counter into struct dev_pm_info and make these per-device wakeup event counters available via sysfs, so that it's possible to check the activity of various wakeup event sources within the kernel. To illustrate how subsystems can use pm_wakeup_event(), make the low-level PCI runtime PM wakeup-handling code use it. Signed-off-by: Rafael J. Wysocki Acked-by: Jesse Barnes Acked-by: Greg Kroah-Hartman Acked-by: markgross Reviewed-by: Alan Stern --- Documentation/ABI/testing/sysfs-power | 15 +++ drivers/base/power/Makefile | 2 +- drivers/base/power/main.c | 1 + drivers/base/power/sysfs.c | 15 +++ drivers/base/power/wakeup.c | 229 ++++++++++++++++++++++++++++++++++ drivers/pci/pci-acpi.c | 1 + drivers/pci/pci.c | 20 ++- drivers/pci/pci.h | 1 + drivers/pci/pcie/pme/pcie_pme.c | 5 +- include/linux/pm.h | 10 ++ include/linux/suspend.h | 7 ++ kernel/power/hibernate.c | 20 ++- kernel/power/main.c | 55 ++++++++ kernel/power/suspend.c | 4 +- 14 files changed, 375 insertions(+), 10 deletions(-) create mode 100644 drivers/base/power/wakeup.c (limited to 'kernel/power') diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power index d6a801f45b4..2875f1f74a0 100644 --- a/Documentation/ABI/testing/sysfs-power +++ b/Documentation/ABI/testing/sysfs-power @@ -114,3 +114,18 @@ Description: if this file contains "1", which is the default. It may be disabled by writing "0" to this file, in which case all devices will be suspended and resumed synchronously. + +What: /sys/power/wakeup_count +Date: July 2010 +Contact: Rafael J. Wysocki +Description: + The /sys/power/wakeup_count file allows user space to put the + system into a sleep state while taking into account the + concurrent arrival of wakeup events. Reading from it returns + the current number of registered wakeup events and it blocks if + some wakeup events are being processed at the time the file is + read from. Writing to it will only succeed if the current + number of wakeup events is equal to the written value and, if + successful, will make the kernel abort a subsequent transition + to a sleep state if any wakeup events are reported after the + write has returned. diff --git a/drivers/base/power/Makefile b/drivers/base/power/Makefile index 89de75325ce..cbccf9a3cee 100644 --- a/drivers/base/power/Makefile +++ b/drivers/base/power/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_PM) += sysfs.o -obj-$(CONFIG_PM_SLEEP) += main.o +obj-$(CONFIG_PM_SLEEP) += main.o wakeup.o obj-$(CONFIG_PM_RUNTIME) += runtime.o obj-$(CONFIG_PM_OPS) += generic_ops.o obj-$(CONFIG_PM_TRACE_RTC) += trace.o diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 941fcb87e52..5419a49ff13 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -59,6 +59,7 @@ void device_pm_init(struct device *dev) { dev->power.status = DPM_ON; init_completion(&dev->power.completion); + dev->power.wakeup_count = 0; pm_runtime_init(dev); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index a4c33bc5125..81d344e0e95 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -73,6 +73,8 @@ * device are known to the PM core. However, for some devices this * attribute is set to "enabled" by bus type code or device drivers and in * that cases it should be safe to leave the default value. + * + * wakeup_count - Report the number of wakeup events related to the device */ static const char enabled[] = "enabled"; @@ -144,6 +146,16 @@ wake_store(struct device * dev, struct device_attribute *attr, static DEVICE_ATTR(wakeup, 0644, wake_show, wake_store); +#ifdef CONFIG_PM_SLEEP +static ssize_t wakeup_count_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", dev->power.wakeup_count); +} + +static DEVICE_ATTR(wakeup_count, 0444, wakeup_count_show, NULL); +#endif + #ifdef CONFIG_PM_ADVANCED_DEBUG #ifdef CONFIG_PM_RUNTIME @@ -230,6 +242,9 @@ static struct attribute * power_attrs[] = { &dev_attr_control.attr, #endif &dev_attr_wakeup.attr, +#ifdef CONFIG_PM_SLEEP + &dev_attr_wakeup_count.attr, +#endif #ifdef CONFIG_PM_ADVANCED_DEBUG &dev_attr_async.attr, #ifdef CONFIG_PM_RUNTIME diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c new file mode 100644 index 00000000000..25599077c39 --- /dev/null +++ b/drivers/base/power/wakeup.c @@ -0,0 +1,229 @@ +/* + * drivers/base/power/wakeup.c - System wakeup events framework + * + * Copyright (c) 2010 Rafael J. Wysocki , Novell Inc. + * + * This file is released under the GPLv2. + */ + +#include +#include +#include +#include +#include +#include + +/* + * If set, the suspend/hibernate code will abort transitions to a sleep state + * if wakeup events are registered during or immediately before the transition. + */ +bool events_check_enabled; + +/* The counter of registered wakeup events. */ +static unsigned long event_count; +/* A preserved old value of event_count. */ +static unsigned long saved_event_count; +/* The counter of wakeup events being processed. */ +static unsigned long events_in_progress; + +static DEFINE_SPINLOCK(events_lock); + +/* + * The functions below use the observation that each wakeup event starts a + * period in which the system should not be suspended. The moment this period + * will end depends on how the wakeup event is going to be processed after being + * detected and all of the possible cases can be divided into two distinct + * groups. + * + * First, a wakeup event may be detected by the same functional unit that will + * carry out the entire processing of it and possibly will pass it to user space + * for further processing. In that case the functional unit that has detected + * the event may later "close" the "no suspend" period associated with it + * directly as soon as it has been dealt with. The pair of pm_stay_awake() and + * pm_relax(), balanced with each other, is supposed to be used in such + * situations. + * + * Second, a wakeup event may be detected by one functional unit and processed + * by another one. In that case the unit that has detected it cannot really + * "close" the "no suspend" period associated with it, unless it knows in + * advance what's going to happen to the event during processing. This + * knowledge, however, may not be available to it, so it can simply specify time + * to wait before the system can be suspended and pass it as the second + * argument of pm_wakeup_event(). + */ + +/** + * pm_stay_awake - Notify the PM core that a wakeup event is being processed. + * @dev: Device the wakeup event is related to. + * + * Notify the PM core of a wakeup event (signaled by @dev) by incrementing the + * counter of wakeup events being processed. If @dev is not NULL, the counter + * of wakeup events related to @dev is incremented too. + * + * Call this function after detecting of a wakeup event if pm_relax() is going + * to be called directly after processing the event (and possibly passing it to + * user space for further processing). + * + * It is safe to call this function from interrupt context. + */ +void pm_stay_awake(struct device *dev) +{ + unsigned long flags; + + spin_lock_irqsave(&events_lock, flags); + if (dev) + dev->power.wakeup_count++; + + events_in_progress++; + spin_unlock_irqrestore(&events_lock, flags); +} + +/** + * pm_relax - Notify the PM core that processing of a wakeup event has ended. + * + * Notify the PM core that a wakeup event has been processed by decrementing + * the counter of wakeup events being processed and incrementing the counter + * of registered wakeup events. + * + * Call this function for wakeup events whose processing started with calling + * pm_stay_awake(). + * + * It is safe to call it from interrupt context. + */ +void pm_relax(void) +{ + unsigned long flags; + + spin_lock_irqsave(&events_lock, flags); + if (events_in_progress) { + events_in_progress--; + event_count++; + } + spin_unlock_irqrestore(&events_lock, flags); +} + +/** + * pm_wakeup_work_fn - Deferred closing of a wakeup event. + * + * Execute pm_relax() for a wakeup event detected in the past and free the + * work item object used for queuing up the work. + */ +static void pm_wakeup_work_fn(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + + pm_relax(); + kfree(dwork); +} + +/** + * pm_wakeup_event - Notify the PM core of a wakeup event. + * @dev: Device the wakeup event is related to. + * @msec: Anticipated event processing time (in milliseconds). + * + * Notify the PM core of a wakeup event (signaled by @dev) that will take + * approximately @msec milliseconds to be processed by the kernel. Increment + * the counter of wakeup events being processed and queue up a work item + * that will execute pm_relax() for the event after @msec milliseconds. If @dev + * is not NULL, the counter of wakeup events related to @dev is incremented too. + * + * It is safe to call this function from interrupt context. + */ +void pm_wakeup_event(struct device *dev, unsigned int msec) +{ + unsigned long flags; + struct delayed_work *dwork; + + dwork = msec ? kzalloc(sizeof(*dwork), GFP_ATOMIC) : NULL; + + spin_lock_irqsave(&events_lock, flags); + if (dev) + dev->power.wakeup_count++; + + if (dwork) { + INIT_DELAYED_WORK(dwork, pm_wakeup_work_fn); + schedule_delayed_work(dwork, msecs_to_jiffies(msec)); + + events_in_progress++; + } else { + event_count++; + } + spin_unlock_irqrestore(&events_lock, flags); +} + +/** + * pm_check_wakeup_events - Check for new wakeup events. + * + * Compare the current number of registered wakeup events with its preserved + * value from the past to check if new wakeup events have been registered since + * the old value was stored. Check if the current number of wakeup events being + * processed is zero. + */ +bool pm_check_wakeup_events(void) +{ + unsigned long flags; + bool ret = true; + + spin_lock_irqsave(&events_lock, flags); + if (events_check_enabled) { + ret = (event_count == saved_event_count) && !events_in_progress; + events_check_enabled = ret; + } + spin_unlock_irqrestore(&events_lock, flags); + return ret; +} + +/** + * pm_get_wakeup_count - Read the number of registered wakeup events. + * @count: Address to store the value at. + * + * Store the number of registered wakeup events at the address in @count. Block + * if the current number of wakeup events being processed is nonzero. + * + * Return false if the wait for the number of wakeup events being processed to + * drop down to zero has been interrupted by a signal (and the current number + * of wakeup events being processed is still nonzero). Otherwise return true. + */ +bool pm_get_wakeup_count(unsigned long *count) +{ + bool ret; + + spin_lock_irq(&events_lock); + if (capable(CAP_SYS_ADMIN)) + events_check_enabled = false; + + while (events_in_progress && !signal_pending(current)) { + spin_unlock_irq(&events_lock); + + schedule_timeout_interruptible(msecs_to_jiffies(100)); + + spin_lock_irq(&events_lock); + } + *count = event_count; + ret = !events_in_progress; + spin_unlock_irq(&events_lock); + return ret; +} + +/** + * pm_save_wakeup_count - Save the current number of registered wakeup events. + * @count: Value to compare with the current number of registered wakeup events. + * + * If @count is equal to the current number of registered wakeup events and the + * current number of wakeup events being processed is zero, store @count as the + * old number of registered wakeup events to be used by pm_check_wakeup_events() + * and return true. Otherwise return false. + */ +bool pm_save_wakeup_count(unsigned long count) +{ + bool ret = false; + + spin_lock_irq(&events_lock); + if (count == event_count && !events_in_progress) { + saved_event_count = count; + events_check_enabled = true; + ret = true; + } + spin_unlock_irq(&events_lock); + return ret; +} diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 2e7a3bf1382..1ab98bbe58d 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -48,6 +48,7 @@ static void pci_acpi_wake_dev(acpi_handle handle, u32 event, void *context) if (event == ACPI_NOTIFY_DEVICE_WAKE && pci_dev) { pci_check_pme_status(pci_dev); pm_runtime_resume(&pci_dev->dev); + pci_wakeup_event(pci_dev); if (pci_dev->subordinate) pci_pme_wakeup_bus(pci_dev->subordinate); } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 740fb4ea966..130ed1daf0f 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1275,6 +1275,22 @@ bool pci_check_pme_status(struct pci_dev *dev) return ret; } +/* + * Time to wait before the system can be put into a sleep state after reporting + * a wakeup event signaled by a PCI device. + */ +#define PCI_WAKEUP_COOLDOWN 100 + +/** + * pci_wakeup_event - Report a wakeup event related to a given PCI device. + * @dev: Device to report the wakeup event for. + */ +void pci_wakeup_event(struct pci_dev *dev) +{ + if (device_may_wakeup(&dev->dev)) + pm_wakeup_event(&dev->dev, PCI_WAKEUP_COOLDOWN); +} + /** * pci_pme_wakeup - Wake up a PCI device if its PME Status bit is set. * @dev: Device to handle. @@ -1285,8 +1301,10 @@ bool pci_check_pme_status(struct pci_dev *dev) */ static int pci_pme_wakeup(struct pci_dev *dev, void *ign) { - if (pci_check_pme_status(dev)) + if (pci_check_pme_status(dev)) { pm_request_resume(&dev->dev); + pci_wakeup_event(dev); + } return 0; } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f8077b3c8c8..c8b7fd056cc 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -56,6 +56,7 @@ extern void pci_update_current_state(struct pci_dev *dev, pci_power_t state); extern void pci_disable_enabled_device(struct pci_dev *dev); extern bool pci_check_pme_status(struct pci_dev *dev); extern int pci_finish_runtime_suspend(struct pci_dev *dev); +extern void pci_wakeup_event(struct pci_dev *dev); extern int __pci_pme_wakeup(struct pci_dev *dev, void *ign); extern void pci_pme_wakeup_bus(struct pci_bus *bus); extern void pci_pm_init(struct pci_dev *dev); diff --git a/drivers/pci/pcie/pme/pcie_pme.c b/drivers/pci/pcie/pme/pcie_pme.c index d672a0a6381..bbdea18693d 100644 --- a/drivers/pci/pcie/pme/pcie_pme.c +++ b/drivers/pci/pcie/pme/pcie_pme.c @@ -154,6 +154,7 @@ static bool pcie_pme_walk_bus(struct pci_bus *bus) /* Skip PCIe devices in case we started from a root port. */ if (!pci_is_pcie(dev) && pci_check_pme_status(dev)) { pm_request_resume(&dev->dev); + pci_wakeup_event(dev); ret = true; } @@ -254,8 +255,10 @@ static void pcie_pme_handle_request(struct pci_dev *port, u16 req_id) if (found) { /* The device is there, but we have to check its PME status. */ found = pci_check_pme_status(dev); - if (found) + if (found) { pm_request_resume(&dev->dev); + pci_wakeup_event(dev); + } pci_dev_put(dev); } else if (devfn) { /* diff --git a/include/linux/pm.h b/include/linux/pm.h index 8e258c72797..b417fc46f3f 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -457,6 +457,7 @@ struct dev_pm_info { #ifdef CONFIG_PM_SLEEP struct list_head entry; struct completion completion; + unsigned long wakeup_count; #endif #ifdef CONFIG_PM_RUNTIME struct timer_list suspend_timer; @@ -552,6 +553,11 @@ extern void __suspend_report_result(const char *function, void *fn, int ret); } while (0) extern void device_pm_wait_for_dev(struct device *sub, struct device *dev); + +/* drivers/base/power/wakeup.c */ +extern void pm_wakeup_event(struct device *dev, unsigned int msec); +extern void pm_stay_awake(struct device *dev); +extern void pm_relax(void); #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) @@ -565,6 +571,10 @@ static inline int dpm_suspend_start(pm_message_t state) #define suspend_report_result(fn, ret) do {} while (0) static inline void device_pm_wait_for_dev(struct device *a, struct device *b) {} + +static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} +static inline void pm_stay_awake(struct device *dev) {} +static inline void pm_relax(void) {} #endif /* !CONFIG_PM_SLEEP */ /* How to reorder dpm_list after device_move() */ diff --git a/include/linux/suspend.h b/include/linux/suspend.h index bc7d6bb4cd8..bf1bab7b059 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -286,6 +286,13 @@ extern int unregister_pm_notifier(struct notifier_block *nb); { .notifier_call = fn, .priority = pri }; \ register_pm_notifier(&fn##_nb); \ } + +/* drivers/base/power/wakeup.c */ +extern bool events_check_enabled; + +extern bool pm_check_wakeup_events(void); +extern bool pm_get_wakeup_count(unsigned long *count); +extern bool pm_save_wakeup_count(unsigned long count); #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index aa9e916da4d..f6120291663 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -277,7 +277,7 @@ static int create_image(int platform_mode) goto Enable_irqs; } - if (hibernation_test(TEST_CORE)) + if (hibernation_test(TEST_CORE) || !pm_check_wakeup_events()) goto Power_up; in_suspend = 1; @@ -288,8 +288,10 @@ static int create_image(int platform_mode) error); /* Restore control flow magically appears here */ restore_processor_state(); - if (!in_suspend) + if (!in_suspend) { + events_check_enabled = false; platform_leave(platform_mode); + } Power_up: sysdev_resume(); @@ -511,14 +513,20 @@ int hibernation_platform_enter(void) local_irq_disable(); sysdev_suspend(PMSG_HIBERNATE); + if (!pm_check_wakeup_events()) { + error = -EAGAIN; + goto Power_up; + } + hibernation_ops->enter(); /* We should never get here */ while (1); - /* - * We don't need to reenable the nonboot CPUs or resume consoles, since - * the system is going to be halted anyway. - */ + Power_up: + sysdev_resume(); + local_irq_enable(); + enable_nonboot_cpus(); + Platform_finish: hibernation_ops->finish(); diff --git a/kernel/power/main.c b/kernel/power/main.c index b58800b21fc..62b0bc6e498 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -204,6 +204,60 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, power_attr(state); +#ifdef CONFIG_PM_SLEEP +/* + * The 'wakeup_count' attribute, along with the functions defined in + * drivers/base/power/wakeup.c, provides a means by which wakeup events can be + * handled in a non-racy way. + * + * If a wakeup event occurs when the system is in a sleep state, it simply is + * woken up. In turn, if an event that would wake the system up from a sleep + * state occurs when it is undergoing a transition to that sleep state, the + * transition should be aborted. Moreover, if such an event occurs when the + * system is in the working state, an attempt to start a transition to the + * given sleep state should fail during certain period after the detection of + * the event. Using the 'state' attribute alone is not sufficient to satisfy + * these requirements, because a wakeup event may occur exactly when 'state' + * is being written to and may be delivered to user space right before it is + * frozen, so the event will remain only partially processed until the system is + * woken up by another event. In particular, it won't cause the transition to + * a sleep state to be aborted. + * + * This difficulty may be overcome if user space uses 'wakeup_count' before + * writing to 'state'. It first should read from 'wakeup_count' and store + * the read value. Then, after carrying out its own preparations for the system + * transition to a sleep state, it should write the stored value to + * 'wakeup_count'. If that fails, at least one wakeup event has occured since + * 'wakeup_count' was read and 'state' should not be written to. Otherwise, it + * is allowed to write to 'state', but the transition will be aborted if there + * are any wakeup events detected after 'wakeup_count' was written to. + */ + +static ssize_t wakeup_count_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + unsigned long val; + + return pm_get_wakeup_count(&val) ? sprintf(buf, "%lu\n", val) : -EINTR; +} + +static ssize_t wakeup_count_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t n) +{ + unsigned long val; + + if (sscanf(buf, "%lu", &val) == 1) { + if (pm_save_wakeup_count(val)) + return n; + } + return -EINVAL; +} + +power_attr(wakeup_count); +#endif /* CONFIG_PM_SLEEP */ + #ifdef CONFIG_PM_TRACE int pm_trace_enabled; @@ -236,6 +290,7 @@ static struct attribute * g[] = { #endif #ifdef CONFIG_PM_SLEEP &pm_async_attr.attr, + &wakeup_count_attr.attr, #ifdef CONFIG_PM_DEBUG &pm_test_attr.attr, #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index f37cb7dd440..5f8d09f9432 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -163,8 +163,10 @@ static int suspend_enter(suspend_state_t state) error = sysdev_suspend(PMSG_SUSPEND); if (!error) { - if (!suspend_test(TEST_CORE)) + if (!suspend_test(TEST_CORE) && pm_check_wakeup_events()) { error = suspend_ops->enter(state); + events_check_enabled = false; + } sysdev_resume(); } -- cgit v1.2.3-70-g09d2 From f6f71f187518477cecc01cd887933b5da19585e6 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:18 +0200 Subject: PM / Hibernate: Fix hibernation_platform_enter() The hibernation_platform_enter() function calls dpm_suspend_noirq() instead of dpm_resume_noirq() by mistake. Fix this. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f6120291663..d97ba8615c3 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -530,7 +530,7 @@ int hibernation_platform_enter(void) Platform_finish: hibernation_ops->finish(); - dpm_suspend_noirq(PMSG_RESTORE); + dpm_resume_noirq(PMSG_RESTORE); Resume_devices: entering_platform_hibernation = false; -- cgit v1.2.3-70-g09d2 From d074ee023fa3a4681b64223c5e636102c39628c4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:35 +0200 Subject: PM / Hibernate: Fix snapshot error code path There is an inconsistency between hibernation_platform_enter() and hibernation_snapshot(), because the latter calls hibernation_ops->end() after failing hibernation_ops->begin(), while the former doesn't do that. Make hibernation_snapshot() behave in the same way as hibernation_platform_enter() in that respect. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- kernel/power/hibernate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/power') diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index d97ba8615c3..d26f04e9274 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -330,7 +330,7 @@ int hibernation_snapshot(int platform_mode) error = platform_begin(platform_mode); if (error) - return error; + goto Close; /* Preallocate image memory before shutting down devices. */ error = hibernate_preallocate_memory(); -- cgit v1.2.3-70-g09d2 From ce4410116c5debfb0e049f5db4b5cd6211e05b80 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Jul 2010 23:43:45 +0200 Subject: PM / Suspend: Fix ordering of calls in suspend error paths The ACPI suspend code calls suspend_nvs_free() at a wrong place, which may lead to a memory leak if there's an error executing acpi_pm_prepare(), because acpi_pm_finish() will not be called in that case. However, the root cause of this problem is the apparently confusing ordering of calls in suspend error paths that needs to be fixed. In addition to that, fix a typo in a label name in suspend.c. Signed-off-by: Rafael J. Wysocki Acked-by: Len Brown --- include/linux/suspend.h | 10 ++++++---- kernel/power/suspend.c | 9 ++++----- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'kernel/power') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index bf1bab7b059..4af270ec220 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -61,14 +61,15 @@ typedef int __bitwise suspend_state_t; * before device drivers' late suspend callbacks are executed. It returns * 0 on success or a negative error code otherwise, in which case the * system cannot enter the desired sleep state (@prepare_late(), @enter(), - * @wake(), and @finish() will not be called in that case). + * and @wake() will not be called in that case). * * @prepare_late: Finish preparing the platform for entering the system sleep * state indicated by @begin(). * @prepare_late is called before disabling nonboot CPUs and after * device drivers' late suspend callbacks have been executed. It returns * 0 on success or a negative error code otherwise, in which case the - * system cannot enter the desired sleep state (@enter() and @wake()). + * system cannot enter the desired sleep state (@enter() will not be + * executed). * * @enter: Enter the system sleep state indicated by @begin() or represented by * the argument if @begin() is not implemented. @@ -81,14 +82,15 @@ typedef int __bitwise suspend_state_t; * resume callbacks are executed. * This callback is optional, but should be implemented by the platforms * that implement @prepare_late(). If implemented, it is always called - * after @enter(), even if @enter() fails. + * after @prepare_late and @enter(), even if one of them fails. * * @finish: Finish wake-up of the platform. * @finish is called right prior to calling device drivers' regular suspend * callbacks. * This callback is optional, but should be implemented by the platforms * that implement @prepare(). If implemented, it is always called after - * @enter() and @wake(), if implemented, even if any of them fails. + * @enter() and @wake(), even if any of them fails. It is executed after + * a failing @prepare. * * @end: Called by the PM core right after resuming devices, to indicate to * the platform that the system has returned to the working state or diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5f8d09f9432..7335952ee47 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -136,19 +136,19 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) - return error; + goto Platform_finish; } error = dpm_suspend_noirq(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to power down\n"); - goto Platfrom_finish; + goto Platform_finish; } if (suspend_ops->prepare_late) { error = suspend_ops->prepare_late(); if (error) - goto Power_up_devices; + goto Platform_wake; } if (suspend_test(TEST_PLATFORM)) @@ -180,10 +180,9 @@ static int suspend_enter(suspend_state_t state) if (suspend_ops->wake) suspend_ops->wake(); - Power_up_devices: dpm_resume_noirq(PMSG_RESUME); - Platfrom_finish: + Platform_finish: if (suspend_ops->finish) suspend_ops->finish(); -- cgit v1.2.3-70-g09d2 From a2531293dbb7608fa672ff28efe3ab4027917a2f Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sun, 18 Jul 2010 14:27:13 +0200 Subject: update email address pavel@suse.cz no longer works, replace it with working address. Signed-off-by: Pavel Machek Signed-off-by: Jiri Kosina --- Documentation/feature-removal-schedule.txt | 2 +- Documentation/hwmon/hpfall.c | 2 +- Documentation/power/tricks.txt | 2 +- Documentation/sparse.txt | 2 +- Documentation/zh_CN/sparse.txt | 2 +- arch/arm/mach-sa1100/collie.c | 2 +- arch/powerpc/kernel/suspend.c | 2 +- arch/x86/kernel/acpi/sleep.c | 2 +- arch/x86/kernel/apm_32.c | 2 +- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 2 +- arch/x86/mm/init_64.c | 2 +- arch/x86/power/cpu.c | 2 +- arch/x86/power/hibernate_64.c | 2 +- drivers/block/nbd.c | 2 +- drivers/media/video/usbvideo/vicam.c | 2 +- drivers/media/video/v4l2-compat-ioctl32.c | 2 +- drivers/staging/winbond/wbusb.c | 2 +- drivers/usb/class/cdc-acm.c | 2 +- drivers/usb/class/usblp.c | 2 +- drivers/video/backlight/locomolcd.c | 4 ++-- fs/compat.c | 2 +- fs/compat_ioctl.c | 2 +- kernel/debug/debug_core.c | 2 +- kernel/debug/gdbstub.c | 2 +- kernel/power/hibernate.c | 2 +- kernel/power/snapshot.c | 2 +- kernel/power/swap.c | 2 +- 27 files changed, 28 insertions(+), 28 deletions(-) (limited to 'kernel/power') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index c268783bc4e..1a0fc32bc20 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -93,7 +93,7 @@ Why: Broken design for runtime control over driver power states, confusing inputs. This framework was never widely used, and most attempts to use it were broken. Drivers should instead be exposing domain-specific interfaces either to kernel or to userspace. -Who: Pavel Machek +Who: Pavel Machek --------------------------- diff --git a/Documentation/hwmon/hpfall.c b/Documentation/hwmon/hpfall.c index 681ec22b9d0..a4a8fc5d05d 100644 --- a/Documentation/hwmon/hpfall.c +++ b/Documentation/hwmon/hpfall.c @@ -1,7 +1,7 @@ /* Disk protection for HP machines. * * Copyright 2008 Eric Piel - * Copyright 2009 Pavel Machek + * Copyright 2009 Pavel Machek * * GPLv2. */ diff --git a/Documentation/power/tricks.txt b/Documentation/power/tricks.txt index 3b26bb502a4..a1b8f7249f4 100644 --- a/Documentation/power/tricks.txt +++ b/Documentation/power/tricks.txt @@ -1,6 +1,6 @@ swsusp/S3 tricks ~~~~~~~~~~~~~~~~ -Pavel Machek +Pavel Machek If you want to trick swsusp/S3 into working, you might want to try: diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt index 9b659c79a54..4909d411635 100644 --- a/Documentation/sparse.txt +++ b/Documentation/sparse.txt @@ -1,5 +1,5 @@ Copyright 2004 Linus Torvalds -Copyright 2004 Pavel Machek +Copyright 2004 Pavel Machek Copyright 2006 Bob Copeland Using sparse for typechecking diff --git a/Documentation/zh_CN/sparse.txt b/Documentation/zh_CN/sparse.txt index 75992a603ae..cc144e58151 100644 --- a/Documentation/zh_CN/sparse.txt +++ b/Documentation/zh_CN/sparse.txt @@ -22,7 +22,7 @@ Documentation/sparse.txt 的中文翻译 --------------------------------------------------------------------- Copyright 2004 Linus Torvalds -Copyright 2004 Pavel Machek +Copyright 2004 Pavel Machek Copyright 2006 Bob Copeland 使用 sparse 工具做类型检查 diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c index 5d5f330c5d9..16e682d5dbb 100644 --- a/arch/arm/mach-sa1100/collie.c +++ b/arch/arm/mach-sa1100/collie.c @@ -11,7 +11,7 @@ * published by the Free Software Foundation. * * ChangeLog: - * 2006 Pavel Machek + * 2006 Pavel Machek * 03-06-2004 John Lenz * 06-04-2002 Chris Larson * 04-16-2001 Lineo Japan,Inc. ... diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c index 6fc6328dc62..0167d53da30 100644 --- a/arch/powerpc/kernel/suspend.c +++ b/arch/powerpc/kernel/suspend.c @@ -3,7 +3,7 @@ * * Distribute under GPLv2 * - * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2002 Pavel Machek * Copyright (c) 2001 Patrick Mochel */ diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 82e508677b9..f51cc55aced 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -2,7 +2,7 @@ * sleep.c - x86-specific ACPI sleep support. * * Copyright (C) 2001-2003 Patrick Mochel - * Copyright (C) 2001-2003 Pavel Machek + * Copyright (C) 2001-2003 Pavel Machek */ #include diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c4f9182ca3a..4c9c67bf09b 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -140,7 +140,7 @@ * is now the way life works). * Fix thinko in suspend() (wrong return). * Notify drivers on critical suspend. - * Make kapmd absorb more idle time (Pavel Machek + * Make kapmd absorb more idle time (Pavel Machek * modified by sfr). * Disable interrupts while we are suspended (Andy Henroid * fixed by sfr). diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 7ec2123838e..0af9aa20fce 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -9,7 +9,7 @@ * Based on the powernow-k7.c module written by Dave Jones. * (C) 2003 Dave Jones on behalf of SuSE Labs * (C) 2004 Dominik Brodowski - * (C) 2004 Pavel Machek + * (C) 2004 Pavel Machek * Licensed under the terms of the GNU GPL License version 2. * Based upon datasheets & sample CPUs kindly provided by AMD. * diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ee41bba315d..9a6674689a2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -2,7 +2,7 @@ * linux/arch/x86_64/mm/init.c * * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2000 Pavel Machek + * Copyright (C) 2000 Pavel Machek * Copyright (C) 2002,2003 Andi Kleen */ diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 1290ba54b35..e7e8c5f5495 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -4,7 +4,7 @@ * Distribute under GPLv2 * * Copyright (c) 2007 Rafael J. Wysocki - * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2002 Pavel Machek * Copyright (c) 2001 Patrick Mochel */ diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index d24f983ba1e..460f314d13e 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -4,7 +4,7 @@ * Distribute under GPLv2 * * Copyright (c) 2007 Rafael J. Wysocki - * Copyright (c) 2002 Pavel Machek + * Copyright (c) 2002 Pavel Machek * Copyright (c) 2001 Patrick Mochel */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 218d091f3c5..16c3c8613cd 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -4,7 +4,7 @@ * Note that you can not swap over this thing, yet. Seems to work but * deadlocks sometimes - you can not swap over TCP in general. * - * Copyright 1997-2000, 2008 Pavel Machek + * Copyright 1997-2000, 2008 Pavel Machek * Parts copyright 2001 Steven Whitehouse * * This file is released under GPLv2 or later. diff --git a/drivers/media/video/usbvideo/vicam.c b/drivers/media/video/usbvideo/vicam.c index 6030410c667..5d6fd01f918 100644 --- a/drivers/media/video/usbvideo/vicam.c +++ b/drivers/media/video/usbvideo/vicam.c @@ -2,7 +2,7 @@ * USB ViCam WebCam driver * Copyright (c) 2002 Joe Burks (jburks@wavicle.org), * Christopher L Cheney (ccheney@cheney.cx), - * Pavel Machek (pavel@suse.cz), + * Pavel Machek (pavel@ucw.cz), * John Tyner (jtyner@cs.ucr.edu), * Monroe Williams (monroe@pobox.com) * diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c index 9004a5fe764..d2f20c2acae 100644 --- a/drivers/media/video/v4l2-compat-ioctl32.c +++ b/drivers/media/video/v4l2-compat-ioctl32.c @@ -5,7 +5,7 @@ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs - * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) + * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) * Copyright (C) 2005 Philippe De Muyter (phdm@macqel.be) * Copyright (C) 2008 Hans Verkuil * diff --git a/drivers/staging/winbond/wbusb.c b/drivers/staging/winbond/wbusb.c index 681419d6856..251caa052ee 100644 --- a/drivers/staging/winbond/wbusb.c +++ b/drivers/staging/winbond/wbusb.c @@ -1,5 +1,5 @@ /* - * Copyright 2008 Pavel Machek + * Copyright 2008 Pavel Machek * * Distribute under GPLv2. * diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 61d75507d5d..8413a567c12 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -2,7 +2,7 @@ * cdc-acm.c * * Copyright (c) 1999 Armin Fuerst - * Copyright (c) 1999 Pavel Machek + * Copyright (c) 1999 Pavel Machek * Copyright (c) 1999 Johannes Erdfelt * Copyright (c) 2000 Vojtech Pavlik * Copyright (c) 2004 Oliver Neukum diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 2250095db0a..84f9e52327f 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -2,7 +2,7 @@ * usblp.c * * Copyright (c) 1999 Michael Gee - * Copyright (c) 1999 Pavel Machek + * Copyright (c) 1999 Pavel Machek * Copyright (c) 2000 Randy Dunlap * Copyright (c) 2000 Vojtech Pavlik # Copyright (c) 2001 Pete Zaitcev diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c index 7571bc26071..d2f59015d51 100644 --- a/drivers/video/backlight/locomolcd.c +++ b/drivers/video/backlight/locomolcd.c @@ -2,7 +2,7 @@ * Backlight control code for Sharp Zaurus SL-5500 * * Copyright 2005 John Lenz - * Maintainer: Pavel Machek (unless John wants to :-) + * Maintainer: Pavel Machek (unless John wants to :-) * GPL v2 * * This driver assumes single CPU. That's okay, because collie is @@ -246,6 +246,6 @@ static void __exit locomolcd_exit(void) module_init(locomolcd_init); module_exit(locomolcd_exit); -MODULE_AUTHOR("John Lenz , Pavel Machek "); +MODULE_AUTHOR("John Lenz , Pavel Machek "); MODULE_DESCRIPTION("Collie LCD driver"); MODULE_LICENSE("GPL"); diff --git a/fs/compat.c b/fs/compat.c index 6490d2134ff..c6fda9aeb86 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -8,7 +8,7 @@ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs - * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) + * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 641640dc7ae..5ead3763bba 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -4,7 +4,7 @@ * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs - * Copyright (C) 2003 Pavel Machek (pavel@suse.cz) + * Copyright (C) 2003 Pavel Machek (pavel@ucw.cz) * * These routines maintain argument size conversion between 32bit and 64bit * ioctls. diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 5cb7cd1de10..568efbce80f 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -6,7 +6,7 @@ * Copyright (C) 2000-2001 VERITAS Software Corporation. * Copyright (C) 2002-2004 Timesys Corporation * Copyright (C) 2003-2004 Amit S. Kale - * Copyright (C) 2004 Pavel Machek + * Copyright (C) 2004 Pavel Machek * Copyright (C) 2004-2006 Tom Rini * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd. * Copyright (C) 2005-2009 Wind River Systems, Inc. diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 4b17b326952..4e584721bcb 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -6,7 +6,7 @@ * Copyright (C) 2000-2001 VERITAS Software Corporation. * Copyright (C) 2002-2004 Timesys Corporation * Copyright (C) 2003-2004 Amit S. Kale - * Copyright (C) 2004 Pavel Machek + * Copyright (C) 2004 Pavel Machek * Copyright (C) 2004-2006 Tom Rini * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd. * Copyright (C) 2005-2009 Wind River Systems, Inc. diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index aa9e916da4d..6b202e7f8b5 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -3,7 +3,7 @@ * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab - * Copyright (c) 2004 Pavel Machek + * Copyright (c) 2004 Pavel Machek * Copyright (c) 2009 Rafael J. Wysocki, Novell Inc. * * This file is released under the GPLv2. diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 25ce010e9f8..f6cd6faf84f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -3,7 +3,7 @@ * * This file provides system snapshot/restore functionality for swsusp. * - * Copyright (C) 1998-2005 Pavel Machek + * Copyright (C) 1998-2005 Pavel Machek * Copyright (C) 2006 Rafael J. Wysocki * * This file is released under the GPLv2. diff --git a/kernel/power/swap.c b/kernel/power/swap.c index b0bb2177839..48a0aa9da16 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -4,7 +4,7 @@ * This file provides functions for reading the suspend image from * and writing it to a swap partition. * - * Copyright (C) 1998,2001-2005 Pavel Machek + * Copyright (C) 1998,2001-2005 Pavel Machek * Copyright (C) 2006 Rafael J. Wysocki * * This file is released under the GPLv2. -- cgit v1.2.3-70-g09d2 From 7b6d91daee5cac6402186ff224c3af39d79f4a0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:20:39 +0200 Subject: block: unify flags for struct bio and struct request Remove the current bio flags and reuse the request flags for the bio, too. This allows to more easily trace the type of I/O from the filesystem down to the block driver. There were two flags in the bio that were missing in the requests: BIO_RW_UNPLUG and BIO_RW_AHEAD. Also I've renamed two request flags that had a superflous RW in them. Note that the flags are in bio.h despite having the REQ_ name - as blkdev.h includes bio.h that is the only way to go for now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- block/blk-core.c | 37 +++-------- block/blk-map.c | 2 +- block/blk-merge.c | 2 +- block/cfq-iosched.c | 14 ++--- block/elevator.c | 3 +- drivers/ata/libata-scsi.c | 2 +- drivers/block/aoe/aoeblk.c | 2 +- drivers/block/brd.c | 2 +- drivers/block/drbd/drbd_actlog.c | 8 +-- drivers/block/drbd/drbd_main.c | 6 +- drivers/block/drbd/drbd_receiver.c | 22 +++---- drivers/block/drbd/drbd_req.c | 2 +- drivers/block/loop.c | 2 +- drivers/block/pktcdvd.c | 2 +- drivers/block/umem.c | 2 +- drivers/ide/ide-cd_ioctl.c | 2 +- drivers/ide/ide-floppy.c | 2 +- drivers/md/dm-io.c | 12 ++-- drivers/md/dm-kcopyd.c | 2 +- drivers/md/dm-raid1.c | 2 +- drivers/md/dm-stripe.c | 2 +- drivers/md/dm.c | 14 ++--- drivers/md/linear.c | 2 +- drivers/md/md.c | 10 +-- drivers/md/md.h | 4 +- drivers/md/multipath.c | 8 +-- drivers/md/raid0.c | 2 +- drivers/md/raid1.c | 22 +++---- drivers/md/raid10.c | 12 ++-- drivers/md/raid5.c | 2 +- drivers/scsi/osd/osd_initiator.c | 8 +-- fs/bio.c | 5 +- fs/btrfs/disk-io.c | 8 +-- fs/btrfs/inode.c | 6 +- fs/btrfs/volumes.c | 18 +++--- fs/exofs/ios.c | 2 +- fs/gfs2/log.c | 4 +- fs/gfs2/meta_io.c | 8 +-- fs/gfs2/ops_fstype.c | 2 +- fs/nilfs2/segbuf.c | 2 +- include/linux/bio.h | 125 +++++++++++++++++++++++-------------- include/linux/blkdev.h | 66 +------------------- include/linux/fs.h | 38 +++++------ kernel/power/block_io.c | 2 +- kernel/trace/blktrace.c | 27 ++++---- mm/page_io.c | 2 +- 47 files changed, 242 insertions(+), 289 deletions(-) (limited to 'kernel/power') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 74e40439317..7c6f4a71468 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -203,7 +203,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) /* initialize proxy request and queue it */ blk_rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_RW; + rq->cmd_flags |= REQ_WRITE; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; init_request_from_bio(rq, q->orig_bar_rq->bio); diff --git a/block/blk-core.c b/block/blk-core.c index dca43a31e72..66c3cfe94d0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1140,25 +1140,9 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cpu = bio->bi_comp_cpu; req->cmd_type = REQ_TYPE_FS; - /* - * Inherit FAILFAST from bio (for read-ahead, and explicit - * FAILFAST). FAILFAST flags are identical for req and bio. - */ - if (bio_rw_flagged(bio, BIO_RW_AHEAD)) + req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; + if (bio->bi_rw & REQ_RAHEAD) req->cmd_flags |= REQ_FAILFAST_MASK; - else - req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK; - - if (bio_rw_flagged(bio, BIO_RW_DISCARD)) - req->cmd_flags |= REQ_DISCARD; - if (bio_rw_flagged(bio, BIO_RW_BARRIER)) - req->cmd_flags |= REQ_HARDBARRIER; - if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) - req->cmd_flags |= REQ_RW_SYNC; - if (bio_rw_flagged(bio, BIO_RW_META)) - req->cmd_flags |= REQ_RW_META; - if (bio_rw_flagged(bio, BIO_RW_NOIDLE)) - req->cmd_flags |= REQ_NOIDLE; req->errors = 0; req->__sector = bio->bi_sector; @@ -1181,12 +1165,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret; unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); - const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); - const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG); + const bool sync = (bio->bi_rw & REQ_SYNC); + const bool unplug = (bio->bi_rw & REQ_UNPLUG); const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if (bio_rw_flagged(bio, BIO_RW_BARRIER) && + if ((bio->bi_rw & REQ_HARDBARRIER) && (q->next_ordered == QUEUE_ORDERED_NONE)) { bio_endio(bio, -EOPNOTSUPP); return 0; @@ -1200,7 +1184,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q)) + if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1275,7 +1259,7 @@ get_rq: */ rw_flags = bio_data_dir(bio); if (sync) - rw_flags |= REQ_RW_SYNC; + rw_flags |= REQ_SYNC; /* * Grab a free request. This is might sleep but can not fail. @@ -1464,7 +1448,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) && + if (unlikely(!(bio->bi_rw & REQ_DISCARD) && nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), @@ -1497,8 +1481,7 @@ static inline void __generic_make_request(struct bio *bio) if (bio_check_eod(bio, nr_sectors)) goto end_io; - if (bio_rw_flagged(bio, BIO_RW_DISCARD) && - !blk_queue_discard(q)) { + if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(q)) { err = -EOPNOTSUPP; goto end_io; } @@ -2365,7 +2348,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ - rq->cmd_flags |= bio->bi_rw & REQ_RW; + rq->cmd_flags |= bio->bi_rw & REQ_WRITE; if (bio_has_data(bio)) { rq->nr_phys_segments = bio_phys_segments(q, bio); diff --git a/block/blk-map.c b/block/blk-map.c index 9083cf0180c..c65d7593f7f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -307,7 +307,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, return PTR_ERR(bio); if (rq_data_dir(rq) == WRITE) - bio->bi_rw |= (1 << BIO_RW); + bio->bi_rw |= (1 << REQ_WRITE); if (do_copy) rq->cmd_flags |= REQ_COPY_USER; diff --git a/block/blk-merge.c b/block/blk-merge.c index 87e4fb7d0e9..4852475521e 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -180,7 +180,7 @@ new_segment: } if (q->dma_drain_size && q->dma_drain_needed(rq)) { - if (rq->cmd_flags & REQ_RW) + if (rq->cmd_flags & REQ_WRITE) memset(q->dma_drain_buffer, 0, q->dma_drain_size); sg->page_link &= ~0x02; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d4edeb8fceb..eb4086f7dfe 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -458,7 +458,7 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) */ static inline bool cfq_bio_sync(struct bio *bio) { - return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO); + return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC); } /* @@ -646,10 +646,10 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, return rq1; else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) return rq2; - if ((rq1->cmd_flags & REQ_RW_META) && !(rq2->cmd_flags & REQ_RW_META)) + if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) return rq1; - else if ((rq2->cmd_flags & REQ_RW_META) && - !(rq1->cmd_flags & REQ_RW_META)) + else if ((rq2->cmd_flags & REQ_META) && + !(rq1->cmd_flags & REQ_META)) return rq2; s1 = blk_rq_pos(rq1); @@ -1485,7 +1485,7 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); - if (rq->cmd_flags & REQ_RW_META) { + if (rq->cmd_flags & REQ_META) { WARN_ON(!cfqq->meta_pending); cfqq->meta_pending--; } @@ -3177,7 +3177,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ - if ((rq->cmd_flags & REQ_RW_META) && !cfqq->meta_pending) + if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) return true; /* @@ -3231,7 +3231,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; - if (rq->cmd_flags & REQ_RW_META) + if (rq->cmd_flags & REQ_META) cfqq->meta_pending++; cfq_update_io_thinktime(cfqd, cic); diff --git a/block/elevator.c b/block/elevator.c index aa99b59c03d..816a7c8d639 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -79,8 +79,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) /* * Don't merge file system requests and discard requests */ - if (bio_rw_flagged(bio, BIO_RW_DISCARD) != - bio_rw_flagged(rq->bio, BIO_RW_DISCARD)) + if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) return 0; /* diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a5c08b082ed..0a8cd348479 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1114,7 +1114,7 @@ static int atapi_drain_needed(struct request *rq) if (likely(rq->cmd_type != REQ_TYPE_BLOCK_PC)) return 0; - if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_RW)) + if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_WRITE)) return 0; return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC; diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 035cefe4045..65deffde60a 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -173,7 +173,7 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio) BUG(); bio_endio(bio, -ENXIO); return 0; - } else if (bio_rw_flagged(bio, BIO_RW_BARRIER)) { + } else if (bio->bi_rw & REQ_HARDBARRIER) { bio_endio(bio, -EOPNOTSUPP); return 0; } else if (bio->bi_io_vec == NULL) { diff --git a/drivers/block/brd.c b/drivers/block/brd.c index f1bf79d9bc0..1b218c6b682 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -340,7 +340,7 @@ static int brd_make_request(struct request_queue *q, struct bio *bio) get_capacity(bdev->bd_disk)) goto out; - if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) { + if (unlikely(bio->bi_rw & REQ_DISCARD)) { err = 0; discard_from_brd(brd, sector, bio->bi_size); goto out; diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index df018990c42..9400845d602 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -79,8 +79,8 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, md_io.error = 0; if ((rw & WRITE) && !test_bit(MD_NO_BARRIER, &mdev->flags)) - rw |= (1 << BIO_RW_BARRIER); - rw |= ((1<bi_rw & REQ_HARDBARRIER) && !ok)) { /* Try again with no barrier */ dev_warn(DEV, "Barriers not supported on meta data device - disabling\n"); set_bit(MD_NO_BARRIER, &mdev->flags); - rw &= ~(1 << BIO_RW_BARRIER); + rw &= ~REQ_HARDBARRIER; bio_put(bio); goto retry; } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 7258c95e895..e2ab13d99d6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2425,15 +2425,15 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) /* NOTE: no need to check if barriers supported here as we would * not pass the test in make_request_common in that case */ - if (bio_rw_flagged(req->master_bio, BIO_RW_BARRIER)) { + if (req->master_bio->bi_rw & REQ_HARDBARRIER) { dev_err(DEV, "ASSERT FAILED would have set DP_HARDBARRIER\n"); /* dp_flags |= DP_HARDBARRIER; */ } - if (bio_rw_flagged(req->master_bio, BIO_RW_SYNCIO)) + if (req->master_bio->bi_rw & REQ_SYNC) dp_flags |= DP_RW_SYNC; /* for now handle SYNCIO and UNPLUG * as if they still were one and the same flag */ - if (bio_rw_flagged(req->master_bio, BIO_RW_UNPLUG)) + if (req->master_bio->bi_rw & REQ_UNPLUG) dp_flags |= DP_RW_SYNC; if (mdev->state.conn >= C_SYNC_SOURCE && mdev->state.conn <= C_PAUSED_SYNC_T) diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index dff48701b84..cba1deb7b27 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1180,7 +1180,7 @@ next_bio: bio->bi_sector = sector; bio->bi_bdev = mdev->ldev->backing_bdev; /* we special case some flags in the multi-bio case, see below - * (BIO_RW_UNPLUG, BIO_RW_BARRIER) */ + * (REQ_UNPLUG, REQ_HARDBARRIER) */ bio->bi_rw = rw; bio->bi_private = e; bio->bi_end_io = drbd_endio_sec; @@ -1209,16 +1209,16 @@ next_bio: bios = bios->bi_next; bio->bi_next = NULL; - /* strip off BIO_RW_UNPLUG unless it is the last bio */ + /* strip off REQ_UNPLUG unless it is the last bio */ if (bios) - bio->bi_rw &= ~(1<bi_rw &= ~REQ_UNPLUG; drbd_generic_make_request(mdev, fault_type, bio); - /* strip off BIO_RW_BARRIER, + /* strip off REQ_HARDBARRIER, * unless it is the first or last bio */ if (bios && bios->bi_next) - bios->bi_rw &= ~(1<bi_rw &= ~REQ_HARDBARRIER; } while (bios); maybe_kick_lo(mdev); return 0; @@ -1233,7 +1233,7 @@ fail: } /** - * w_e_reissue() - Worker callback; Resubmit a bio, without BIO_RW_BARRIER set + * w_e_reissue() - Worker callback; Resubmit a bio, without REQ_HARDBARRIER set * @mdev: DRBD device. * @w: work object. * @cancel: The connection will be closed anyways (unused in this callback) @@ -1245,7 +1245,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea (and DE_BARRIER_IN_NEXT_EPOCH_ISSUED in the previous Epoch) so that we can finish that epoch in drbd_may_finish_epoch(). That is necessary if we already have a long chain of Epochs, before - we realize that BIO_RW_BARRIER is actually not supported */ + we realize that REQ_HARDBARRIER is actually not supported */ /* As long as the -ENOTSUPP on the barrier is reported immediately that will never trigger. If it is reported late, we will just @@ -1824,14 +1824,14 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) epoch = list_entry(e->epoch->list.prev, struct drbd_epoch, list); if (epoch == e->epoch) { set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); - rw |= (1<flags |= EE_IS_BARRIER; } else { if (atomic_read(&epoch->epoch_size) > 1 || !test_bit(DE_CONTAINS_A_BARRIER, &epoch->flags)) { set_bit(DE_BARRIER_IN_NEXT_EPOCH_ISSUED, &epoch->flags); set_bit(DE_CONTAINS_A_BARRIER, &e->epoch->flags); - rw |= (1<flags |= EE_IS_BARRIER; } } @@ -1841,10 +1841,10 @@ static int receive_Data(struct drbd_conf *mdev, struct p_header *h) dp_flags = be32_to_cpu(p->dp_flags); if (dp_flags & DP_HARDBARRIER) { dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n"); - /* rw |= (1<flags |= EE_MAY_SET_IN_SYNC; diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 654f1ef5cbb..f761d98a4e9 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -997,7 +997,7 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) * because of those XXX, this is not yet enabled, * i.e. in drbd_init_set_defaults we set the NO_BARRIER_SUPP bit. */ - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags))) { + if (unlikely(bio->bi_rw & REQ_HARDBARRIER) && test_bit(NO_BARRIER_SUPP, &mdev->flags)) { /* dev_warn(DEV, "Rejecting barrier request as underlying device does not support\n"); */ bio_endio(bio, -EOPNOTSUPP); return 0; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 6120922f459..fedfdb7d3cd 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -476,7 +476,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; if (bio_rw(bio) == WRITE) { - bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER); + bool barrier = (bio->bi_rw & REQ_HARDBARRIER); struct file *file = lo->lo_backing_file; if (barrier) { diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 8a549db2aa7..9f3e4454274 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -1221,7 +1221,7 @@ static int pkt_start_recovery(struct packet_data *pkt) pkt->bio->bi_flags = 1 << BIO_UPTODATE; pkt->bio->bi_idx = 0; - BUG_ON(pkt->bio->bi_rw != (1 << BIO_RW)); + BUG_ON(pkt->bio->bi_rw != REQ_WRITE); BUG_ON(pkt->bio->bi_vcnt != pkt->frames); BUG_ON(pkt->bio->bi_size != pkt->frames * CD_FRAMESIZE); BUG_ON(pkt->bio->bi_end_io != pkt_end_io_packet_write); diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 2f9470ff8f7..8be57151f5d 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -478,7 +478,7 @@ static void process_page(unsigned long data) le32_to_cpu(desc->local_addr)>>9, le32_to_cpu(desc->transfer_size)); dump_dmastat(card, control); - } else if (test_bit(BIO_RW, &bio->bi_rw) && + } else if ((bio->bi_rw & REQ_WRITE) && le32_to_cpu(desc->local_addr) >> 9 == card->init_size) { card->init_size += le32_to_cpu(desc->transfer_size) >> 9; diff --git a/drivers/ide/ide-cd_ioctl.c b/drivers/ide/ide-cd_ioctl.c index 02712bf045c..766b3deeb23 100644 --- a/drivers/ide/ide-cd_ioctl.c +++ b/drivers/ide/ide-cd_ioctl.c @@ -454,7 +454,7 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi, touch it at all. */ if (cgc->data_direction == CGC_DATA_WRITE) - flags |= REQ_RW; + flags |= REQ_WRITE; if (cgc->sense) memset(cgc->sense, 0, sizeof(struct request_sense)); diff --git a/drivers/ide/ide-floppy.c b/drivers/ide/ide-floppy.c index c7d0737bb18..5406b6ea3ad 100644 --- a/drivers/ide/ide-floppy.c +++ b/drivers/ide/ide-floppy.c @@ -207,7 +207,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive, memcpy(rq->cmd, pc->c, 12); pc->rq = rq; - if (rq->cmd_flags & REQ_RW) + if (rq->cmd_flags & REQ_WRITE) pc->flags |= PC_FLAG_WRITING; pc->flags |= PC_FLAG_DMA_OK; diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 10f457ca6af..0590c75b0ab 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -356,7 +356,7 @@ static void dispatch_io(int rw, unsigned int num_regions, BUG_ON(num_regions > DM_IO_MAX_REGIONS); if (sync) - rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + rw |= REQ_SYNC | REQ_UNPLUG; /* * For multiple regions we need to be careful to rewind @@ -364,7 +364,7 @@ static void dispatch_io(int rw, unsigned int num_regions, */ for (i = 0; i < num_regions; i++) { *dp = old_pages; - if (where[i].count || (rw & (1 << BIO_RW_BARRIER))) + if (where[i].count || (rw & REQ_HARDBARRIER)) do_region(rw, i, where + i, dp, io); } @@ -412,8 +412,8 @@ retry: } set_current_state(TASK_RUNNING); - if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) { - rw &= ~(1 << BIO_RW_BARRIER); + if (io->eopnotsupp_bits && (rw & REQ_HARDBARRIER)) { + rw &= ~REQ_HARDBARRIER; goto retry; } @@ -479,8 +479,8 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp) * New collapsed (a)synchronous interface. * * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug - * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in - * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to + * the queue with blk_unplug() some time later or set REQ_SYNC in +io_req->bi_rw. If you fail to do one of these, the IO will be submitted to * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. */ int dm_io(struct dm_io_request *io_req, unsigned num_regions, diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index addf8347504..d8587bac568 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -345,7 +345,7 @@ static int run_io_job(struct kcopyd_job *job) { int r; struct dm_io_request io_req = { - .bi_rw = job->rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG), + .bi_rw = job->rw | REQ_SYNC | REQ_UNPLUG, .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, .mem.offset = job->offset, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ddda531723d..74136262d65 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1211,7 +1211,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, if (error == -EOPNOTSUPP) goto out; - if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD)) + if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD)) goto out; if (unlikely(error)) { diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e610725db76..d6e28d732b4 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -284,7 +284,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, if (!error) return 0; /* I/O complete */ - if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD)) + if ((error == -EWOULDBLOCK) && (bio->bi_rw & REQ_RAHEAD)) return error; if (error == -EOPNOTSUPP) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1e0e6dd5150..d6f77baeafd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -614,7 +614,7 @@ static void dec_pending(struct dm_io *io, int error) */ spin_lock_irqsave(&md->deferred_lock, flags); if (__noflush_suspending(md)) { - if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER)) + if (!(io->bio->bi_rw & REQ_HARDBARRIER)) bio_list_add_head(&md->deferred, io->bio); } else @@ -626,7 +626,7 @@ static void dec_pending(struct dm_io *io, int error) io_error = io->error; bio = io->bio; - if (bio_rw_flagged(bio, BIO_RW_BARRIER)) { + if (bio->bi_rw & REQ_HARDBARRIER) { /* * There can be just one barrier request so we use * a per-device variable for error reporting. @@ -1106,7 +1106,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_sector = sector; clone->bi_bdev = bio->bi_bdev; - clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER); + clone->bi_rw = bio->bi_rw & ~REQ_HARDBARRIER; clone->bi_vcnt = 1; clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; @@ -1133,7 +1133,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); __bio_clone(clone, bio); - clone->bi_rw &= ~(1 << BIO_RW_BARRIER); + clone->bi_rw &= ~REQ_HARDBARRIER; clone->bi_destructor = dm_bio_destructor; clone->bi_sector = sector; clone->bi_idx = idx; @@ -1301,7 +1301,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.map = dm_get_live_table(md); if (unlikely(!ci.map)) { - if (!bio_rw_flagged(bio, BIO_RW_BARRIER)) + if (!(bio->bi_rw & REQ_HARDBARRIER)) bio_io_error(bio); else if (!md->barrier_error) @@ -1414,7 +1414,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio) * we have to queue this io for later. */ if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || - unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + unlikely(bio->bi_rw & REQ_HARDBARRIER)) { up_read(&md->io_lock); if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && @@ -2296,7 +2296,7 @@ static void dm_wq_work(struct work_struct *work) if (dm_request_based(md)) generic_make_request(c); else { - if (bio_rw_flagged(c, BIO_RW_BARRIER)) + if (c->bi_rw & REQ_HARDBARRIER) process_barrier(md, c); else __split_and_process_bio(md, c); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 7e0e057db9a..ba19060bcf3 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -294,7 +294,7 @@ static int linear_make_request (mddev_t *mddev, struct bio *bio) dev_info_t *tmp_dev; sector_t start_sector; - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { md_barrier_request(mddev, bio); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index cb20d0b0555..1893af67877 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -353,7 +353,7 @@ static void md_submit_barrier(struct work_struct *ws) /* an empty barrier - all done */ bio_endio(bio, 0); else { - bio->bi_rw &= ~(1<bi_rw &= ~REQ_HARDBARRIER; if (mddev->pers->make_request(mddev, bio)) generic_make_request(bio); mddev->barrier = POST_REQUEST_BARRIER; @@ -675,11 +675,11 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, * if zero is reached. * If an error occurred, call md_error * - * As we might need to resubmit the request if BIO_RW_BARRIER + * As we might need to resubmit the request if REQ_HARDBARRIER * causes ENOTSUPP, we allocate a spare bio... */ struct bio *bio = bio_alloc(GFP_NOIO, 1); - int rw = (1<bi_bdev = rdev->bdev; bio->bi_sector = sector; @@ -691,7 +691,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, atomic_inc(&mddev->pending_writes); if (!test_bit(BarriersNotsupp, &rdev->flags)) { struct bio *rbio; - rw |= (1<bi_private = bio; rbio->bi_end_io = super_written_barrier; @@ -736,7 +736,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, struct completion event; int ret; - rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + rw |= REQ_SYNC | REQ_UNPLUG; bio->bi_bdev = bdev; bio->bi_sector = sector; diff --git a/drivers/md/md.h b/drivers/md/md.h index 10597bfec00..fc56e0f21c8 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -67,7 +67,7 @@ struct mdk_rdev_s #define Faulty 1 /* device is known to have a fault */ #define In_sync 2 /* device is in_sync with rest of array */ #define WriteMostly 4 /* Avoid reading if at all possible */ -#define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ +#define BarriersNotsupp 5 /* REQ_HARDBARRIER is not supported */ #define AllReserved 6 /* If whole device is reserved for * one array */ #define AutoDetected 7 /* added by auto-detect */ @@ -254,7 +254,7 @@ struct mddev_s * fails. Only supported */ struct bio *biolist; /* bios that need to be retried - * because BIO_RW_BARRIER is not supported + * because REQ_HARDBARRIER is not supported */ atomic_t recovery_active; /* blocks scheduled, but not written */ diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 410fb60699a..0307d217e7a 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -91,7 +91,7 @@ static void multipath_end_request(struct bio *bio, int error) if (uptodate) multipath_end_bh_io(mp_bh, 0); - else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) { + else if (!(bio->bi_rw & REQ_RAHEAD)) { /* * oops, IO error: */ @@ -142,7 +142,7 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { md_barrier_request(mddev, bio); return 0; } @@ -163,7 +163,7 @@ static int multipath_make_request(mddev_t *mddev, struct bio * bio) mp_bh->bio = *bio; mp_bh->bio.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_bdev = multipath->rdev->bdev; - mp_bh->bio.bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); + mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT; mp_bh->bio.bi_end_io = multipath_end_request; mp_bh->bio.bi_private = mp_bh; generic_make_request(&mp_bh->bio); @@ -398,7 +398,7 @@ static void multipathd (mddev_t *mddev) *bio = *(mp_bh->master_bio); bio->bi_sector += conf->multipaths[mp_bh->path].rdev->data_offset; bio->bi_bdev = conf->multipaths[mp_bh->path].rdev->bdev; - bio->bi_rw |= (1 << BIO_RW_FAILFAST_TRANSPORT); + bio->bi_rw |= REQ_FAILFAST_TRANSPORT; bio->bi_end_io = multipath_end_request; bio->bi_private = mp_bh; generic_make_request(bio); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 563abed5a2c..6f7af46d623 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -483,7 +483,7 @@ static int raid0_make_request(mddev_t *mddev, struct bio *bio) struct strip_zone *zone; mdk_rdev_t *tmp_dev; - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { md_barrier_request(mddev, bio); return 0; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index a948da8012d..73cc74ffc26 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -787,7 +787,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) struct bio_list bl; struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); - const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); + const bool do_sync = (bio->bi_rw & REQ_SYNC); bool do_barriers; mdk_rdev_t *blocked_rdev; @@ -822,7 +822,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) finish_wait(&conf->wait_barrier, &w); } if (unlikely(!mddev->barriers_work && - bio_rw_flagged(bio, BIO_RW_BARRIER))) { + (bio->bi_rw & REQ_HARDBARRIER))) { if (rw == WRITE) md_write_end(mddev); bio_endio(bio, -EOPNOTSUPP); @@ -877,7 +877,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid1_end_read_request; - read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r1_bio; generic_make_request(read_bio); @@ -959,7 +959,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) atomic_set(&r1_bio->remaining, 0); atomic_set(&r1_bio->behind_remaining, 0); - do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER); + do_barriers = bio->bi_rw & REQ_HARDBARRIER; if (do_barriers) set_bit(R1BIO_Barrier, &r1_bio->state); @@ -975,8 +975,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; mbio->bi_bdev = conf->mirrors[i].rdev->bdev; mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | (do_barriers << BIO_RW_BARRIER) | - (do_sync << BIO_RW_SYNCIO); + mbio->bi_rw = WRITE | do_barriers | do_sync; mbio->bi_private = r1_bio; if (behind_pages) { @@ -1633,7 +1632,7 @@ static void raid1d(mddev_t *mddev) sync_request_write(mddev, r1_bio); unplug = 1; } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { - /* some requests in the r1bio were BIO_RW_BARRIER + /* some requests in the r1bio were REQ_HARDBARRIER * requests which failed with -EOPNOTSUPP. Hohumm.. * Better resubmit without the barrier. * We know which devices to resubmit for, because @@ -1641,7 +1640,7 @@ static void raid1d(mddev_t *mddev) * We already have a nr_pending reference on these rdevs. */ int i; - const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO); + const bool do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); clear_bit(R1BIO_BarrierRetry, &r1_bio->state); clear_bit(R1BIO_Barrier, &r1_bio->state); for (i=0; i < conf->raid_disks; i++) @@ -1662,8 +1661,7 @@ static void raid1d(mddev_t *mddev) conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_end_io = raid1_end_write_request; - bio->bi_rw = WRITE | - (do_sync << BIO_RW_SYNCIO); + bio->bi_rw = WRITE | do_sync; bio->bi_private = r1_bio; r1_bio->bios[i] = bio; generic_make_request(bio); @@ -1698,7 +1696,7 @@ static void raid1d(mddev_t *mddev) (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { - const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO); + const bool do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; r1_bio->bios[r1_bio->read_disk] = mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; @@ -1715,7 +1713,7 @@ static void raid1d(mddev_t *mddev) bio->bi_sector = r1_bio->sector + rdev->data_offset; bio->bi_bdev = rdev->bdev; bio->bi_end_io = raid1_end_read_request; - bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + bio->bi_rw = READ | do_sync; bio->bi_private = r1_bio; unplug = 1; generic_make_request(bio); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 42e64e4e5e2..62ecb6650fd 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -799,12 +799,12 @@ static int make_request(mddev_t *mddev, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); - const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); + const bool do_sync = (bio->bi_rw & REQ_SYNC); struct bio_list bl; unsigned long flags; mdk_rdev_t *blocked_rdev; - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { + if (unlikely(bio->bi_rw & REQ_HARDBARRIER)) { md_barrier_request(mddev, bio); return 0; } @@ -879,7 +879,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) mirror->rdev->data_offset; read_bio->bi_bdev = mirror->rdev->bdev; read_bio->bi_end_io = raid10_end_read_request; - read_bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r10_bio; generic_make_request(read_bio); @@ -947,7 +947,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) conf->mirrors[d].rdev->data_offset; mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; - mbio->bi_rw = WRITE | (do_sync << BIO_RW_SYNCIO); + mbio->bi_rw = WRITE | do_sync; mbio->bi_private = r10_bio; atomic_inc(&r10_bio->remaining); @@ -1716,7 +1716,7 @@ static void raid10d(mddev_t *mddev) raid_end_bio_io(r10_bio); bio_put(bio); } else { - const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO); + const bool do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); bio_put(bio); rdev = conf->mirrors[mirror].rdev; if (printk_ratelimit()) @@ -1730,7 +1730,7 @@ static void raid10d(mddev_t *mddev) bio->bi_sector = r10_bio->devs[r10_bio->read_slot].addr + rdev->data_offset; bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | (do_sync << BIO_RW_SYNCIO); + bio->bi_rw = READ | do_sync; bio->bi_private = r10_bio; bio->bi_end_io = raid10_end_read_request; unplug = 1; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 96c690279fc..20ac2f14376 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3958,7 +3958,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) const int rw = bio_data_dir(bi); int remaining; - if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) { + if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) { /* Drain all pending writes. We only really need * to ensure they have been submitted, but this is * easier. diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c index ee4b6914667..fda4de3440c 100644 --- a/drivers/scsi/osd/osd_initiator.c +++ b/drivers/scsi/osd/osd_initiator.c @@ -716,7 +716,7 @@ static int _osd_req_list_objects(struct osd_request *or, return PTR_ERR(bio); } - bio->bi_rw &= ~(1 << BIO_RW); + bio->bi_rw &= ~REQ_WRITE; or->in.bio = bio; or->in.total_bytes = bio->bi_size; return 0; @@ -814,7 +814,7 @@ void osd_req_write(struct osd_request *or, { _osd_req_encode_common(or, OSD_ACT_WRITE, obj, offset, len); WARN_ON(or->out.bio || or->out.total_bytes); - WARN_ON(0 == bio_rw_flagged(bio, BIO_RW)); + WARN_ON(0 == (bio->bi_rw & REQ_WRITE)); or->out.bio = bio; or->out.total_bytes = len; } @@ -829,7 +829,7 @@ int osd_req_write_kern(struct osd_request *or, if (IS_ERR(bio)) return PTR_ERR(bio); - bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */ + bio->bi_rw |= REQ_WRITE; /* FIXME: bio_set_dir() */ osd_req_write(or, obj, offset, bio, len); return 0; } @@ -865,7 +865,7 @@ void osd_req_read(struct osd_request *or, { _osd_req_encode_common(or, OSD_ACT_READ, obj, offset, len); WARN_ON(or->in.bio || or->in.total_bytes); - WARN_ON(1 == bio_rw_flagged(bio, BIO_RW)); + WARN_ON(1 == (bio->bi_rw & REQ_WRITE)); or->in.bio = bio; or->in.total_bytes = len; } diff --git a/fs/bio.c b/fs/bio.c index e7bf6ca64dc..8abb2dfb2e7 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -843,7 +843,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (!bio) goto out_bmd; - bio->bi_rw |= (!write_to_vm << BIO_RW); + if (!write_to_vm) + bio->bi_rw |= REQ_WRITE; ret = 0; @@ -1024,7 +1025,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, * set data direction, and check if mapped pages need bouncing */ if (!write_to_vm) - bio->bi_rw |= (1 << BIO_RW); + bio->bi_rw |= REQ_WRITE; bio->bi_bdev = bdev; bio->bi_flags |= (1 << BIO_USER_MAPPED); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 34f7c375567..64f10082f04 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -480,7 +480,7 @@ static void end_workqueue_bio(struct bio *bio, int err) end_io_wq->work.func = end_workqueue_fn; end_io_wq->work.flags = 0; - if (bio->bi_rw & (1 << BIO_RW)) { + if (bio->bi_rw & REQ_WRITE) { if (end_io_wq->metadata) btrfs_queue_worker(&fs_info->endio_meta_write_workers, &end_io_wq->work); @@ -604,7 +604,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, atomic_inc(&fs_info->nr_async_submits); - if (rw & (1 << BIO_RW_SYNCIO)) + if (rw & REQ_SYNC) btrfs_set_work_high_prio(&async->work); btrfs_queue_worker(&fs_info->workers, &async->work); @@ -668,7 +668,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, bio, 1); BUG_ON(ret); - if (!(rw & (1 << BIO_RW))) { + if (!(rw & REQ_WRITE)) { /* * called for a read, do the setup so that checksum validation * can happen in the async kernel threads @@ -1427,7 +1427,7 @@ static void end_workqueue_fn(struct btrfs_work *work) * ram and up to date before trying to verify things. For * blocksize <= pagesize, it is basically a noop */ - if (!(bio->bi_rw & (1 << BIO_RW)) && end_io_wq->metadata && + if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata && !bio_ready_for_csum(bio)) { btrfs_queue_worker(&fs_info->endio_meta_workers, &end_io_wq->work); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1bff92ad474..e975d7180a8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1429,7 +1429,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); BUG_ON(ret); - if (!(rw & (1 << BIO_RW))) { + if (!(rw & REQ_WRITE)) { if (bio_flags & EXTENT_BIO_COMPRESSED) { return btrfs_submit_compressed_read(inode, bio, mirror_num, bio_flags); @@ -1841,7 +1841,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, bio->bi_size = 0; bio_add_page(bio, page, failrec->len, start - page_offset(page)); - if (failed_bio->bi_rw & (1 << BIO_RW)) + if (failed_bio->bi_rw & REQ_WRITE) rw = WRITE; else rw = READ; @@ -5642,7 +5642,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, struct bio_vec *bvec = bio->bi_io_vec; u64 start; int skip_sum; - int write = rw & (1 << BIO_RW); + int write = rw & REQ_WRITE; int ret = 0; skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d6e3af8be95..dd318ff280b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -258,7 +258,7 @@ loop_lock: BUG_ON(atomic_read(&cur->bi_cnt) == 0); - if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) + if (cur->bi_rw & REQ_SYNC) num_sync_run++; submit_bio(cur->bi_rw, cur); @@ -2651,7 +2651,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int max_errors = 0; struct btrfs_multi_bio *multi = NULL; - if (multi_ret && !(rw & (1 << BIO_RW))) + if (multi_ret && !(rw & REQ_WRITE)) stripes_allocated = 1; again: if (multi_ret) { @@ -2687,7 +2687,7 @@ again: mirror_num = 0; /* if our multi bio struct is too small, back off and try again */ - if (rw & (1 << BIO_RW)) { + if (rw & REQ_WRITE) { if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP)) { stripes_required = map->num_stripes; @@ -2697,7 +2697,7 @@ again: max_errors = 1; } } - if (multi_ret && (rw & (1 << BIO_RW)) && + if (multi_ret && (rw & REQ_WRITE) && stripes_allocated < stripes_required) { stripes_allocated = map->num_stripes; free_extent_map(em); @@ -2733,7 +2733,7 @@ again: num_stripes = 1; stripe_index = 0; if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (unplug_page || (rw & (1 << BIO_RW))) + if (unplug_page || (rw & REQ_WRITE)) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -2744,7 +2744,7 @@ again: } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (rw & (1 << BIO_RW)) + if (rw & REQ_WRITE) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -2755,7 +2755,7 @@ again: stripe_index = do_div(stripe_nr, factor); stripe_index *= map->sub_stripes; - if (unplug_page || (rw & (1 << BIO_RW))) + if (unplug_page || (rw & REQ_WRITE)) num_stripes = map->sub_stripes; else if (mirror_num) stripe_index += mirror_num - 1; @@ -2945,7 +2945,7 @@ static noinline int schedule_bio(struct btrfs_root *root, struct btrfs_pending_bios *pending_bios; /* don't bother with additional async steps for reads, right now */ - if (!(rw & (1 << BIO_RW))) { + if (!(rw & REQ_WRITE)) { bio_get(bio); submit_bio(rw, bio); bio_put(bio); @@ -2964,7 +2964,7 @@ static noinline int schedule_bio(struct btrfs_root *root, bio->bi_rw |= rw; spin_lock(&device->io_lock); - if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) + if (bio->bi_rw & REQ_SYNC) pending_bios = &device->pending_sync_bios; else pending_bios = &device->pending_bios; diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 4337cad7777..e2732203fa9 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c @@ -599,7 +599,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) } else { bio = master_dev->bio; /* FIXME: bio_set_dir() */ - bio->bi_rw |= (1 << BIO_RW); + bio->bi_rw |= REQ_WRITE; } osd_req_write(or, &ios->obj, per_dev->offset, bio, diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index efc3539ac5a..cde1248a622 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -595,7 +595,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) goto skip_barrier; get_bh(bh); - submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); + submit_bh(WRITE_BARRIER | REQ_META, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -605,7 +605,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) lock_buffer(bh); skip_barrier: get_bh(bh); - submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); + submit_bh(WRITE_SYNC | REQ_META, bh); wait_on_buffer(bh); } if (!buffer_uptodate(bh)) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 18176d0b75d..f3b071f921a 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -36,8 +36,8 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? - WRITE_SYNC_PLUG : WRITE)); + int write_op = REQ_META | + (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC_PLUG : WRITE); BUG_ON(!PageLocked(page)); BUG_ON(!page_has_buffers(page)); @@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); + submit_bh(READ_SYNC | REQ_META, bh); if (!(flags & DIO_WAIT)) return 0; @@ -432,7 +432,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); + ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3593b3a7290..fd4f8946abf 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -275,7 +275,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); + submit_bio(READ_SYNC | REQ_META, bio); wait_on_page_locked(page); bio_put(bio); if (!PageUptodate(page)) { diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 2e6a2723b8f..4588fb9e93d 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -508,7 +508,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + rw |= REQ_SYNC | REQ_UNPLUG; res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); } diff --git a/include/linux/bio.h b/include/linux/bio.h index 7fc5606e6ea..4d379c8250a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -138,55 +138,83 @@ struct bio { #define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) /* - * bio bi_rw flags - * - * bit 0 -- data direction - * If not set, bio is a read from device. If set, it's a write to device. - * bit 1 -- fail fast device errors - * bit 2 -- fail fast transport errors - * bit 3 -- fail fast driver errors - * bit 4 -- rw-ahead when set - * bit 5 -- barrier - * Insert a serialization point in the IO queue, forcing previously - * submitted IO to be completed before this one is issued. - * bit 6 -- synchronous I/O hint. - * bit 7 -- Unplug the device immediately after submitting this bio. - * bit 8 -- metadata request - * Used for tracing to differentiate metadata and data IO. May also - * get some preferential treatment in the IO scheduler - * bit 9 -- discard sectors - * Informs the lower level device that this range of sectors is no longer - * used by the file system and may thus be freed by the device. Used - * for flash based storage. - * Don't want driver retries for any fast fail whatever the reason. - * bit 10 -- Tell the IO scheduler not to wait for more requests after this - one has been submitted, even if it is a SYNC request. + * Request flags. For use in the cmd_flags field of struct request, and in + * bi_rw of struct bio. Note that some flags are only valid in either one. */ -enum bio_rw_flags { - BIO_RW, - BIO_RW_FAILFAST_DEV, - BIO_RW_FAILFAST_TRANSPORT, - BIO_RW_FAILFAST_DRIVER, - /* above flags must match REQ_* */ - BIO_RW_AHEAD, - BIO_RW_BARRIER, - BIO_RW_SYNCIO, - BIO_RW_UNPLUG, - BIO_RW_META, - BIO_RW_DISCARD, - BIO_RW_NOIDLE, +enum rq_flag_bits { + /* common flags */ + __REQ_WRITE, /* not set, read. set, write */ + __REQ_FAILFAST_DEV, /* no driver retries of device errors */ + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + + __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_SYNC, /* request is sync (sync write or read) */ + __REQ_META, /* metadata io request */ + __REQ_DISCARD, /* request to discard sectors */ + __REQ_NOIDLE, /* don't anticipate more IO after this one */ + + /* bio only flags */ + __REQ_UNPLUG, /* unplug the immediately after submission */ + __REQ_RAHEAD, /* read ahead, can fail anytime */ + + /* request only flags */ + __REQ_SORTED, /* elevator knows about this request */ + __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ + __REQ_FUA, /* forced unit access */ + __REQ_NOMERGE, /* don't touch this for merging */ + __REQ_STARTED, /* drive already may have started this one */ + __REQ_DONTPREP, /* don't call prep for this one */ + __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ + __REQ_FAILED, /* set if the request failed */ + __REQ_QUIET, /* don't worry about errors */ + __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ + __REQ_ALLOCED, /* request came from our alloc pool */ + __REQ_COPY_USER, /* contains copies of user pages */ + __REQ_INTEGRITY, /* integrity metadata has been remapped */ + __REQ_IO_STAT, /* account I/O stat */ + __REQ_MIXED_MERGE, /* merge of different types, fail separately */ + __REQ_NR_BITS, /* stops here */ }; -/* - * First four bits must match between bio->bi_rw and rq->cmd_flags, make - * that explicit here. - */ -#define BIO_RW_RQ_MASK 0xf - -static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) -{ - return (bio->bi_rw & (1 << flag)) != 0; -} +#define REQ_WRITE (1 << __REQ_WRITE) +#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) +#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_SYNC (1 << __REQ_SYNC) +#define REQ_META (1 << __REQ_META) +#define REQ_DISCARD (1 << __REQ_DISCARD) +#define REQ_NOIDLE (1 << __REQ_NOIDLE) + +#define REQ_FAILFAST_MASK \ + (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) +#define REQ_COMMON_MASK \ + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ + REQ_META| REQ_DISCARD | REQ_NOIDLE) + +#define REQ_UNPLUG (1 << __REQ_UNPLUG) +#define REQ_RAHEAD (1 << __REQ_RAHEAD) + +#define REQ_SORTED (1 << __REQ_SORTED) +#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) +#define REQ_FUA (1 << __REQ_FUA) +#define REQ_NOMERGE (1 << __REQ_NOMERGE) +#define REQ_STARTED (1 << __REQ_STARTED) +#define REQ_DONTPREP (1 << __REQ_DONTPREP) +#define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) +#define REQ_FAILED (1 << __REQ_FAILED) +#define REQ_QUIET (1 << __REQ_QUIET) +#define REQ_PREEMPT (1 << __REQ_PREEMPT) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) +#define REQ_ALLOCED (1 << __REQ_ALLOCED) +#define REQ_COPY_USER (1 << __REQ_COPY_USER) +#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) /* * upper 16 bits of bi_rw define the io priority of this bio @@ -211,7 +239,10 @@ static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag) #define bio_offset(bio) bio_iovec((bio))->bv_offset #define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) #define bio_sectors(bio) ((bio)->bi_size >> 9) -#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD)) +#define bio_empty_barrier(bio) \ + ((bio->bi_rw & REQ_HARDBARRIER) && \ + !bio_has_data(bio) && \ + !(bio->bi_rw & REQ_DISCARD)) static inline unsigned int bio_cur_bytes(struct bio *bio) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3ecd28ef9ba..3fc0f590861 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -84,70 +84,6 @@ enum { REQ_LB_OP_FLUSH = 0x41, /* flush request */ }; -/* - * request type modified bits. first four bits match BIO_RW* bits, important - */ -enum rq_flag_bits { - __REQ_RW, /* not set, read. set, write */ - __REQ_FAILFAST_DEV, /* no driver retries of device errors */ - __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ - __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ - /* above flags must match BIO_RW_* */ - __REQ_DISCARD, /* request to discard sectors */ - __REQ_SORTED, /* elevator knows about this request */ - __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_HARDBARRIER, /* may not be passed by drive either */ - __REQ_FUA, /* forced unit access */ - __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_STARTED, /* drive already may have started this one */ - __REQ_DONTPREP, /* don't call prep for this one */ - __REQ_QUEUED, /* uses queueing */ - __REQ_ELVPRIV, /* elevator private data attached */ - __REQ_FAILED, /* set if the request failed */ - __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_RW_SYNC, /* request is sync (sync write or read) */ - __REQ_ALLOCED, /* request came from our alloc pool */ - __REQ_RW_META, /* metadata io request */ - __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_INTEGRITY, /* integrity metadata has been remapped */ - __REQ_NOIDLE, /* Don't anticipate more IO after this one */ - __REQ_IO_STAT, /* account I/O stat */ - __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_NR_BITS, /* stops here */ -}; - -#define REQ_RW (1 << __REQ_RW) -#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) -#define REQ_DISCARD (1 << __REQ_DISCARD) -#define REQ_SORTED (1 << __REQ_SORTED) -#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) -#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) -#define REQ_FUA (1 << __REQ_FUA) -#define REQ_NOMERGE (1 << __REQ_NOMERGE) -#define REQ_STARTED (1 << __REQ_STARTED) -#define REQ_DONTPREP (1 << __REQ_DONTPREP) -#define REQ_QUEUED (1 << __REQ_QUEUED) -#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) -#define REQ_FAILED (1 << __REQ_FAILED) -#define REQ_QUIET (1 << __REQ_QUIET) -#define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) -#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) -#define REQ_ALLOCED (1 << __REQ_ALLOCED) -#define REQ_RW_META (1 << __REQ_RW_META) -#define REQ_COPY_USER (1 << __REQ_COPY_USER) -#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) -#define REQ_NOIDLE (1 << __REQ_NOIDLE) -#define REQ_IO_STAT (1 << __REQ_IO_STAT) -#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) - -#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ - REQ_FAILFAST_DRIVER) - #define BLK_MAX_CDB 16 /* @@ -631,7 +567,7 @@ enum { */ static inline bool rw_is_sync(unsigned int rw_flags) { - return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC); + return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); } static inline bool rq_is_sync(struct request *rq) diff --git a/include/linux/fs.h b/include/linux/fs.h index 59887883149..c5c92943c76 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -144,29 +144,31 @@ struct inodes_stat_t { * of this IO. * */ -#define RW_MASK 1 -#define RWA_MASK 2 -#define READ 0 -#define WRITE 1 -#define READA 2 /* read-ahead - don't block if no resources */ -#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ -#define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG)) -#define READ_META (READ | (1 << BIO_RW_META)) -#define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) -#define WRITE_SYNC (WRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -#define WRITE_ODIRECT_PLUG (WRITE | (1 << BIO_RW_SYNCIO)) -#define WRITE_META (WRITE | (1 << BIO_RW_META)) -#define SWRITE_SYNC_PLUG \ - (SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE)) -#define SWRITE_SYNC (SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG)) -#define WRITE_BARRIER (WRITE_SYNC | (1 << BIO_RW_BARRIER)) +#define RW_MASK 1 +#define RWA_MASK 2 + +#define READ 0 +#define WRITE 1 +#define READA 2 /* readahead - don't block if no resources */ +#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ + +#define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) +#define READ_META (READ | REQ_META) +#define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE) +#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) +#define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) +#define WRITE_META (WRITE | REQ_META) +#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_HARDBARRIER) +#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) +#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about * parts of device that are now unused by the file system. */ -#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD)) -#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER)) +#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD) +#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER) #define SEL_IN 1 #define SEL_OUT 2 diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c index 97024fd40cd..83bbc7c02df 100644 --- a/kernel/power/block_io.c +++ b/kernel/power/block_io.c @@ -28,7 +28,7 @@ static int submit(int rw, struct block_device *bdev, sector_t sector, struct page *page, struct bio **bio_chain) { - const int bio_rw = rw | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG; struct bio *bio; bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 4f149944cb8..3b4a695051b 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -169,9 +169,12 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; +#define BLK_TC_HARDBARRIER BLK_TC_BARRIER +#define BLK_TC_RAHEAD BLK_TC_AHEAD + /* The ilog2() calls fall out because they're constant */ -#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \ - (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name)) +#define MASK_TC_BIT(rw, __name) ((rw & REQ_ ## __name) << \ + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - __REQ_ ## __name)) /* * The worker for the various blk_add_trace*() types. Fills out a @@ -194,9 +197,9 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, return; what |= ddir_act[rw & WRITE]; - what |= MASK_TC_BIT(rw, BARRIER); - what |= MASK_TC_BIT(rw, SYNCIO); - what |= MASK_TC_BIT(rw, AHEAD); + what |= MASK_TC_BIT(rw, HARDBARRIER); + what |= MASK_TC_BIT(rw, SYNC); + what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); @@ -662,7 +665,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, return; if (rq->cmd_flags & REQ_DISCARD) - rw |= (1 << BIO_RW_DISCARD); + rw |= REQ_DISCARD; if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { what |= BLK_TC_ACT(BLK_TC_PC); @@ -1755,20 +1758,20 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) if (rw & WRITE) rwbs[i++] = 'W'; - else if (rw & 1 << BIO_RW_DISCARD) + else if (rw & REQ_DISCARD) rwbs[i++] = 'D'; else if (bytes) rwbs[i++] = 'R'; else rwbs[i++] = 'N'; - if (rw & 1 << BIO_RW_AHEAD) + if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; - if (rw & 1 << BIO_RW_BARRIER) + if (rw & REQ_HARDBARRIER) rwbs[i++] = 'B'; - if (rw & 1 << BIO_RW_SYNCIO) + if (rw & REQ_SYNC) rwbs[i++] = 'S'; - if (rw & 1 << BIO_RW_META) + if (rw & REQ_META) rwbs[i++] = 'M'; rwbs[i] = '\0'; @@ -1780,7 +1783,7 @@ void blk_fill_rwbs_rq(char *rwbs, struct request *rq) int bytes; if (rq->cmd_flags & REQ_DISCARD) - rw |= (1 << BIO_RW_DISCARD); + rw |= REQ_DISCARD; bytes = blk_rq_bytes(rq); diff --git a/mm/page_io.c b/mm/page_io.c index 31a3b962230..2dee975bf46 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -106,7 +106,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) goto out; } if (wbc->sync_mode == WB_SYNC_ALL) - rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + rw |= REQ_SYNC | REQ_UNPLUG; count_vm_event(PSWPOUT); set_page_writeback(page); unlock_page(page); -- cgit v1.2.3-70-g09d2 From d2997b1042ec150616c1963b5e5e919ffd0b0ebf Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Mon, 9 Aug 2010 17:20:11 -0700 Subject: hibernation: freeze swap at hibernation When taking a memory snapshot in hibernate_snapshot(), all (directly called) memory allocations use GFP_ATOMIC. Hence swap misusage during hibernation never occurs. But from a pessimistic point of view, there is no guarantee that no page allcation has __GFP_WAIT. It is better to have a global indication "we enter hibernation, don't use swap!". This patch tries to freeze new-swap-allocation during hibernation. (All user processes are frozenm so swapin is not a concern). This way, no updates will happen to swap_map[] between hibernate_snapshot() and save_image(). Swap is thawed when swsusp_free() is called. We can be assured that swap corruption will not occur. Signed-off-by: KAMEZAWA Hiroyuki Cc: "Rafael J. Wysocki" Cc: Hugh Dickins Cc: KOSAKI Motohiro Cc: Ondrej Zary Cc: Balbir Singh Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 ++++- kernel/power/hibernate.c | 1 + kernel/power/snapshot.c | 1 + kernel/power/swap.c | 6 ++-- mm/swapfile.c | 94 ++++++++++++++++++++++++++++++++++++------------ 5 files changed, 84 insertions(+), 26 deletions(-) (limited to 'kernel/power') diff --git a/include/linux/swap.h b/include/linux/swap.h index ff4acea9bbd..91c9d3fc851 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -316,7 +316,6 @@ extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); -extern swp_entry_t get_swap_page_of_type(int); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); @@ -333,6 +332,13 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; +#ifdef CONFIG_HIBERNATION +void hibernation_freeze_swap(void); +void hibernation_thaw_swap(void); +swp_entry_t get_swap_for_hibernation(int type); +void swap_free_for_hibernation(swp_entry_t val); +#endif + /* linux/mm/thrash.c */ extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8dc31e02ae1..c77963938bc 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,6 +338,7 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); + hibernation_freeze_swap(); saved_mask = clear_gfp_allowed_mask(GFP_IOFS); error = dpm_suspend_start(PMSG_FREEZE); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f6cd6faf84f..5e7edfb05e6 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1086,6 +1086,7 @@ void swsusp_free(void) buffer = NULL; alloc_normal = 0; alloc_highmem = 0; + hibernation_thaw_swap(); } /* Helper functions used for the shrinking of memory. */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index e6a5bdf61a3..5d0059eed3e 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap) { unsigned long offset; - offset = swp_offset(get_swap_page_of_type(swap)); + offset = swp_offset(get_swap_for_hibernation(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free(swp_entry(swap, offset)); + swap_free_for_hibernation(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } @@ -163,7 +163,7 @@ void free_all_swap_pages(int swap) ext = container_of(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); for (offset = ext->start; offset <= ext->end; offset++) - swap_free(swp_entry(swap, offset)); + swap_free_for_hibernation(swp_entry(swap, offset)); kfree(ext); } diff --git a/mm/swapfile.c b/mm/swapfile.c index f08d165871b..1f3f9c59a73 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -47,6 +47,8 @@ long nr_swap_pages; long total_swap_pages; static int least_priority; +static bool swap_for_hibernation; + static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -451,6 +453,8 @@ swp_entry_t get_swap_page(void) spin_lock(&swap_lock); if (nr_swap_pages <= 0) goto noswap; + if (swap_for_hibernation) + goto noswap; nr_swap_pages--; for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { @@ -483,28 +487,6 @@ noswap: return (swp_entry_t) {0}; } -/* The only caller of this function is now susupend routine */ -swp_entry_t get_swap_page_of_type(int type) -{ - struct swap_info_struct *si; - pgoff_t offset; - - spin_lock(&swap_lock); - si = swap_info[type]; - if (si && (si->flags & SWP_WRITEOK)) { - nr_swap_pages--; - /* This is called for allocating swap entry, not cache */ - offset = scan_swap_map(si, 1); - if (offset) { - spin_unlock(&swap_lock); - return swp_entry(type, offset); - } - nr_swap_pages++; - } - spin_unlock(&swap_lock); - return (swp_entry_t) {0}; -} - static struct swap_info_struct *swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; @@ -764,6 +746,74 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) #endif #ifdef CONFIG_HIBERNATION + +static pgoff_t hibernation_offset[MAX_SWAPFILES]; +/* + * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise, + * saved swap_map[] image to the disk will be an incomplete because it's + * changing without synchronization with hibernation snap shot. + * At resume, we just make swap_for_hibernation=false. We can forget + * used maps easily. + */ +void hibernation_freeze_swap(void) +{ + int i; + + spin_lock(&swap_lock); + + printk(KERN_INFO "PM: Freeze Swap\n"); + swap_for_hibernation = true; + for (i = 0; i < MAX_SWAPFILES; i++) + hibernation_offset[i] = 1; + spin_unlock(&swap_lock); +} + +void hibernation_thaw_swap(void) +{ + spin_lock(&swap_lock); + if (swap_for_hibernation) { + printk(KERN_INFO "PM: Thaw Swap\n"); + swap_for_hibernation = false; + } + spin_unlock(&swap_lock); +} + +/* + * Because updateing swap_map[] can make not-saved-status-change, + * we use our own easy allocator. + * Please see kernel/power/swap.c, Used swaps are recorded into + * RB-tree. + */ +swp_entry_t get_swap_for_hibernation(int type) +{ + pgoff_t off; + swp_entry_t val = {0}; + struct swap_info_struct *si; + + spin_lock(&swap_lock); + + si = swap_info[type]; + if (!si || !(si->flags & SWP_WRITEOK)) + goto done; + + for (off = hibernation_offset[type]; off < si->max; ++off) { + if (!si->swap_map[off]) + break; + } + if (off < si->max) { + val = swp_entry(type, off); + hibernation_offset[type] = off + 1; + } +done: + spin_unlock(&swap_lock); + return val; +} + +void swap_free_for_hibernation(swp_entry_t ent) +{ + /* Nothing to do */ +} + /* * Find the swap type that corresponds to given device (if any). * -- cgit v1.2.3-70-g09d2