diff options
58 files changed, 3506 insertions, 2941 deletions
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt index a91e2e2095b..770267af5b3 100644 --- a/Documentation/filesystems/caching/fscache.txt +++ b/Documentation/filesystems/caching/fscache.txt @@ -343,8 +343,8 @@ This will look something like: [root@andromeda ~]# head /proc/fs/fscache/objects OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================ - 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a - 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 8 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a + 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 0 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a + 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 0 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a where the first set of columns before the '|' describe the object: @@ -362,7 +362,7 @@ where the first set of columns before the '|' describe the object: EM Object's event mask EV Events raised on this object F Object flags - S Object slow-work work item flags + S Object work item busy state mask (1:pending 2:running) and the second set of columns describe the object's cookie, if present: @@ -395,8 +395,8 @@ and the following paired letters: w Show objects that don't have pending writes R Show objects that have outstanding reads r Show objects that don't have outstanding reads - S Show objects that have slow work queued - s Show objects that don't have slow work queued + S Show objects that have work queued + s Show objects that don't have work queued If neither side of a letter pair is given, then both are implied. For example: diff --git a/Documentation/slow-work.txt b/Documentation/slow-work.txt deleted file mode 100644 index 9dbf4470c7e..00000000000 --- a/Documentation/slow-work.txt +++ /dev/null @@ -1,322 +0,0 @@ - ==================================== - SLOW WORK ITEM EXECUTION THREAD POOL - ==================================== - -By: David Howells <dhowells@redhat.com> - -The slow work item execution thread pool is a pool of threads for performing -things that take a relatively long time, such as making mkdir calls. -Typically, when processing something, these items will spend a lot of time -blocking a thread on I/O, thus making that thread unavailable for doing other -work. - -The standard workqueue model is unsuitable for this class of work item as that -limits the owner to a single thread or a single thread per CPU. For some -tasks, however, more threads - or fewer - are required. - -There is just one pool per system. It contains no threads unless something -wants to use it - and that something must register its interest first. When -the pool is active, the number of threads it contains is dynamic, varying -between a maximum and minimum setting, depending on the load. - - -==================== -CLASSES OF WORK ITEM -==================== - -This pool support two classes of work items: - - (*) Slow work items. - - (*) Very slow work items. - -The former are expected to finish much quicker than the latter. - -An operation of the very slow class may do a batch combination of several -lookups, mkdirs, and a create for instance. - -An operation of the ordinarily slow class may, for example, write stuff or -expand files, provided the time taken to do so isn't too long. - -Operations of both types may sleep during execution, thus tying up the thread -loaned to it. - -A further class of work item is available, based on the slow work item class: - - (*) Delayed slow work items. - -These are slow work items that have a timer to defer queueing of the item for -a while. - - -THREAD-TO-CLASS ALLOCATION --------------------------- - -Not all the threads in the pool are available to work on very slow work items. -The number will be between one and one fewer than the number of active threads. -This is configurable (see the "Pool Configuration" section). - -All the threads are available to work on ordinarily slow work items, but a -percentage of the threads will prefer to work on very slow work items. - -The configuration ensures that at least one thread will be available to work on -very slow work items, and at least one thread will be available that won't work -on very slow work items at all. - - -===================== -USING SLOW WORK ITEMS -===================== - -Firstly, a module or subsystem wanting to make use of slow work items must -register its interest: - - int ret = slow_work_register_user(struct module *module); - -This will return 0 if successful, or a -ve error upon failure. The module -pointer should be the module interested in using this facility (almost -certainly THIS_MODULE). - - -Slow work items may then be set up by: - - (1) Declaring a slow_work struct type variable: - - #include <linux/slow-work.h> - - struct slow_work myitem; - - (2) Declaring the operations to be used for this item: - - struct slow_work_ops myitem_ops = { - .get_ref = myitem_get_ref, - .put_ref = myitem_put_ref, - .execute = myitem_execute, - }; - - [*] For a description of the ops, see section "Item Operations". - - (3) Initialising the item: - - slow_work_init(&myitem, &myitem_ops); - - or: - - delayed_slow_work_init(&myitem, &myitem_ops); - - or: - - vslow_work_init(&myitem, &myitem_ops); - - depending on its class. - -A suitably set up work item can then be enqueued for processing: - - int ret = slow_work_enqueue(&myitem); - -This will return a -ve error if the thread pool is unable to gain a reference -on the item, 0 otherwise, or (for delayed work): - - int ret = delayed_slow_work_enqueue(&myitem, my_jiffy_delay); - - -The items are reference counted, so there ought to be no need for a flush -operation. But as the reference counting is optional, means to cancel -existing work items are also included: - - cancel_slow_work(&myitem); - cancel_delayed_slow_work(&myitem); - -can be used to cancel pending work. The above cancel function waits for -existing work to have been executed (or prevent execution of them, depending -on timing). - - -When all a module's slow work items have been processed, and the -module has no further interest in the facility, it should unregister its -interest: - - slow_work_unregister_user(struct module *module); - -The module pointer is used to wait for all outstanding work items for that -module before completing the unregistration. This prevents the put_ref() code -from being taken away before it completes. module should almost certainly be -THIS_MODULE. - - -================ -HELPER FUNCTIONS -================ - -The slow-work facility provides a function by which it can be determined -whether or not an item is queued for later execution: - - bool queued = slow_work_is_queued(struct slow_work *work); - -If it returns false, then the item is not on the queue (it may be executing -with a requeue pending). This can be used to work out whether an item on which -another depends is on the queue, thus allowing a dependent item to be queued -after it. - -If the above shows an item on which another depends not to be queued, then the -owner of the dependent item might need to wait. However, to avoid locking up -the threads unnecessarily be sleeping in them, it can make sense under some -circumstances to return the work item to the queue, thus deferring it until -some other items have had a chance to make use of the yielded thread. - -To yield a thread and defer an item, the work function should simply enqueue -the work item again and return. However, this doesn't work if there's nothing -actually on the queue, as the thread just vacated will jump straight back into -the item's work function, thus busy waiting on a CPU. - -Instead, the item should use the thread to wait for the dependency to go away, -but rather than using schedule() or schedule_timeout() to sleep, it should use -the following function: - - bool requeue = slow_work_sleep_till_thread_needed( - struct slow_work *work, - signed long *_timeout); - -This will add a second wait and then sleep, such that it will be woken up if -either something appears on the queue that could usefully make use of the -thread - and behind which this item can be queued, or if the event the caller -set up to wait for happens. True will be returned if something else appeared -on the queue and this work function should perhaps return, of false if -something else woke it up. The timeout is as for schedule_timeout(). - -For example: - - wq = bit_waitqueue(&my_flags, MY_BIT); - init_wait(&wait); - requeue = false; - do { - prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); - if (!test_bit(MY_BIT, &my_flags)) - break; - requeue = slow_work_sleep_till_thread_needed(&my_work, - &timeout); - } while (timeout > 0 && !requeue); - finish_wait(wq, &wait); - if (!test_bit(MY_BIT, &my_flags) - goto do_my_thing; - if (requeue) - return; // to slow_work - - -=============== -ITEM OPERATIONS -=============== - -Each work item requires a table of operations of type struct slow_work_ops. -Only ->execute() is required; the getting and putting of a reference and the -describing of an item are all optional. - - (*) Get a reference on an item: - - int (*get_ref)(struct slow_work *work); - - This allows the thread pool to attempt to pin an item by getting a - reference on it. This function should return 0 if the reference was - granted, or a -ve error otherwise. If an error is returned, - slow_work_enqueue() will fail. - - The reference is held whilst the item is queued and whilst it is being - executed. The item may then be requeued with the same reference held, or - the reference will be released. - - (*) Release a reference on an item: - - void (*put_ref)(struct slow_work *work); - - This allows the thread pool to unpin an item by releasing the reference on - it. The thread pool will not touch the item again once this has been - called. - - (*) Execute an item: - - void (*execute)(struct slow_work *work); - - This should perform the work required of the item. It may sleep, it may - perform disk I/O and it may wait for locks. - - (*) View an item through /proc: - - void (*desc)(struct slow_work *work, struct seq_file *m); - - If supplied, this should print to 'm' a small string describing the work - the item is to do. This should be no more than about 40 characters, and - shouldn't include a newline character. - - See the 'Viewing executing and queued items' section below. - - -================== -POOL CONFIGURATION -================== - -The slow-work thread pool has a number of configurables: - - (*) /proc/sys/kernel/slow-work/min-threads - - The minimum number of threads that should be in the pool whilst it is in - use. This may be anywhere between 2 and max-threads. - - (*) /proc/sys/kernel/slow-work/max-threads - - The maximum number of threads that should in the pool. This may be - anywhere between min-threads and 255 or NR_CPUS * 2, whichever is greater. - - (*) /proc/sys/kernel/slow-work/vslow-percentage - - The percentage of active threads in the pool that may be used to execute - very slow work items. This may be between 1 and 99. The resultant number - is bounded to between 1 and one fewer than the number of active threads. - This ensures there is always at least one thread that can process very - slow work items, and always at least one thread that won't. - - -================================== -VIEWING EXECUTING AND QUEUED ITEMS -================================== - -If CONFIG_SLOW_WORK_DEBUG is enabled, a debugfs file is made available: - - /sys/kernel/debug/slow_work/runqueue - -through which the list of work items being executed and the queues of items to -be executed may be viewed. The owner of a work item is given the chance to -add some information of its own. - -The contents look something like the following: - - THR PID ITEM ADDR FL MARK DESC - === ===== ================ == ===== ========== - 0 3005 ffff880023f52348 a 952ms FSC: OBJ17d3: LOOK - 1 3006 ffff880024e33668 2 160ms FSC: OBJ17e5 OP60d3b: Write1/Store fl=2 - 2 3165 ffff8800296dd180 a 424ms FSC: OBJ17e4: LOOK - 3 4089 ffff8800262c8d78 a 212ms FSC: OBJ17ea: CRTN - 4 4090 ffff88002792bed8 2 388ms FSC: OBJ17e8 OP60d36: Write1/Store fl=2 - 5 4092 ffff88002a0ef308 2 388ms FSC: OBJ17e7 OP60d2e: Write1/Store fl=2 - 6 4094 ffff88002abaf4b8 2 132ms FSC: OBJ17e2 OP60d4e: Write1/Store fl=2 - 7 4095 ffff88002bb188e0 a 388ms FSC: OBJ17e9: CRTN - vsq - ffff880023d99668 1 308ms FSC: OBJ17e0 OP60f91: Write1/EnQ fl=2 - vsq - ffff8800295d1740 1 212ms FSC: OBJ16be OP4d4b6: Write1/EnQ fl=2 - vsq - ffff880025ba3308 1 160ms FSC: OBJ179a OP58dec: Write1/EnQ fl=2 - vsq - ffff880024ec83e0 1 160ms FSC: OBJ17ae OP599f2: Write1/EnQ fl=2 - vsq - ffff880026618e00 1 160ms FSC: OBJ17e6 OP60d33: Write1/EnQ fl=2 - vsq - ffff880025a2a4b8 1 132ms FSC: OBJ16a2 OP4d583: Write1/EnQ fl=2 - vsq - ffff880023cbe6d8 9 212ms FSC: OBJ17eb: LOOK - vsq - ffff880024d37590 9 212ms FSC: OBJ17ec: LOOK - vsq - ffff880027746cb0 9 212ms FSC: OBJ17ed: LOOK - vsq - ffff880024d37ae8 9 212ms FSC: OBJ17ee: LOOK - vsq - ffff880024d37cb0 9 212ms FSC: OBJ17ef: LOOK - vsq - ffff880025036550 9 212ms FSC: OBJ17f0: LOOK - vsq - ffff8800250368e0 9 212ms FSC: OBJ17f1: LOOK - vsq - ffff880025036aa8 9 212ms FSC: OBJ17f2: LOOK - -In the 'THR' column, executing items show the thread they're occupying and -queued threads indicate which queue they're on. 'PID' shows the process ID of -a slow-work thread that's executing something. 'FL' shows the work item flags. -'MARK' indicates how long since an item was queued or began executing. Lastly, -the 'DESC' column permits the owner of an item to give some information. - diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 6a1380e90f8..99dcc85193c 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -519,7 +519,7 @@ do_boot_cpu (int sapicid, int cpu) /* * We can't use kernel_thread since we must avoid to reschedule the child. */ - if (!keventd_up() || current_is_keventd()) + if (!keventd_up()) c_idle.work.func(&c_idle.work); else { schedule_work(&c_idle.work); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 11015fd1abb..51620953b18 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -735,7 +735,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) goto do_rest; } - if (!keventd_up() || current_is_keventd()) + if (!keventd_up()) c_idle.work.func(&c_idle.work); else { schedule_work(&c_idle.work); diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 78418ce4fc7..46cce391fa4 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -191,36 +191,11 @@ acpi_status __init acpi_os_initialize(void) return AE_OK; } -static void bind_to_cpu0(struct work_struct *work) -{ - set_cpus_allowed_ptr(current, cpumask_of(0)); - kfree(work); -} - -static void bind_workqueue(struct workqueue_struct *wq) -{ - struct work_struct *work; - - work = kzalloc(sizeof(struct work_struct), GFP_KERNEL); - INIT_WORK(work, bind_to_cpu0); - queue_work(wq, work); -} - acpi_status acpi_os_initialize1(void) { - /* - * On some machines, a software-initiated SMI causes corruption unless - * the SMI runs on CPU 0. An SMI can be initiated by any AML, but - * typically it's done in GPE-related methods that are run via - * workqueues, so we can avoid the known corruption cases by binding - * the workqueues to CPU 0. - */ - kacpid_wq = create_singlethread_workqueue("kacpid"); - bind_workqueue(kacpid_wq); - kacpi_notify_wq = create_singlethread_workqueue("kacpi_notify"); - bind_workqueue(kacpi_notify_wq); - kacpi_hotplug_wq = create_singlethread_workqueue("kacpi_hotplug"); - bind_workqueue(kacpi_hotplug_wq); + kacpid_wq = create_workqueue("kacpid"); + kacpi_notify_wq = create_workqueue("kacpi_notify"); + kacpi_hotplug_wq = create_workqueue("kacpi_hotplug"); BUG_ON(!kacpid_wq); BUG_ON(!kacpi_notify_wq); BUG_ON(!kacpi_hotplug_wq); @@ -766,7 +741,14 @@ static acpi_status __acpi_os_execute(acpi_execute_type type, else INIT_WORK(&dpc->work, acpi_os_execute_deferred); - ret = queue_work(queue, &dpc->work); + /* + * On some machines, a software-initiated SMI causes corruption unless + * the SMI runs on CPU 0. An SMI can be initiated by any AML, but + * typically it's done in GPE-related methods that are run via + * workqueues, so we can avoid the known corruption cases by always + * queueing on CPU 0. + */ + ret = queue_work_on(0, queue, &dpc->work); if (!ret) { printk(KERN_ERR PREFIX diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index a0a4d696840..4972fdf4bd3 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -98,8 +98,6 @@ static unsigned long ata_dev_blacklisted(const struct ata_device *dev); unsigned int ata_print_id = 1; -struct workqueue_struct *ata_aux_wq; - struct ata_force_param { const char *name; unsigned int cbl; @@ -5594,6 +5592,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN; #endif + mutex_init(&ap->scsi_scan_mutex); INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug); INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); INIT_LIST_HEAD(&ap->eh_done_q); @@ -6532,29 +6531,20 @@ static int __init ata_init(void) ata_parse_force_param(); - ata_aux_wq = create_singlethread_workqueue("ata_aux"); - if (!ata_aux_wq) - goto fail; - rc = ata_sff_init(); - if (rc) - goto fail; + if (rc) { + kfree(ata_force_tbl); + return rc; + } printk(KERN_DEBUG "libata version " DRV_VERSION " loaded.\n"); return 0; - -fail: - kfree(ata_force_tbl); - if (ata_aux_wq) - destroy_workqueue(ata_aux_wq); - return rc; } static void __exit ata_exit(void) { ata_sff_exit(); kfree(ata_force_tbl); - destroy_workqueue(ata_aux_wq); } subsys_initcall(ata_init); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 697474b625b..c9ae299b834 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -727,7 +727,7 @@ void ata_scsi_error(struct Scsi_Host *host) if (ap->pflags & ATA_PFLAG_LOADING) ap->pflags &= ~ATA_PFLAG_LOADING; else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) - queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); + schedule_delayed_work(&ap->hotplug_task, 0); if (ap->pflags & ATA_PFLAG_RECOVERED) ata_port_printk(ap, KERN_INFO, "EH complete\n"); @@ -2945,7 +2945,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, ehc->i.flags |= ATA_EHI_SETMODE; /* schedule the scsi_rescan_device() here */ - queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); + schedule_work(&(ap->scsi_rescan_task)); } else if (dev->class == ATA_DEV_UNKNOWN && ehc->tries[dev->devno] && ata_class_enabled(ehc->classes[dev->devno])) { diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a54273d2c3c..d75c9c479d1 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3435,7 +3435,7 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync) " switching to async\n"); } - queue_delayed_work(ata_aux_wq, &ap->hotplug_task, + queue_delayed_work(system_long_wq, &ap->hotplug_task, round_jiffies_relative(HZ)); } @@ -3582,6 +3582,7 @@ void ata_scsi_hotplug(struct work_struct *work) } DPRINTK("ENTER\n"); + mutex_lock(&ap->scsi_scan_mutex); /* Unplug detached devices. We cannot use link iterator here * because PMP links have to be scanned even if PMP is @@ -3595,6 +3596,7 @@ void ata_scsi_hotplug(struct work_struct *work) /* scan for new ones */ ata_scsi_scan_host(ap, 0); + mutex_unlock(&ap->scsi_scan_mutex); DPRINTK("EXIT\n"); } @@ -3673,9 +3675,7 @@ static int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel, * @work: Pointer to ATA port to perform scsi_rescan_device() * * After ATA pass thru (SAT) commands are executed successfully, - * libata need to propagate the changes to SCSI layer. This - * function must be executed from ata_aux_wq such that sdev - * attach/detach don't race with rescan. + * libata need to propagate the changes to SCSI layer. * * LOCKING: * Kernel thread context (may sleep). @@ -3688,6 +3688,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) struct ata_device *dev; unsigned long flags; + mutex_lock(&ap->scsi_scan_mutex); spin_lock_irqsave(ap->lock, flags); ata_for_each_link(link, ap, EDGE) { @@ -3707,6 +3708,7 @@ void ata_scsi_dev_rescan(struct work_struct *work) } spin_unlock_irqrestore(ap->lock, flags); + mutex_unlock(&ap->scsi_scan_mutex); } /** diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index efa4a18cfb9..674c1436491 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -3318,14 +3318,7 @@ void ata_sff_port_init(struct ata_port *ap) int __init ata_sff_init(void) { - /* - * FIXME: In UP case, there is only one workqueue thread and if you - * have more than one PIO device, latency is bloody awful, with - * occasional multi-second "hiccups" as one PIO device waits for - * another. It's an ugly wart that users DO occasionally complain - * a |