299 files changed, 5126 insertions, 3185 deletions
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
index bf9c16b64c3..cf11736acb7 100644
--- a/Documentation/ABI/testing/debugfs-pktcdvd
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@@ -1,4 +1,4 @@
-What:           /debug/pktcdvd/pktcdvd[0-7]
+What:           /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
 Date:           Oct. 2006
 KernelVersion:  2.6.20
 Contact:        Thomas Maier <balagi@justmail.de>
@@ -10,10 +10,10 @@ debugfs interface
 The pktcdvd module (packet writing driver) creates
 these files in debugfs:
 
-/debug/pktcdvd/pktcdvd[0-7]/
+/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
     info            (0444) Lots of driver statistics and infos.
 
 Example:
 -------
 
-cat /debug/pktcdvd/pktcdvd0/info
+cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index ecad6ee7570..6fab97ea7e6 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -1040,23 +1040,21 @@ Front merges are handled by the binary trees in AS and deadline schedulers.
 iii. Plugging the queue to batch requests in anticipation of opportunities for
      merge/sort optimizations
 
-This is just the same as in 2.4 so far, though per-device unplugging
-support is anticipated for 2.5. Also with a priority-based i/o scheduler,
-such decisions could be based on request priorities.
-
 Plugging is an approach that the current i/o scheduling algorithm resorts to so
 that it collects up enough requests in the queue to be able to take
 advantage of the sorting/merging logic in the elevator. If the
 queue is empty when a request comes in, then it plugs the request queue
-(sort of like plugging the bottom of a vessel to get fluid to build up)
+(sort of like plugging the bath tub of a vessel to get fluid to build up)
 till it fills up with a few more requests, before starting to service
 the requests. This provides an opportunity to merge/sort the requests before
 passing them down to the device. There are various conditions when the queue is
 unplugged (to open up the flow again), either through a scheduled task or
 could be on demand. For example wait_on_buffer sets the unplugging going
-(by running tq_disk) so the read gets satisfied soon. So in the read case,
-the queue gets explicitly unplugged as part of waiting for completion,
-in fact all queues get unplugged as a side-effect.
+through sync_buffer() running blk_run_address_space(mapping). Or the caller
+can do it explicity through blk_unplug(bdev). So in the read case,
+the queue gets explicitly unplugged as part of waiting for completion on that
+buffer. For page driven IO, the address space ->sync_page() takes care of
+doing the blk_run_address_space().
 
 Aside:
   This is kind of controversial territory, as it's not clear if plugging is
@@ -1067,11 +1065,6 @@ Aside:
   multi-page bios being queued in one shot, we may not need to wait to merge
   a big request from the broken up pieces coming by.
 
-  Per-queue granularity unplugging (still a Todo) may help reduce some of the
-  concerns with just a single tq_disk flush approach. Something like
-  blk_kick_queue() to unplug a specific queue (right away ?)
-  or optionally, all queues, is in the plan.
-
 4.4 I/O contexts
 I/O contexts provide a dynamically allocated per process data area. They may
 be used in I/O schedulers, and in the block layer (could be used for IO statis,
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index a98a7fe7aab..1a608877b14 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -6,15 +6,14 @@ used here with the memory controller that is used in hardware.
 
 Salient features
 
-a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages
+a. Enable control of Anonymous, Page Cache (mapped and unmapped) and
+   Swap Cache memory pages.
 b. The infrastructure allows easy addition of other types of memory to control
 c. Provides *zero overhead* for non memory controller users
 d. Provides a double LRU: global memory pressure causes reclaim from the
    global LRU; a cgroup on hitting a limit, reclaims from the per
    cgroup LRU
 
-NOTE: Swap Cache (unmapped) is not accounted now.
-
 Benefits and Purpose of the memory controller
 
 The memory controller isolates the memory behaviour of a group of tasks
@@ -290,34 +289,44 @@ will be charged as a new owner of it.
   moved to the parent. If you want to avoid that, force_empty will be useful.
 
 5.2 stat file
-  memory.stat file includes following statistics (now)
-	cache			- # of pages from page-cache and shmem.
-	rss			- # of pages from anonymous memory.
-	pgpgin			- # of event of charging
-	pgpgout			- # of event of uncharging
-	active_anon		- # of pages on active lru of anon, shmem.
-	inactive_anon 		- # of pages on active lru of anon, shmem
-	active_file		- # of pages on active lru of file-cache
-	inactive_file		- # of pages on inactive lru of file cache
-	unevictable		- # of pages cannot be reclaimed.(mlocked etc)
-
-	Below is depend on CONFIG_DEBUG_VM.
-	inactive_ratio		- VM internal parameter. (see mm/page_alloc.c)
-	recent_rotated_anon	- VM internal parameter. (see mm/vmscan.c)
-	recent_rotated_file	- VM internal parameter. (see mm/vmscan.c)
-	recent_scanned_anon 	- VM internal parameter. (see mm/vmscan.c)
-	recent_scanned_file 	- VM internal parameter. (see mm/vmscan.c)
-
-  Memo:
+
+memory.stat file includes following statistics
+
+cache		- # of bytes of page cache memory.
+rss		- # of bytes of anonymous and swap cache memory.
+pgpgin		- # of pages paged in (equivalent to # of charging events).
+pgpgout		- # of pages paged out (equivalent to # of uncharging events).
+active_anon	- # of bytes of anonymous and  swap cache memory on active
+		  lru list.
+inactive_anon	- # of bytes of anonymous memory and swap cache memory on
+		  inactive lru list.
+active_file	- # of bytes of file-backed memory on active lru list.
+inactive_file	- # of bytes of file-backed memory on inactive lru list.
+unevictable	- # of bytes of memory that cannot be reclaimed (mlocked etc).
+
+The following additional stats are dependent on CONFIG_DEBUG_VM.
+
+inactive_ratio		- VM internal parameter. (see mm/page_alloc.c)
+recent_rotated_anon	- VM internal parameter. (see mm/vmscan.c)
+recent_rotated_file	- VM internal parameter. (see mm/vmscan.c)
+recent_scanned_anon	- VM internal parameter. (see mm/vmscan.c)
+recent_scanned_file	- VM internal parameter. (see mm/vmscan.c)
+
+Memo:
 	recent_rotated means recent frequency of lru rotation.
 	recent_scanned means recent # of scans to lru.
 	showing for better debug please see the code for meanings.
 
+Note:
+	Only anonymous and swap cache memory is listed as part of 'rss' stat.
+	This should not be confused with the true 'resident set size' or the
+	amount of physical memory used by the cgroup. Per-cgroup rss
+	accounting is not done yet.
 
 5.3 swappiness
   Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only.
 
-  Following cgroup's swapiness can't be changed.
+  Following cgroups' swapiness can't be changed.
   - root cgroup (uses /proc/sys/vm/swappiness).
   - a cgroup which uses hierarchy and it has child cgroup.
   - a cgroup which uses hierarchy and not the root of hierarchy.
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index f196ac1d7d2..95b24d766ea 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -47,13 +47,18 @@ to work with it.
 
 2. Basic accounting routines
 
- a. void res_counter_init(struct res_counter *rc)
+ a. void res_counter_init(struct res_counter *rc,
+				struct res_counter *rc_parent)
 
  	Initializes the resource counter. As usual, should be the first
 	routine called for a new counter.
 
- b. int res_counter_charge[_locked]
-			(struct res_counter *rc, unsigned long val)
+	The struct res_counter *parent can be used to define a hierarchical
+	child -> parent relationship directly in the res_counter structure,
+	NULL can be used to define no relationship.
+
+ c. int res_counter_charge(struct res_counter *rc, unsigned long val,
+				struct res_counter **limit_fail_at)
 
 	When a resource is about to be allocated it has to be accounted
 	with the appropriate resource counter (controller should determine
@@ -67,15 +72,25 @@ to work with it.
 	  * if the charging is performed first, then it should be uncharged
 	    on error path (if the one is called).
 
- c. void res_counter_uncharge[_locked]
+	If the charging fails and a hierarchical dependency exists, the
+	limit_fail_at parameter is set to the particular res_counter element
+	where the charging failed.
+
+ d. int res_counter_charge_locked
+			(struct res_counter *rc, unsigned long val)
+
+	The same as res_counter_charge(), but it must not acquire/release the
+	res_counter->lock internally (it must be called with res_counter->lock
+	held).
+
+ e. void res_counter_uncharge[_locked]
 			(struct res_counter *rc, unsigned long val)
 
 	When a resource is released (freed) it should be de-accounted
 	from the resource counter it was accounted to.  This is called
 	"uncharging".
 
-    The _locked routines imply that the res_counter->lock is taken.
-
+	The _locked routines imply that the res_counter->lock is taken.
 
  2.1 Other accounting routines
 
diff --git a/Documentation/powerpc/dts-bindings/fsl/i2c.txt b/Documentation/powerpc/dts-bindings/fsl/i2c.txt
index d0ab33e21fe..b6d2e21474f 100644
--- a/Documentation/powerpc/dts-bindings/fsl/i2c.txt
+++ b/Documentation/powerpc/dts-bindings/fsl/i2c.txt
@@ -7,8 +7,10 @@ Required properties :
 
 Recommended properties :
 
- - compatible : Should be "fsl-i2c" for parts compatible with
-   Freescale I2C specifications.
+ - compatible : compatibility list with 2 entries, the first should
+   be "fsl,CHIP-i2c" where CHIP is the name of a compatible processor,
+   e.g. mpc8313, mpc8543, mpc8544, mpc5200 or mpc5200b. The second one
+   should be "fsl-i2c".
  - interrupts : <a b> where a is the interrupt number and b is a
    field that represents an encoding of the sense and level
    information for the interrupt.  This should be encoded based on
@@ -16,17 +18,31 @@ Recommended properties :
    controller you have.
  - interrupt-parent : the phandle for the interrupt controller that
    services interrupts for this device.
- - dfsrr : boolean; if defined, indicates that this I2C device has
-   a digital filter sampling rate register
- - fsl5200-clocking : boolean; if defined, indicated that this device
-   uses the FSL 5200 clocking mechanism.
-
-Example :
-	i2c@3000 {
-		interrupt-parent = <40000>;
-		interrupts = <1b 3>;
-		reg = <3000 18>;
-		device_type = "i2c";
-		compatible  = "fsl-i2c";
-		dfsrr;
+ - fsl,preserve-clocking : boolean; if defined, the clock settings
+   from the bootloader are preserved (not touched).
+ - clock-frequency : desired I2C bus clock frequency in Hz.
+
+Examples :
+
+	i2c@3d00 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c";
+		cell-index = <0>;
+		reg = <0x3d00 0x40>;
+		interrupts = <2 15 0>;
+		interrupt-parent = <&mpc5200_pic>;
+		fsl,preserve-clocking;
 	};
+
+	i2c@3100 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cell-index = <1>;
+		compatible = "fsl,mpc8544-i2c", "fsl-i2c";
+		reg = <0x3100 0x100>;
+		interrupts = <43 2>;
+		interrupt-parent = <&mpic>;
+		clock-frequency = <400000>;
+	};
+
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
index c5948f2f9a2..88b7433d2f1 100644
--- a/Documentation/sound/alsa/HD-Audio.txt
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -169,7 +169,7 @@ PCI SSID look-up.
 What `model` option values are available depends on the codec chip.
 Check your codec chip from the codec proc file (see "Codec Proc-File"
 section below).  It will show the vendor/product name of your codec
-chip.  Then, see Documentation/sound/alsa/HD-Audio-Modelstxt file,
+chip.  Then, see Documentation/sound/alsa/HD-Audio-Models.txt file,
 the section of HD-audio driver.  You can find a list of codecs
 and `model` options belonging to each codec.  For example, for Realtek
 ALC262 codec chip, pass `model=ultra` for devices that are compatible
@@ -177,7 +177,7 @@ with Samsung Q1 Ultra.
 
 Thus, the first thing you can do for any brand-new, unsupported and
 non-working HD-audio hardware is to check HD-audio codec and several
-different `model` option values.  If you have a luck, some of them
+different `model` option values.  If you have any luck, some of them
 might suit with your device well.
 
 Some codecs such as ALC880 have a special model option `model=test`.
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index a34d55b6544..df38ef046f8 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -95,7 +95,7 @@ of struct cmsghdr structures with appended data.
 
 There is only one file in this directory.
 unix_dgram_qlen limits the max number of datagrams queued in Unix domain
-socket's buffer. It will not take effect unless PF_UNIX flag is spicified.
+socket's buffer. It will not take effect unless PF_UNIX flag is specified.
 
 
 3. /proc/sys/net/ipv4 - IPV4 settings
diff --git a/Documentation/tomoyo.txt b/Documentation/tomoyo.txt
new file mode 100644
index 00000000000..b3a232cae7f
--- /dev/null
+++ b/Documentation/tomoyo.txt
@@ -0,0 +1,55 @@
+--- What is TOMOYO? ---
+
+TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
+
+LiveCD-based tutorials are available at
+http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/ubuntu8.04-live/
+http://tomoyo.sourceforge.jp/en/1.6.x/1st-step/centos5-live/ .
+Though these tutorials use non-LSM version of TOMOYO, they are useful for you
+to know what TOMOYO is.
+
+--- How to enable TOMOYO? ---
+
+Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on
+kernel's command line.
+
+Please see http://tomoyo.sourceforge.jp/en/2.2.x/ for details.
+
+--- Where is documentation? ---
+
+User <-> Kernel interface documentation is available at
+http://tomoyo.sourceforge.jp/en/2.2.x/policy-reference.html .
+
+Materials we prepared for seminars and symposiums are available at
+http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
+Below lists are chosen from three aspects.
+
+What is TOMOYO?
+  TOMOYO Linux Overview
+    http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf
+  TOMOYO Linux: pragmatic and manageable security for Linux
+    http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
+  TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
+    http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
+
+What can TOMOYO do?
+  Deep inside TOMOYO Linux
+    http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
+  The role of "pathname based access control" in security.
+    http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf
+
+History of TOMOYO?
+  Realities of Mainlining
+    http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf
+
+--- What is future plan? ---
+
+We believe that inode based security and name based security are complementary
+and both should be used together. But unfortunately, so far, we cannot enable
+multiple LSM modules at the same time. We feel sorry that you have to give up
+SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
+
+We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
+version of TOMOYO, available at http://tomoyo.sourceforge.jp/en/1.6.x/ .
+LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
+to port non-LSM version's functionalities to LSM versions.
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
index 2131b00b63f..2f77ced35df 100644
--- a/Documentation/vm/00-INDEX
+++ b/Documentation/vm/00-INDEX
@@ -1,5 +1,7 @@
 00-INDEX
 	- this file.
+active_mm.txt
+	- An explanation from Linus about tsk->active_mm vs tsk->mm.
 balance
 	- various information on memory balancing.
 hugetlbpage.txt
diff --git a/Documentation/vm/active_mm.txt b/Documentation/vm/active_mm.txt
new file mode 100644
index 00000000000..4ee1f643d89
--- /dev/null
+++ b/Documentation/vm/active_mm.txt
@@ -0,0 +1,83 @@
+List:       linux-kernel
+Subject:    Re: active_mm
+From:       Linus Torvalds <torvalds () transmeta ! com>
+Date:       1999-07-30 21:36:24
+
+Cc'd to linux-kernel, because I don't write explanations all that often,
+and when I do I feel better about more people reading them.
+
+On Fri, 30 Jul 1999, David Mosberger wrote:
+>
+> Is there a brief description someplace on how "mm" vs. "active_mm" in
+> the task_struct are supposed to be used?  (My apologies if this was
+> discussed on the mailing lists---I just returned from vacation and
+> wasn't able to follow linux-kernel for a while).
+
+Basically, the new setup is:
+
+ - we have "real address spaces" and "anonymous address spaces". The
+   difference is that an anonymous address space doesn't care about the
+   user-level page tables at all, so when we do a context switch into an
+   anonymous address space we just leave the previous address space
+   active.
+
+   The obvious use for a "anonymous address space" is any thread that
+   doesn't need any user mappings - all kernel threads basically fall into
+   this category, but even "real" threads can temporarily say that for
+   some amount of time they are not going to be interested in user space,
+   and that the scheduler might as well try to avoid wasting time on
+   switching the VM state around. Currently only the old-style bdflush
+   sync does that.
+
+ - "tsk->mm" points to the "real address space". For an anonymous process,
+   tsk->mm will be NULL, for the logical reason that an anonymous process
+   really doesn't _have_ a real address space at all.
+
+ - however, we obviously need to keep track of which address space we
+   "stole" for such an anonymous user. For that, we have "tsk->active_mm",
+   which shows what the currently active address space is.
+
+   The rule is that for a process with a real address space (ie tsk->mm is
+   non-NULL) the active_mm obviously always has to be the same as the real
+   one.
+
+   For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
+   "borrowed" mm while the anonymous process is running. When the
+   anonymous process gets scheduled away, the borrowed address space is
+   returned and cleared.
+
+To support all that, the "struct mm_struct" now has two counters: a
+"mm_users" counter that is how many "real address space users" there are,
+and a "mm_count" counter that is the number of "lazy" users (ie anonymous
+users) plus one if there are any real users.
+
+Usually there is at least one real user, but it could be that the real
+user exited on another CPU while a lazy user was still active, so you do
+actually get cases where you have a address space that is _only_ used by
+lazy users. That is often a short-lived state, because once that thread
+gets scheduled away in favour of a real thread, the "zombie" mm gets
+released because "mm_users" becomes zero.
+
+Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
+more. "init_mm" should be considered just a "lazy context when no other
+context is available", and in fact it is mainly used just at bootup when
+no real VM has yet been created. So code that used to check
+
+	if (current->mm == &init_mm)
+
+should generally just do
+
+	if (!current->mm)
+
+instead (which makes more sense anyway - the test is basically one of "do
+we have a user context", and is generally done by the page fault handler
+and things like that).
+
+Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
+because it slightly changes the interfaces to accomodate the alpha (who
+would have thought it, but the alpha actually ends up having one of the
+ugliest context switch codes - unlike the other architectures where the MM
+and register state is separate, the alpha PALcode joins the two, and you
+need to switch both together).
+
+(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index 0706a7282a8..2d70d0d9510 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -1,588 +1,691 @@
-
-This document describes the Linux memory management "Unevictable LRU"
-infrastructure and the use of this infrastructure to manage several types
-of "unevictable" pages.  The document attempts to provide the overall
-rationale behind this mechanism and the rationale for some of the design
-decisions that drove the implementation.  The latter design rationale is
-discussed in the context of an implementation description.  Admittedly, one
-can obtain the implementation details--the "what does it do?"--by reading the
-code.  One hopes that the descriptions below add value by provide the answer
-to "why does it do that?".
-
-Unevictable LRU Infrastructure:
-
-The Unevictable LRU adds an additional LRU list to track unevictable pages
-and to hide these pages from vmscan.  This mechanism is based on a patch by
-Larry Woodman of Red Hat to address several scalability problems with page
+			==============================
+			UNEVICTABLE LRU INFRASTRUCTURE
+			==============================
+
+========
+CONTENTS
+========
+
+ (*) The Unevictable LRU
+
+     - The unevictable page list.
+     - Memory control group interaction.
+     - Marking address spaces unevictable.
+     - Detecting Unevictable Pages.
+     - vmscan's handling of unevictable pages.
+
+ (*) mlock()'d pages.
+
+     - History.
+     - Basic management.
+     - mlock()/mlockall() system call handling.
+     - Filtering special vmas.
+     - munlock()/munlockall() system call handling.
+     - Migrating mlocked pages.
+     - mmap(MAP_LOCKED) system call handling.
+     - munmap()/exit()/exec() system call handling.
+     - try_to_unmap().
+     - try_to_munlock() reverse map scan.
+     - Page reclaim in shrink_*_list().
+
+
+============
+INTRODUCTION
+============
+
+This document describes the Linux memory manager's "Unevictable LRU"
+infrastructure and the use of this to manage several types of "unevictable"
+pages.
+
+The document attempts to provide the overall rationale behind this mechanism
+and the rationale for some of the design decisions that drove the
+implementation.  The latter design rationale is discussed in the context of an
+implementation description.  Admittedly, one can obtain the implementation
+details - the "what does it do?" - by reading the code.  One hopes that the
+descriptions below add value by provide the answer to "why does it do that?".
+
+
+===================
+THE UNEVICTABLE LRU
+===================
+
+The Unevictable LRU facility adds an additional LRU list to track unevictable
+pages and to hide these pages from vmscan.  This mechanism is based on a patch
+by Larry Woodman of Red Hat to address several scalability problems with page
 reclaim in Linux.  The problems have been observed at customer sites on large
-memory x86_64 systems.  For example, a non-numal x86_64 platform with 128GB
-of main memory will have over 32 million 4k pages in a single zone.  When a
-large fraction of these pages are not evictable for any reason [see below],
-vmscan will spend a lot of time scanning the LRU lists looking for the small
-fraction of pages that are evictable.  This can result in a situation where
-all cpus are spending 100% of their time in vmscan for hours or days on end,
-with the system completely unresponsive.
-
-The Unevictable LRU infrastructure addresses the following classes of
-unevictable pages:
-
-+ page owned by ramfs
-+ page mapped into SHM_LOCKed shared memory regions
-+ page mapped into VM_LOCKED [mlock()ed] vmas
-
-The infrastructure might be able to handle other conditions that make pages
+memory x86_64 systems.
+
+To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
+main memory will have over 32 million 4k pages in a single zone.  When a large
+fraction of these pages are not evictable for any reason [see below], vmscan
+will spend a lot of time scanning the LRU lists looking for the small fraction
+of pages that are evictable.  This can result in a situation where all CPUs are
+spending 100% of their time in vmscan for hours or days on end, with the system
+completely unresponsive.
+
+The unevictable list addresses the following classes of unevictable pages:
+
+ (*) Those owned by ramfs.
+
+ (*) Those mapped into SHM_LOCK'd shared memory regions.
+
+ (*) Those mapped into VM_LOCKED [mlock()ed] VMAs.
+
+The infrastructure may also be able to handle other conditions that make pages
 unevictable, either by definition or by circumstance, in the future.
 
 
-The Unevictable LRU List
+THE UNEVICTABLE PAGE LIST
+-------------------------
 
 The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
 called the "unevictable" list and an associated page flag, PG_unevictable, to
-indicate that the page is being managed on the unevictable list.  The
-PG_unevictable flag is analogous to, and mutually exclusive with, the PG_active
-flag in that it indicates on which LRU list a page resides when PG_lru is set.
-The unevictable LRU list is source configurable based on the UNEVICTABLE_LRU
-Kconfig option.
+indicate that the page is being managed on the unevictable list.
+
+The PG_unevictable flag is analogous to, and mutually exclusive with, the
+PG_active flag in that it indicates on which LRU list a page resides when
+PG_lru is set.  The unevictable list is compile-time configurable based on the
+UNEVICTABLE_LRU Kconfig option.
 
 The Unevictable LRU infrastructure maintains unevictable pages on an additional
 LRU list for a few reasons:
 
-1) We get to "treat unevictable pages just like we treat other pages in the
-   system, which means we get to use the same code to manipulate them, the
-   same code to isolate them (for migrate, etc.), the same code to keep track
-   of the statistics, etc..." [Rik van Riel]
+ (1) We get to "treat unevictable pages just like we treat other pages in the
+     system - which means we get to use the same code to manipulate them, the
+     same code to isolate them (for migrate, etc.), the same code to keep track
+     of the statistics, etc..." [Rik van Riel]
+
+ (2) We want to be able to migrate unevictable pages between nodes for memory
+     defragmentation, workload management and memory hotplug.  The linux kernel
+     can only migrate pages that it can successfully isolate from the LRU
+     lists.  If we were to maintain pages elsewhere than on an LRU-like list,
+     where they can be found by isolate_lru_page(), we would prevent their
+     migration, unless we reworked migration code to find the unevictable pages
+     itself.
 
-2) We want to be able to migrate unevictable pages between nodes--for memory
-   defragmentation, workload management and memory hotplug.  The linux kernel
-   can only migrate pages that it can successfully isolate from the lru lists.
-   If we were to maintain pages elsewise than on an lru-like list, where they
-   can be found by isolate_lru_page(), we would prevent their migration, unless
-   we reworked migration code to find the unevictable pages.
 
+The unevictable list does not differentiate between file-backed and anonymous,
+swap-backed pages.  This differentiation is only important while the pages are,
+in fact, evictable.
 
-The unevictable LRU list does not differentiate between file backed and swap
-backed [anon] pages.  This differentiation is only important while the pages
-are, in fact, evictable.
+The unevictable list benefits from the "arrayification" of the per-zone LRU
+lists and statistics originally proposed and posted by Christoph Lameter.
 
-The unevictable LRU list benefits from the "arrayification" of the per-zone
-LRU lists and statistics originally proposed and posted by Christoph Lameter.
+The unevictable list does not use the LRU pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable list
+under the zone lru_lock.  This allows us to prevent the stranding of pages on
+the unevictable list when one task has the page isolated from the LRU and other
+tasks are changing the "evictability" state of the page.
 
-The unevictable list does not use the lru pagevec mechanism. Rather,
-unevictable pages are placed directly on the page's zone's unevictable
-list under the zone lru_lock.  The reason for this is to prevent stranding
-of pages on the unevictable list when one task has the page isolated from the
-lru and other tasks are changing the "evictability" state of the page.
 
+MEMORY CONTROL GROUP INTERACTION
+--------------------------------
 
-Unevictable LRU and Memory Controller Interaction
+The unevictable LRU facility interacts with the memory control group [aka
+memory controller; see Documentation/cgroups/memory.txt] by extending the
+lru_list enum.
+
+The memory controller data structure automatically gets a per-zone unevictable
+list as a result of the "arrayification" of the per-zone LRU lists (one per
+lru_list enum element).  The memory controller tracks the movement of pages to
+and from the unevictable list.
 
-The memory controller data structure automatically gets a per zone unevictable
-lru list as a result of the "arrayification" of the per-zone LRU lists.  The
-memory controller tracks the movement of pages to and from the unevictable list.
 When a memory control group comes under memory pressure, the controller will
 not attempt to reclaim pages on the unevictable list.  This has a couple of
-effects.  Because the pages are "hidden" from reclaim on the unevictable list,
-the reclaim process can be more efficient, dealing only with pages that have
-a chance of being reclaimed.  On the other hand, if too many of the pages
-charged to the control group are unevictable, the evictable portion of the
-working set of the tasks in the control group may not fit into the available
-memory.  This can cause the control group to thrash or to oom-kill tasks.
-
-
-Unevictable LRU:  Detecting Unevictable Pages
-
-The function page_evictable(page, vma) in vmscan.c determines whether a
-page is evictable or not.  For ramfs pages and pages in SHM_LOCKed regions,
-page_evictable() tests a new address space flag, AS_UNEVICTABLE, in the page's
-address space using a wrapper function.  Wrapper functions are used to set,
-clear and test the flag to reduce the requirement for #ifdef's throughout the
-source code.  AS_UNEVICTABLE is set on ramfs inode/mapping when it is created.
-This flag remains for the life of the inode.
-
-For shared memory regions, AS_UNEVICTABLE is set when an application
-successfully SHM_LOCKs the region and is removed when the region is
-SHM_UNLOCKed.  Note that shmctl(SHM_LOCK, ...) does not populate the page
-tables for the region as does, for example, mlock().   So, we make no special
-effort to push any pages in the SHM_LOCKed region to the unevictable list.
-Vmscan will do this when/if it encounters the pages during reclaim.  On
-SHM_UNLOCK, shmctl() scans the pages in the region and "rescues" them from the
-unevictable list if no other condition keeps them unevictable.  If a SHM_LOCKed
-region is destroyed, the pages are also "rescued" from the unevictable list in
-the process of freeing them.
-
-page_evictable() detects mlock()ed pages by testing an additional page flag,
-PG_mlocked via the PageMlocked() wrapper.  If the page is NOT mlocked, and a
-non-NULL vma is supplied, page_evictable() will check whether the vma is
+effects:
+
+ (1) Because the pages are "hidden" from reclaim on the unevictable list, the
+     reclaim process can be more efficient, dealing only with pages that have a
+     chance of being reclaimed.
+
+ (2) On the other hand, if too many of the pages charged to the control group
+     are unevictable, the evictable portion of the working set of the tasks in
+     the control group may not fit into the available memory.  This can cause
+     the control group to thrash or to OOM-kill tasks.
+
+
+MARKING ADDRESS SPACES UNEVICTABLE
+----------------------------------
+
+For facilities such as ramfs none of the pages attached to the address space
+may be evicted.  To prevent eviction of any such pages, the AS_UNEVICTABLE
+address space flag is provided, and this can be manipulated by a filesystem
+using a number of wrapper functions:
+
+ (*) void mapping_set_unevictable(struct address_space *mapping);
+
+	Mark the address space as being completely unevictable.
+
+ (*) void mapping_clear_unevictable(struct address_space *mapping);
+
+	Mark the address space as being evictable.
+
+ (*) int mapping_unevictable(struct address_space *mapping);
+
+	Query the address space, and return true if it is completely
+	unevictable.
+
+These are currently used in two places in the kernel:
+
+ (1) By ramfs to mark the address spaces of its inodes when they are created,
+     and this mark remains for the life of the inode.
+
+ (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
+
+     Note that SHM_LOCK is not required to page in the locked pages if they're
+     swapped out; the application must touch the pages manually if it wants to
+     ensure they're in memory.
+
+
+DETECTING UNEVICTABLE PAGES
+---------------------------
+
+The function page_evictable() in vmscan.c determines whether a page is
+evictable or not using the query function outlined above [see section "Marking
+address spaces unevictable"] to check the AS_UNEVICTABLE flag.
+
+For address spaces that are so marked after being populated (as SHM regions
+might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
+the page tables for the region as does, for example, mlock(), nor need it make
+any special effort to push any pages in the SHM_LOCK'd area to the unevictable
+list.  Instead, vmscan will do this if and when it encounters the pages during
+a reclamation scan.
+
+On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
+the pages in the region and "rescue" them from the unevictable list if no other
+condition is keeping them unevictable.  If an unevictable region is destroyed,
+the pages are also "rescued" from the unevictable list in the process of
+freeing them.
+
+page_evictable() also checks for mlocked pages by testing an additional page
+flag, PG_mlocked (as wrapped by PageMlocked()).  If the page is NOT mlocked,
+and a non-NULL VMA is supplied, page_evictable() will check whether the VMA is
 VM_LOCKED via is_mlocked_vma().  is_mlocked_vma() will SetPageMlocked() and
 update the appropriate statistics if the vma is VM_LOCKED.  This method allows
 efficient "culling" of pages in the fault path that are being faulted in to
-VM_LOCKED vmas.
+VM_LOCKED VMAs.
 
 
-Unevictable Pages and Vmscan [shrink_*_list()]
+VMSCAN'S HANDLING OF UNEVICTABLE PAGES
+--------------------------------------
 
 If unevictable pages are culled in the fault path, or moved to the unevictable
-list at mlock() or mmap() time, vmscan will never encounter the pages until
-they have become evictable again, for example, via munlock() and have been
-"rescued" from the unevictable list.  However, there may be situations where we
-decide, for the sake of expediency, to leave a unevictable page on one of the
-regular active/inactive LRU lists for vmscan to deal with.  Vmscan checks for
-such pages in all of the shrink_{active|inactive|page}_list() functions and
-will "cull" such pages that it encounters--that is, it diverts those pages to
-the unevictable list for the zone being scanned.
-
-There may be situations where a page is mapped into a VM_LOCKED vma, but the
-page is not marked as PageMlocked.  Such pages will make it all the way to
+list at mlock() or mmap() time, vmscan will not encounter the pages until they
+have become evictable again (via munlock() for example) and have been "rescued"
+from the unevictable list.  However, there may be situations where we decide,
+for the sake of expediency, to leave a unevictable page on one of the regular
+active/inactive LRU lists for vmscan to deal with.  vmscan checks for such
+pages in all of the shrink_{active|inactive|page}_list() functions and will
+"cull" such pages that it encounters: that is, it diverts those pages to the
+unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED VMA, but the
+page is not marked as PG_mlocked.  Such pages will make it all the way to
 shrink_page_list() where they will be detected when vmscan walks the reverse
-map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK, shrink_page_list()
-will cull the page at that point.
+map in try_to_unmap().  If try_to_unmap() returns SWAP_MLOCK,
+shrink_page_list() will cull the page at that point.
 
-To "cull" an unevictable page, vmscan simply puts the page back on the lru
-list using putback_lru_page()--the inverse operation to isolate_lru_page()--
-after dropping the page lock.  Because the condition which makes the page
-unevictable may change once the page is unlocked, putback_lru_page() will
-recheck the unevictable state of a page that it places on the unevictable lru
-list.  If the page has become unevictable, putback_lru_page() removes it from
-the list and retries, including the page_unevictable() test.  Because such a
-race is a rare event and movement of pages onto the unevictable list should be
-rare, these extra evictabilty checks should not occur in the majority of calls
-to putback_lru_page().
+To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
+using putback_lru_page() - the inverse operation to isolate_lru_page() - after
+dropping the page lock.  Because the condition which makes the page unevictable
+may change once the page is unlocked, putback_lru_page() will recheck the
+unevictable state of a page that it places on the unevictable list.  If the
+page has become unevictable, putback_lru_page() removes it from the list and
+retries, including the page_unevictable() test.  Because such a race is a rare
+event and movement of pages onto the unevictable list should be rare, these
+extra evictabilty checks should not occur in the majority of calls to
+putback_lru_page().
 
 
-Mlocked Page:  Prior Work
+=============
+MLOCKED PAGES
+=============
 
-The "Unevictable Mlocked Pages" infrastructure is based on work originally
+The unevictable page list is also useful for mlock(), in addition to ramfs and
+SYSV SHM.  Note that mlock() is only available in CONFIG_MMU=y situations; in
+NOMMU situations, all mappings are effectively mlocked.
+
+
+HISTORY
+-------
+
+The "Unevictable mlocked Pages" infrastructure is based on work originally
 posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
-Nick posted his patch as an alternative to a patch posted by Christoph
-Lameter to achieve the same objective--hiding mlocked pages from vmscan.
-In Nick's patch, he used one of the struct page lru list link fields as a count
-of VM_LOCKED vmas that map the page.  This use of the link field for a count
-prevented the management of the pages on an LRU list.  Thus, mlocked pages were
-not migratable as isolate_lru_page() could not find them and the lru list link
-field was not available to the migration subsystem.  Nick resolved this by
-putting mlocked pages back on the lru list before attempting to isolate them,
-thus abandoning the count of VM_LOCKED vmas.  When Nick's patch was integrated
-with the Unevictable LRU work, the count was replaced by walking the reverse
-map to determine whether any VM_LOCKED vmas mapped the page.  More on this
-below.
-
-
-Mlocked Pages:  Basic Management
-
-Mlocked pages--pages mapped into a VM_LOCKED vma--represent one class of
-unevictable pages.  When such a page has been "noticed" by the memory
-management subsystem, the page is marked with the PG_mlocked [PageMlocked()]
-flag.  A PageMlocked() page will be placed on the unevictable LRU list when
-it is added to the LRU.   Pages can be "noticed" by memory management in
-several places:
-
-1) in the mlock()/mlockall() system call handlers.
-2) in the mmap() system call handler when mmap()ing a region with the
-   MAP_LOCKED flag, or mmap()ing a region in a task that has called
-   mlockall() with the MCL_FUTURE flag.  Both of these conditions result
-   in the VM_LOCKED flag being set for the vma.
-3) in the fault path, if mlocked pages are "culled" in the fault path,
-   and when a VM_LOCKED stack segment is expanded.
-4) as mentioned above, in vmscan:shrink_page_list() when attempting to
-   reclaim a page in a VM_LOCKED vma via try_to_unmap().
-
-Mlocked pages become unlocked and rescued from the unevictable list when:
-
-1) mapped in a range unlocked via the munlock()/munlockall() system calls.
-2) munmapped() out of the last VM_LOCKED vma that maps the page, including
-   unmapping at task exit.
-3) when the page is truncated from the last VM_LOCKED vma of an mmap()ed file.
-4) before a page is COWed in a VM_LOCKED vma.
-
-
-Mlocked Pages:  mlock()/mlockall() System Call Handling
+Nick posted his patch as an alternative to a patch posted by Christoph Lameter
+to achieve the same objective: hiding mlocked pages from vmscan.
+
+In Nick's patch, he used one of the struct page LRU list link fields as a count
+of VM_LOCKED VMAs that map the page.  This use of the link field for a count
+prevented the management of the pages on an LRU list, and thus mlocked pages
+were not migratable as isolate_lru_page() could not find them, and the LRU list
+link field was not available to the migration subsystem.
+
+Nick resolved this by putting mlocked pages back on the lru list before
+attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs.  When
+Nick's patch was integrated with the Unevictable LRU work, the count was
+replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
+mapped the page.  More on this below.
+
+
+BASIC MANAGEMENT
+----------------
+
+mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
+pages.  When such a page has been "noticed" by the memory management subsystem,
+the page is marked with the PG_mlocked flag.  This can be manipulated using the
+PageMlocked() functions.
+
+A PG_mlocked page will be placed on the unevictable list when it is added to
+the LRU.  Such pages can be "noticed" by memory management in several places:
+
+ (1) in the mlock()/mlockall() system call handlers;
+
+ (2) in the mmap() system call handler when mmapping a region with the
+     MAP_LOCKED flag;
+
+ (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
+     flag
+
+ (4) in the fault path, if mlocked pages are "culled" in the fault path,
+     and when a VM_LOCKED stack segment is expanded; or
+
+ (5) as mentioned above, in vmscan:shrink_page_list() when attempting to
+     reclaim a page in a VM_LOCKED VMA via try_to_unmap()
+
+all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
+already have it set.
+
+mlocked pages become unlocked and rescued from the unevictable list when:
+
+ (1) mapped in a range unlocked via the munlock()/munlockall() system calls;
+
+ (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including
+     unmapping at task exit;
+
+ (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file;
+     or
+
+ (4) before a page is COW'd in a VM_LOCKED VMA.
+
+
+mlock()/mlockall() SYSTEM CALL HANDLING
+---------------------------------------
 
 Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
-for each vma in the range specified by the call.  In the case of mlockall(),
+for each VMA in the range specified by the call.  In the case of mlockall(),
 this is the entire active address space of the task.  Note that mlock_fixup()
-is used for both mlock()ing and munlock()ing a range of memory.  A call to
-mlock() an already VM_LOCKED vma, or to munlock() a vma that is not VM_LOCKED
-is treated as a no-op--mlock_fixup() simply returns.
-
-If the vma passes some filtering described in "Mlocked Pages:  Filtering Vmas"
-below, mlock_fixup() will attempt to merge the vma with its neighbors or split
-off a subset of the vma if the range does not cover the entire vma.  Once the
-vma has been merged or split or neither, mlock_fixup() will call
-__mlock_vma_pages_range() to fault in the pages via get_user_pages() and
-to mark the pages as mlocked via mlock_vma_page().
-
-Note that the vma being mlocked might be mapped with PROT_NONE.  In this case,
-get_user_pages() will be unable to fault in the pages.  That's OK.  If pages
-do end up getting faulted into this VM_LOCKED vma, we'll handle them in the
+is used for both mlocking and munlocking a range of memory.  A call to mlock()
+an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
+treated as a no-op, and mlock_fixup() simply returns.
+
+If the VMA passes some filtering as described in "Filtering Special Vmas"
+below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
+off a subset of the VMA if the range does not cover the entire VMA.  Once the
+VMA has been merged or split or neither, mlock_fixup() will call
+__mlock_vma_pages_range() to fault in the pages via get_user_pages() and to
+mark the pages as mlocked via mlock_vma_page().
+
+Note that the VMA being mlocked might be mapped with PROT_NONE.  In this case,
+get_user_pages() will be unable to fault in the pages.  That's okay.  If pages
+do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
 fault path or in vmscan.
 
 Also note that a page returned by get_user_pages() could be truncated or
-migrated out from under us, while we're trying to mlock it.  To detect
-this, __mlock_vma_pages_range() tests the page_mapping after acquiring
-the page lock.  If the page is still associated with its mapping, we'll
-go ahead and call mlock_vma_page().  If the mapping is gone, we just
-unlock the page and move on.  Worse case, this results in page mapped
-in a VM_LOCKED vma remaining on a normal LRU list without being
-PageMlocked().  Again, vmscan will detect and cull such pages.
-
-mlock_vma_page(), called with the page locked [N.B., not "mlocked"], will
-TestSetPageMlocked() for each page returned by get_user_pages().  We use
-TestSetPageMlocked() because the page might already be mlocked by another
-task/vma and we don't want to do extra work.  We especially do not want to
-count an mlocked page more than once in the statistics.  If the page was
-already mlocked, mlock_vma_page() is done.
+migrated out from under us, while we're trying to mlock it.  To detect this,
+__mlock_vma_pages_range() checks page_mapping() after acquiring the page lock.
+If the page is still associated with its mapping, we'll go ahead and call
+mlock_vma_page().  If the mapping is gone, we just unlock the page and move on.
+In the worst case, this will result in a page mapped in a VM_LOCKED VMA
+remaining on a normal LRU list without being PageMlocked().  Again, vmscan will
+detect and cull such pages.
+
+mlock_vma_page() will call TestSetPageMlocked() for each page returned by
+get_user_pages().  We use TestSetPageMlocked() because the page might already
+be mlocked by another task/VMA and we don't want to do extra work.  We
+especially do not want to count an mlocked page more than once in the
+statistics.  If the page was already mlocked, mlock_vma_page() need do nothing
+more.
 
 If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
 page from the LRU, as it is likely on the appropriate active or inactive list
-at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will
-putback the page--putback_lru_page()--which will notice that the page is now
-mlocked and divert the page to the zone's unevictable LRU list.  If
+at that time.  If the isolate_lru_page() succeeds, mlock_vma_page() will put
+back the page - by calling putback_lru_page() - which will notice that the page
+is now mlocked and divert the page to the zone's unevictable list.  If
 mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
-it later if/when it attempts to reclaim the page.
+it later if and when it attempts to reclaim the page.
 
 
-Mlocked Pages:  Filtering Special Vmas
+FILTERING SPECIAL VMAS
+----------------------
 
-mlock_fixup() filters several classes of "special" vmas:
+mlock_fixup() filters several classes of "special" VMAs:
 
-1) vmas with VM_IO|VM_PFNMAP set are skipped entirely.  The pages behind
+1) VMAs with VM_IO or VM_PFNMAP set are skipped entirely.  The pages behind
    these mappings are inherently pinned, so we don't need to mark them as
-   mlocked.  In any case, most of the pages have no struct page in which to
-   so mark the page.  Because of this, get_user_pages() will fail for these
-   vmas, so there is no sense in attempting to visit them.
-
-2) vmas mapping hugetlbfs page are already effectively pinned into memory.
-   We don't need nor want to mlock() these pages.  However, to preserve the
-   prior behavior of mlock()--before the unevictable/mlock changes--
-   mlock_fixup() will call make_pages_present() in the hugetlbfs vma range
-   to allocate the huge pages and populate the ptes.
-
-3) vmas with VM_DONTEXPAND|VM_RESERVED are generally user space mappings of
-   kernel pages, such as the vdso page, relay channel pages, etc.  These pages
+   mlocked.  In any case, most of the pages have no struct page in which to so
+   mark the page.  Because of this, get_user_pages() will fail for these VMAs,
+   so there is no sense in attempting to visit them.
+
+2) VMAs mapping hugetlbfs page are already effectively pinned into memory.  We
+   neither need nor want to mlock() these pages.  However, to preserve the
+   prior behavior of mlock() - before the unevictable/mlock changes -
+   mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
+   allocate the huge pages and populate the ptes.
+
+3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of
+   kernel pages, such as the VDSO page, relay channel pages, etc.  These pages
    are inherently unevictable and are not managed on the LRU lists.
-   mlock_fixup() treats these vmas the same as hugetlbfs vmas.  It calls
+   mlock_fixup() treats these VMAs the same as hugetlbfs VMAs.  It calls
    make_pages_present() to populate the ptes.
 
-Note that for all of these special vmas, mlock_fixup() does not set the
+Note that for all of these special VMAs, mlock_fixup() does not set the
 VM_LOCKED flag.  Therefore, we won't have to deal with them later during
-munlock() or munmap()--for example, at task exit.  Neither does mlock_fixup()
-account these vmas against the task's "locked_vm".
-
-Mlocked Pages:  Downgrading the Mmap Semaphore.
-
-mlock_fixup() must be called with the mmap semaphore held for write, because
-it may have to merge or split vmas.  However, mlocking a large region of
-memory can take a long time--especially if vmscan must reclaim pages to
-satisfy the regions requirements.  Faulting in a large region with the mmap
-semaphore held for write can hold off other faults on the address space, in
-the case of a multi-threaded task.  It can also hold off scans of the task's
-address space via /proc.  While testing under heavy load, it was observed that
-the ps(1) command could be held off for many minutes while a large segment was
-mlock()ed down.
-
-To address this issue, and to make the system more responsive during mlock()ing
-of large segments, mlock_fixup() downgrades the mmap semaphore to read mode
-during the call to __mlock_vma_pages_range().  This works fine.  However, the
-callers of mlock_fixup() expect the semaphore to be returned in write mode.
-So, mlock_fixup() "upgrades" the semphore to write mode.  Linux does not
-support an atomic upgrade_sem() call, so mlock_fixup() must drop the semaphore
-and reacquire it in write mode.  In a multi-threaded task, it is possible for
-the task memory map to change while the semaphore is dropped.  Therefore,
-mlock_fixup() looks up the vma at the range start address after reacquiring
-the semaphore in write mode and verifies that it still covers the original
-range.  If not, mlock_fixup() returns an error [-EAGAIN].  All callers of
-mlock_fixup() have been changed to deal with this new error condition.
-
-Note:  when munlocking a region, all of the pages should already be resident--
-unless we have racing threads mlocking() and munlocking() regions.  So,
-unlocking should not have to wait for page allocations nor faults  of any kind.
-Therefore mlock_fixup() does not downgrade the semaphore for munlock().
-
-
-Mlocked Pages:  munlock()/munlockall() System Call Handling
-
-The munlock() and munlockall() system calls are handled by the same functions--
-do_mlock[all]()--as the mlock() and mlockall() system calls with the unlock
-vs lock operation indicated by an argument.  So, these system calls are also
-handled by mlock_fixup().  Again, if called for an already munlock()ed vma,
-mlock_fixup() simply returns.  Because of the vma filtering discussed above,
-VM_LOCKED will not be set in any "special" vmas.  So, these vmas will be
+munlock(), munmap() or task exit.  Neither does mlock_fixup() account these
+VMAs against the task's "locked_vm".
+
+
+munlock()/munlockall() SYSTEM CALL HANDLING
+-------------------------------------------
+
+The munlock() and munlockall() system calls are handled by the same functions -
+do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
+lock operation indicated by an argument.  So, these system calls are also
+handled by mlock_fixup().  Again, if called for an already munlocked VMA,
+mlock_fixup() simply returns.  Because of the VMA filtering discussed above,
+VM_LOCKED will not be set in any "special" VMAs.  So, these VMAs will be
 ignored for munlock.
 
-If the vma is VM_LOCKED, mlock_fixup() again attempts to merge or split off
-the specified range.  The range is then munlocked via the function
-__mlock_vma_pages_range()--the same function used to mlock a vma range--
+If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
+specified range.  The range is then munlocked via the function
+__mlock_vma_pages_range() - the same function used to mlock a VMA range -
 passing a flag to indicate that munlock() is being performed.
 
-Because the vma access protections could have been changed to PROT_NONE after
+Because the VMA access protections could have been changed to PROT_NONE after
 faulting in and mlocking pages, get_user_pages() was unreliable for visiting
-these pages for munlocking.  Because we don't want to leave pages mlocked(),
+these pages for munlocking.  Because we don't want to leave pages mlocked,
 get_user_pages() was enhanced to accept a flag to ignore the permissions when
-fetching the pages--all of which should be resident as a result of previous
-mlock()ing.
+fetching the pages - all of which should be resident as a result of previous
+mlocking.
 
 For munlock(), __mlock_vma_pages_range() unlocks individual pages by calling
 munlock_vma_page().  munlock_vma_page() unconditionally clears the PG_mlocked
-flag using TestClearPageMlocked().  As with mlock_vma_page(), munlock_vma_page()
-use the Test*PageMlocked() function to handle the case where the page might
-have already been unlocked by another task.  If the page was mlocked,
-munlock_vma_page() updates that zone statistics for the number of mlocked
-pages.  Note, however, that at this point we haven't checked whether the page
-is mapped by other VM_LOCKED vmas.
-
-We can't call try_to_munlock(), the function that walks the reverse map to check
-for other VM_LOCKED vmas, without first isolating the page from the LRU.
+flag using TestClearPageMlocked().  As with mlock_vma_page(),
+munlock_vma_page() use the Test*PageMlocked() function to handle the case where
+the page might have already been unlocked by another task.  If the page was
+mlocked, munlock_vma_page() updates that zone statistics for the number of
+mlocked pages.  Note, however, that at this point we haven't checked whether
+the page is mapped by other VM_LOCKED VMAs.
+
+We can't call try_to_munlock(), the function that walks the reverse map to
+check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
 try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
-not be on an lru list.  [More on these below.]  However, the call to
-isolate_lru_page() could fail, in which case we couldn't try_to_munlock().
-So, we go ahead and clear PG_mlocked up front, as this might be the only chance
-we have.  If we can successfully isolate the page, we go ahead and
+not be on an LRU list [more on these below].  However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock().  So,
+we go ahead and clear PG_mlocked up front, as this might be the only chance we
+have.  If we can successfully isolate the page, we go ahead and
 try_to_munlock(), which will restore the PG_mlocked flag and update the zone
-page statistics if it finds another vma holding the page mlocked.  If we fail
+page statistics if it finds another VMA holding the page mlocked.  If we fail
 to isolate the page, we'll have left a potentially mlocked page on the LRU.
-This is fine, because we'll catch it later when/if vmscan tries to reclaim the
-page.  This should be relatively rare.
-
-Mlocked Pages:  Migrating Them...
-
-A page that is being migrated has been isolated from the lru lists and is
-held locked across unmapping of the page, updating the page's mapping
-[address_space] entry and copying the contents and state, until the
-page table entry has been replaced with an entry that refers to the new
-page.  Linux supports migration of mlocked pages and other unevictable
-pages.  This involves simply moving the PageMlocked and PageUnevictable states
-from the old page to the new page.
-
-Note that page migration can race with mlocking or munlocking of the same
-page.  This has been discussed from the mlock/munlock perspective in the
-respective sections above.  Both processes [migration, m[un]locking], hold
-the page locked.  This provides the first level of synchronization.  Page
-migration zeros out the page_mapping of the old page before unlocking it,
-so m[un]lock can skip these pages by testing the page mapping under page
-lock.
-
-When completing page migration, we place the new and old pages back onto the
-lru after dropping the page lock.  The "unneeded" page--old page on success,
-new page on failure--will be freed when the reference count held by the
-migration process is released.  To ensure that we don't strand pages on the
-unevictable list because of a race between munlock and migration, page
-migration uses the putback_lru_page() function to add migrated pages back to
-the lru.
-
-
-Mlocked Pages:  mmap(MAP_LOCKED) System Call Handling
+This is fine, because we'll catch it later if and if vmscan tries to reclaim
+the page.  This should be relatively rare.
+
+
+MIGRATING MLOCKED PAGES
+-----------------------
+
+A page that is being migrated has been isolated from the LRU lists and is held
+locked across unmapping of the page, updating the page's address space entry
+and copying the contents and state, until the page table entry has been
+replaced with an entry that refers to the new page.  Linux supports migration
+of mlocked pages and other unevictable pages.  This involves simply moving the
+PG_mlocked and PG_unevictable states from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same page.
+This has been discussed from the mlock/munlock perspective in the respective
+sections above.  Both processes (migration and m[un]locking) hold the page
+locked.  This provides the first level of synchronization.  Page migration
+zeros out the page_mapping of the old page before unlocking it, so m[un]lock
+can skip these pages by testing the page mapping under page lock.
+
+To complete page migration, we place the new and old pages back onto the LRU
+after dropping the page lock.  The "unneeded" page - old page on success, new
+page on failure - will be freed when the reference count held by the migration
+process is released.  To ensure that we don't strand pages on the unevictable
+list because of a race between munlock and migration, page migration uses the
+putback_lru_page() function to add migrated pages back to the LRU.
+
+
+mmap(MAP_LOCKED) SYSTEM CALL HANDLING
+-------------------------------------
 
 In addition the the mlock()/mlockall() system calls, an application can request
-that a region of memory be mlocked using the MAP_LOCKED flag with the mmap()
+that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
 call.  Furthermore, any mmap() call or brk() call that expands the heap by a
 task that has previously called mlockall() with the MCL_FUTURE flag will result
-in the newly mapped memory being mlocked.  Before the unevictable/mlock changes,
-the kernel simply called make_pages_present() to allocate pages and populate
-the page table.
+in the newly mapped memory being mlocked.  Before the unevictable/mlock
+changes, the kernel simply called make_pages_present() to allocate pages and
+populate the page table.
 
 To mlock a range of memory under the unevictable/mlock infrastructure, the
 mmap() handler and task address space expansion functions call
 mlock_vma_pages_range() specifying the vma and the address range to mlock.
-mlock_vma_pages_range() filters vmas like mlock_fixup(), as described above in
-"Mlocked Pages:  Filtering Vmas".  It will clear the VM_LOCKED flag, which will
-have already been set by the caller, in filtered vmas.  Thus these vma's need
-not be visited for munlock when the region is unmapped.
+mlock_vma_pages_range() filters VMAs like mlock_fixup(), as described above in
+"Filtering Special VMAs".  It will clear the VM_LOCKED flag, which will have
+already been set by the caller, in filtered VMAs.  Thus these VMA's need not be
+visited for munlock when the region is unmapped.
 
-For "normal" vmas, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
+For "normal" VMAs, mlock_vma_pages_range() calls __mlock_vma_pages_range() to
 fault/allocate the pages and mlock them.  Again, like mlock_fixup(),
 mlock_vma_pages_range() downgrades the mmap semaphore to read mode before
-attempting to fault/allocate and mlock the pages; and "upgrades" the semaphore
+attempting to fault/allocate and mlock the pages and "upgrades" the semaphore
 back to write mode before returning.
 
-The callers of mlock_vma_pages_range() will have already added the memory
-range to be mlocked to the task's "locked_vm".  To account for filtered vmas,
+The callers of mlock_vma_pages_range() will have already added the memory range
+to be mlocked to the task's "locked_vm".  To account for filtered VMAs,
 mlock_vma_pages_range() returns the number of pages NOT mlocked.  All of the
-callers then subtract a non-negative return value from the task's locked_vm.
-A negative return value represent an error--for example, from get_user_pages()
-attempting to fault in a vma with PROT_NONE access.  In this case, we leave
-the memory range accounted as locked_vm, as the protections could be changed
-later and pages allocated into that region.
+callers then subtract a non-negative return value from the task's locked_vm.  A
+negative return value represent an error - for example, from get_user_pages()
+attempting to fault in a VMA with PROT_NONE access.  In this case, we leave the
+memory range accounted as locked_vm, as the protections could be changed later
+and pages allocated into that region.
 
 
-Mlocked Pages:  munmap()/exit()/exec() System Call Handling
+munmap()/exit()/exec() SYSTEM CALL HANDLING
+-------------------------------------------
 
 When unmapping an mlocked region of memory, whether by an explicit call to
 munmap() or via an internal unmap from exit() or exec() processing, we must
-munlock the pages if we're removing the last VM_LOCKED vma that maps the pages.
+munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
 Before the unevictable/mlock changes, mlocking did not mark the pages in any
 way, so unmapping them required no processing.
 
 To munlock a range of memory under the unevictable/mlock infrastructure, the
-munmap() hander and task address space tear down function call
+munmap() handler and task address space call tear down function
 munlock_vma_pages_all().  The name reflects the observation that one always
-specifies the entire vma range when munlock()ing during unmap of a region.
-Because of the vma filtering when mlocking() regions, only "normal" vmas that
+specifies the entire VMA range when munlock()ing during unmap of a region.
+Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
 actually contain mlocked pages will be passed to munlock_vma_pages_all().
 
-munlock_vma_pages_all() clears the VM_LOCKED vma flag and, like mlock_fixup()
+munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
 for the munlock case, calls __munlock_vma_pages_range() to walk the page table
-for the vma's memory range and munlock_vma_page() each resident page mapped by
-the vma.  This effectively munlocks the page, only if this is the last
-VM_LOCKED vma that maps the page.
-
+for the VMA's memory range and munlock_vma_page() each resident page mapped by
+the VMA.  This effectively munlocks the page, only if this is the last
+VM_LOCKED VMA that maps the page.
 
-Mlocked Page:  try_to_unmap()
 
-[Note:  the code changes represented by this section are really quite small
-compared to the text to describe what happening and why, and to discuss the
-implications.]
+try_to_unmap()
+--------------
 
-Pages can, of course, be mapped into multiple vmas.  Some of these vmas may
+Pages can, of course, be mapped into multiple VMAs.  Some of these VMAs may
 have VM_LOCKED flag set.  It is possible for a page mapped into one or more
-VM_LOCKED vmas not to have the PG_mlocked flag set and therefore reside on one
-of the active or inactive LRU lists.  This could happen if, for example, a
-task in the process of munlock()ing the page could not isolate the page from
-the LRU.  As a result, vmscan/shrink_page_list() might encounter such a page
-as described in "Unevictable Pages and Vmscan [shrink_*_list()]".  To
-handle this situation, try_to_unmap() has been enhanced to check for VM_LOCKED
-vmas while it is walking a page's reverse map.
+VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists.  This could happen if, for example, a task
+in the process of munlocking the page could not isolate the page from the LRU.
+As a result, vmscan/shrink_page_list() might encounter such a page as described
+in section "vmscan's handling of unevictable pages".  To handle this situation,
+try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
+map.
 
 try_to_unmap() is always called, by either vmscan for reclaim or for page
-migration, with the argument page locked and isolated from the LRU.  BUG_ON()
-assertions enforce this requirement.  Separate functions handle anonymous and
-mapped file pages, as these types of pages have different reverse map
-mechanisms.
-
-	try_to_unmap_anon()
-
-To unmap anonymous pages, each vma in the list anchored in the anon_vma must be
-visited--at least until a VM_LOCKED vma is encountered.  If the page is being
-unmapped for migration, VM_LOCKED vmas do not stop the process because mlocked
-pages are migratable.  However, for reclaim, if the page is mapped into a
-VM_LOCKED vma, the scan stops.  try_to_unmap() attempts to acquire the mmap
-semphore of the mm_struct to which the vma belongs in read mode.  If this is
-successful, try_to_unmap() will mlock the page via mlock_vma_page()--we
-wouldn't have gotten to try_to_unmap() if the page were already mlocked--and
-will return SWAP_MLOCK, indicating that the page is unevictable.  If the
-mmap semaphore cannot be acquired, we are not sure whether the page is really
-unevictable or not.  In this case, try_to_unmap() will return SWAP_AGAIN.
-
-	try_to_unmap_file() -- linear mappings
-
-Unmapping of a mapped file page works the same, except that the scan visits
-all vmas that maps the page's index/page offset in the page's mapping's
-reverse map priority search tree.  It must also visit each vma in the page's
-mapping's non-linear list, if the list is non-empty.  As for anonymous pages,
-on encountering a VM_LOCKED vma for a mapped file page, try_to_unmap() will
-attempt to acquire the associated mm_struct's mmap semaphore to mlock the page,
-returning SWAP_MLOCK if this is successful, and SWAP_AGAIN, if not.
-
-	try_to_unmap_file() -- non-linear mappings
-
-If a page's mapping contains a non-empty non-linear mapping vma list, then
-try_to_un{map|lock}() must also visit each vma in that list to determine
-whether the page is mapped in a VM_LOCKED vma.  Again, the scan must visit
-all vmas in the non-linear list to ensure that the pages is not/should not be
-mlocked.  If a VM_LOCKED vma is found in the list, the scan could terminate.
-However, there is no easy way to determine whether the page is actually mapped
-in a given vma--either for unmapping or testing whether the VM_LOCKED vma
-actually pins the page.
-
-So, try_to_unmap_file() handles non-linear mappings by scanning a certain
-number of pages--a "cluster"--in each non-linear vma associated with the page's
-mapping, for each file mapped page that vmscan tries to unmap.  If this happens
-to unmap the page we're trying to unmap, try_to_unmap() will notice this on
-return--(page_mapcount(page) == 0)--and return SWAP_SUCCESS.  Otherwise, it
-will return SWAP_AGAIN, causing vmscan to recirculate this page.  We take
-advantage of the cluster scan in try_to_unmap_cluster() as follows:
-
-For each non-linear vma, try_to_unmap_cluster() attempts to acquire the mmap
-semaphore of the associated mm_struct for read without blocking.  If this
-attempt is successful and the vma is VM_LOCKED, try_to_unmap_cluster() will
-retain the mmap semaphore for the scan; otherwise it drops it here.  Then,
-for each page in the cluster, if we're holding the mmap semaphore for a locked
-vma, try_to_unmap_cluster() calls mlock_vma_page() to mlock the page.  This
-call is a no-op if the page is already locked, but will mlock any pages in
-the non-linear mapping that happen to be unlocked.  If one of the pages so
-mlocked is the page passed in to try_to_unmap(), try_to_unmap_cluster() will
-return SWAP_MLOCK, rather than the default SWAP_AGAIN.  This will allow vmscan
-to cull the page, rather than recirculating it on the inactive list.  Again,
-if try_to_unmap_cluster() cannot acquire the vma's mmap sem, it returns
-SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED vma, but
-couldn't be mlocked.
-
-
-Mlocked pages:  try_to_munlock() Reverse Map Scan
-
-TODO/FIXME:  a better name might be page_mlocked()--analogous to the
-page_referenced() reverse map walker.
-
-When munlock_vma_page()--see "Mlocked Pages:  munlock()/munlockall()
-System Call Handling" above--tries to munlock a page, it needs to
-determine whether or not the page is mapped by any VM_LOCKED vma, without
-actually attempting to unmap all ptes from the page.  For this purpose, the
-unevictable/mlock infrastructure introduced a variant of try_to_unmap() called
-try_to_munlock().
+migration, with the argument page locked and isolated from the LRU.  Separate
+functions handle anonymous and mapped file pages, as these types of pages have
+different reverse map mechanisms.
+
+ (*) try_to_unmap_anon()
+
+     To unmap anonymous pages, each VMA in the list anchored in the anon_vma
+     must be visited - at least until a VM_LOCKED VMA is encountered.  If the
+     page is being unmapped for migration, VM_LOCKED VMAs do not stop the
+     process because mlocked pages are migratable.  However, for reclaim, if
+     the page is mapped into a VM_LOCKED VMA, the scan stops.
+
+     try_to_unmap_anon() attempts to acquire in read mode the mmap semphore of
+     the mm_struct to which the VMA belongs.  If this is successful, it will
+     mlock the page via mlock_vma_page() - we wouldn't have gotten to
+     try_to_unmap_anon() if the page were already mlocked - and will return
+     SWAP_MLOCK, indicating that the page is unevictable.
+
+     If the mmap semaphore cannot be acquired, we are not sure whether the page
+     is really unevictable or not.  In this case, try_to_unmap_anon() will
+     return SWAP_AGAIN.
+
+ (*) try_to_unmap_file() - linear mappings
+
+     Unmapping of a mapped file page works the same as for anonymous mappings,
+     except that the scan visits all VMAs that map the page's index/page offset
+     in the page's mapping's reverse map priority search tree.  It also visits
+     each VMA in the page's mapping's non-linear list, if the list is
+     non-empty.
+
+     As for anonymous pages, on encountering a VM_LOCKED VMA for a mapped file
+     page, try_to_unmap_file() will attempt to acquire the associated
+     mm_struct's mmap semaphore to mlock the page, returning SWAP_MLOCK if this
+     is successful, and SWAP_AGAIN, if not.
+
+ (*) try_to_unmap_file() - non-linear mappings
+
+     If a page's mapping contains a non-empty non-linear mapping VMA list, then
+     try_to_un{map|lock}() must also visit each VMA in that list to determine
+     whether the page is mapped in a VM_LOCKED VMA.  Again, the scan must visit
+     all VMAs in the non-linear list to ensure that the pages is not/should not
+     be mlocked.
+
+     If a VM_LOCKED VMA is found in the list, the scan could terminate.
+     However, there is no easy way to determine whether the page is actually
+     mapped in a given VMA - either for unmapping or testing whether the
+     VM_LOCKED VMA actually pins the page.
+
+     try_to_unmap_file() handles non-linear mappings by scanning a certain
+     number of pages - a "cluster" - in each non-linear VMA associated with the
+     page's mapping, for each file mapped page that vmscan tries to unmap.  If
+     this happens to unmap the page we're trying to unmap, try_to_unmap() will
+     notice this on return (page_mapcount(page) will be 0) and return
+     SWAP_SUCCESS.  Otherwise, it will return SWAP_AGAIN, causing vmscan to
+     recirculate this page.  We take advantage of the cluster scan in
+     try_to_unmap_cluster() as follows:
+
+	For each non-linear VMA, try_to_unmap_cluster() attempts to acquire the
+	mmap semaphore of the associated mm_struct for read without blocking.
+
+	If this attempt is successful and the VMA is VM_LOCKED,
+	try_to_unmap_cluster() will retain the mmap semaphore for the scan;
+	otherwise it drops it here.
+
+	Then, for each page in the cluster, if we're holding the mmap semaphore
+	for a locked VMA, try_to_unmap_cluster() calls mlock_vma_page() to
+	mlock the page.  This call is a no-op if the page is already locked,
+	but will mlock any pages in the non-linear mapping that happen to be
+	unlocked.
+
+	If one of the pages so mlocked is the page passed in to try_to_unmap(),
+	try_to_unmap_cluster() will return SWAP_MLOCK, rather than the default
+	SWAP_AGAIN.  This will allow vmscan to cull the page, rather than
+	recirculating it on the inactive list.
+
+	Again, if try_to_unmap_cluster() cannot acquire the VMA's mmap sem, it
+	returns SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED
+	VMA, but couldn't be mlocked.
+
+
+try_to_munlock() REVERSE MAP SCAN
+---------------------------------
+
+ [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
+     page_referenced() reverse map walker.
+
+When munlock_vma_page() [see section "munlock()/munlockall() System Call
+Handling" above] tries to munlock a page, it needs to determine whether or not
+the page is mapped by any VM_LOCKED VMA without actually attempting to unmap
+all PTEs from the page.  For this purpose, the unevictable/mlock infrastructure
+introduced a variant of try_to_unmap() called try_to_munlock().
 
 try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
 mapped file pages with an additional argument specifing unlock versus unmap
 processing.  Again, these functions walk the respective reverse maps looking
-for VM_LOCKED vmas.  When such a vma is found for anonymous pages and file
+for VM_LOCKED VMAs.  When such a VMA is found for anonymous pages and file
 pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
 attempt to acquire the associated mmap semphore, mlock the page via
 mlock_vma_page() and return SWAP_MLOCK.  This effectively undoes the
 pre-clearing of the page's PG_mlocked done by munlock_vma_page.
 
-If try_to_unmap() is unable to acquire a VM_LOCKED vma's associated mmap
-semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list()
-to recycle the page on the inactive list and hope that it has better luck
-with the page next time.
-
-For file pages mapped into non-linear vmas, the try_to_munlock() logic works
-slightly differently.  On encountering a VM_LOCKED non-linear vma that might
-map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking
-the page.  munlock_vma_page() will just leave the page unlocked and let
-vmscan deal with it--the usual fallback position.
-
-Note that try_to_munlock()'s reverse map walk must visit every vma in a pages'
-reverse map to determine that a page is NOT mapped into any VM_LOCKED vma.
-However, the scan can terminate when it encounters a VM_LOCKED vma and can
-successfully acquire the vma's mmap semphore for read and mlock the page.
-Although try_to_munlock() can be called many [very many!] times when
-munlock()ing a large region or tearing down a large address space that has been
-mlocked via mlockall(), overall this is a fairly rare event.
-
-Mlocked Page:  Page Reclaim in shrink_*_list()
-
-shrink_active_list() culls any obviously unevictable pages--i.e.,
-!page_evictable(page, NULL)--diverting these to the unevictable lru
-list.  However, shrink_active_list() only sees unevictable pages that
-made it onto the active/inactive lru lists.  Note that these pages do not
-have PageUnevictable set--otherwise, they would be on the unevictable list and
-shrink_active_list would never see them.
+If try_to_unmap() is unable to acquire a VM_LOCKED VMA's associated mmap
+semaphore, it will return SWAP_AGAIN.  This will allow shrink_page_list() to
+recycle the page on the inactive list and hope that it has better luck with the
+page next time.
+
+For file pages mapped into non-linear VMAs, the try_to_munlock() logic works
+slightly differently.  On encountering a VM_LOCKED non-linear VMA that might
+map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking the
+page.  munlock_vma_page() will just leave the page unlocked and let vmscan deal
+with it - the usual fallback position.
+
+Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's
+reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
+However, the scan can terminate when it encounters a VM_LOCKED VMA and can
+successfully acquire the VMA's mmap semphore for read and mlock the page.
+Although try_to_munlock() might be called a great many times when munlocking a
+large region or tearing down a large address space that has been mlocked via
+mlockall(), overall this is a fairly rare event.
+
+
+PAGE RECLAIM IN shrink_*_list()
+-------------------------------
+
+shrink_active_list() culls any obviously unevictable pages - i.e.
+!page_evictable(page, NULL) - diverting these to the unevictable list.
+However, shrink_active_list() only sees unevictable pages that made it onto the
+active/inactive lru lists.  Note that these pages do not have PageUnevictable
+set - otherwise they would be on the unevictable list and shrink_active_list
+would never see them.
 
 Some examples of these unevictable pages on the LRU lists are:
 
-1) ramfs pages that have been placed on the lru lists when first allocated.
+ (1) ramfs pages that have been placed on the LRU lists when first allocated.
+
+ (2) SHM_LOCK'd shared memory pages.  shmctl(SHM_LOCK) does not attempt to
+     allocate or fault in the pages in the shared memory region.  This happens
+     when an application accesses the page the first time after SHM_LOCK'ing
+     the segment.
 
-2) SHM_LOCKed shared memory pages.  shmctl(SHM_LOCK) does not attempt to
-   allocate or fault in the pages in the shared memory region.  This happens
-   when an application accesses the page the first time after SHM_LOCKing
-   the segment.
+ (3) mlocked pages that could not be isolated from the LRU and moved to the
+     unevictable list in mlock_vma_page().
 
-3) Mlocked pages that could not be isolated from the lru and moved to the
-   unevictable list in mlock_vma_page().
+ (4) Pages mapped into multiple VM_LOCKED VMAs, but try_to_munlock() couldn't
+     acquire the VMA's mmap semaphore to test the flags and set PageMlocked.
+     munlock_vma_page() was forced to let the page back on to the normal LRU
+     list for vmscan to handle.
 
-3) Pages mapped into multiple VM_LOCKED vmas, but try_to_munlock() couldn't
-   acquire the vma's mmap semaphore to test the flags and set PageMlocked.
-   munlock_vma_page() was forced to let the page back on to the normal
-   LRU list for vmscan to handle.
+shrink_inactive_list() also diverts any unevictable pages that it finds on the
+inactive lists to the appropriate zone's unevictable list.
 
-shrink_inactive_list() also culls any unevictable pages that it finds on
-the inactive lists, again diverting them to the appropriate zone's unevictable
-lru list.  shrink_inactive_list() should only see SHM_LOCKed pages that became
-SHM_LOCKed after shrink_active_list() had moved them to the inactive list, or
-pages mapped into VM_LOCKED vmas that munlock_vma_page() couldn't isolate from
-the lru to recheck via try_to_munlock().  shrink_inactive_list() won't notice
-the latter, but will pass on to shrink_page_list().
+shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
+after shrink_active_list() had moved them to the inactive list, or pages mapped
+into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
+recheck via try_to_munlock().  shrink_inactive_list() won't notice the latter,
+but will pass on to shrink_page_list().
 
 shrink_page_list() again culls obviously unevictable pages that it could
 encounter for similar reason to shrink_inactive_list().  Pages mapped into
-VM_LOCKED vmas but without PG_mlocked set will make it all the way to
+VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
 try_to_unmap().  shrink_page_list() will divert them to the unevictable list
 when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/MAINTAINERS b/MAINTAINERS
index abedca58d8b..abbedb658c2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -461,7 +461,7 @@ F:	arch/x86/include/asm/amd_iommu*.h
 
 AMD MICROCODE UPDATE SUPPORT
 P:	Andreas Herrmann
-M:	andeas.herrmann3@amd.com
+M:	andreas.herrmann3@amd.com
 L:	amd64-microcode@amd64.org
 S:	Supported
 F:	arch/x86/kernel/microcode_amd.c
@@ -1894,7 +1894,7 @@ F:	fs/ecryptfs/
 EDAC-CORE
 P:	Doug Thompson
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Supported
 F:	Documentation/edac.txt
@@ -1906,7 +1906,7 @@ P:	Mark Gross
 P:	Doug Thompson
 M:	mark.gross@intel.com
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/e752x_edac.c
@@ -1914,7 +1914,7 @@ F:	drivers/edac/e752x_edac.c
 EDAC-E7XXX
 P:	Doug Thompson
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/e7xxx_edac.c
@@ -1922,7 +1922,7 @@ F:	drivers/edac/e7xxx_edac.c
 EDAC-I82443BXGX
 P:	Tim Small
 M:	tim@buttersideup.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i82443bxgx_edac.c
@@ -1930,7 +1930,7 @@ F:	drivers/edac/i82443bxgx_edac.c
 EDAC-I3000
 P:	Jason Uhlenkott
 M:	juhlenko@akamai.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i3000_edac.c
@@ -1938,7 +1938,7 @@ F:	drivers/edac/i3000_edac.c
 EDAC-I5000
 P:	Doug Thompson
 M:	dougthompson@xmission.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i5000_edac.c
@@ -1946,7 +1946,7 @@ F:	drivers/edac/i5000_edac.c
 EDAC-I5400
 P:	Mauro Carvalho Chehab
 M:	mchehab@redhat.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i5400_edac.c
@@ -1956,7 +1956,7 @@ P:	Ranganathan Desikan
 P:	Arvind R.
 M:	rdesikan@jetzbroadband.com
 M:	arvind@acarlab.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/i82975x_edac.c
@@ -1964,7 +1964,7 @@ F:	drivers/edac/i82975x_edac.c
 EDAC-PASEMI
 P:	Egor Martovetsky
 M:	egor@pasemi.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/pasemi_edac.c
@@ -1972,7 +1972,7 @@ F:	drivers/edac/pasemi_edac.c
 EDAC-R82600
 P:	Tim Small
 M:	tim@buttersideup.com
-L:	bluesmoke-devel@lists.sourceforge.net
+L:	bluesmoke-devel@lists.sourceforge.net (moderated for non-subscribers)
 W:	bluesmoke.sourceforge.net
 S:	Maintained
 F:	drivers/edac/r82600_edac.c
@@ -2592,8 +2592,8 @@ S:	Maintained
 F:	fs/hpfs/
 
 HSO 3G MODEM DRIVER
-P:	Denis Joseph Barrow
-M:	d.barow@option.com
+P:	Jan Dumon
+M:	j.dumon@option.com
 W:	http://www.pharscape.org
 S:	Maintained
 F:	drivers/net/usb/hso.c
@@ -3680,6 +3680,7 @@ L:	microblaze-uclinux@itee.uq.edu.au
 W:	http://www.monstr.eu/fdt/
 T:	git git://git.monstr.eu/linux-2.6-microblaze.git
 S:	Supported
+F:	arch/microblaze/
 
 MICROTEK X6 SCANNER
 P:	Oliver Neukum
@@ -4971,8 +4972,8 @@ S:	Maintained for 2.6.
 F:	Documentation/sgi-visws.txt
 
 SGI XP/XPC/XPNET DRIVER
-P:	Dean Nelson
-M:	dcn@sgi.com
+P:	Robin Holt
+M:	holt@sgi.com
 S:	Maintained
 F:	drivers/misc/sgi-xp/
 
@@ -5306,7 +5307,9 @@ L:	linux-sh@vger.kernel.org
 W:	http://www.linux-sh.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6.git
 S:	Supported
+F:	Documentation/sh/
 F:	arch/sh/
+F:	drivers/sh/
 
 SUSPEND TO RAM
 P:	Len Brown
diff --git a/Makefile b/Makefile
index ad830bd45a4..bfdef56add3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 30
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
 NAME = Temporary Tasmanian Devil
 
 # *DOCUMENTATION*
@@ -169,7 +169,7 @@ SUBARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
 				  -e s/arm.*/arm/ -e s/sa110/arm/ \
 				  -e s/s390x/s390/ -e s/parisc64/parisc/ \
 				  -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
-				  -e s/sh.*/sh/ )
+				  -e s/sh[234].*/sh/ )
 
 # Cross compiling and selecting different set of gcc/bin-utils
 # ---------------------------------------------------------------------------
@@ -210,6 +210,11 @@ ifeq ($(ARCH),sparc64)
        SRCARCH := sparc
 endif
 
+# Additional ARCH settings for sh
+ifeq ($(ARCH),sh64)
+       SRCARCH := sh
+endif
+
 # Where to locate arch specific headers
 hdr-arch  := $(SRCARCH)
 
diff --git a/arch/arm/mach-omap2/usb-musb.c b/arch/arm/mach-omap2/usb-musb.c
index fc74e913c41..34a56a136ef 100644
--- a/arch/arm/mach-omap2/usb-musb.c
+++ b/arch/arm/mach-omap2/usb-musb.c
@@ -131,14 +131,14 @@ static struct musb_hdrc_platform_data musb_plat = {
 	.power		= 50,			/* up to 100 mA */
 };
 
-static u64 musb_dmamask = DMA_32BIT_MASK;
+static u64 musb_dmamask = DMA_BIT_MASK(32);
 
 static struct platform_device musb_device = {
 	.name		= "musb_hdrc",
 	.id		= -1,
 	.dev = {
 		.dma_mask		= &musb_dmamask,
-		.coherent_dma_mask	= DMA_32BIT_MASK,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
 		.platform_data		= &musb_plat,
 	},
 	.num_resources	= ARRAY_SIZE(musb_resources),
@@ -146,14 +146,14 @@ static struct platform_device musb_device = {
 };
 
 #ifdef CONFIG_NOP_USB_XCEIV
-static u64 nop_xceiv_dmamask = DMA_32BIT_MASK;
+static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32);
 
 static struct platform_device nop_xceiv_device = {
 	.name		= "nop_usb_xceiv",
 	.id		= -1,
 	.dev = {
 		.dma_mask		= &nop_xceiv_dmamask,
-		.coherent_dma_mask	= DMA_32BIT_MASK,
+		.coherent_dma_mask	= DMA_BIT_MASK(32),
 		.platform_data		= NULL,
 	},
 };
diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c
index 573f02c39a0..285aae8431c 100644
--- a/arch/ia64/kernel/pci-swiotlb.c
+++ b/arch/ia64/kernel/pci-swiotlb.c
@@ -16,7 +16,7 @@ EXPORT_SYMBOL(swiotlb);
 static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
 					 dma_addr_t *dma_handle, gfp_t gfp)
 {
-	if (dev->coherent_dma_mask != DMA_64BIT_MASK)
+	if (dev->coherent_dma_mask != DMA_BIT_MASK(64))
 		gfp |= GFP_DMA;
 	return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
 }
diff --git a/arch/microblaze/include/asm/auxvec.h b/arch/microblaze/include/asm/auxvec.h
index e69de29bb2d..8b137891791 100644
--- a/arch/microblaze/include/asm/auxvec.h
+++ b/arch/microblaze/include/asm/auxvec.h
@@ -0,0 +1 @@
+
diff --git a/arch/microblaze/include/asm/cputable.h b/arch/microblaze/include/asm/cputable.h
index e69de29bb2d..8b137891791 100644
--- a/arch/microblaze/include/asm/cputable.h
+++ b/arch/microblaze/include/asm/cputable.h
@@ -0,0 +1 @@
+
diff --git a/arch/microblaze/include/asm/ftrace.h b/arch/microblaze/include/asm/ftrace.h
new file mode 100644
index 00000000000..8b137891791
--- /dev/null
+++ b/arch/microblaze/include/asm/ftrace.h
@@ -0,0 +1 @@
+
diff --git a/arch/microblaze/include/asm/hw_irq.h b/arch/microblaze/include/asm/hw_irq.h
index e69de29bb2d..8b137891791 100644
--- a/arch/microblaze/include/asm/hw_irq.h
+++ b/arch/microblaze/include/asm/hw_irq.h
@@ -0,0 +1 @@
+
diff --git a/arch/microblaze/include/asm/io.h b/arch/microblaze/include/asm/io.h
index cfab0342588..8b5853ee6b5 100644
--- a/arch/microblaze/include/asm/io.h
+++ b/arch/microblaze/include/asm/io.h
@@ -12,7 +12,6 @@
 #include <asm/byteorder.h>
 #include <asm/page.h>
 #include <linux/types.h>
-#include <asm/page.h>
 
 #define IO_SPACE_LIMIT (0xFFFFFFFF)
 
diff --git a/arch/microblaze/include/asm/socket.h b/arch/microblaze/include/asm/socket.h
index f919b6b540a..82593686031 100644
--- a/arch/microblaze/include/asm/socket.h
+++ b/arch/microblaze/include/asm/socket.h
@@ -63,4 +63,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPING		37
+#define SCM_TIMESTAMPING	SO_TIMESTAMPING
+
 #endif /* _ASM_MICROBLAZE_SOCKET_H */
diff --git a/arch/microblaze/include/asm/user.h b/arch/microblaze/include/asm/user.h
index e69de29bb2d..8b137891791 100644
--- a/arch/microblaze/include/asm/user.h
+++ b/arch/microblaze/include/asm/user.h
@@ -0,0 +1 @@
+
diff --git a/arch/microblaze/include/asm/vga.h b/arch/microblaze/include/asm/vga.h
index e69de29bb2d..8b137891791 100644
--- a/arch/microblaze/include/asm/vga.h
+++ b/arch/microblaze/include/asm/vga.h
@@ -0,0 +1 @@
+
diff --git a/arch/microblaze/kernel/of_device.c b/arch/microblaze/kernel/of_device.c
index 717edf4ad0b..9a0f7632c47 100644
--- a/arch/microblaze/kernel/of_device.c
+++ b/arch/microblaze/kernel/of_device.c
@@ -13,7 +13,6 @@ void of_device_make_bus_id(struct of_device *dev)
 {
 	static atomic_t bus_no_reg_magic;
 	struct device_node *node = dev->node;
-	char *name = dev->dev.bus_id;
 	const u32 *reg;
 	u64 addr;
 	int magic;
@@ -25,9 +24,8 @@ void of_device_make_bus_id(struct of_device *dev)
 	if (reg) {
 		addr = of_translate_address(node, reg);
 		if (addr != OF_BAD_ADDR) {
-			snprintf(name, BUS_ID_SIZE,
-				 "%llx.%s", (unsigned long long)addr,
-				 node->name);
+			dev_set_name(&dev->dev, "%llx.%s",
+				     (unsigned long long)addr, node->name);
 			return;
 		}
 	}
@@ -37,7 +35,7 @@ void of_device_make_bus_id(struct of_device *dev)
 	 * counter (and pray...)
 	 */
 	magic = atomic_add_return(1, &bus_no_reg_magic);
-	snprintf(name, BUS_ID_SIZE, "%s.%d", node->name, magic - 1);
+	dev_set_name(&dev->dev, "%s.%d", node->name, magic - 1);
 }
 EXPORT_SYMBOL(of_device_make_bus_id);
 
@@ -58,7 +56,7 @@ struct of_device *of_device_alloc(struct device_node *np,
 	dev->dev.archdata.of_node = np;
 
 	if (bus_id)
-		strlcpy(dev->dev.bus_id, bus_id, BUS_ID_SIZE);
+		dev_set_name(&dev->dev, bus_id);
 	else
 		of_device_make_bus_id(dev);
 
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 60e9ed7d313..436f26ccbfa 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -115,8 +115,7 @@ void flush_thread(void)
 {
 }
 
-/* FIXME - here will be a proposed change -> remove nr parameter */
-int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
+int copy_thread(unsigned long clone_flags, unsigned long usp,
 		unsigned long unused,
 		struct task_struct *p, struct pt_regs *regs)
 {
diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c
index 475b1fac5cf..34c48718061 100644
--- a/arch/microblaze/kernel/prom.c
+++ b/arch/microblaze/kernel/prom.c
@@ -39,7 +39,6 @@
 #include <asm/system.h>
 #include <asm/mmu.h>
 #include <asm/pgtable.h>
-#include <linux/pci.h>
 #include <asm/sections.h>
 #include <asm/pci-bridge.h>
 
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 3171e39e322..b86aa623e36 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -32,7 +32,6 @@
 #include <linux/signal.h>
 
 #include <linux/errno.h>
-#include <linux/ptrace.h>
 #include <asm/processor.h>
 #include <linux/uaccess.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index ff347b98863..3889cf45fa7 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -37,7 +37,6 @@
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
-#include <linux/signal.h>
 #include <linux/syscalls.h>
 #include <asm/cacheflush.h>
 #include <asm/syscalls.h>
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index d90b548fb1b..ba0568c2cc1 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -29,9 +29,7 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/fs.h>
-#include <linux/ipc.h>
 #include <linux/semaphore.h>
-#include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <linux/unistd.h>
 
diff --git a/arch/mn10300/include/asm/bug.h b/arch/mn10300/include/asm/bug.h
index 4fcf3384e25..aa6a3888639 100644
--- a/arch/mn10300/include/asm/bug.h
+++ b/arch/mn10300/include/asm/bug.h
@@ -11,10 +11,12 @@
 #ifndef _ASM_BUG_H
 #define _ASM_BUG_H
 
+#ifdef CONFIG_BUG
+
 /*
  * Tell the user there is some problem.
  */
-#define _debug_bug_trap()					\
+#define BUG()							\
 do {								\
 	asm volatile(						\
 		"	syscall 15			\n"	\
@@ -25,11 +27,11 @@ do {								\
 		:						\
 		: "i"(__FILE__), "i"(__LINE__)			\
 		);						\
-} while (0)
-
-#define BUG() _debug_bug_trap()
+} while (1)
 
 #define HAVE_ARCH_BUG
+#endif /* CONFIG_BUG */
+
 #include <asm-generic/bug.h>
 
 #endif /* _ASM_BUG_H */
diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h
index 543a4f98695..fef5b434dad 100644
--- a/arch/mn10300/include/asm/unistd.h
+++ b/arch/mn10300/include/asm/unistd.h
@@ -344,6 +344,8 @@
 #define __NR_dup3		331
 #define __NR_pipe2		332
 #define __NR_inotify_init1	333
+#define __NR_preadv		334
+#define __NR_pwritev		335
 
 #ifdef __KERNEL__
 
diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S
index 34ab5a29315..3dc3e462f92 100644
--- a/arch/mn10300/kernel/entry.S
+++ b/arch/mn10300/kernel/entry.S
@@ -723,6 +723,8 @@ ENTRY(sys_call_table)
 	.long sys_dup3
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_preadv
+	.long sys_pwritev		/* 335 */
 
 
 nr_syscalls=(.-sys_call_table)/4
diff --git a/arch/mn10300/kernel/setup.c b/arch/mn10300/kernel/setup.c
index 71414e19fd1..79890edfd67 100644
--- a/arch/mn10300/kernel/setup.c
+++ b/arch/mn10300/kernel/setup.c
@@ -136,10 +136,6 @@ void __init setup_arch(char **cmdline_p)
 	data_resource.start = virt_to_bus(&_etext);
 	data_resource.end = virt_to_bus(&_edata)-1;
 
-#define PFN_UP(x)	(((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
-#define PFN_DOWN(x)	((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)	((x) << PAGE_SHIFT)
-
 	start_pfn = (CONFIG_KERNEL_RAM_BASE_ADDRESS >> PAGE_SHIFT);
 	kstart_pfn = PFN_UP(__pa(&_text));
 	free_pfn = PFN_UP(__pa(&_end));
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5b50e1ac617..4c780455136 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -462,7 +462,7 @@ config PPC_64K_PAGES
 
 config PPC_256K_PAGES
 	bool "256k page size" if 44x
-	depends on !STDBINUTILS && (!SHMEM || BROKEN)
+	depends on !STDBINUTILS
 	help
 	  Make the page size 256k.
 
diff --git a/arch/powerpc/boot/dts/tqm8540.dts b/arch/powerpc/boot/dts/tqm8540.dts
index 231bae75663..b6f1fc6eb96 100644
--- a/arch/powerpc/boot/dts/tqm8540.dts
+++ b/arch/powerpc/boot/dts/tqm8540.dts
@@ -84,9 +84,9 @@
 			interrupt-parent = <&mpic>;
 			dfsrr;
 
-			dtt@50 {
+			dtt@48 {
 				compatible = "national,lm75";
-				reg = <0x50>;
+				reg = <0x48>;
 			};
 
 			rtc@68 {
diff --git a/arch/powerpc/boot/dts/tqm8541.dts b/arch/powerpc/boot/dts/tqm8541.dts
index 4356a1f0829..fa6a3d54a8a 100644
--- a/arch/powerpc/boot/dts/tqm8541.dts
+++ b/arch/powerpc/boot/dts/tqm8541.dts
@@ -83,9 +83,9 @@
 			interrupt-parent = <&mpic>;
 			dfsrr;
 
-			dtt@50 {
+			dtt@48 {
 				compatible = "national,lm75";
-				reg = <0x50>;
+				reg = <0x48>;
 			};
 
 			rtc@68 {
diff --git a/arch/powerpc/boot/dts/tqm8548-bigflash.dts b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
index 19aa72301c8..00f7ed7a245 100644
--- a/arch/powerpc/boot/dts/tqm8548-bigflash.dts
+++ b/arch/powerpc/boot/dts/tqm8548-bigflash.dts
@@ -85,9 +85,9 @@
 			interrupt-parent = <&mpic>;
 			dfsrr;
 
-			dtt@50 {
+			dtt@48 {
 				compatible = "national,lm75";
-				reg = <0x50>;
+				reg = <0x48>;
 			};
 
 			rtc@68 {
@@ -247,7 +247,7 @@
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi2>;
-			phy-handle = <&phy3>;
+			phy-handle = <&phy4>;
 
 			mdio@520 {
 				#address-cells = <1>;
@@ -275,7 +275,7 @@
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi3>;
-			phy-handle = <&phy4>;
+			phy-handle = <&phy5>;
 
 			mdio@520 {
 				#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/tqm8548.dts b/arch/powerpc/boot/dts/tqm8548.dts
index 49145a04fc6..673e4a778ac 100644
--- a/arch/powerpc/boot/dts/tqm8548.dts
+++ b/arch/powerpc/boot/dts/tqm8548.dts
@@ -85,9 +85,9 @@
 			interrupt-parent = <&mpic>;
 			dfsrr;
 
-			dtt@50 {
+			dtt@48 {
 				compatible = "national,lm75";
-				reg = <0x50>;
+				reg = <0x48>;
 			};
 
 			rtc@68 {
@@ -247,7 +247,7 @@
 			interrupts = <31 2 32 2 33 2>;
 			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi2>;
-			phy-handle = <&phy3>;
+			phy-handle = <&phy4>;
 
 			mdio@520 {
 				#address-cells = <1>;
@@ -275,7 +275,7 @@
 			interrupts = <37 2 38 2 39 2>;
 			interrupt-parent = <&mpic>;
 			tbi-handle = <&tbi3>;
-			phy-handle = <&phy4>;
+			phy-handle = <&phy5>;
 
 			mdio@520 {
 				#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/tqm8555.dts b/arch/powerpc/boot/dts/tqm8555.dts
index 06d366ebbda..6a99f1eef7a 100644
--- a/arch/powerpc/boot/dts/tqm8555.dts
+++ b/arch/powerpc/boot/dts/tqm8555.dts
@@ -83,9 +83,9 @@
 			interrupt-parent = <&mpic>;
 			dfsrr;
 
-			dtt@50 {
+			dtt@48 {
 				compatible = "national,lm75";
-				reg = <0x50>;
+				reg = <0x48>;
 			};
 
 			rtc@68 {
diff --git a/arch/powerpc/boot/dts/tqm8560.dts b/arch/powerpc/boot/dts/tqm8560.dts
index feff915e049..b6c2d71defd 100644
--- a/arch/powerpc/boot/dts/tqm8560.dts
+++ b/arch/powerpc/boot/dts/tqm8560.dts
@@ -85,9 +85,9 @@
 			interrupt-parent = <&mpic>;
 			dfsrr;
 
-			dtt@50 {
+			dtt@48 {
 				compatible = "national,lm75";
-				reg = <0x50>;
+				reg = <0x48>;
 			};
 
 			rtc@68 {
diff --git a/arch/powerpc/configs/85xx/tqm8548_defconfig b/arch/powerpc/configs/85xx/tqm8548_defconfig
index 0bc45975911..43030fea2ee 100644
--- a/arch/powerpc/configs/85xx/tqm8548_defconfig
+++ b/arch/powerpc/configs/85xx/tqm8548_defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc2
-# Mon Jan 26 15:36:20 2009
+# Linux kernel version: 2.6.29-rc7
+# Mon Mar 16 09:03:28 2009
 #
 # CONFIG_PPC64 is not set
 
@@ -22,6 +22,7 @@ CONFIG_FSL_EMB_PERFMON=y
 # CONFIG_PHYS_64BIT is not set
 CONFIG_SPE=y
 CONFIG_PPC_MMU_NOHASH=y
+CONFIG_PPC_BOOK3E_MMU=y
 # CONFIG_PPC_MM_SLICES is not set
 # CONFIG_SMP is not set
 CONFIG_PPC32=y
@@ -75,6 +76,15 @@ CONFIG_SYSVIPC_SYSCTL=y
 # CONFIG_BSD_PROCESS_ACCT is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
+
+#
+# RCU Subsystem
+#
+CONFIG_CLASSIC_RCU=y
+# CONFIG_TREE_RCU is not set
+# CONFIG_PREEMPT_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+# CONFIG_PREEMPT_RCU_TRACE is not set
 # CONFIG_IKCONFIG is not set
 CONFIG_LOG_BUF_SHIFT=14
 CONFIG_GROUP_SCHED=y
@@ -152,11 +162,6 @@ CONFIG_DEFAULT_AS=y
 # CONFIG_DEFAULT_CFQ is not set
 # CONFIG_DEFAULT_NOOP is not set
 CONFIG_DEFAULT_IOSCHED="anticipatory"
-CONFIG_CLASSIC_RCU=y
-# CONFIG_TREE_RCU is not set
-# CONFIG_PREEMPT_RCU is not set
-# CONFIG_TREE_RCU_TRACE is not set
-# CONFIG_PREEMPT_RCU_TRACE is not set
 # CONFIG_FREEZER is not set
 
 #
@@ -202,7 +207,7 @@ CONFIG_MPIC=y
 #
 # Kernel options
 #
-# CONFIG_HIGHMEM is not set
+CONFIG_HIGHMEM=y
 CONFIG_TICK_ONESHOT=y
 CONFIG_NO_HZ=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -244,6 +249,7 @@ CONFIG_UNEVICTABLE_LRU=y
 CONFIG_PPC_4K_PAGES=y
 # CONFIG_PPC_16K_PAGES is not set
 # CONFIG_PPC_64K_PAGES is not set
+# CONFIG_PPC_256K_PAGES is not set
 CONFIG_FORCE_MAX_ZONEORDER=11
 CONFIG_PROC_DEVICETREE=y
 # CONFIG_CMDLINE_BOOL is not set
@@ -259,6 +265,7 @@ CONFIG_ZONE_DMA=y
 CONFIG_PPC_INDIRECT_PCI=y
 CONFIG_FSL_SOC=y
 CONFIG_FSL_PCI=y
+CONFIG_FSL_LBC=y
 CONFIG_PPC_PCI_CHOICE=y
 CONFIG_PCI=y
 CONFIG_PCI_DOMAINS=y
@@ -284,10 +291,11 @@ CONFIG_ARCH_SUPPORTS_MSI=y
 # Default settings for advanced configuration options are used
 #
 CONFIG_LOWMEM_SIZE=0x30000000
+CONFIG_LOWMEM_CAM_NUM=3
 CONFIG_PAGE_OFFSET=0xc0000000
 CONFIG_KERNEL_START=0xc0000000
 CONFIG_PHYSICAL_START=0x00000000
-CONFIG_PHYSICAL_ALIGN=0x10000000
+CONFIG_PHYSICAL_ALIGN=0x04000000
 CONFIG_TASK_SIZE=0xc0000000
 CONFIG_NET=y
 
@@ -363,12 +371,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
 # CONFIG_PHONET is not set
-CONFIG_WIRELESS=y
-# CONFIG_CFG80211 is not set
-CONFIG_WIRELESS_OLD_REGULATORY=y
-# CONFIG_WIRELESS_EXT is not set
-# CONFIG_LIB80211 is not set
-# CONFIG_MAC80211 is not set
+# CONFIG_WIRELESS is not set
 # CONFIG_WIMAX is not set
 # CONFIG_RFKILL is not set
 # CONFIG_NET_9P is not set
@@ -471,27 +474,18 @@ CONFIG_MTD_NAND_IDS=y
 # CONFIG_MTD_NAND_NANDSIM is not set
 # CONFIG_MTD_NAND_PLATFORM is not set
 # CONFIG_MTD_NAND_FSL_ELBC is not set
-# CONFIG_MTD_NAND_FSL_UPM is not set
+CONFIG_MTD_NAND_FSL_UPM=y
 # CONFIG_MTD_ONENAND is not set
 
 #
 # LPDDR flash memory drivers
 #
 # CONFIG_MTD_LPDDR is not set
-# CONFIG_MTD_QINFO_PROBE is not set
 
 #
 # UBI - Unsorted block images
 #
-CONFIG_MTD_UBI=m
-CONFIG_MTD_UBI_WL_THRESHOLD=4096
-CONFIG_MTD_UBI_BEB_RESERVE=1
-# CONFIG_MTD_UBI_GLUEBI is not set
-
-#
-# UBI debugging options
-#
-# CONFIG_MTD_UBI_DEBUG is not set
+# CONFIG_MTD_UBI is not set
 CONFIG_OF_DEVICE=y
 CONFIG_OF_I2C=y
 # CONFIG_PARPORT is not set
@@ -515,69 +509,21 @@ CONFIG_BLK_DEV_RAM_SIZE=32768
 # CONFIG_BLK_DEV_HD is not set
 CONFIG_MISC_DEVICES=y
 # CONFIG_PHANTOM is not set
-# CONFIG_EEPROM_93CX6 is not set
 # CONFIG_SGI_IOC4 is not set
 # CONFIG_TIFM_CORE is not set
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
 # CONFIG_C2PORT is not set
+
+#
+# EEPROM support
+#
+# CONFIG_EEPROM_AT24 is not set
+# CONFIG_EEPROM_LEGACY is not set
+# CONFIG_EEPROM_93CX6 is not set
 CONFIG_HAVE_IDE=y
-CONFIG_IDE=y
-
-#
-# Please see Documentation/ide/ide.txt for help/info on IDE drives
-#
-CONFIG_IDE_TIMINGS=y
-# CONFIG_BLK_DEV_IDE_SATA is not set
-CONFIG_IDE_GD=y
-CONFIG_IDE_GD_ATA=y
-# CONFIG_IDE_GD_ATAPI is not set
-# CONFIG_BLK_DEV_IDECD is not set
-# CONFIG_BLK_DEV_IDETAPE is not set
-# CONFIG_IDE_TASK_IOCTL is not set
-CONFIG_IDE_PROC_FS=y
-
-#
-# IDE chipset support/bugfixes
-#
-# CONFIG_BLK_DEV_PLATFORM is not set
-CONFIG_BLK_DEV_IDEDMA_SFF=y
-
-#
-# PCI IDE chipsets support
-#
-CONFIG_BLK_DEV_IDEPCI=y
-CONFIG_IDEPCI_PCIBUS_ORDER=y
-# CONFIG_BLK_DEV_OFFBOARD is not set
-CONFIG_BLK_DEV_GENERIC=y
-# CONFIG_BLK_DEV_OPTI621 is not set
-CONFIG_BLK_DEV_IDEDMA_PCI=y
-# CONFIG_BLK_DEV_AEC62XX is not set
-# CONFIG_BLK_DEV_ALI15X3 is not set
-# CONFIG_BLK_DEV_AMD74XX is not set
-# CONFIG_BLK_DEV_CMD64X is not set
-# CONFIG_BLK_DEV_TRIFLEX is not set
-# CONFIG_BLK_DEV_CS5520 is not set
-# CONFIG_BLK_DEV_CS5530 is not set
-# CONFIG_BLK_DEV_HPT366 is not set
-# CONFIG_BLK_DEV_JMICRON is not set
-# CONFIG_BLK_DEV_SC1200 is not set
-# CONFIG_BLK_DEV_PIIX is not set
-# CONFIG_BLK_DEV_IT8172 is not set
-# CONFIG_BLK_DEV_IT8213 is not set
-# CONFIG_BLK_DEV_IT821X is not set
-# CONFIG_BLK_DEV_NS87415 is not set
-# CONFIG_BLK_DEV_PDC202XX_OLD is not set
-# CONFIG_BLK_DEV_PDC202XX_NEW is not set
-# CONFIG_BLK_DEV_SVWKS is not set
-# CONFIG_BLK_DEV_SIIMAGE is not set
-# CONFIG_BLK_DEV_SL82C105 is not set
-# CONFIG_BLK_DEV_SLC90E66 is not set
-# CONFIG_BLK_DEV_TRM290 is not set
-CONFIG_BLK_DEV_VIA82CXXX=y
-# CONFIG_BLK_DEV_TC86C001 is not set
-CONFIG_BLK_DEV_IDEDMA=y
+# CONFIG_IDE is not set
 
 #
 # SCSI device support
@@ -650,7 +596,7 @@ CONFIG_MII=y
 CONFIG_NETDEV_1000=y
 # CONFIG_ACENIC is not set
 # CONFIG_DL2K is not set
-CONFIG_E1000=y
+# CONFIG_E1000 is not set
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
@@ -668,6 +614,7 @@ CONFIG_GIANFAR=y
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
@@ -835,8 +782,6 @@ CONFIG_I2C_MPC=y
 # Miscellaneous I2C Chip support
 #
 # CONFIG_DS1682 is not set
-# CONFIG_EEPROM_AT24 is not set
-# CONFIG_EEPROM_LEGACY is not set
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
@@ -975,26 +920,7 @@ CONFIG_HID=y
 # Special HID drivers
 #
 CONFIG_HID_COMPAT=y
-CONFIG_USB_SUPPORT=y
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-CONFIG_USB_ARCH_HAS_EHCI=y
-# CONFIG_USB is not set
-# CONFIG_USB_OTG_WHITELIST is not set
-# CONFIG_USB_OTG_BLACKLIST_HUB is not set
-
-#
-# Enable Host or Gadget support to see Inventra options
-#
-
-#
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# OTG and related infrastructure
-#
+# CONFIG_USB_SUPPORT is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1064,16 +990,9 @@ CONFIG_RTC_DRV_DS1307=y
 #
 # File systems
 #
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
+# CONFIG_EXT2_FS is not set
+# CONFIG_EXT3_FS is not set
 # CONFIG_EXT4_FS is not set
-CONFIG_JBD=y
-CONFIG_FS_MBCACHE=y
 # CONFIG_REISERFS_FS is not set
 # CONFIG_JFS_FS is not set
 # CONFIG_FS_POSIX_ACL is not set
@@ -1122,8 +1041,17 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_BEFS_FS is not set
 # CONFIG_BFS_FS is not set
 # CONFIG_EFS_FS is not set
-# CONFIG_JFFS2_FS is not set
-# CONFIG_UBIFS_FS is not set
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_FS_DEBUG=0
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+# CONFIG_JFFS2_SUMMARY is not set
+# CONFIG_JFFS2_FS_XATTR is not set
+# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFFS2_LZO is not set
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
 # CONFIG_CRAMFS is not set
 # CONFIG_SQUASHFS is not set
 # CONFIG_VXFS_FS is not set
@@ -1184,6 +1112,8 @@ CONFIG_GENERIC_FIND_LAST_BIT=y
 CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZLIB_DEFLATE=y
 CONFIG_PLIST=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
@@ -1219,6 +1149,7 @@ CONFIG_DEBUG_MUTEXES=y
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
 # CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_HIGHMEM is not set
 # CONFIG_DEBUG_BUGVERBOSE is not set
 # CONFIG_DEBUG_INFO is not set
 # CONFIG_DEBUG_VM is not set
@@ -1236,6 +1167,7 @@ CONFIG_DEBUG_MUTEXES=y
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 6d406c5c5de..9696cc36d2d 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -27,7 +27,7 @@
 	PPC_LONG "1b,4b,2b,4b\n" \
 	".previous" \
 	: "=&r" (oldval), "=&r" (ret) \
-	: "b" (uaddr), "i" (-EFAULT), "1" (oparg) \
+	: "b" (uaddr), "i" (-EFAULT), "r" (oparg) \
 	: "cr0", "memory")
 
 static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
@@ -47,19 +47,19 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 
 	switch (op) {
 	case FUTEX_OP_SET:
-		__futex_atomic_op("", ret, oldval, uaddr, oparg);
+		__futex_atomic_op("mr %1,%4\n", ret, oldval, uaddr, oparg);
 		break;
 	case FUTEX_OP_ADD:
-		__futex_atomic_op("add %1,%0,%1\n", ret, oldval, uaddr, oparg);
+		__futex_atomic_op("add %1,%0,%4\n", ret, oldval, uaddr, oparg);
 		break;
 	case FUTEX_OP_OR:
-		__futex_atomic_op("or %1,%0,%1\n", ret, oldval, uaddr, oparg);
+		__futex_atomic_op("or %1,%0,%4\n", ret, oldval, uaddr, oparg);
 		break;
 	case FUTEX_OP_ANDN:
-		__futex_atomic_op("andc %1,%0,%1\n", ret, oldval, uaddr, oparg);
+		__futex_atomic_op("andc %1,%0,%4\n", ret, oldval, uaddr, oparg);
 		break;
 	case FUTEX_OP_XOR:
-		__futex_atomic_op("xor %1,%0,%1\n", ret, oldval, uaddr, oparg);
+		__futex_atomic_op("xor %1,%0,%4\n", ret, oldval, uaddr, oparg);
 		break;
 	default:
 		ret = -ENOSYS;
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index cbf15438709..86d2366ab6a 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -52,6 +52,12 @@
  */
 #define MMU_FTR_NEED_DTLB_SW_LRU	ASM_CONST(0x00200000)
 
+/* This indicates that the processor uses the wrong opcode for tlbilx
+ * instructions.  During the ISA 2.06 development the opcode for tlbilx
+ * changed and some early implementations used to old opcode
+ */
+#define MMU_FTR_TLBILX_EARLY_OPCODE	ASM_CONST(0x00400000)
+
 #ifndef __ASSEMBLY__
 #include <asm/cputable.h>
 
diff --git a/arch/powerpc/include/asm/parport.h b/arch/powerpc/include/asm/parport.h
index 414c50e2e88..94942d60ddf 100644
--- a/arch/powerpc/include/asm/parport.h
+++ b/arch/powerpc/include/asm/parport.h
@@ -29,7 +29,7 @@ static int __devinit parport_pc_find_nonpci_ports (int autoirq, int autodma)
 		prop = of_get_property(np, "interrupts", NULL);
 		if (!prop)
 			continue;
-		if (parport_pc_probe_port(io1, io2, prop[0], autodma, NULL) != NULL)
+		if (parport_pc_probe_port(io1, io2, prop[0], autodma, NULL, 0) != NULL)
 			count++;
 	}
 	return count;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index f4a4db8d555..ef4da37f3c1 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -43,7 +43,8 @@
 
 #define PPC_INST_STSWI			0x7c0005aa
 #define PPC_INST_STSWX			0x7c00052a
-#define PPC_INST_TLBILX			0x7c000626
+#define PPC_INST_TLBILX			0x7c000024
+#define PPC_INST_TLBILX_EARLY		0x7c000626
 #define PPC_INST_WAIT			0x7c00007c
 
 /* macros to insert fields into opcodes */
@@ -63,10 +64,18 @@
 #define PPC_RFDI		stringify_in_c(.long PPC_INST_RFDI)
 #define PPC_RFMCI		stringify_in_c(.long PPC_INST_RFMCI)
 #define PPC_TLBILX(t, a, b)	stringify_in_c(.long PPC_INST_TLBILX | \
-					__PPC_T_TLB(t) | __PPC_RA(a) | __PPC_RB(b))
+					__PPC_T_TLB(t) | \
+					__PPC_RA(a) | __PPC_RB(b))
 #define PPC_TLBILX_ALL(a, b)	PPC_TLBILX(0, a, b)
 #define PPC_TLBILX_PID(a, b)	PPC_TLBILX(1, a, b)
 #define PPC_TLBILX_VA(a, b)	PPC_TLBILX(3, a, b)
+
+#define PPC_TLBILX_EARLY(t, a, b) stringify_in_c(.long PPC_INST_TLBILX_EARLY | \
+						__PPC_T_TLB(t) | \
+						__PPC_RA(a) | __PPC_RB(b))
+#define PPC_TLBILX_ALL_EARLY(a, b)	PPC_TLBILX_EARLY(0, a, b)
+#define PPC_TLBILX_PID_EARLY(a, b)	PPC_TLBILX_EARLY(1, a, b)
+#define PPC_TLBILX_VA_EARLY(a, b)	PPC_TLBILX_EARLY(3, a, b)
 #define PPC_WAIT(w)		stringify_in_c(.long PPC_INST_WAIT | \
 					__PPC_WC(w))
 
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index cd1b687544f..57db50f4028 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1766,7 +1766,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.cpu_features		= CPU_FTRS_E500MC,
 		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
 		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
-			MMU_FTR_USE_TLBILX,
+			MMU_FTR_USE_TLBILX | MMU_FTR_TLBILX_EARLY_OPCODE,
 		.icache_bsize		= 64,
 		.dcache_bsize		= 64,
 		.num_pmcs		= 4,
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 7af72970fae..ad2eb4d34dd 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -125,7 +125,6 @@ static void do_flush_tlb_page_ipi(void *param)
 
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	cpumask_t cpu_mask;
 	unsigned int pid;
 
 	preempt_disable();
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index 788b87c36f7..45fed369834 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -138,7 +138,11 @@ BEGIN_MMU_FTR_SECTION
 	andi.	r3,r3,MMUCSR0_TLBFI@l
 	bne	1b
 MMU_FTR_SECTION_ELSE
-	PPC_TLBILX_ALL(0,0)
+  BEGIN_MMU_FTR_SECTION_NESTED(96)
+	PPC_TLBILX_ALL(0,r3)
+  MMU_FTR_SECTION_ELSE_NESTED(96)
+	PPC_TLBILX_ALL_EARLY(0,r3)
+  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_TLBILX_EARLY_OPCODE, 96)
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
 	msync
 	isync
@@ -151,7 +155,11 @@ BEGIN_MMU_FTR_SECTION
 	wrteei	0
 	mfspr	r4,SPRN_MAS6	/* save MAS6 */
 	mtspr	SPRN_MAS6,r3
+  BEGIN_MMU_FTR_SECTION_NESTED(96)
 	PPC_TLBILX_PID(0,0)
+  MMU_FTR_SECTION_ELSE_NESTED(96)
+	PPC_TLBILX_PID_EARLY(0,0)
+  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_TLBILX_EARLY_OPCODE, 96)
 	mtspr	SPRN_MAS6,r4	/* restore MAS6 */
 	wrtee	r10
 MMU_FTR_SECTION_ELSE
@@ -185,7 +193,11 @@ BEGIN_MMU_FTR_SECTION
 	mtspr	SPRN_MAS1,r4
 	tlbwe
 MMU_FTR_SECTION_ELSE
+  BEGIN_MMU_FTR_SECTION_NESTED(96)
 	PPC_TLBILX_VA(0,r3)
+  MMU_FTR_SECTION_ELSE_NESTED(96)
+	PPC_TLBILX_VA_EARLY(0,r3)
+  ALT_MMU_FTR_SECTION_END_NESTED_IFCLR(MMU_FTR_TLBILX_EARLY_OPCODE, 96)
 ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_USE_TLBILX)
 	msync
 	isync
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index fafcaa0e81e..ab69925d579 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -25,6 +25,7 @@
 #include <asm/smp.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
+#include <asm/firmware.h>
 
 #include "plpar_wrappers.h"
 
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 380420f8c40..9a2a6e32f00 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -182,6 +182,8 @@ static void eeh_report_reset(struct pci_dev *dev, void *userdata)
 	if (!driver)
 		return;
 
+	dev->error_state = pci_channel_io_normal;
+
 	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 27b70d8a359..aeb3cff95f6 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -176,7 +176,7 @@ static void __appldata_mod_vtimer_wrap(void *p) {
 		struct vtimer_list *timer;
 		u64    expires;
 	} *args = p;
-	mod_virt_timer(args->timer, args->expires);
+	mod_virt_timer_periodic(args->timer, args->expires);
 }
 
 #define APPLDATA_ADD_TIMER	0
diff --git a/arch/s390/include/asm/cpuid.h b/arch/s390/include/asm/cpuid.h
new file mode 100644
index 00000000000..07836a2e522
--- /dev/null
+++ b/arch/s390/include/asm/cpuid.h
@@ -0,0 +1,25 @@
+/*
+ *    Copyright IBM Corp. 2000,2009
+ *    Author(s): Hartmut Penner <hp@de.ibm.com>,
+ *		 Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *		 Christian Ehrhardt <ehrhardt@de.ibm.com>
+ */
+
+#ifndef _ASM_S390_CPUID_H_
+#define _ASM_S390_CPUID_H_
+
+/*
+ *  CPU type and hardware bug flags. Kept separately for each CPU.
+ *  Members of this structure are referenced in head.S, so think twice
+ *  before touching them. [mj]
+ */
+
+typedef struct
+{
+	unsigned int version :	8;
+	unsigned int ident   : 24;
+	unsigned int machine : 16;
+	unsigned int unused  : 16;
+} __attribute__ ((packed)) cpuid_t;
+
+#endif /* _ASM_S390_CPUID_H_ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c6e674f5fca..54ea39f96ec 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -15,6 +15,7 @@
 #define ASM_KVM_HOST_H
 #include <linux/kvm_host.h>
 #include <asm/debug.h>
+#include <asm/cpuid.h>
 
 #define KVM_MAX_VCPUS 64
 #define KVM_MEMORY_SLOTS 32
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index b349f1c7fdf..3aeca492b14 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -66,6 +66,7 @@
 #define __LC_USER_EXEC_ASCE		0x02ac
 #define __LC_CPUID			0x02b0
 #define __LC_INT_CLOCK			0x02c8
+#define __LC_MACHINE_FLAGS		0x02d8
 #define __LC_IRB			0x0300
 #define __LC_PFAULT_INTPARM		0x0080
 #define __LC_CPU_TIMER_SAVE_AREA	0x00d8
@@ -110,6 +111,7 @@
 #define __LC_CPUID			0x0320
 #define __LC_INT_CLOCK			0x0340
 #define __LC_VDSO_PER_CPU		0x0350
+#define __LC_MACHINE_FLAGS		0x0358
 #define __LC_IRB			0x0380
 #define __LC_PASTE			0x03c0
 #define __LC_PFAULT_INTPARM		0x11b8
@@ -127,9 +129,9 @@
 
 #ifndef __ASSEMBLY__
 
-#include <asm/processor.h>
+#include <asm/cpuid.h>
+#include <asm/ptrace.h>
 #include <linux/types.h>
-#include <asm/sigp.h>
 
 void restart_int_handler(void);
 void ext_int_handler(void);
@@ -277,7 +279,8 @@ struct _lowcore
 	__u32	ext_call_fast;			/* 0x02c4 */
 	__u64	int_clock;			/* 0x02c8 */
 	__u64	clock_comparator;		/* 0x02d0 */
-	__u8	pad_0x02d8[0x0300-0x02d8];	/* 0x02d8 */
+	__u32	machine_flags;			/* 0x02d8 */
+	__u8	pad_0x02dc[0x0300-0x02dc];	/* 0x02dc */
 
 	/* Interrupt response block */
 	__u8	irb[64];			/* 0x0300 */
@@ -381,7 +384,8 @@ struct _lowcore
 	__u64	int_clock;			/* 0x0340 */
 	__u64	clock_comparator;		/* 0x0348 */
 	__u64	vdso_per_cpu_data;		/* 0x0350 */
-	__u8	pad_0x0358[0x0380-0x0358];	/* 0x0358 */
+	__u64	machine_flags;			/* 0x0358 */
+	__u8	pad_0x0360[0x0380-0x0360];	/* 0x0360 */
 
 	/* Interrupt response block. */
 	__u8	irb[64];			/* 0x0380 */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 61862b3ac79..c139fa7b8e8 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -14,7 +14,10 @@
 #define __ASM_S390_PROCESSOR_H
 
 #include <linux/linkage.h>
+#include <asm/cpuid.h>
+#include <asm/page.h>
 #include <asm/ptrace.h>
+#include <asm/setup.h>
 
 #ifdef __KERNEL__
 /*
@@ -23,20 +26,6 @@
  */
 #define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
 
-/*
- *  CPU type and hardware bug flags. Kept separately for each CPU.
- *  Members of this structure are referenced in head.S, so think twice
- *  before touching them. [mj]
- */
-
-typedef struct
-{
-        unsigned int version :  8;
-        unsigned int ident   : 24;
-        unsigned int machine : 16;
-        unsigned int unused  : 16;
-} __attribute__ ((packed)) cpuid_t;
-
 static inline void get_cpu_id(cpuid_t *ptr)
 {
 	asm volatile("stidp 0(%1)" : "=m" (*ptr) : "a" (ptr));
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index f1b051630c5..539263fc9ab 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -313,8 +313,6 @@ typedef struct
 
 
 #ifdef __KERNEL__
-#include <asm/setup.h>
-#include <asm/page.h>
 
 /*
  * The pt_regs struct defines the way the registers are stored on
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index e8bd6ac22c9..38b0fc221ed 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -14,6 +14,7 @@
 
 #ifdef __KERNEL__
 
+#include <asm/lowcore.h>
 #include <asm/types.h>
 
 #define PARMAREA		0x10400
@@ -63,7 +64,6 @@ extern unsigned int s390_noexec;
 /*
  * Machine features detected in head.S
  */
-extern unsigned long machine_flags;
 
 #define MACHINE_FLAG_VM		(1UL << 0)
 #define MACHINE_FLAG_IEEE	(1UL << 1)
@@ -77,28 +77,28 @@ extern unsigned long machine_flags;
 #define MACHINE_FLAG_HPAGE	(1UL << 10)
 #define MACHINE_FLAG_PFMF	(1UL << 11)
 
-#define MACHINE_IS_VM		(machine_flags & MACHINE_FLAG_VM)
-#define MACHINE_IS_KVM		(machine_flags & MACHINE_FLAG_KVM)
-#define MACHINE_HAS_DIAG9C	(machine_flags & MACHINE_FLAG_DIAG9C)
+#define MACHINE_IS_VM		(S390_lowcore.machine_flags & MACHINE_FLAG_VM)
+#define MACHINE_IS_KVM		(S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
+#define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
 
 #ifndef __s390x__
-#define MACHINE_HAS_IEEE	(machine_flags & MACHINE_FLAG_IEEE)
-#define MACHINE_HAS_CSP		(machine_flags & MACHINE_FLAG_CSP)
+#define MACHINE_HAS_IEEE	(S390_lowcore.machine_flags & MACHINE_FLAG_IEEE)
+#define MACHINE_HAS_CSP		(S390_lowcore.machine_flags & MACHINE_FLAG_CSP)
 #define MACHINE_HAS_IDTE	(0)
 #define MACHINE_HAS_DIAG44	(1)
-#define MACHINE_HAS_MVPG	(machine_flags & MACHINE_FLAG_MVPG)
+#define MACHINE_HAS_MVPG	(S390_lowcore.machine_flags & MACHINE_FLAG_MVPG)
 #define MACHINE_HAS_MVCOS	(0)
 #define MACHINE_HAS_HPAGE	(0)
 #define MACHINE_HAS_PFMF	(0)
 #else /* __s390x__ */
 #define MACHINE_HAS_IEEE	(1)
 #define MACHINE_HAS_CSP		(1)
-#define MACHINE_HAS_IDTE	(machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_DIAG44	(machine_flags & MACHINE_FLAG_DIAG44)
+#define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
+#define MACHINE_HAS_DIAG44	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
 #define MACHINE_HAS_MVPG	(1)
-#define MACHINE_HAS_MVCOS	(machine_flags & MACHINE_FLAG_MVCOS)
-#define MACHINE_HAS_HPAGE	(machine_flags & MACHINE_FLAG_HPAGE)
-#define MACHINE_HAS_PFMF	(machine_flags & MACHINE_FLAG_PFMF)
+#define MACHINE_HAS_MVCOS	(S390_lowcore.machine_flags & MACHINE_FLAG_MVCOS)
+#define MACHINE_HAS_HPAGE	(S390_lowcore.machine_flags & MACHINE_FLAG_HPAGE)
+#define MACHINE_HAS_PFMF	(S390_lowcore.machine_flags & MACHINE_FLAG_PFMF)
 #endif /* __s390x__ */
 
 #define ZFCPDUMP_HSA_SIZE	(32UL<<20)
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c544aa52453..461f2abd2e6 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -31,8 +31,9 @@
 #define ASYNC_SIZE  (PAGE_SIZE << ASYNC_ORDER)
 
 #ifndef __ASSEMBLY__
-#include <asm/processor.h>
 #include <asm/lowcore.h>
+#include <asm/page.h>
+#include <asm/processor.h>
 
 /*
  * low level task data that entry.S needs immediate access to
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index e4bcab739c1..814243cafdf 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -41,6 +41,7 @@ extern void init_virt_timer(struct vtimer_list *timer);
 extern void add_virt_timer(void *new);
 extern void add_virt_timer_periodic(void *new);
 extern int mod_virt_timer(struct vtimer_list *timer, __u64 expires);
+extern int mod_virt_timer_periodic(struct vtimer_list *timer, __u64 expires);
 extern int del_virt_timer(struct vtimer_list *timer);
 
 extern void init_cpu_vtimer(void);
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index d744c3d62de..cc21e3e20fd 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -11,6 +11,9 @@
 #ifndef _ASM_S390_TIMEX_H
 #define _ASM_S390_TIMEX_H
 
+/* The value of the TOD clock for 1.1.1970. */
+#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
+
 /* Inline functions for clock register access. */
 static inline int set_clock(__u64 time)
 {
@@ -85,4 +88,6 @@ int get_sync_clock(unsigned long long *clock);
 void init_cpu_timer(void);
 unsigned long long monotonic_clock(void);
 
+extern u64 sched_clock_base_cc;
+
 #endif
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index c8ad350d144..f0f19e6ace6 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -265,7 +265,9 @@
 #define __NR_pipe2		325
 #define __NR_dup3		326
 #define __NR_epoll_create1	327
-#define NR_syscalls 328
+#define	__NR_preadv		328
+#define	__NR_pwritev		329
+#define NR_syscalls 330
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 67a60016bab..fa9905ce7d0 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -27,6 +27,8 @@ int main(void)
 	DEFINE(__TI_flags, offsetof(struct thread_info, flags));
 	DEFINE(__TI_cpu, offsetof(struct thread_info, cpu));
 	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
+	DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
+	DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
 	BLANK();
 	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
 	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index 87cf5a79a35..fb38af6316b 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1805,3 +1805,21 @@ compat_sys_keyctl_wrapper:
 	llgfr	%r5,%r5			# u32
 	llgfr	%r6,%r6			# u32
 	jg	compat_sys_keyctl	# branch to system call
+
+	.globl	compat_sys_preadv_wrapper
+compat_sys_preadv_wrapper:
+	llgfr	%r2,%r2			# unsigned long
+	llgtr	%r3,%r3			# compat_iovec *
+	llgfr	%r4,%r4			# unsigned long
+	llgfr	%r5,%r5			# u32
+	llgfr	%r6,%r6			# u32
+	jg	compat_sys_preadv	# branch to system call
+
+	.globl	compat_sys_pwritev_wrapper
+compat_sys_pwritev_wrapper:
+	llgfr	%r2,%r2			# unsigned long
+	llgtr	%r3,%r3			# compat_iovec *
+	llgfr	%r4,%r4			# unsigned long
+	llgfr	%r5,%r5			# u32
+	llgfr	%r6,%r6			# u32
+	jg	compat_sys_pwritev	# branch to system call
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4d221c81c84..cf09948faad 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -34,8 +34,25 @@
 
 char kernel_nss_name[NSS_NAME_SIZE + 1];
 
+static unsigned long machine_flags;
+
 static void __init setup_boot_command_line(void);
 
+/*
+ * Get the TOD clock running.
+ */
+static void __init reset_tod_clock(void)
+{
+	u64 time;
+
+	if (store_clock(&time) == 0)
+		return;
+	/* TOD clock not running. Set the clock to Unix Epoch. */
+	if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0)
+		disabled_wait(0);
+
+	sched_clock_base_cc = TOD_UNIX_EPOCH;
+}
 
 #ifdef CONFIG_SHARED_KERNEL
 int __init savesys_ipl_nss(char *cmd, const int cmdlen);
@@ -370,6 +387,7 @@ static void __init setup_boot_command_line(void)
  */
 void __init startup_init(void)
 {
+	reset_tod_clock();
 	ipl_save_parameters();
 	rescue_initrd();
 	clear_bss_section();
@@ -391,5 +409,6 @@ void __init startup_init(void)
 	setup_hpage();
 	sclp_facilities_detect();
 	detect_memory_layout(memory_chunk);
+	S390_lowcore.machine_flags = machine_flags;
 	lockdep_on();
 }
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1268aa2991b..f3e27593421 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -837,16 +837,29 @@ mcck_return:
 	__CPUINIT
 	.globl restart_int_handler
 restart_int_handler:
+	basr	%r1,0
+restart_base:
+	spt	restart_vtime-restart_base(%r1)
+	stck	__LC_LAST_UPDATE_CLOCK
+	mvc	__LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1)
+	mvc	__LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1)
 	l	%r15,__LC_SAVE_AREA+60	# load ksp
 	lctl	%c0,%c15,__LC_CREGS_SAVE_AREA # get new ctl regs
 	lam	%a0,%a15,__LC_AREGS_SAVE_AREA
 	lm	%r6,%r15,__SF_GPRS(%r15) # load registers from clone
+	l	%r1,__LC_THREAD_INFO
+	mvc	__LC_USER_TIMER(8),__TI_user_timer(%r1)
+	mvc	__LC_SYSTEM_TIMER(8),__TI_system_timer(%r1)
+	xc	__LC_STEAL_TIMER(8),__LC_STEAL_TIMER
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
 	basr	%r14,0
 	l	%r14,restart_addr-.(%r14)
 	br	%r14			# branch to start_secondary
 restart_addr:
 	.long	start_secondary
+	.align	8
+restart_vtime:
+	.long	0x7fffffff,0xffffffff
 	.previous
 #else
 /*
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index c6fbde13971..84a105838e0 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -831,14 +831,27 @@ mcck_return:
 	__CPUINIT
 	.globl restart_int_handler
 restart_int_handler:
+	basr	%r1,0
+restart_base:
+	spt	restart_vtime-restart_base(%r1)
+	stck	__LC_LAST_UPDATE_CLOCK
+	mvc	__LC_LAST_UPDATE_TIMER(8),restart_vtime-restart_base(%r1)
+	mvc	__LC_EXIT_TIMER(8),restart_vtime-restart_base(%r1)
 	lg	%r15,__LC_SAVE_AREA+120 # load ksp
 	lghi	%r10,__LC_CREGS_SAVE_AREA
 	lctlg	%c0,%c15,0(%r10) # get new ctl regs
 	lghi	%r10,__LC_AREGS_SAVE_AREA
 	lam	%a0,%a15,0(%r10)
 	lmg	%r6,%r15,__SF_GPRS(%r15) # load registers from clone
+	lg	%r1,__LC_THREAD_INFO
+	mvc	__LC_USER_TIMER(8),__TI_user_timer(%r1)
+	mvc	__LC_SYSTEM_TIMER(8),__TI_system_timer(%r1)
+	xc	__LC_STEAL_TIMER(8),__LC_STEAL_TIMER
 	stosm	__SF_EMPTY(%r15),0x04	# now we can turn dat on
 	jg	start_secondary
+	.align	8
+restart_vtime:
+	.long	0x7fffffff,0xffffffff
 	.previous
 #else
 /*
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index 1046c2c9f8d..bba14494ee0 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -471,7 +471,12 @@ startup:basr	%r13,0			# get base
 .LPG0:
 	xc	0x200(256),0x200	# partially clear lowcore
 	xc	0x300(256),0x300
-
+	l	%r1,5f-.LPG0(%r13)
+	stck	0(%r1)
+	spt	6f-.LPG0(%r13)
+	mvc	__LC_LAST_UPDATE_CLOCK(8),0(%r1)
+	mvc	__LC_LAST_UPDATE_TIMER(8),6f-.LPG0(%r13)
+	mvc	__LC_EXIT_TIMER(8),5f-.LPG0(%r13)
 #ifndef CONFIG_MARCH_G5
 	# check processor version against MARCH_{G5,Z900,Z990,Z9_109,Z10}
 	stidp	__LC_CPUID		# store cpuid
@@ -496,9 +501,13 @@ startup:basr	%r13,0			# get base
 	brct	%r0,0b
 #endif
 
-	l	%r13,0f-.LPG0(%r13)
+	l	%r13,4f-.LPG0(%r13)
 	b	0(%r13)
-0:	.long	startup_continue
+	.align	4
+4:	.long	startup_continue
+5:	.long	sched_clock_base_cc
+	.align	8
+6:	.long	0x7fffffff,0xffffffff
 
 #
 # params at 10400 (setup.h)
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 4bfdc421d7e..28cf196ba77 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -10,6 +10,7 @@
 
 #include <linux/init.h>
 #include <linux/errno.h>
+#include <linux/hardirq.h>
 #include <linux/time.h>
 #include <linux/module.h>
 #include <asm/lowcore.h>
@@ -253,7 +254,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 	struct mci *mci;
 	int umode;
 
-	lockdep_off();
+	nmi_enter();
 	s390_idle_check();
 
 	mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
@@ -363,7 +364,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
 		mcck->warning = 1;
 		set_thread_flag(TIF_MCCK_PENDING);
 	}
-	lockdep_on();
+	nmi_exit();
 }
 
 static int __init machine_check_init(void)
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 06201b93cbb..7402b6a39ea 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -82,9 +82,6 @@ EXPORT_SYMBOL(console_devno);
 unsigned int console_irq = -1;
 EXPORT_SYMBOL(console_irq);
 
-unsigned long machine_flags;
-EXPORT_SYMBOL(machine_flags);
-
 unsigned long elf_hwcap = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
@@ -426,6 +423,7 @@ setup_lowcore(void)
 		__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
 	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
 	lc->thread_info = (unsigned long) &init_thread_union;
+	lc->machine_flags = S390_lowcore.machine_flags;
 #ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE) {
 		lc->extended_save_area_addr = (__u32)
@@ -436,6 +434,14 @@ setup_lowcore(void)
 #else
 	lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
 #endif
+	lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
+	lc->async_enter_timer = S390_lowcore.async_enter_timer;
+	lc->exit_timer = S390_lowcore.exit_timer;
+	lc->user_timer = S390_lowcore.user_timer;
+	lc->system_timer = S390_lowcore.system_timer;
+	lc->steal_timer = S390_lowcore.steal_timer;
+	lc->last_update_timer = S390_lowcore.last_update_timer;
+	lc->last_update_clock = S390_lowcore.last_update_clock;
 	set_prefix((u32)(unsigned long) lc);
 	lowcore_ptr[0] = lc;
 }
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 006ed5016eb..a985a3ba440 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -571,6 +571,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
 	cpu_lowcore->current_task = (unsigned long) idle;
 	cpu_lowcore->cpu_nr = cpu;
 	cpu_lowcore->kernel_asce = S390_lowcore.kernel_asce;
+	cpu_lowcore->machine_flags = S390_lowcore.machine_flags;
 	eieio();
 
 	while (signal_processor(cpu, sigp_restart) == sigp_busy)
@@ -590,7 +591,8 @@ static int __init setup_possible_cpus(char *s)
 	int pcpus, cpu;
 
 	pcpus = simple_strtoul(s, NULL, 0);
-	for (cpu = 0; cpu < pcpus && cpu < nr_cpu_ids; cpu++)
+	init_cpu_possible(cpumask_of(0));
+	for (cpu = 1; cpu < pcpus && cpu < nr_cpu_ids; cpu++)
 		set_cpu_possible(cpu, true);
 	return 0;
 }
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index fe5b25a988a..2c7739fe70b 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -336,3 +336,5 @@ SYSCALL(sys_inotify_init1,sys_inotify_init1,sys_inotify_init1_wrapper)
 SYSCALL(sys_pipe2,sys_pipe2,sys_pipe2_wrapper) /* 325 */
 SYSCALL(sys_dup3,sys_dup3,sys_dup3_wrapper)
 SYSCALL(sys_epoll_create1,sys_epoll_create1,sys_epoll_create1_wrapper)
+SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
+SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index f72d41068dc..6ded50dfa75 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -52,9 +52,6 @@
 #define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
 #define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
 
-/* The value of the TOD clock for 1.1.1970. */
-#define TOD_UNIX_EPOCH 0x7d91048bca000000ULL
-
 /*
  * Create a small time difference between the timer interrupts
  * on the different cpus to avoid lock contention.
@@ -63,9 +60,10 @@
 
 #define TICK_SIZE tick
 
+u64 sched_clock_base_cc = -1;	/* Force to data section. */
+
 static ext_int_info_t ext_int_info_cc;
 static ext_int_info_t ext_int_etr_cc;
-static u64 sched_clock_base_cc;
 
 static DEFINE_PER_CPU(struct clock_event_device, comparators);
 
@@ -195,22 +193,12 @@ static void timing_alert_interrupt(__u16 code)
 static void etr_reset(void);
 static void stp_reset(void);
 
-/*
- * Get the TOD clock running.
- */
-static u64 __init reset_tod_clock(void)
+unsigned long read_persistent_clock(void)
 {
-	u64 time;
-
-	etr_reset();
-	stp_reset();
-	if (store_clock(&time) == 0)
-		return time;
-	/* TOD clock not running. Set the clock to Unix Epoch. */
-	if (set_clock(TOD_UNIX_EPOCH) != 0 || store_clock(&time) != 0)
-		panic("TOD clock not operational.");
+	struct timespec ts;
 
-	return TOD_UNIX_EPOCH;
+	tod_to_timeval(get_clock() - TOD_UNIX_EPOCH, &ts);
+	return ts.tv_sec;
 }
 
 static cycle_t read_tod_clock(void)
@@ -265,12 +253,13 @@ void update_vsyscall_tz(void)
  */
 void __init time_init(void)
 {
-	sched_clock_base_cc = reset_tod_clock();
+	struct timespec ts;
+	unsigned long flags;
+	cycle_t now;
 
-	/* set xtime */
-	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &xtime);
-        set_normalized_timespec(&wall_to_monotonic,
-                                -xtime.tv_sec, -xtime.tv_nsec);
+	/* Reset time synchronization interfaces. */
+	etr_reset();
+	stp_reset();
 
 	/* request the clock comparator external interrupt */
 	if (register_early_external_interrupt(0x1004,
@@ -278,17 +267,38 @@ void __init time_init(void)
 					      &ext_int_info_cc) != 0)
                 panic("Couldn't request external interrupt 0x1004");
 
-	if (clocksource_register(&clocksource_tod) != 0)
-		panic("Could not register TOD clock source");
-
 	/* request the timing alert external interrupt */
 	if (register_early_external_interrupt(0x1406,
 					      timing_alert_interrupt,
 					      &ext_int_etr_cc) != 0)
 		panic("Couldn't request external interrupt 0x1406");
 
+	if (clocksource_register(&clocksource_tod) != 0)
+		panic("Could not register TOD clock source");
+
+	/*
+	 * The TOD clock is an accurate clock. The xtime should be
+	 * initialized in a way that the difference between TOD and
+	 * xtime is reasonably small. Too bad that timekeeping_init
+	 * sets xtime.tv_nsec to zero. In addition the clock source
+	 * change from the jiffies clock source to the TOD clock
+	 * source add another error of up to 1/HZ second. The same
+	 * function sets wall_to_monotonic to a value that is too
+	 * small for /proc/uptime to be accurate.
+	 * Reset xtime and wall_to_monotonic to sane values.
+	 */
+	write_seqlock_irqsave(&xtime_lock, flags);
+	now = get_clock();
+	tod_to_timeval(now - TOD_UNIX_EPOCH, &xtime);
+	clocksource_tod.cycle_last = now;
+	clocksource_tod.raw_time = xtime;
+	tod_to_timeval(sched_clock_base_cc - TOD_UNIX_EPOCH, &ts);
+	set_normalized_timespec(&wall_to_monotonic, -ts.tv_sec, -ts.tv_nsec);
+	write_sequnlock_irqrestore(&xtime_lock, flags);
+
 	/* Enable TOD clock interrupts on the boot cpu. */
 	init_cpu_timer();
+
 	/* Enable cpu timer interrupts on the boot cpu. */
 	vtime_init();
 }
@@ -1423,6 +1433,7 @@ static void *stp_page;
 static void stp_work_fn(struct work_struct *work);
 static DEFINE_MUTEX(stp_work_mutex);
 static DECLARE_WORK(stp_work, stp_work_fn);
+static struct timer_list stp_timer;
 
 static int __init early_parse_stp(char *p)
 {
@@ -1454,10 +1465,16 @@ static void __init stp_reset(void)
 	}
 }
 
+static void stp_timeout(unsigned long dummy)
+{
+	queue_work(time_sync_wq, &stp_work);
+}
+
 static int __init stp_init(void)
 {
 	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
 		return 0;
+	setup_timer(&stp_timer, stp_timeout, 0UL);
 	time_init_wq();
 	if (!stp_online)
 		return 0;
@@ -1565,6 +1582,7 @@ static void stp_work_fn(struct work_struct *work)
 
 	if (!stp_online) {
 		chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+		del_timer_sync(&stp_timer);
 		goto out_unlock;
 	}
 
@@ -1586,6 +1604,13 @@ static void stp_work_fn(struct work_struct *work)
 	stop_machine(stp_sync_clock, &stp_sync, &cpu_online_map);
 	put_online_cpus();
 
+	if (!check_sync_clock())
+		/*
+		 * There is a usable clock but the synchonization failed.
+		 * Retry after a second.
+		 */
+		mod_timer(&stp_timer, jiffies + HZ);
+
 out_unlock:
 	mutex_unlock(&stp_work_mutex);
 }
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index ecf0304e61c..38ea92ff04f 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -134,6 +134,8 @@ void vtime_start_cpu(void)
 	/* Account time spent with enabled wait psw loaded as idle time. */
 	idle_time = S390_lowcore.int_clock - idle->idle_enter;
 	account_idle_time(idle_time);
+	S390_lowcore.steal_timer +=
+		idle->idle_enter - S390_lowcore.last_update_clock;
 	S390_lowcore.last_update_clock = S390_lowcore.int_clock;
 
 	/* Account system time spent going idle. */
@@ -425,17 +427,7 @@ void add_virt_timer_periodic(void *new)
 }
 EXPORT_SYMBOL(add_virt_timer_periodic);
 
-/*
- * If we change a pending timer the function must be called on the CPU
- * where the timer is running on, e.g. by smp_call_function_single()
- *
- * The original mod_timer adds the timer if it is not pending. For
- * compatibility we do the same. The timer will be added on the current
- * CPU as a oneshot timer.
- *
- * returns whether it has modified a pending timer (1) or not (0)
- */
-int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
+int __mod_vtimer(struct vtimer_list *timer, __u64 expires, int periodic)
 {
 	struct vtimer_queue *vq;
 	unsigned long flags;
@@ -444,39 +436,35 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 	BUG_ON(!timer->function);
 	BUG_ON(!expires || expires > VTIMER_MAX_SLICE);
 
-	/*
-	 * This is a common optimization triggered by the
-	 * networking code - if the timer is re-modified
-	 * to be the same thing then just return:
-	 */
 	if (timer->expires == expires && vtimer_pending(timer))
 		return 1;
 
 	cpu = get_cpu();
 	vq = &per_cpu(virt_cpu_timer, cpu);
 
-	/* check if we run on the right CPU */
-	BUG_ON(timer->cpu != cpu);
-
 	/* disable interrupts before test if timer is pending */
 	spin_lock_irqsave(&vq->lock, flags);
 
 	/* if timer isn't pending add it on the current CPU */
 	if (!vtimer_pending(timer)) {
 		spin_unlock_irqrestore(&vq->lock, flags);
-		/* we do not activate an interval timer with mod_virt_timer */
-		timer->interval = 0;
+
+		if (periodic)
+			timer->interval = expires;
+		else
+			timer->interval = 0;
 		timer->expires = expires;
 		timer->cpu = cpu;
 		internal_add_vtimer(timer);
 		return 0;
 	}
 
+	/* check if we run on the right CPU */
+	BUG_ON(timer->cpu != cpu);
+
 	list_del_init(&timer->entry);
 	timer->expires = expires;
-
-	/* also change the interval if we have an interval timer */
-	if (timer->interval)
+	if (periodic)
 		timer->interval = expires;
 
 	/* the timer can't expire anymore so we can release the lock */
@@ -484,9 +472,32 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
 	internal_add_vtimer(timer);
 	return 1;
 }
+
+/*
+ * If we change a pending timer the function must be called on the CPU
+ * where the timer is running on.
+ *
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
+{
+	return __mod_vtimer(timer, expires, 0);
+}
 EXPORT_SYMBOL(mod_virt_timer);
 
 /*
+ * If we change a pending timer the function must be called on the CPU
+ * where the timer is running on.
+ *
+ * returns whether it has modified a pending timer (1) or not (0)
+ */
+int mod_virt_timer_periodic(struct vtimer_list *timer, __u64 expires)
+{
+	return __mod_vtimer(timer, expires, 1);
+}
+EXPORT_SYMBOL(mod_virt_timer_periodic);
+
+/*
  * delete a virtual timer
  *
  * returns whether the deleted timer was pending (1) or not (0)
@@ -516,16 +527,8 @@ EXPORT_SYMBOL(del_virt_timer);
  */
 void init_cpu_vtimer(void)
 {
-	struct thread_info *ti = current_thread_info();
 	struct vtimer_queue *vq;
 
-	S390_lowcore.user_timer = ti->user_timer;
-	S390_lowcore.system_timer = ti->system_timer;
-
-	/* kick the virtual timer */
-	asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
-	asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
-
 	/* initialize per cpu vtimer structure */
 	vq = &__get_cpu_var(virt_cpu_timer);
 	INIT_LIST_HEAD(&vq->list);
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5e4babecf93..e7390dd0283 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -14,6 +14,7 @@ config SUPERH
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_IOREMAP_PROT if MMU
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_DMA_API_DEBUG
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
@@ -21,7 +22,7 @@ config SUPERH
 	  <http://www.linux-sh.org/>.
 
 config SUPERH32
-	def_bool !SUPERH64
+	def_bool ARCH = "sh"
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_FUNCTION_TRACER
@@ -31,7 +32,7 @@ config SUPERH32
 	select ARCH_HIBERNATION_POSSIBLE if MMU
 
 config SUPERH64
-	def_bool y if CPU_SH5
+	def_bool ARCH = "sh64"
 
 config ARCH_DEFCONFIG
 	string
@@ -187,6 +188,8 @@ config ARCH_SHMOBILE
 	bool
 	select ARCH_SUSPEND_POSSIBLE
 
+if SUPERH32
+
 choice
 	prompt "Processor sub-type selection"
 
@@ -408,6 +411,15 @@ config CPU_SUBTYPE_SH7366
 	select SYS_SUPPORTS_NUMA
 	select SYS_SUPPORTS_CMT
 
+endchoice
+
+endif
+
+if SUPERH64
+
+choice
+	prompt "Processor sub-type selection"
+
 # SH-5 Processor Support
 
 config CPU_SUBTYPE_SH5_101
@@ -420,6 +432,8 @@ config CPU_SUBTYPE_SH5_103
 
 endchoice
 
+endif
+
 source "arch/sh/mm/Kconfig"
  
 source "arch/sh/Kconfig.cpu"
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 912458f666e..39e46919df1 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -349,6 +349,7 @@ static int ov7725_power(struct device *dev, int mode)
 static struct ov772x_camera_info ov7725_info = {
 	.buswidth  = SOCAM_DATAWIDTH_8,
 	.flags = OV772X_FLAG_VFLIP | OV772X_FLAG_HFLIP,
+	.edgectrl = OV772X_AUTO_EDGECTRL(0xf, 0),
 	.link = {
 		.power  = ov7725_power,
 	},
diff --git a/arch/sh/boards/board-urquell.c b/arch/sh/boards/board-urquell.c
index 8367d1d789c..beb88c4da2c 100644
--- a/arch/sh/boards/board-urquell.c
+++ b/arch/sh/boards/board-urquell.c
@@ -2,6 +2,8 @@
  * Renesas Technology Corp. SH7786 Urquell Support.
  *
  * Copyright (C) 2008  Kuninori Morimoto <morimoto.kuninori@renesas.com>
+ *
+ * Based on board-sh7785lcr.c
  * Copyright (C) 2008  Yoshihiro Shimoda
  *
  * This file is subject to the terms and conditions of the GNU General Public
@@ -21,6 +23,32 @@
 #include <asm/heartbeat.h>
 #include <asm/sizes.h>
 
+/*
+ * bit  1234 5678
+ *----------------------------
+ * SW1  0101 0010  -> Pck 33MHz version
+ *     (1101 0010)    Pck 66MHz version
+ * SW2  0x1x xxxx  -> little endian
+ *                    29bit mode
+ * SW47 0001 1000  -> CS0 : on-board flash
+ *                    CS1 : SRAM, registers, LAN, PCMCIA
+ *                    38400 bps for SCIF1
+ *
+ * Address
+ * 0x00000000 - 0x04000000  (CS0)     Nor Flash
+ * 0x04000000 - 0x04200000  (CS1)     SRAM
+ * 0x05000000 - 0x05800000  (CS1)     on board register
+ * 0x05800000 - 0x06000000  (CS1)     LAN91C111
+ * 0x06000000 - 0x06400000  (CS1)     PCMCIA
+ * 0x08000000 - 0x10000000  (CS2-CS3) DDR3
+ * 0x10000000 - 0x14000000  (CS4)     PCIe
+ * 0x14000000 - 0x14800000  (CS5)     Core0 LRAM/URAM
+ * 0x14800000 - 0x15000000  (CS5)     Core1 LRAM/URAM
+ * 0x18000000 - 0x1C000000  (CS6)     ATA/NAND-Flash
+ * 0x1C000000 -             (CS7)     SH7786 Control register
+ */
+
+/* HeartBeat */
 static struct resource heartbeat_resources[] = {
 	[0] = {
 		.start	= BOARDREG(SLEDR),
@@ -43,6 +71,7 @@ static struct platform_device heartbeat_device = {
 	.resource	= heartbeat_resources,
 };
 
+/* LAN91C111 */
 static struct smc91x_platdata smc91x_info = {
 	.flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
 };
@@ -69,6 +98,7 @@ static struct platform_device smc91x_eth_device = {
 	},
 };
 
+/* Nor Flash */
 static struct mtd_partition nor_flash_partitions[] = {
 	{
 		.name		= "loader",
diff --git a/arch/sh/drivers/pci/ops-sh7785lcr.c b/arch/sh/drivers/pci/ops-sh7785lcr.c
index e8b7446a7c2..fb0869f0bef 100644
--- a/arch/sh/drivers/pci/ops-sh7785lcr.c
+++ b/arch/sh/drivers/pci/ops-sh7785lcr.c
@@ -48,8 +48,13 @@ EXPORT_SYMBOL(board_pci_channels);
 
 static struct sh4_pci_address_map sh7785_pci_map = {
 	.window0	= {
+#if defined(CONFIG_32BIT)
+		.base	= SH7780_32BIT_DDR_BASE_ADDR,
+		.size	= 0x40000000,
+#else
 		.base	= SH7780_CS0_BASE_ADDR,
 		.size	= 0x20000000,
+#endif
 	},
 
 	.flags	= SH4_PCIC_NO_RESET,
diff --git a/arch/sh/drivers/pci/pci-sh7780.h b/arch/sh/drivers/pci/pci-sh7780.h
index 97b2c98f05c..93adc7119b7 100644
--- a/arch/sh/drivers/pci/pci-sh7780.h
+++ b/arch/sh/drivers/pci/pci-sh7780.h
@@ -104,6 +104,8 @@
 #define SH7780_CS5_BASE_ADDR	(SH7780_CS4_BASE_ADDR + SH7780_MEM_REGION_SIZE)
 #define SH7780_CS6_BASE_ADDR	(SH7780_CS5_BASE_ADDR + SH7780_MEM_REGION_SIZE)
 
+#define SH7780_32BIT_DDR_BASE_ADDR	0x40000000
+
 struct sh4_pci_address_map;
 
 /* arch/sh/drivers/pci/pci-sh7780.c */
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index e36c7b87086..0d6ac7a1db4 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -19,6 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
 #include <linux/init.h>
+#include <linux/dma-debug.h>
 #include <asm/io.h>
 
 static int __init pcibios_init(void)
@@ -43,6 +44,8 @@ static int __init pcibios_init(void)
 
 	pci_fixup_irqs(pci_common_swizzle, pcibios_map_platform_irq);
 
+	dma_debug_add_bus(&pci_bus_type);
+
 	return 0;
 }
 subsys_initcall(pcibios_init);
diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h
index 627315ecdb5..ea9d4f41c9d 100644
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
@@ -3,6 +3,7 @@
 
 #include <linux/mm.h>
 #include <linux/scatterlist.h>
+#include <linux/dma-debug.h>
 #include <asm/cacheflush.h>
 #include <asm/io.h>
 #include <asm-generic/dma-coherent.h>
@@ -38,16 +39,26 @@ static inline dma_addr_t dma_map_single(struct device *dev,
 					void *ptr, size_t size,
 					enum dma_data_direction dir)
 {
+	dma_addr_t addr = virt_to_phys(ptr);
+
 #if defined(CONFIG_PCI) && !defined(CONFIG_SH_PCIDMA_NONCOHERENT)
 	if (dev->bus == &pci_bus_type)
-		return virt_to_phys(ptr);
+		return addr;
 #endif
 	dma_cache_sync(dev, ptr, size, dir);
 
-	return virt_to_phys(ptr);
+	debug_dma_map_page(dev, virt_to_page(ptr),
+			   (unsigned long)ptr & ~PAGE_MASK, size,
+			   dir, addr, true);
+
+	return addr;
 }
 
-#define dma_unmap_single(dev, addr, size, dir)	do { } while (0)
+static inline void dma_unmap_single(struct device *dev, dma_addr_t addr,
+				    size_t size, enum dma_data_direction dir)
+{
+	debug_dma_unmap_page(dev, addr, size, dir, true);
+}
 
 static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
 			     int nents, enum dma_data_direction dir)
@@ -59,12 +70,19 @@ static inline int dma_map_sg(struct device *dev, struct scatterlist *sg,
 		dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
 #endif
 		sg[i].dma_address = sg_phys(&sg[i]);
+		sg[i].dma_length = sg[i].length;
 	}
 
+	debug_dma_map_sg(dev, sg, nents, i, dir);
+
 	return nents;
 }
 
-#define dma_unmap_sg(dev, sg, nents, dir)	do { } while (0)
+static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+				int nents, enum dma_data_direction dir)
+{
+	debug_dma_unmap_sg(dev, sg, nents, dir);
+}
 
 static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
 				      unsigned long offset, size_t size,
@@ -111,6 +129,7 @@ static inline void dma_sync_sg(struct device *dev, struct scatterlist *sg,
 		dma_cache_sync(dev, sg_virt(&sg[i]), sg[i].length, dir);
 #endif
 		sg[i].dma_address = sg_phys(&sg[i]);
+		sg[i].dma_length = sg[i].length;
 	}
 }
 
@@ -119,6 +138,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev,
 					   enum dma_data_direction dir)
 {
 	dma_sync_single(dev, dma_handle, size, dir);
+	debug_dma_sync_single_for_cpu(dev, dma_handle, size, dir);
 }
 
 static inline void dma_sync_single_for_device(struct device *dev,
@@ -127,6 +147,7 @@ static inline void dma_sync_single_for_device(struct device *dev,
 					      enum dma_data_direction dir)
 {
 	dma_sync_single(dev, dma_handle, size, dir);
+	debug_dma_sync_single_for_device(dev, dma_handle, size, dir);
 }
 
 static inline void dma_sync_single_range_for_cpu(struct device *dev,
@@ -136,6 +157,8 @@ static inline void dma_sync_single_range_for_cpu(struct device *dev,
 						 enum dma_data_direction direction)
 {
 	dma_sync_single_for_cpu(dev, dma_handle+offset, size, direction);
+	debug_dma_sync_single_range_for_cpu(dev, dma_handle,
+					    offset, size, direction);
 }
 
 static inline void dma_sync_single_range_for_device(struct device *dev,
@@ -145,6 +168,8 @@ static inline void dma_sync_single_range_for_device(struct device *dev,
 						    enum dma_data_direction direction)
 {
 	dma_sync_single_for_device(dev, dma_handle+offset, size, direction);
+	debug_dma_sync_single_range_for_device(dev, dma_handle,
+					       offset, size, direction);
 }
 
 
@@ -153,6 +178,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev,
 				       enum dma_data_direction dir)
 {
 	dma_sync_sg(dev, sg, nelems, dir);
+	debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
 }
 
 static inline void dma_sync_sg_for_device(struct device *dev,
@@ -160,9 +186,9 @@ static inline void dma_sync_sg_for_device(struct device *dev,
 					  enum dma_data_direction dir)
 {
 	dma_sync_sg(dev, sg, nelems, dir);
+	debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
 }
 
-
 static inline int dma_get_cache_alignment(void)
 {
 	/*
diff --git a/arch/sh/include/asm/scatterlist.h b/arch/sh/include/asm/scatterlist.h
index 2084d037369..c693d268a41 100644
--- a/arch/sh/include/asm/scatterlist.h
+++ b/arch/sh/include/asm/scatterlist.h
@@ -5,12 +5,13 @@
 
 struct scatterlist {
 #ifdef CONFIG_DEBUG_SG
-    unsigned long sg_magic;
+	unsigned long	sg_magic;
 #endif
-    unsigned long page_link;
-    unsigned int offset;/* for highmem, page offset */
-    dma_addr_t dma_address;
-    unsigned int length;
+	unsigned long	page_link;
+	unsigned int	offset;		/* for highmem, page offset */
+	unsigned int	length;
+	dma_addr_t	dma_address;
+	unsigned int	dma_length;
 };
 
 #define ISA_DMA_THRESHOLD	PHYS_ADDR_MASK
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index a3f23954589..8489a0905a8 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -37,8 +37,11 @@
 #define pcibus_to_node(bus)	((void)(bus), -1)
 #define pcibus_to_cpumask(bus)	(pcibus_to_node(bus) == -1 ? \
 					CPU_MASK_ALL : \
-					node_to_cpumask(pcibus_to_node(bus)) \
-				)
+					node_to_cpumask(pcibus_to_node(bus)))
+#define cpumask_of_pcibus(bus)	(pcibus_to_node(bus) == -1 ? \
+					CPU_MASK_ALL_PTR : \
+					cpumask_of_node(pcibus_to_node(bus)))
+
 #endif
 
 #include <asm-generic/topology.h>
diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h
index d52c000cf92..2efb819e2db 100644
--- a/arch/sh/include/asm/unistd_32.h
+++ b/arch/sh/include/asm/unistd_32.h
@@ -341,8 +341,10 @@
 #define __NR_dup3		330
 #define __NR_pipe2		331
 #define __NR_inotify_init1	332
+#define __NR_preadv		333
+#define __NR_pwritev		334
 
-#define NR_syscalls 333
+#define NR_syscalls 335
 
 #ifdef __KERNEL__
 
diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h
index 7c54e91753c..6eb9d2934c0 100644
--- a/arch/sh/include/asm/unistd_64.h
+++ b/arch/sh/include/asm/unistd_64.h
@@ -381,10 +381,12 @@
 #define __NR_dup3		358
 #define __NR_pipe2		359
 #define __NR_inotify_init1	360
+#define __NR_preadv		361
+#define __NR_pwritev		362
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 361
+#define NR_syscalls 363
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index 5a47e1cf442..90e8cfff55f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -143,14 +143,14 @@ static void __init sh7786_usb_setup(void)
 	 * Set the PHY and PLL enable bit
 	 */
 	__raw_writel(PHY_ENB | PLL_ENB, USBPCTL1);
-	while (i-- &&
-	       ((__raw_readl(USBST) & ACT_PLL_STATUS) != ACT_PLL_STATUS))
+	while (i--) {
+		if (ACT_PLL_STATUS == (__raw_readl(USBST) & ACT_PLL_STATUS)) {
+			/* Set the PHY RST bit */
+			__raw_writel(PHY_ENB | PLL_ENB | PHY_RST, USBPCTL1);
+			printk(KERN_INFO "sh7786 usb setup done\n");
+			break;
+		}
 		cpu_relax();
-
-	if (i) {
-		/* Set the PHY RST bit */
-		__raw_writel(PHY_ENB | PLL_ENB | PHY_RST, USBPCTL1);
-		printk(KERN_INFO "sh7786 usb setup done\n");
 	}
 }
 
diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S
index e67c1733e1b..05202edd8e2 100644
--- a/arch/sh/kernel/syscalls_32.S
+++ b/arch/sh/kernel/syscalls_32.S
@@ -349,3 +349,5 @@ ENTRY(sys_call_table)
 	.long sys_dup3			/* 330 */
 	.long sys_pipe2
 	.long sys_inotify_init1
+	.long sys_preadv
+	.long sys_writev
diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S
index 557cb91f5ca..a083609f928 100644
--- a/arch/sh/kernel/syscalls_64.S
+++ b/arch/sh/kernel/syscalls_64.S
@@ -387,3 +387,5 @@ sys_call_table:
 	.long sys_dup3
 	.long sys_pipe2
 	.long sys_inotify_init1		/* 360 */
+	.long sys_preadv
+	.long sys_pwritev
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index edcd5fbf965..e098ec158dd 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -10,11 +10,22 @@
  * for more details.
  */
 #include <linux/mm.h>
+#include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-debug.h>
+#include <linux/io.h>
 #include <asm/cacheflush.h>
 #include <asm/addrspace.h>
-#include <asm/io.h>
+
+#define PREALLOC_DMA_DEBUG_ENTRIES	4096
+
+static int __init dma_init(void)
+{
+	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
+	return 0;
+}
+fs_initcall(dma_init);
 
 void *dma_alloc_coherent(struct device *dev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t gfp)
@@ -45,6 +56,9 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
 	split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
 
 	*dma_handle = virt_to_phys(ret);
+
+	debug_dma_alloc_coherent(dev, size, *dma_handle, ret_nocache);
+
 	return ret_nocache;
 }
 EXPORT_SYMBOL(dma_alloc_coherent);
@@ -56,12 +70,15 @@ void dma_free_coherent(struct device *dev, size_t size,
 	unsigned long pfn = dma_handle >> PAGE_SHIFT;
 	int k;
 
-	if (!dma_release_from_coherent(dev, order, vaddr)) {
-		WARN_ON(irqs_disabled());	/* for portability */
-		for (k = 0; k < (1 << order); k++)
-			__free_pages(pfn_to_page(pfn + k), 0);
-		iounmap(vaddr);
-	}
+	WARN_ON(irqs_disabled());	/* for portability */
+
+	if (dma_release_from_coherent(dev, order, vaddr))
+		return;
+
+	debug_dma_free_coherent(dev, size, vaddr, dma_handle);
+	for (k = 0; k < (1 << order); k++)
+		__free_pages(pfn_to_page(pfn + k), 0);
+	iounmap(vaddr);
 }
 EXPORT_SYMBOL(dma_free_coherent);
 
diff --git a/arch/sparc/include/asm/atomic_32.h b/arch/sparc/include/asm/atomic_32.h
index ce465975a6a..bb91b1248cd 100644
--- a/arch/sparc/include/asm/atomic_32.h
+++ b/arch/sparc/include/asm/atomic_32.h
@@ -15,6 +15,8 @@
 
 #ifdef __KERNEL__
 
+#include <asm/system.h>
+
 #define ATOMIC_INIT(i)  { (i) }
 
 extern int __atomic_add_return(int, atomic_t *);
diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h
index dff3f0253aa..ff9ead640c4 100644
--- a/arch/sparc/include/asm/parport.h
+++ b/arch/sparc/include/asm/parport.h
@@ -117,7 +117,7 @@ static int __devinit ecpp_probe(struct of_device *op, const struct of_device_id
 	if (!strcmp(parent->name, "dma")) {
 		p = parport_pc_probe_port(base, base + 0x400,
 					  op->irqs[0], PARPORT_DMA_NOFIFO,
-					  op->dev.parent->parent);
+					  op->dev.parent->parent, 0);
 		if (!p)
 			return -ENOMEM;
 		dev_set_drvdata(&op->dev, p);
@@ -168,7 +168,8 @@ static int __devinit ecpp_probe(struct of_device *op, const struct of_device_id
 	p = parport_pc_probe_port(base, base + 0x400,
 				  op->irqs[0],
 				  slot,
-				  op->dev.parent);
+				  op->dev.parent,
+				  0);
 	err = -ENOMEM;
 	if (!p)
 		goto out_disable_irq;
diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c
index 6ce5d2598a0..adf5f273868 100644
--- a/arch/sparc/kernel/ldc.c
+++ b/arch/sparc/kernel/ldc.c
@@ -1183,8 +1183,7 @@ out_free_txq:
 	free_queue(lp->tx_num_entries, lp->tx_base);
 
 out_free_mssbuf:
-	if (mssbuf)
-		kfree(mssbuf);
+	kfree(mssbuf);
 
 out_free_iommu:
 	ldc_iommu_release(lp);
@@ -1217,8 +1216,7 @@ void ldc_free(struct ldc_channel *lp)
 
 	hlist_del(&lp->list);
 
-	if (lp->mssbuf)
-		kfree(lp->mssbuf);
+	kfree(lp->mssbuf);
 
 	ldc_iommu_release(lp);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 708e12a26b0..f7642e5a94d 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -118,9 +118,9 @@ void __cpuinit smp_callin(void)
 	while (!cpu_isset(cpuid, smp_commenced_mask))
 		rmb();
 
-	ipi_call_lock();
+	ipi_call_lock_irq();
 	cpu_set(cpuid, cpu_online_map);
-	ipi_call_unlock();
+	ipi_call_unlock_irq();
 
 	/* idle thread is expected to have preempt disabled */
 	preempt_disable();
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index d5cd6c58688..a4737dddfd5 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -50,7 +50,7 @@
 #ifdef CONFIG_X86_64
 #define NEED_PSE	0
 #define NEED_MSR	(1<<(X86_FEATURE_MSR & 31))
-#define NEED_PGE	(1<<(X86_FEATURE_PGE & 31))
+#define NEED_PGE	0
 #define NEED_FXSR	(1<<(X86_FEATURE_FXSR & 31))
 #define NEED_XMM	(1<<(X86_FEATURE_XMM & 31))
 #define NEED_XMM2	(1<<(X86_FEATURE_XMM2 & 31))
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a918dde46b..018a0a40079 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -124,7 +124,8 @@ static inline unsigned long mfn_to_local_pfn(unsigned long mfn)
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)	(phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_mfn(v)		(pfn_to_mfn(PFN_DOWN(__pa(v))))
+#define virt_to_pfn(v)          (PFN_DOWN(__pa(v)))
+#define virt_to_mfn(v)		(pfn_to_mfn(virt_to_pfn(v)))
 #define mfn_to_virt(m)		(__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
 static inline unsigned long pte_mfn(pte_t pte)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 3e3cd3db7a0..ecdb682ab51 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -204,7 +204,13 @@ static void drv_read(struct drv_cmd *cmd)
 
 static void drv_write(struct drv_cmd *cmd)
 {
+	int this_cpu;
+
+	this_cpu = get_cpu();
+	if (cpumask_test_cpu(this_cpu, cmd->mask))
+		do_drv_write(cmd);
 	smp_call_function_many(cmd->mask, do_drv_write, cmd, 1);
+	put_cpu();
 }
 
 static u32 get_cur_val(const struct cpumask *mask)
@@ -277,7 +283,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 	unsigned int perf_percent;
 	unsigned int retval;
 
-	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &cur, 1))
+	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
 		return 0;
 
 	cur.aperf.whole = readin.aperf.whole -
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index a0f3851ef31..2e0eb414095 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -108,40 +108,29 @@ struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
-struct update_for_cpu {
-	const void __user	*buf;
-	size_t			size;
-};
-
-static long update_for_cpu(void *_ufc)
-{
-	struct update_for_cpu *ufc = _ufc;
-	int error;
-
-	error = microcode_ops->request_microcode_user(smp_processor_id(),
-						      ufc->buf, ufc->size);
-	if (error < 0)
-		return error;
-	if (!error)
-		microcode_ops->apply_microcode(smp_processor_id());
-	return error;
-}
-
 static int do_microcode_update(const void __user *buf, size_t size)
 {
+	cpumask_t old;
 	int error = 0;
 	int cpu;
-	struct update_for_cpu ufc = { .buf = buf, .size = size };
+
+	old = current->cpus_allowed;
 
 	for_each_online_cpu(cpu) {
 		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
 		if (!uci->valid)
 			continue;
-		error = work_on_cpu(cpu, update_for_cpu, &ufc);
+
+		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+		error = microcode_ops->request_microcode_user(cpu, buf, size);
 		if (error < 0)
-			break;
+			goto out;
+		if (!error)
+			microcode_ops->apply_microcode(cpu);
 	}
+out:
+	set_cpus_allowed_ptr(current, &old);
 	return error;
 }
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 82cd39a6cbd..f09e8c36ee8 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/fixmap.h>
 #include <asm/processor.h>
+#include <asm/proto.h>
 #include <asm/msr-index.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
@@ -168,21 +169,23 @@ static void __init xen_banner(void)
 	       xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
 }
 
+static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
+static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
+
 static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		      unsigned int *cx, unsigned int *dx)
 {
+	unsigned maskecx = ~0;
 	unsigned maskedx = ~0;
 
 	/*
 	 * Mask out inconvenient features, to try and disable as many
 	 * unsupported kernel subsystems as possible.
 	 */
-	if (*ax == 1)
-		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
-			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-			    (1 << X86_FEATURE_MCE)  |  /* disable MCE */
-			    (1 << X86_FEATURE_MCA)  |  /* disable MCA */
-			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+	if (*ax == 1) {
+		maskecx = cpuid_leaf1_ecx_mask;
+		maskedx = cpuid_leaf1_edx_mask;
+	}
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
 		: "=a" (*ax),
@@ -190,9 +193,43 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 		  "=c" (*cx),
 		  "=d" (*dx)
 		: "0" (*ax), "2" (*cx));
+
+	*cx &= maskecx;
 	*dx &= maskedx;
 }
 
+static __init void xen_init_cpuid_mask(void)
+{
+	unsigned int ax, bx, cx, dx;
+
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
+		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+
+	if (!xen_initial_domain())
+		cpuid_leaf1_edx_mask &=
+			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
+			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
+
+	ax = 1;
+	xen_cpuid(&ax, &bx, &cx, &dx);
+
+	/* cpuid claims we support xsave; try enabling it to see what happens */
+	if (cx & (1 << (X86_FEATURE_XSAVE % 32))) {
+		unsigned long cr4;
+
+		set_in_cr4(X86_CR4_OSXSAVE);
+		
+		cr4 = read_cr4();
+
+		if ((cr4 & X86_CR4_OSXSAVE) == 0)
+			cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_XSAVE % 32));
+
+		clear_in_cr4(X86_CR4_OSXSAVE);
+	}
+}
+
 static void xen_set_debugreg(int reg, unsigned long val)
 {
 	HYPERVISOR_set_debugreg(reg, val);
@@ -284,12 +321,11 @@ static void xen_set_ldt(const void *addr, unsigned entries)
 
 static void xen_load_gdt(const struct desc_ptr *dtr)
 {
-	unsigned long *frames;
 	unsigned long va = dtr->address;
 	unsigned int size = dtr->size + 1;
 	unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+	unsigned long frames[pages];
 	int f;
-	struct multicall_space mcs;
 
 	/* A GDT can be up to 64k in size, which corresponds to 8192
 	   8-byte entries, or 16 4k pages.. */
@@ -297,19 +333,26 @@ static void xen_load_gdt(const struct desc_ptr *dtr)
 	BUG_ON(size > 65536);
 	BUG_ON(va & ~PAGE_MASK);
 
-	mcs = xen_mc_entry(sizeof(*frames) * pages);
-	frames = mcs.args;
-
 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
-		frames[f] = arbitrary_virt_to_mfn((void *)va);
+		int level;
+		pte_t *ptep = lookup_address(va, &level);
+		unsigned long pfn, mfn;
+		void *virt;
+
+		BUG_ON(ptep == NULL);
+
+		pfn = pte_pfn(*ptep);
+		mfn = pfn_to_mfn(pfn);
+		virt = __va(PFN_PHYS(pfn));
+
+		frames[f] = mfn;
 
 		make_lowmem_page_readonly((void *)va);
-		make_lowmem_page_readonly(mfn_to_virt(frames[f]));
+		make_lowmem_page_readonly(virt);
 	}
 
-	MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
-
-	xen_mc_issue(PARAVIRT_LAZY_CPU);
+	if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct)))
+		BUG();
 }
 
 static void load_TLS_descriptor(struct thread_struct *t,
@@ -385,7 +428,7 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
 			    struct trap_info *info)
 {
-	if (val->type != 0xf && val->type != 0xe)
+	if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
 		return 0;
 
 	info->vector = vector;
@@ -393,8 +436,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
 	info->cs = gate_segment(*val);
 	info->flags = val->dpl;
 	/* interrupt gates clear IF */
-	if (val->type == 0xe)
-		info->flags |= 4;
+	if (val->type == GATE_INTERRUPT)
+		info->flags |= 1 << 2;
 
 	return 1;
 }
@@ -872,7 +915,6 @@ static const struct machine_ops __initdata xen_machine_ops = {
 	.emergency_restart = xen_emergency_restart,
 };
 
-
 /* First C function to be called on Xen boot */
 asmlinkage void __init xen_start_kernel(void)
 {
@@ -897,6 +939,8 @@ asmlinkage void __init xen_start_kernel(void)
 
 	xen_init_irq_ops();
 
+	xen_init_cpuid_mask();
+
 #ifdef CONFIG_X86_LOCAL_APIC
 	/*
 	 * set up the basic apic ops.
@@ -938,6 +982,11 @@ asmlinkage void __init xen_start_kernel(void)
 	if (!xen_initial_domain())
 		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
 
+#ifdef CONFIG_X86_64
+	/* Work out if we support NX */
+	check_efer();
+#endif
+
 	/* Don't do the full vcpu_info placement stuff until we have a
 	   possible map and a non-dummy shared_info. */
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2a81838a9ab..9842b121240 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -184,7 +184,7 @@ static inline unsigned p2m_index(unsigned long pfn)
 }
 
 /* Build the parallel p2m_top_mfn structures */
-void xen_setup_mfn_list_list(void)
+static void __init xen_build_mfn_list_list(void)
 {
 	unsigned pfn, idx;
 
@@ -198,7 +198,10 @@ void xen_setup_mfn_list_list(void)
 		unsigned topidx = idx * P2M_ENTRIES_PER_PAGE;
 		p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]);
 	}
+}
 
+void xen_setup_mfn_list_list(void)
+{
 	BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
 
 	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
@@ -218,6 +221,8 @@ void __init xen_build_dynamic_phys_to_machine(void)
 
 		p2m_top[topidx] = &mfn_list[pfn];
 	}
+
+	xen_build_mfn_list_list();
 }
 
 unsigned long get_phys_to_machine(unsigned long pfn)
@@ -233,47 +238,74 @@ unsigned long get_phys_to_machine(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(get_phys_to_machine);
 
-static void alloc_p2m(unsigned long **pp, unsigned long *mfnp)
+/* install a  new p2m_top page */
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p)
 {
-	unsigned long *p;
+	unsigned topidx = p2m_top_index(pfn);
+	unsigned long **pfnp, *mfnp;
 	unsigned i;
 
-	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
-	BUG_ON(p == NULL);
+	pfnp = &p2m_top[topidx];
+	mfnp = &p2m_top_mfn[topidx];
 
 	for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++)
 		p[i] = INVALID_P2M_ENTRY;
 
-	if (cmpxchg(pp, p2m_missing, p) != p2m_missing)
-		free_page((unsigned long)p);
-	else
+	if (cmpxchg(pfnp, p2m_missing, p) == p2m_missing) {
 		*mfnp = virt_to_mfn(p);
+		return true;
+	}
+
+	return false;
 }
 
-void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+static void alloc_p2m(unsigned long pfn)
 {
-	unsigned topidx, idx;
+	unsigned long *p;
 
-	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
-		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
-		return;
-	}
+	p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL);
+	BUG_ON(p == NULL);
+
+	if (!install_p2mtop_page(pfn, p))
+		free_page((unsigned long)p);
+}
+
+/* Try to install p2m mapping; fail if intermediate bits missing */
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	unsigned topidx, idx;
 
 	if (unlikely(pfn >= MAX_DOMAIN_PAGES)) {
 		BUG_ON(mfn != INVALID_P2M_ENTRY);
-		return;
+		return true;
 	}
 
 	topidx = p2m_top_index(pfn);
 	if (p2m_top[topidx] == p2m_missing) {
-		/* no need to allocate a page to store an invalid entry */
 		if (mfn == INVALID_P2M_ENTRY)
-			return;
-		alloc_p2m(&p2m_top[topidx], &p2m_top_mfn[topidx]);
+			return true;
+		return false;
 	}
 
 	idx = p2m_index(pfn);
 	p2m_top[topidx][idx] = mfn;
+
+	return true;
+}
+
+void set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+	if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) {
+		BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY);
+		return;
+	}
+
+	if (unlikely(!__set_phys_to_machine(pfn, mfn)))  {
+		alloc_p2m(pfn);
+
+		if (!__set_phys_to_machine(pfn, mfn))
+			BUG();
+	}
 }
 
 unsigned long arbitrary_virt_to_mfn(void *vaddr)
@@ -987,7 +1019,7 @@ static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
 	return 0;
 }
 
-void __init xen_mark_init_mm_pinned(void)
+static void __init xen_mark_init_mm_pinned(void)
 {
 	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
 }
@@ -1270,8 +1302,8 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	} *args;
 	struct multicall_space mcs;
 
-	BUG_ON(cpumask_empty(cpus));
-	BUG_ON(!mm);
+	if (cpumask_empty(cpus))
+		return;		/* nothing to do */
 
 	mcs = xen_mc_entry(sizeof(*args));
 	args = mcs.args;
@@ -1438,6 +1470,15 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 }
 #endif
 
+static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+{
+	struct mmuext_op op;
+	op.cmd = cmd;
+	op.arg1.mfn = pfn_to_mfn(pfn);
+	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+		BUG();
+}
+
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
@@ -1446,22 +1487,29 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
 	BUG_ON(mem_map);	/* should only be used early */
 #endif
 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
+	pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+}
+
+/* Used for pmd and pud */
+static __init void xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
+{
+#ifdef CONFIG_FLATMEM
+	BUG_ON(mem_map);	/* should only be used early */
+#endif
+	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
 /* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static void xen_release_pte_init(unsigned long pfn)
+static __init void xen_release_pte_init(unsigned long pfn)
 {
+	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+static __init void xen_release_pmd_init(unsigned long pfn)
 {
-	struct mmuext_op op;
-	op.cmd = cmd;
-	op.arg1.mfn = pfn_to_mfn(pfn);
-	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
-		BUG();
+	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
 
 /* This needs to make sure the new pte page is pinned iff its being
@@ -1773,6 +1821,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #ifdef CONFIG_X86_LOCAL_APIC
 	case FIX_APIC_BASE:	/* maps dummy local APIC */
 #endif
+	case FIX_TEXT_POKE0:
+	case FIX_TEXT_POKE1:
+		/* All local page mappings */
 		pte = pfn_pte(phys, prot);
 		break;
 
@@ -1819,7 +1870,6 @@ __init void xen_post_allocator_init(void)
 	xen_mark_init_mm_pinned();
 }
 
-
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pagetable_setup_start = xen_pagetable_setup_start,
 	.pagetable_setup_done = xen_pagetable_setup_done,
@@ -1843,9 +1893,9 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.alloc_pte = xen_alloc_pte_init,
 	.release_pte = xen_release_pte_init,
-	.alloc_pmd = xen_alloc_pte_init,
+	.alloc_pmd = xen_alloc_pmd_init,
 	.alloc_pmd_clone = paravirt_nop,
-	.release_pmd = xen_release_pte_init,
+	.release_pmd = xen_release_pmd_init,
 
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
@@ -1883,8 +1933,8 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_pgd = xen_set_pgd_hyper,
 
-	.alloc_pud = xen_alloc_pte_init,
-	.release_pud = xen_release_pte_init,
+	.alloc_pud = xen_alloc_pmd_init,
+	.release_pud = xen_release_pmd_init,
 #endif	/* PAGETABLE_LEVELS == 4 */
 
 	.activate_mm = xen_activate_mm,
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index 24d1b44a337..da730262489 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -11,6 +11,9 @@ enum pt_level {
 };
 
 
+bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn);
+bool install_p2mtop_page(unsigned long pfn, unsigned long *p);
+
 void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags);
 
 
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 585a6e33083..429834ec168 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -317,7 +317,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
 	BUG_ON(rc);
 
 	while(per_cpu(cpu_state, cpu) != CPU_ONLINE) {
-		HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+		HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 		barrier();
 	}
 
@@ -422,7 +422,7 @@ static void xen_smp_send_call_function_ipi(const struct cpumask *mask)
 	/* Make sure other vcpus get a chance to run if they need to. */
 	for_each_cpu(cpu, mask) {
 		if (xen_vcpu_stolen(cpu)) {
-			HYPERVISOR_sched_op(SCHEDOP_yield, 0);
+			HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
 			break;
 		}
 	}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2f5ef2632ea..20139464943 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -57,8 +57,6 @@ irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
 bool xen_vcpu_stolen(int vcpu);
 
-void xen_mark_init_mm_pinned(void);
-
 void xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 631f6f44460..c48fa670d22 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -17,9 +17,6 @@
 #include <linux/rbtree.h>
 #include <linux/interrupt.h>
 
-#define REQ_SYNC	1
-#define REQ_ASYNC	0
-
 /*
  * See Documentation/block/as-iosched.txt
  */
@@ -93,7 +90,7 @@ struct as_data {
 	struct list_head fifo_list[2];
 
 	struct request *next_rq[2];	/* next in sort order */
-	sector_t last_sector[2];	/* last REQ_SYNC & REQ_ASYNC sectors */
+	sector_t last_sector[2];	/* last SYNC & ASYNC sectors */
 
 	unsigned long exit_prob;	/* probability a task will exit while
 					   being waited on */
@@ -109,7 +106,7 @@ struct as_data {
 	unsigned long last_check_fifo[2];
 	int changed_batch;		/* 1: waiting for old batch to end */
 	int new_batch;			/* 1: waiting on first read complete */
-	int batch_data_dir;		/* current batch REQ_SYNC / REQ_ASYNC */
+	int batch_data_dir;		/* current batch SYNC / ASYNC */
 	int write_batch_count;		/* max # of reqs in a write batch */
 	int current_write_count;	/* how many requests left this batch */
 	int write_batch_idled;		/* has the write batch gone idle? */
@@ -554,7 +551,7 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic,
 	if (aic == NULL)
 		return;
 
-	if (data_dir == REQ_SYNC) {
+	if (data_dir == BLK_RW_SYNC) {
 		unsigned long in_flight = atomic_read(&aic->nr_queued)
 					+ atomic_read(&aic->nr_dispatched);
 		spin_lock(&aic->lock);
@@ -811,7 +808,7 @@ static void as_update_rq(struct as_data *ad, struct request *rq)
  */
 static void update_write_batch(struct as_data *ad)
 {
-	unsigned long batch = ad->batch_expire[REQ_ASYNC];
+	unsigned long batch = ad->batch_expire[BLK_RW_ASYNC];
 	long write_time;
 
 	write_time = (jiffies - ad->current_batch_expires) + batch;
@@ -855,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 		kblockd_schedule_work(q, &ad->antic_work);
 		ad->changed_batch = 0;
 
-		if (ad->batch_data_dir == REQ_SYNC)
+		if (ad->batch_data_dir == BLK_RW_SYNC)
 			ad->new_batch = 1;
 	}
 	WARN_ON(ad->nr_dispatched == 0);
@@ -869,7 +866,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
 	if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) {
 		update_write_batch(ad);
 		ad->current_batch_expires = jiffies +
-				ad->batch_expire[REQ_SYNC];
+				ad->batch_expire[BLK_RW_SYNC];
 		ad->new_batch = 0;
 	}
 
@@ -960,7 +957,7 @@ static inline int as_batch_expired(struct as_data *ad)
 	if (ad->changed_batch || ad->new_batch)
 		return 0;
 
-	if (ad->batch_data_dir == REQ_SYNC)
+	if (ad->batch_data_dir == BLK_RW_SYNC)
 		/* TODO! add a check so a complete fifo gets written? */
 		return time_after(jiffies, ad->current_batch_expires);
 
@@ -986,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 	 */
 	ad->last_sector[data_dir] = rq->sector + rq->nr_sectors;
 
-	if (data_dir == REQ_SYNC) {
+	if (data_dir == BLK_RW_SYNC) {
 		struct io_context *ioc = RQ_IOC(rq);
 		/* In case we have to anticipate after this */
 		copy_io_context(&ad->io_context, &ioc);
@@ -1025,41 +1022,41 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq)
 static int as_dispatch_request(struct request_queue *q, int force)
 {
 	struct as_data *ad = q->elevator->elevator_data;
-	const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
-	const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
+	const int reads = !list_empty(&ad->fifo_list[BLK_RW_SYNC]);
+	const int writes = !list_empty(&ad->fifo_list[BLK_RW_ASYNC]);
 	struct request *rq;
 
 	if (unlikely(force)) {
 		/*
 		 * Forced dispatch, accounting is useless.  Reset
 		 * accounting states and dump fifo_lists.  Note that
-		 * batch_data_dir is reset to REQ_SYNC to avoid
+		 * batch_data_dir is reset to BLK_RW_SYNC to avoid
 		 * screwing write batch accounting as write batch
 		 * accounting occurs on W->R transition.
 		 */
 		int dispatched = 0;
 
-		ad->batch_data_dir = REQ_SYNC;
+		ad->batch_data_dir = BLK_RW_SYNC;
 		ad->changed_batch = 0;
 		ad->new_batch = 0;
 
-		while (ad->next_rq[REQ_SYNC]) {
-			as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]);
+		while (ad->next_rq[BLK_RW_SYNC]) {
+			as_move_to_dispatch(ad, ad->next_rq[BLK_RW_SYNC]);
 			dispatched++;
 		}
-		ad->last_check_fifo[REQ_SYNC] = jiffies;
+		ad->last_check_fifo[BLK_RW_SYNC] = jiffies;
 
-		while (ad->next_rq[REQ_ASYNC]) {
-			as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]);
+		while (ad->next_rq[BLK_RW_ASYNC]) {
+			as_move_to_dispatch(ad, ad->next_rq[BLK_RW_ASYNC]);
 			dispatched++;
 		}
-		ad->last_check_fifo[REQ_ASYNC] = jiffies;
+		ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
 
 		return dispatched;
 	}
 
 	/* Signal that the write batch was uncontended, so we can't time it */
-	if (ad->batch_data_dir == REQ_ASYNC && !reads) {
+	if (ad->batch_data_dir == BLK_RW_ASYNC && !reads) {
 		if (ad->current_write_count == 0 || !writes)
 			ad->write_batch_idled = 1;
 	}
@@ -1076,8 +1073,8 @@ static int as_dispatch_request(struct request_queue *q, int force)
 		 */
 		rq = ad->next_rq[ad->batch_data_dir];
 
-		if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) {
-			if (as_fifo_expired(ad, REQ_SYNC))
+		if (ad->batch_data_dir == BLK_RW_SYNC && ad->antic_expire) {
+			if (as_fifo_expired(ad, BLK_RW_SYNC))
 				goto fifo_expired;
 
 			if (as_can_anticipate(ad, rq)) {
@@ -1090,7 +1087,7 @@ static int as_dispatch_request(struct request_queue *q, int force)
 			/* we have a "next request" */
 			if (reads && !writes)
 				ad->current_batch_expires =
-					jiffies + ad->batch_expire[REQ_SYNC];
+					jiffies + ad->batch_expire[BLK_RW_SYNC];
 			goto dispatch_request;
 		}
 	}
@@ -1101,20 +1098,20 @@ static int as_dispatch_request(struct request_queue *q, int force)
 	 */
 
 	if (reads) {
-		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_SYNC]));
+		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_SYNC]));
 
-		if (writes && ad->batch_data_dir == REQ_SYNC)
+		if (writes && ad->batch_data_dir == BLK_RW_SYNC)
 			/*
 			 * Last batch was a read, switch to writes
 			 */
 			goto dispatch_writes;
 
-		if (ad->batch_data_dir == REQ_ASYNC) {
+		if (ad->batch_data_dir == BLK_RW_ASYNC) {
 			WARN_ON(ad->new_batch);
 			ad->changed_batch = 1;
 		}
-		ad->batch_data_dir = REQ_SYNC;
-		rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next);
+		ad->batch_data_dir = BLK_RW_SYNC;
+		rq = rq_entry_fifo(ad->fifo_list[BLK_RW_SYNC].next);
 		ad->last_check_fifo[ad->batch_data_dir] = jiffies;
 		goto dispatch_request;
 	}
@@ -1125,9 +1122,9 @@ static int as_dispatch_request(struct request_queue *q, int force)
 
 	if (writes) {
 dispatch_writes:
-		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[REQ_ASYNC]));
+		BUG_ON(RB_EMPTY_ROOT(&ad->sort_list[BLK_RW_ASYNC]));
 
-		if (ad->batch_data_dir == REQ_SYNC) {
+		if (ad->batch_data_dir == BLK_RW_SYNC) {
 			ad->changed_batch = 1;
 
 			/*
@@ -1137,11 +1134,11 @@ dispatch_writes:
 			 */
 			ad->new_batch = 0;
 		}
-		ad->batch_data_dir = REQ_ASYNC;
+		ad->batch_data_dir = BLK_RW_ASYNC;
 		ad->current_write_count = ad->write_batch_count;
 		ad->write_batch_idled = 0;
-		rq = rq_entry_fifo(ad->fifo_list[REQ_ASYNC].next);
-		ad->last_check_fifo[REQ_ASYNC] = jiffies;
+		rq = rq_entry_fifo(ad->fifo_list[BLK_RW_ASYNC].next);
+		ad->last_check_fifo[BLK_RW_ASYNC] = jiffies;
 		goto dispatch_request;
 	}
 
@@ -1164,9 +1161,9 @@ fifo_expired:
 		if (ad->nr_dispatched)
 			return 0;
 
-		if (ad->batch_data_dir == REQ_ASYNC)
+		if (ad->batch_data_dir == BLK_RW_ASYNC)
 			ad->current_batch_expires = jiffies +
-					ad->batch_expire[REQ_ASYNC];
+					ad->batch_expire[BLK_RW_ASYNC];
 		else
 			ad->new_batch = 1;
 
@@ -1238,8 +1235,8 @@ static int as_queue_empty(struct request_queue *q)
 {
 	struct as_data *ad = q->elevator->elevator_data;
 
-	return list_empty(&ad->fifo_list[REQ_ASYNC])
-		&& list_empty(&ad->fifo_list[REQ_SYNC]);
+	return list_empty(&ad->fifo_list[BLK_RW_ASYNC])
+		&& list_empty(&ad->fifo_list[BLK_RW_SYNC]);
 }
 
 static int
@@ -1346,8 +1343,8 @@ static void as_exit_queue(struct elevator_queue *e)
 	del_timer_sync(&ad->antic_timer);
 	cancel_work_sync(&ad->antic_work);
 
-	BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
-	BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
+	BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_SYNC]));
+	BUG_ON(!list_empty(&ad->fifo_list[BLK_RW_ASYNC]));
 
 	put_io_context(ad->io_context);
 	kfree(ad);
@@ -1372,18 +1369,18 @@ static void *as_init_queue(struct request_queue *q)
 	init_timer(&ad->antic_timer);
 	INIT_WORK(&ad->antic_work, as_work_handler);
 
-	INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]);
-	INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
-	ad->sort_list[REQ_SYNC] = RB_ROOT;
-	ad->sort_list[REQ_ASYNC] = RB_ROOT;
-	ad->fifo_expire[REQ_SYNC] = default_read_expire;
-	ad->fifo_expire[REQ_ASYNC] = default_write_expire;
+	INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_SYNC]);
+	INIT_LIST_HEAD(&ad->fifo_list[BLK_RW_ASYNC]);
+	ad->sort_list[BLK_RW_SYNC] = RB_ROOT;
+	ad->sort_list[BLK_RW_ASYNC] = RB_ROOT;
+	ad->fifo_expire[BLK_RW_SYNC] = default_read_expire;
+	ad->fifo_expire[BLK_RW_ASYNC] = default_write_expire;
 	ad->antic_expire = default_antic_expire;
-	ad->batch_expire[REQ_SYNC] = default_read_batch_expire;
-	ad->batch_expire[REQ_ASYNC] = default_write_batch_expire;
+	ad->batch_expire[BLK_RW_SYNC] = default_read_batch_expire;
+	ad->batch_expire[BLK_RW_ASYNC] = default_write_batch_expire;
 
-	ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC];
-	ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10;
+	ad->current_batch_expires = jiffies + ad->batch_expire[BLK_RW_SYNC];
+	ad->write_batch_count = ad->batch_expire[BLK_RW_ASYNC] / 10;
 	if (ad->write_batch_count < 2)
 		ad->write_batch_count = 2;
 
@@ -1432,11 +1429,11 @@ static ssize_t __FUNC(struct elevator_queue *e, char *page)	\
 	struct as_data *ad = e->elevator_data;			\
 	return as_var_show(jiffies_to_msecs((__VAR)), (page));	\
 }
-SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[REQ_SYNC]);
-SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[REQ_ASYNC]);
+SHOW_FUNCTION(as_read_expire_show, ad->fifo_expire[BLK_RW_SYNC]);
+SHOW_FUNCTION(as_write_expire_show, ad->fifo_expire[BLK_RW_ASYNC]);
 SHOW_FUNCTION(as_antic_expire_show, ad->antic_expire);
-SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[REQ_SYNC]);
-SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]);
+SHOW_FUNCTION(as_read_batch_expire_show, ad->batch_expire[BLK_RW_SYNC]);
+SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[BLK_RW_ASYNC]);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)				\
@@ -1451,13 +1448,14 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
 	*(__PTR) = msecs_to_jiffies(*(__PTR));				\
 	return ret;							\
 }
-STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[REQ_SYNC], 0, INT_MAX);
-STORE_FUNCTION(as_write_expire_store, &ad->fifo_expire[REQ_ASYNC], 0, INT_MAX);
+STORE_FUNCTION(as_read_expire_store, &ad->fifo_expire[BLK_RW_SYNC], 0, INT_MAX);
+STORE_FUNCTION(as_write_expire_store,
+			&ad->fifo_expire[BLK_RW_ASYNC], 0, INT_MAX);
 STORE_FUNCTION(as_antic_expire_store, &ad->antic_expire, 0, INT_MAX);
 STORE_FUNCTION(as_read_batch_expire_store,
-			&ad->batch_expire[REQ_SYNC], 0, INT_MAX);
+			&ad->batch_expire[BLK_RW_SYNC], 0, INT_MAX);
 STORE_FUNCTION(as_write_batch_expire_store,
-			&ad->batch_expire[REQ_ASYNC], 0, INT_MAX);
+			&ad->batch_expire[BLK_RW_ASYNC], 0, INT_MAX);
 #undef STORE_FUNCTION
 
 #define AS_ATTR(name) \
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f7dae57e6ca..20b4111fa05 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -319,9 +319,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
 		return -ENXIO;
 
 	bio = bio_alloc(GFP_KERNEL, 0);
-	if (!bio)
-		return -ENOMEM;
-
 	bio->bi_end_io = bio_end_empty_barrier;
 	bio->bi_private = &wait;
 	bio->bi_bdev = bdev;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 73f36beff5c..cac4e9febe6 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -209,14 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page,
 	ssize_t ret = queue_var_store(&stats, page, count);
 
 	spin_lock_irq(q->queue_lock);
-	elv_quisce_start(q);
+	elv_quiesce_start(q);
 
 	if (stats)
 		queue_flag_set(QUEUE_FLAG_IO_STAT, q);
 	else
 		queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
 
-	elv_quisce_end(q);
+	elv_quiesce_end(q);
 	spin_unlock_irq(q->queue_lock);
 
 	return ret;
diff --git a/block/blk.h b/block/blk.h
index 24fcaeeaf62..5dfc41267a0 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -70,8 +70,8 @@ void blk_queue_congestion_threshold(struct request_queue *q);
 
 int blk_dev_init(void);
 
-void elv_quisce_start(struct request_queue *q);
-void elv_quisce_end(struct request_queue *q);
+void elv_quiesce_start(struct request_queue *q);
+void elv_quiesce_end(struct request_queue *q);
 
 
 /*
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a4809de6fea..0d3b70de3d8 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -56,9 +56,6 @@ static DEFINE_SPINLOCK(ioc_gone_lock);
 #define cfq_class_idle(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
 #define cfq_class_rt(cfqq)	((cfqq)->ioprio_class == IOPRIO_CLASS_RT)
 
-#define ASYNC			(0)
-#define SYNC			(1)
-
 #define sample_valid(samples)	((samples) > 80)
 
 /*
@@ -83,6 +80,14 @@ struct cfq_data {
 	 * rr list of queues with requests and the count of them
 	 */
 	struct cfq_rb_root service_tree;
+
+	/*
+	 * Each priority tree is sorted by next_request position.  These
+	 * trees are used when determining if two or more queues are
+	 * interleaving requests (see cfq_close_cooperator).
+	 */
+	struct rb_root prio_trees[CFQ_PRIO_LISTS];
+
 	unsigned int busy_queues;
 	/*
 	 * Used to track any pending rt requests so we can pre-empt current
@@ -147,6 +152,8 @@ struct cfq_queue {
 	struct rb_node rb_node;
 	/* service_tree key */
 	unsigned long rb_key;
+	/* prio tree member */
+	struct rb_node p_node;
 	/* sorted list of pending requests */
 	struct rb_root sort_list;
 	/* if fifo isn't expired, next request to serve */
@@ -185,6 +192,7 @@ enum cfqq_state_flags {
 	CFQ_CFQQ_FLAG_prio_changed,	/* task priority has changed */
 	CFQ_CFQQ_FLAG_slice_new,	/* no requests dispatched in slice */
 	CFQ_CFQQ_FLAG_sync,		/* synchronous queue */
+	CFQ_CFQQ_FLAG_coop,		/* has done a coop jump of the queue */
 };
 
 #define CFQ_CFQQ_FNS(name)						\
@@ -211,6 +219,7 @@ CFQ_CFQQ_FNS(idle_window);
 CFQ_CFQQ_FNS(prio_changed);
 CFQ_CFQQ_FNS(slice_new);
 CFQ_CFQQ_FNS(sync);
+CFQ_CFQQ_FNS(coop);
 #undef CFQ_CFQQ_FNS
 
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...)	\
@@ -419,13 +428,17 @@ static struct cfq_queue *cfq_rb_first(struct cfq_rb_root *root)
 	return NULL;
 }
 
+static void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+	rb_erase(n, root);
+	RB_CLEAR_NODE(n);
+}
+
 static void cfq_rb_erase(struct rb_node *n, struct cfq_rb_root *root)
 {
 	if (root->left == n)
 		root->left = NULL;
-
-	rb_erase(n, &root->rb);
-	RB_CLEAR_NODE(n);
+	rb_erase_init(n, &root->rb);
 }
 
 /*
@@ -470,8 +483,8 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
  * requests waiting to be processed. It is sorted in the order that
  * we will service the queues.
  */
-static void cfq_service_tree_add(struct cfq_data *cfqd,
-				    struct cfq_queue *cfqq, int add_front)
+static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
+				 int add_front)
 {
 	struct rb_node **p, *parent;
 	struct cfq_queue *__cfqq;
@@ -544,6 +557,63 @@ static void cfq_service_tree_add(struct cfq_data *cfqd,
 	rb_insert_color(&cfqq->rb_node, &cfqd->service_tree.rb);
 }
 
+static struct cfq_queue *
+cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector,
+		     struct rb_node **ret_parent, struct rb_node ***rb_link)
+{
+	struct rb_root *root = &cfqd->prio_trees[ioprio];
+	struct rb_node **p, *parent;
+	struct cfq_queue *cfqq = NULL;
+
+	parent = NULL;
+	p = &root->rb_node;
+	while (*p) {
+		struct rb_node **n;
+
+		parent = *p;
+		cfqq = rb_entry(parent, struct cfq_queue, p_node);
+
+		/*
+		 * Sort strictly based on sector.  Smallest to the left,
+		 * largest to the right.
+		 */
+		if (sector > cfqq->next_rq->sector)
+			n = &(*p)->rb_right;
+		else if (sector < cfqq->next_rq->sector)
+			n = &(*p)->rb_left;
+		else
+			break;
+		p = n;
+	}
+
+	*ret_parent = parent;
+	if (rb_link)
+		*rb_link = p;
+	return NULL;
+}
+
+static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio];
+	struct rb_node **p, *parent;
+	struct cfq_queue *__cfqq;
+
+	if (!RB_EMPTY_NODE(&cfqq->p_node))
+		rb_erase_init(&cfqq->p_node, root);
+
+	if (cfq_class_idle(cfqq))
+		return;
+	if (!cfqq->next_rq)
+		return;
+
+	__cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector,
+					 &parent, &p);
+	BUG_ON(__cfqq);
+
+	rb_link_node(&cfqq->p_node, parent, p);
+	rb_insert_color(&cfqq->p_node, root);
+}
+
 /*
  * Update cfqq's position in the service tree.
  */
@@ -552,8 +622,10 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 	/*
 	 * Resorting requires the cfqq to be on the RR list already.
 	 */
-	if (cfq_cfqq_on_rr(cfqq))
+	if (cfq_cfqq_on_rr(cfqq)) {
 		cfq_service_tree_add(cfqd, cfqq, 0);
+		cfq_prio_tree_add(cfqd, cfqq);
+	}
 }
 
 /*
@@ -584,6 +656,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
 	if (!RB_EMPTY_NODE(&cfqq->rb_node))
 		cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree);
+	if (!RB_EMPTY_NODE(&cfqq->p_node))
+		rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]);
 
 	BUG_ON(!cfqd->busy_queues);
 	cfqd->busy_queues--;
@@ -613,7 +687,7 @@ static void cfq_add_rq_rb(struct request *rq)
 {
 	struct cfq_queue *cfqq = RQ_CFQQ(rq);
 	struct cfq_data *cfqd = cfqq->cfqd;
-	struct request *__alias;
+	struct request *__alias, *prev;
 
 	cfqq->queued[rq_is_sync(rq)]++;
 
@@ -630,7 +704,15 @@ static void cfq_add_rq_rb(struct request *rq)
 	/*
 	 * check if this request is a better next-serve candidate
 	 */
+	prev = cfqq->next_rq;
 	cfqq->next_rq = cfq_choose_req(cfqd, cfqq->next_rq, rq);
+
+	/*
+	 * adjust priority tree position, if ->next_rq changes
+	 */
+	if (prev != cfqq->next_rq)
+		cfq_prio_tree_add(cfqd, cfqq);
+
 	BUG_ON(!cfqq->next_rq);
 }
 
@@ -843,11 +925,15 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
 /*
  * Get and set a new active queue for service.
  */
-static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd)
+static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd,
+					      struct cfq_queue *cfqq)
 {
-	struct cfq_queue *cfqq;
+	if (!cfqq) {
+		cfqq = cfq_get_next_queue(cfqd);
+		if (cfqq)
+			cfq_clear_cfqq_coop(cfqq);
+	}
 
-	cfqq = cfq_get_next_queue(cfqd);
 	__cfq_set_active_queue(cfqd, cfqq);
 	return cfqq;
 }
@@ -871,17 +957,89 @@ static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq)
 	return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean;
 }
 
-static int cfq_close_cooperator(struct cfq_data *cfq_data,
-				struct cfq_queue *cfqq)
+static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
+				    struct cfq_queue *cur_cfqq)
+{
+	struct rb_root *root = &cfqd->prio_trees[cur_cfqq->ioprio];
+	struct rb_node *parent, *node;
+	struct cfq_queue *__cfqq;
+	sector_t sector = cfqd->last_position;
+
+	if (RB_EMPTY_ROOT(root))
+		return NULL;
+
+	/*
+	 * First, if we find a request starting at the end of the last
+	 * request, choose it.
+	 */
+	__cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio,
+				      sector, &parent, NULL);
+	if (__cfqq)
+		return __cfqq;
+
+	/*
+	 * If the exact sector wasn't found, the parent of the NULL leaf
+	 * will contain the closest sector.
+	 */
+	__cfqq = rb_entry(parent, struct cfq_queue, p_node);
+	if (cfq_rq_close(cfqd, __cfqq->next_rq))
+		return __cfqq;
+
+	if (__cfqq->next_rq->sector < sector)
+		node = rb_next(&__cfqq->p_node);
+	else
+		node = rb_prev(&__cfqq->p_node);
+	if (!node)
+		return NULL;
+
+	__cfqq = rb_entry(node, struct cfq_queue, p_node);
+	if (cfq_rq_close(cfqd, __cfqq->next_rq))
+		return __cfqq;
+
+	return NULL;
+}
+
+/*
+ * cfqd - obvious
+ * cur_cfqq - passed in so that we don't decide that the current queue is
+ * 	      closely cooperating with itself.
+ *
+ * So, basically we're assuming that that cur_cfqq has dispatched at least
+ * one request, and that cfqd->last_position reflects a position on the disk
+ * associated with the I/O issued by cur_cfqq.  I'm not sure this is a valid
+ * assumption.
+ */
+static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
+					      struct cfq_queue *cur_cfqq,
+					      int probe)
 {
+	struct cfq_queue *cfqq;
+
+	/*
+	 * A valid cfq_io_context is necessary to compare requests against
+	 * the seek_mean of the current cfqq.
+	 */
+	if (!cfqd->active_cic)
+		return NULL;
+
 	/*
 	 * We should notice if some of the queues are cooperating, eg
 	 * working closely on the same area of the disk. In that case,
 	 * we can group them together and don't waste time idling.
 	 */
-	return 0;
+	cfqq = cfqq_close(cfqd, cur_cfqq);
+	if (!cfqq)
+		return NULL;
+
+	if (cfq_cfqq_coop(cfqq))
+		return NULL;
+
+	if (!probe)
+		cfq_mark_cfqq_coop(cfqq);
+	return cfqq;
 }
 
+
 #define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024))
 
 static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -920,13 +1078,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	if (!cic || !atomic_read(&cic->ioc->nr_tasks))
 		return;
 
-	/*
-	 * See if this prio level has a good candidate
-	 */
-	if (cfq_close_cooperator(cfqd, cfqq) &&
-	    (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2))
-		return;
-
 	cfq_mark_cfqq_wait_request(cfqq);
 
 	/*
@@ -939,7 +1090,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 		sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
 
 	mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
-	cfq_log(cfqd, "arm_idle: %lu", sl);
+	cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
 }
 
 /*
@@ -1003,7 +1154,7 @@ cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  */
 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 {
-	struct cfq_queue *cfqq;
+	struct cfq_queue *cfqq, *new_cfqq = NULL;
 
 	cfqq = cfqd->active_queue;
 	if (!cfqq)
@@ -1037,6 +1188,16 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 		goto keep_queue;
 
 	/*
+	 * If another queue has a request waiting within our mean seek
+	 * distance, let it run.  The expire code will check for close
+	 * cooperators and put the close queue at the front of the service
+	 * tree.
+	 */
+	new_cfqq = cfq_close_cooperator(cfqd, cfqq, 0);
+	if (new_cfqq)
+		goto expire;
+
+	/*
 	 * No requests pending. If the active queue still has requests in
 	 * flight or is idling for a new request, allow either of these
 	 * conditions to happen (or time out) before selecting a new queue.
@@ -1050,7 +1211,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 expire:
 	cfq_slice_expired(cfqd, 0);
 new_queue:
-	cfqq = cfq_set_active_queue(cfqd);
+	cfqq = cfq_set_active_queue(cfqd, new_cfqq);
 keep_queue:
 	return cfqq;
 }
@@ -1333,14 +1494,14 @@ static void __cfq_exit_single_io_context(struct cfq_data *cfqd,
 	if (ioc->ioc_data == cic)
 		rcu_assign_pointer(ioc->ioc_data, NULL);
 
-	if (cic->cfqq[ASYNC]) {
-		cfq_exit_cfqq(cfqd, cic->cfqq[ASYNC]);
-		cic->cfqq[ASYNC] = NULL;
+	if (cic->cfqq[BLK_RW_ASYNC]) {
+		cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_ASYNC]);
+		cic->cfqq[BLK_RW_ASYNC] = NULL;
 	}
 
-	if (cic->cfqq[SYNC]) {
-		cfq_exit_cfqq(cfqd, cic->cfqq[SYNC]);
-		cic->cfqq[SYNC] = NULL;
+	if (cic->cfqq[BLK_RW_SYNC]) {
+		cfq_exit_cfqq(cfqd, cic->cfqq[BLK_RW_SYNC]);
+		cic->cfqq[BLK_RW_SYNC] = NULL;
 	}
 }
 
@@ -1449,17 +1610,18 @@ static void changed_ioprio(struct io_context *ioc, struct cfq_io_context *cic)
 
 	spin_lock_irqsave(cfqd->queue->queue_lock, flags);
 
-	cfqq = cic->cfqq[ASYNC];
+	cfqq = cic->cfqq[BLK_RW_ASYNC];
 	if (cfqq) {
 		struct cfq_queue *new_cfqq;
-		new_cfqq = cfq_get_queue(cfqd, ASYNC, cic->ioc, GFP_ATOMIC);
+		new_cfqq = cfq_get_queue(cfqd, BLK_RW_ASYNC, cic->ioc,
+						GFP_ATOMIC);
 		if (new_cfqq) {
-			cic->cfqq[ASYNC] = new_cfqq;
+			cic->cfqq[BLK_RW_ASYNC] = new_cfqq;
 			cfq_put_queue(cfqq);
 		}
 	}
 
-	cfqq = cic->cfqq[SYNC];
+	cfqq = cic->cfqq[BLK_RW_SYNC];
 	if (cfqq)
 		cfq_mark_cfqq_prio_changed(cfqq);
 
@@ -1510,6 +1672,7 @@ retry:
 		}
 
 		RB_CLEAR_NODE(&cfqq->rb_node);
+		RB_CLEAR_NODE(&cfqq->p_node);
 		INIT_LIST_HEAD(&cfqq->fifo);
 
 		atomic_set(&cfqq->ref, 0);
@@ -1905,10 +2068,20 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		 * Remember that we saw a request from this process, but
 		 * don't start queuing just yet. Otherwise we risk seeing lots
 		 * of tiny requests, because we disrupt the normal plugging
-		 * and merging.
+		 * and merging. If the request is already larger than a single
+		 * page, let it rip immediately. For that case we assume that
+		 * merging is already done. Ditto for a busy system that
+		 * has other work pending, don't risk delaying until the
+		 * idle timer unplug to continue working.
 		 */
-		if (cfq_cfqq_wait_request(cfqq))
+		if (cfq_cfqq_wait_request(cfqq)) {
+			if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
+			    cfqd->busy_queues > 1) {
+				del_timer(&cfqd->idle_slice_timer);
+				blk_start_queueing(cfqd->queue);
+			}
 			cfq_mark_cfqq_must_dispatch(cfqq);
+		}
 	} else if (cfq_should_preempt(cfqd, cfqq, rq)) {
 		/*
 		 * not the active queue - expire current slice if it is
@@ -1992,16 +2165,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	 * or if we want to idle in case it has no pending requests.
 	 */
 	if (cfqd->active_queue == cfqq) {
+		const bool cfqq_empty = RB_EMPTY_ROOT(&cfqq->sort_list);
+
 		if (cfq_cfqq_slice_new(cfqq)) {
 			cfq_set_prio_slice(cfqd, cfqq);
 			cfq_clear_cfqq_slice_new(cfqq);
 		}
+		/*
+		 * If there are no requests waiting in this queue, and
+		 * there are other queues ready to issue requests, AND
+		 * those other queues are issuing requests within our
+		 * mean seek distance, give them a chance to run instead
+		 * of idling.
+		 */
 		if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
 			cfq_slice_expired(cfqd, 1);
-		else if (sync && !rq_noidle(rq) &&
-			 RB_EMPTY_ROOT(&cfqq->sort_list)) {
+		else if (cfqq_empty && !cfq_close_cooperator(cfqd, cfqq, 1) &&
+			 sync && !rq_noidle(rq))
 			cfq_arm_slice_timer(cfqd);
-		}
 	}
 
 	if (!cfqd->rq_in_driver)
@@ -2062,7 +2243,7 @@ static int cfq_may_queue(struct request_queue *q, int rw)
 	if (!cic)
 		return ELV_MQUEUE_MAY;
 
-	cfqq = cic_to_cfqq(cic, rw & REQ_RW_SYNC);
+	cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
 	if (cfqq) {
 		cfq_init_prio_data(cfqq, cic->ioc);
 		cfq_prio_boost(cfqq);
@@ -2152,11 +2333,10 @@ static void cfq_kick_queue(struct work_struct *work)
 	struct cfq_data *cfqd =
 		container_of(work, struct cfq_data, unplug_work);
 	struct request_queue *q = cfqd->queue;
-	unsigned long flags;
 
-	spin_lock_irqsave(q->queue_lock, flags);
+	spin_lock_irq(q->queue_lock);
 	blk_start_queueing(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
+	spin_unlock_irq(q->queue_lock);
 }
 
 /*
diff --git a/block/elevator.c b/block/elevator.c
index fb81bcc14a8..7073a907257 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -590,7 +590,7 @@ void elv_drain_elevator(struct request_queue *q)
 /*
  * Call with queue lock held, interrupts disabled
  */
-void elv_quisce_start(struct request_queue *q)
+void elv_quiesce_start(struct request_queue *q)
 {
 	queue_flag_set(QUEUE_FLAG_ELVSWITCH, q);
 
@@ -607,7 +607,7 @@ void elv_quisce_start(struct request_queue *q)
 	}
 }
 
-void elv_quisce_end(struct request_queue *q)
+void elv_quiesce_end(struct request_queue *q)
 {
 	queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
 }
@@ -1126,7 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	 * Turn on BYPASS and drain all requests w/ elevator private data
 	 */
 	spin_lock_irq(q->queue_lock);
-	elv_quisce_start(q);
+	elv_quiesce_start(q);
 
 	/*
 	 * Remember old elevator.
@@ -1150,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
 	 */
 	elevator_exit(old_elevator);
 	spin_lock_irq(q->queue_lock);
-	elv_quisce_end(q);
+	elv_quiesce_end(q);
 	spin_unlock_irq(q->queue_lock);
 
 	blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
diff --git a/block/ioctl.c b/block/ioctl.c
index 0f22e629b13..ad474d4bbcc 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -146,8 +146,6 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
 		struct bio *bio;
 
 		bio = bio_alloc(GFP_KERNEL, 0);
-		if (!bio)
-			return -ENOMEM;
 
 		bio->bi_end_io = blk_ioc_discard_endio;
 		bio->bi_bdev = bdev;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 626ee274c5c..84b7f8709f4 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -217,7 +217,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
 static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 				 struct bio *bio)
 {
-	int ret = 0;
+	int r, ret = 0;
 
 	/*
 	 * fill in all the output members
@@ -242,7 +242,9 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 			ret = -EFAULT;
 	}
 
-	blk_rq_unmap_user(bio);
+	r = blk_rq_unmap_user(bio);
+	if (!ret)
+		ret = r;
 	blk_put_request(rq);
 
 	return ret;
diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
index be204308cc1..9359613addc 100644
--- a/drivers/atm/solos-pci.c
+++ b/drivers/atm/solos-pci.c
@@ -1059,7 +1059,7 @@ static int fpga_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		goto out;
 	}
 
-	err = pci_set_dma_mask(dev, DMA_32BIT_MASK);
+	err = pci_set_dma_mask(dev, DMA_BIT_MASK(32));
 	if (err) {
 		dev_warn(&dev->dev, "Failed to set 32-bit DMA mask\n");
 		goto out;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index bdd4f5f4557..5f7e64ba87e 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -275,8 +275,10 @@ static int brd_do_bvec(struct brd_device *brd, struct page *page,
 	if (rw == READ) {
 		copy_from_brd(mem + off, brd, sector, len);
 		flush_dcache_page(page);
-	} else
+	} else {
+		flush_dcache_page(page);
 		copy_to_brd(brd, mem + off, sector, len);
+	}
 	kunmap_atomic(mem, KM_USER0);
 
 out:
@@ -436,6 +438,7 @@ static struct brd_device *brd_alloc(int i)
 	if (!brd->brd_queue)
 		goto out_free_dev;
 	blk_queue_make_request(brd->brd_queue, brd_make_request);
+	blk_queue_ordered(brd->brd_queue, QUEUE_ORDERED_TAG, NULL);
 	blk_queue_max_sectors(brd->brd_queue, 1024);
 	blk_queue_bounce_limit(brd->brd_queue, BLK_BOUNCE_ANY);
 
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 0ef6f08aa6e..4d4d5e0d3fa 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3505,7 +3505,7 @@ static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, u
 	/* The Inbound Post Queue only accepts 32-bit physical addresses for the
 	   CCISS commands, so they must be allocated from the lower 4GiB of
 	   memory. */
-	err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 	if (err) {
 		iounmap(vaddr);
 		return -ENOMEM;
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 9d9490e22e0..3686912427b 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -2131,6 +2131,8 @@ static const struct intel_driver_description {
 	{ PCI_DEVICE_ID_INTEL_82845G_HB, PCI_DEVICE_ID_INTEL_82845G_IG, 0, "830M",
 		&intel_845_driver, &intel_830_driver },
 	{ PCI_DEVICE_ID_INTEL_82850_HB, 0, 0, "i850", &intel_850_driver, NULL },
+	{ PCI_DEVICE_ID_INTEL_82854_HB, PCI_DEVICE_ID_INTEL_82854_IG, 0, "854",
+		&intel_845_driver, &intel_830_driver },
 	{ PCI_DEVICE_ID_INTEL_82855PM_HB, 0, 0, "855PM", &intel_845_driver, NULL },
 	{ PCI_DEVICE_ID_INTEL_82855GM_HB, PCI_DEVICE_ID_INTEL_82855GM_IG, 0, "855GM",
 		&intel_845_driver, &intel_830_driver },
@@ -2355,6 +2357,7 @@ static struct pci_device_id agp_intel_pci_table[] = {
 	ID(PCI_DEVICE_ID_INTEL_82845_HB),
 	ID(PCI_DEVICE_ID_INTEL_82845G_HB),
 	ID(PCI_DEVICE_ID_INTEL_82850_HB),
+	ID(PCI_DEVICE_ID_INTEL_82854_HB),
 	ID(PCI_DEVICE_ID_INTEL_82855PM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82855GM_HB),
 	ID(PCI_DEVICE_ID_INTEL_82860_HB),
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 6de020d078e..b0a6a3e5192 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -35,7 +35,6 @@
 #include <linux/vt_kern.h>
 #include <linux/workqueue.h>
 #include <linux/kexec.h>
-#include <linux/interrupt.h>
 #include <linux/hrtimer.h>
 #include <linux/oom.h>
 
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 28f2c3f959b..6ad95c8d636 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -767,11 +767,19 @@ static inline void pci_write_bits16(struct pci_dev *pdev, int offset,
 	pci_write_config_word(pdev, offset, value);
 }
 
-/* write all or some bits in a dword-register*/
+/*
+ * pci_write_bits32
+ *
+ * edac local routine to do pci_write_config_dword, but adds
+ * a mask parameter. If mask is all ones, ignore the mask.
+ * Otherwise utilize the mask to isolate specified bits
+ *
+ * write all or some bits in a dword-register
+ */
 static inline void pci_write_bits32(struct pci_dev *pdev, int offset,
 				    u32 value, u32 mask)
 {
-	if (mask != 0xffff) {
+	if (mask != 0xffffffff) {
 		u32 buf;
 
 		pci_read_config_dword(pdev, offset, &buf);
diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
index ca9113e1c10..a7d2c717d03 100644
--- a/drivers/edac/edac_device.c
+++ b/drivers/edac/edac_device.c
@@ -389,7 +389,7 @@ static void del_edac_device_from_global_list(struct edac_device_ctl_info
  */
 static void edac_device_workq_function(struct work_struct *work_req)
 {
-	struct delayed_work *d_work = (struct delayed_work *)work_req;
+	struct delayed_work *d_work = to_delayed_work(work_req);
 	struct edac_device_ctl_info *edac_dev = to_edac_device_ctl_work(d_work);
 
 	mutex_lock(&device_ctls_mutex);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 25d66940b4f..335b7ebdb11 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -260,7 +260,7 @@ static int edac_mc_assert_error_check_and_clear(void)
  */
 static void edac_mc_workq_function(struct work_struct *work_req)
 {
-	struct delayed_work *d_work = (struct delayed_work *)work_req;
+	struct delayed_work *d_work = to_delayed_work(work_req);
 	struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 
 	mutex_lock(&mem_ctls_mutex);
diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
index 5b150aea703..30b585b1d60 100644
--- a/drivers/edac/edac_pci.c
+++ b/drivers/edac/edac_pci.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(edac_pci_find);
  */
 static void edac_pci_workq_function(struct work_struct *work_req)
 {
-	struct delayed_work *d_work = (struct delayed_work *)work_req;
+	struct delayed_work *d_work = to_delayed_work(work_req);
 	struct edac_pci_ctl_info *pci = to_edac_pci_ctl_work(d_work);
 	int msec;
 	unsigned long delay;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3750d800304..473a8f7fbdb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -446,6 +446,9 @@ struct drm_i915_gem_object {
 	uint32_t tiling_mode;
 	uint32_t stride;
 
+	/** Record of address bit 17 of each page at last unbind. */
+	long *bit_17;
+
 	/** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
 	uint32_t agp_type;
 
@@ -635,9 +638,13 @@ int i915_gem_attach_phys_object(struct drm_device *dev,
 void i915_gem_detach_phys_object(struct drm_device *dev,
 				 struct drm_gem_object *obj);
 void i915_gem_free_all_phys_object(struct drm_device *dev);
+int i915_gem_object_get_pages(struct drm_gem_object *obj);
+void i915_gem_object_put_pages(struct drm_gem_object *obj);
 
 /* i915_gem_tiling.c */
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
+void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
+void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
 
 /* i915_gem_debug.c */
 void i915_gem_dump_object(struct drm_gem_object *obj, int len,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 1449b452cc6..4642115902d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -43,8 +43,6 @@ static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
 						     uint64_t offset,
 						     uint64_t size);
 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
-static int i915_gem_object_get_pages(struct drm_gem_object *obj);
-static void i915_gem_object_put_pages(struct drm_gem_object *obj);
 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
 					   unsigned alignment);
@@ -143,15 +141,27 @@ fast_shmem_read(struct page **pages,
 		int length)
 {
 	char __iomem *vaddr;
-	int ret;
+	int unwritten;
 
 	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
 	if (vaddr == NULL)
 		return -ENOMEM;
-	ret = __copy_to_user_inatomic(data, vaddr + page_offset, length);
+	unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
 	kunmap_atomic(vaddr, KM_USER0);
 
-	return ret;
+	if (unwritten)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
+{
+	drm_i915_private_t *dev_priv = obj->dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+		obj_priv->tiling_mode != I915_TILING_NONE;
 }
 
 static inline int
@@ -181,6 +191,64 @@ slow_shmem_copy(struct page *dst_page,
 	return 0;
 }
 
+static inline int
+slow_shmem_bit17_copy(struct page *gpu_page,
+		      int gpu_offset,
+		      struct page *cpu_page,
+		      int cpu_offset,
+		      int length,
+		      int is_read)
+{
+	char *gpu_vaddr, *cpu_vaddr;
+
+	/* Use the unswizzled path if this page isn't affected. */
+	if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
+		if (is_read)
+			return slow_shmem_copy(cpu_page, cpu_offset,
+					       gpu_page, gpu_offset, length);
+		else
+			return slow_shmem_copy(gpu_page, gpu_offset,
+					       cpu_page, cpu_offset, length);
+	}
+
+	gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
+	if (gpu_vaddr == NULL)
+		return -ENOMEM;
+
+	cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
+	if (cpu_vaddr == NULL) {
+		kunmap_atomic(gpu_vaddr, KM_USER0);
+		return -ENOMEM;
+	}
+
+	/* Copy the data, XORing A6 with A17 (1). The user already knows he's
+	 * XORing with the other bits (A9 for Y, A9 and A10 for X)
+	 */
+	while (length > 0) {
+		int cacheline_end = ALIGN(gpu_offset + 1, 64);
+		int this_length = min(cacheline_end - gpu_offset, length);
+		int swizzled_gpu_offset = gpu_offset ^ 64;
+
+		if (is_read) {
+			memcpy(cpu_vaddr + cpu_offset,
+			       gpu_vaddr + swizzled_gpu_offset,
+			       this_length);
+		} else {
+			memcpy(gpu_vaddr + swizzled_gpu_offset,
+			       cpu_vaddr + cpu_offset,
+			       this_length);
+		}
+		cpu_offset += this_length;
+		gpu_offset += this_length;
+		length -= this_length;
+	}
+
+	kunmap_atomic(cpu_vaddr, KM_USER1);
+	kunmap_atomic(gpu_vaddr, KM_USER0);
+
+	return 0;
+}
+
 /**
  * This is the fast shmem pread path, which attempts to copy_from_user directly
  * from the backing pages of the object to the user's address space.  On a
@@ -269,6 +337,7 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
 	int page_length;
 	int ret;
 	uint64_t data_ptr = args->data_ptr;
+	int do_bit17_swizzling;
 
 	remain = args->size;
 
@@ -286,13 +355,15 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
 
 	down_read(&mm->mmap_sem);
 	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
-				      num_pages, 0, 0, user_pages, NULL);
+				      num_pages, 1, 0, user_pages, NULL);
 	up_read(&mm->mmap_sem);
 	if (pinned_pages < num_pages) {
 		ret = -EFAULT;
 		goto fail_put_user_pages;
 	}
 
+	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
 	mutex_lock(&dev->struct_mutex);
 
 	ret = i915_gem_object_get_pages(obj);
@@ -327,11 +398,20 @@ i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
 		if ((data_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - data_page_offset;
 
-		ret = slow_shmem_copy(user_pages[data_page_index],
-				      data_page_offset,
-				      obj_priv->pages[shmem_page_index],
-				      shmem_page_offset,
-				      page_length);
+		if (do_bit17_swizzling) {
+			ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+						    shmem_page_offset,
+						    user_pages[data_page_index],
+						    data_page_offset,
+						    page_length,
+						    1);
+		} else {
+			ret = slow_shmem_copy(user_pages[data_page_index],
+					      data_page_offset,
+					      obj_priv->pages[shmem_page_index],
+					      shmem_page_offset,
+					      page_length);
+		}
 		if (ret)
 			goto fail_put_pages;
 
@@ -383,9 +463,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
-	if (ret != 0)
+	if (i915_gem_object_needs_bit17_swizzle(obj)) {
 		ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
+	} else {
+		ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
+		if (ret != 0)
+			ret = i915_gem_shmem_pread_slow(dev, obj, args,
+							file_priv);
+	}
 
 	drm_gem_object_unreference(obj);
 
@@ -727,6 +812,7 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 	int page_length;
 	int ret;
 	uint64_t data_ptr = args->data_ptr;
+	int do_bit17_swizzling;
 
 	remain = args->size;
 
@@ -751,6 +837,8 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 		goto fail_put_user_pages;
 	}
 
+	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
+
 	mutex_lock(&dev->struct_mutex);
 
 	ret = i915_gem_object_get_pages(obj);
@@ -785,11 +873,20 @@ i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
 		if ((data_page_offset + page_length) > PAGE_SIZE)
 			page_length = PAGE_SIZE - data_page_offset;
 
-		ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
-				      shmem_page_offset,
-				      user_pages[data_page_index],
-				      data_page_offset,
-				      page_length);
+		if (do_bit17_swizzling) {
+			ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
+						    shmem_page_offset,
+						    user_pages[data_page_index],
+						    data_page_offset,
+						    page_length,
+						    0);
+		} else {
+			ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
+					      shmem_page_offset,
+					      user_pages[data_page_index],
+					      data_page_offset,
+					      page_length);
+		}
 		if (ret)
 			goto fail_put_pages;
 
@@ -854,6 +951,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 			ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
 						       file_priv);
 		}
+	} else if (i915_gem_object_needs_bit17_swizzle(obj)) {
+		ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
 	} else {
 		ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
 		if (ret == -EFAULT) {
@@ -1285,7 +1384,7 @@ i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-static void
+void
 i915_gem_object_put_pages(struct drm_gem_object *obj)
 {
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1297,6 +1396,9 @@ i915_gem_object_put_pages(struct drm_gem_object *obj)
 	if (--obj_priv->pages_refcount != 0)
 		return;
 
+	if (obj_priv->tiling_mode != I915_TILING_NONE)
+		i915_gem_object_save_bit_17_swizzle(obj);
+
 	for (i = 0; i < page_count; i++)
 		if (obj_priv->pages[i] != NULL) {
 			if (obj_priv->dirty)
@@ -1494,8 +1596,19 @@ i915_gem_retire_request(struct drm_device *dev,
 
 		if (obj->write_domain != 0)
 			i915_gem_object_move_to_flushing(obj);
-		else
+		else {
+			/* Take a reference on the object so it won't be
+			 * freed while the spinlock is held.  The list
+			 * protection for this spinlock is safe when breaking
+			 * the lock like this since the next thing we do
+			 * is just get the head of the list again.
+			 */
+			drm_gem_object_reference(obj);
 			i915_gem_object_move_to_inactive(obj);
+			spin_unlock(&dev_priv->mm.active_list_lock);
+			drm_gem_object_unreference(obj);
+			spin_lock(&dev_priv->mm.active_list_lock);
+		}
 	}
 out:
 	spin_unlock(&dev_priv->mm.active_list_lock);
@@ -1884,7 +1997,7 @@ i915_gem_evict_everything(struct drm_device *dev)
 	return ret;
 }
 
-static int
+int
 i915_gem_object_get_pages(struct drm_gem_object *obj)
 {
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
@@ -1922,6 +2035,10 @@ i915_gem_object_get_pages(struct drm_gem_object *obj)
 		}
 		obj_priv->pages[i] = page;
 	}
+
+	if (obj_priv->tiling_mode != I915_TILING_NONE)
+		i915_gem_object_do_bit_17_swizzle(obj);
+
 	return 0;
 }
 
@@ -3002,13 +3119,13 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
 			drm_free(*relocs, reloc_count * sizeof(**relocs),
 				 DRM_MEM_DRIVER);
 			*relocs = NULL;
-			return ret;
+			return -EFAULT;
 		}
 
 		reloc_index += exec_list[i].relocation_count;
 	}
 
-	return ret;
+	return 0;
 }
 
 static int
@@ -3017,23 +3134,28 @@ i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
 			    struct drm_i915_gem_relocation_entry *relocs)
 {
 	uint32_t reloc_count = 0, i;
-	int ret;
+	int ret = 0;
 
 	for (i = 0; i < buffer_count; i++) {
 		struct drm_i915_gem_relocation_entry __user *user_relocs;
+		int unwritten;
 
 		user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
 
-		if (ret == 0) {
-			ret = copy_to_user(user_relocs,
-					   &relocs[reloc_count],
-					   exec_list[i].relocation_count *
-					   sizeof(*relocs));
+		unwritten = copy_to_user(user_relocs,
+					 &relocs[reloc_count],
+					 exec_list[i].relocation_count *
+					 sizeof(*relocs));
+
+		if (unwritten) {
+			ret = -EFAULT;
+			goto err;
 		}
 
 		reloc_count += exec_list[i].relocation_count;
 	}
 
+err:
 	drm_free(relocs, reloc_count * sizeof(*relocs), DRM_MEM_DRIVER);
 
 	return ret;
@@ -3243,7 +3365,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 	exec_offset = exec_list[args->buffer_count - 1].offset;
 
 #if WATCH_EXEC
-	i915_gem_dump_object(object_list[args->buffer_count - 1],
+	i915_gem_dump_object(batch_obj,
 			      args->batch_len,
 			      __func__,
 			      ~0);
@@ -3308,10 +3430,12 @@ err:
 				   (uintptr_t) args->buffers_ptr,
 				   exec_list,
 				   sizeof(*exec_list) * args->buffer_count);
-		if (ret)
+		if (ret) {
+			ret = -EFAULT;
 			DRM_ERROR("failed to copy %d exec entries "
 				  "back to user (%d)\n",
 				  args->buffer_count, ret);
+		}
 	}
 
 	/* Copy the updated relocations out regardless of current error
@@ -3593,6 +3717,7 @@ void i915_gem_free_object(struct drm_gem_object *obj)
 	i915_gem_free_mmap_offset(obj);
 
 	drm_free(obj_priv->page_cpu_valid, 1, DRM_MEM_DRIVER);
+	kfree(obj_priv->bit_17);
 	drm_free(obj->driver_private, 1, DRM_MEM_DRIVER);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_debugfs.c b/drivers/gpu/drm/i915/i915_gem_debugfs.c
index a1ac0c5e730..986f1082c59 100644
--- a/drivers/gpu/drm/i915/i915_gem_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_gem_debugfs.c
@@ -234,6 +234,96 @@ static int i915_hws_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static void i915_dump_pages(struct seq_file *m, struct page **pages, int page_count)
+{
+	int page, i;
+	uint32_t *mem;
+
+	for (page = 0; page < page_count; page++) {
+		mem = kmap(pages[page]);
+		for (i = 0; i < PAGE_SIZE; i += 4)
+			seq_printf(m, "%08x :  %08x\n", i, mem[i / 4]);
+		kunmap(pages[page]);
+	}
+}
+
+static int i915_batchbuffer_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_gem_object *obj;
+	struct drm_i915_gem_object *obj_priv;
+	int ret;
+
+	spin_lock(&dev_priv->mm.active_list_lock);
+
+	list_for_each_entry(obj_priv, &dev_priv->mm.active_list, list) {
+		obj = obj_priv->obj;
+		if (obj->read_domains & I915_GEM_DOMAIN_COMMAND) {
+		    ret = i915_gem_object_get_pages(obj);
+		    if (ret) {
+			    DRM_ERROR("Failed to get pages: %d\n", ret);
+			    spin_unlock(&dev_priv->mm.active_list_lock);
+			    return ret;
+		    }
+
+		    seq_printf(m, "--- gtt_offset = 0x%08x\n", obj_priv->gtt_offset);
+		    i915_dump_pages(m, obj_priv->pages, obj->size / PAGE_SIZE);
+
+		    i915_gem_object_put_pages(obj);
+		}
+	}
+
+	spin_unlock(&dev_priv->mm.active_list_lock);
+
+	return 0;
+}
+
+static int i915_ringbuffer_data(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	u8 *virt;
+	uint32_t *ptr, off;
+
+	if (!dev_priv->ring.ring_obj) {
+		seq_printf(m, "No ringbuffer setup\n");
+		return 0;
+	}
+
+	virt = dev_priv->ring.virtual_start;
+
+	for (off = 0; off < dev_priv->ring.Size; off += 4) {
+		ptr = (uint32_t *)(virt + off);
+		seq_printf(m, "%08x :  %08x\n", off, *ptr);
+	}
+
+	return 0;
+}
+
+static int i915_ringbuffer_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	unsigned int head, tail, mask;
+
+	head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
+	tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
+	mask = dev_priv->ring.tail_mask;
+
+	seq_printf(m, "RingHead :  %08x\n", head);
+	seq_printf(m, "RingTail :  %08x\n", tail);
+	seq_printf(m, "RingMask :  %08x\n", mask);
+	seq_printf(m, "RingSize :  %08lx\n", dev_priv->ring.Size);
+	seq_printf(m, "Acthd :  %08x\n", I915_READ(IS_I965G(dev) ? ACTHD_I965 : ACTHD));
+
+	return 0;
+}
+
+
 static struct drm_info_list i915_gem_debugfs_list[] = {
 	{"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST},
 	{"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST},
@@ -243,6 +333,9 @@ static struct drm_info_list i915_gem_debugfs_list[] = {
 	{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
 	{"i915_gem_interrupt", i915_interrupt_info, 0},
 	{"i915_gem_hws", i915_hws_info, 0},
+	{"i915_ringbuffer_data", i915_ringbuffer_data, 0},
+	{"i915_ringbuffer_info", i915_ringbuffer_info, 0},
+	{"i915_batchbuffers", i915_batchbuffer_info, 0},
 };
 #define I915_GEM_DEBUGFS_ENTRIES ARRAY_SIZE(i915_gem_debugfs_list)
 
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 6be3f927c86..f27e523c764 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -25,6 +25,8 @@
  *
  */
 
+#include "linux/string.h"
+#include "linux/bitops.h"
 #include "drmP.h"
 #include "drm.h"
 #include "i915_drm.h"
@@ -127,8 +129,8 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 				swizzle_y = I915_BIT_6_SWIZZLE_9_11;
 			} else {
 				/* Bit 17 swizzling by the CPU in addition. */
-				swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-				swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
+				swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
+				swizzle_y = I915_BIT_6_SWIZZLE_9_17;
 			}
 			break;
 		}
@@ -288,6 +290,19 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 			args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x;
 		else
 			args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y;
+
+		/* Hide bit 17 swizzling from the user.  This prevents old Mesa
+		 * from aborting the application on sw fallbacks to bit 17,
+		 * and we use the pread/pwrite bit17 paths to swizzle for it.
+		 * If there was a user that was relying on the swizzle
+		 * information for drm_intel_bo_map()ed reads/writes this would
+		 * break it, but we don't have any of those.
+		 */
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
 		/* If we can't handle the swizzling, make it untiled. */
 		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
 			args->tiling_mode = I915_TILING_NONE;
@@ -354,8 +369,100 @@ i915_gem_get_tiling(struct drm_device *dev, void *data,
 		DRM_ERROR("unknown tiling mode\n");
 	}
 
+	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
+	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
 	drm_gem_object_unreference(obj);
 	mutex_unlock(&dev->struct_mutex);
 
 	return 0;
 }
+
+/**
+ * Swap every 64 bytes of this page around, to account for it having a new
+ * bit 17 of its physical address and therefore being interpreted differently
+ * by the GPU.
+ */
+static int
+i915_gem_swizzle_page(struct page *page)
+{
+	char *vaddr;
+	int i;
+	char temp[64];
+
+	vaddr = kmap(page);
+	if (vaddr == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < PAGE_SIZE; i += 128) {
+		memcpy(temp, &vaddr[i], 64);
+		memcpy(&vaddr[i], &vaddr[i + 64], 64);
+		memcpy(&vaddr[i + 64], temp, 64);
+	}
+
+	kunmap(page);
+
+	return 0;
+}
+
+void
+i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	int page_count = obj->size >> PAGE_SHIFT;
+	int i;
+
+	if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
+		return;
+
+	if (obj_priv->bit_17 == NULL)
+		return;
+
+	for (i = 0; i < page_count; i++) {
+		char new_bit_17 = page_to_phys(obj_priv->pages[i]) >> 17;
+		if ((new_bit_17 & 0x1) !=
+		    (test_bit(i, obj_priv->bit_17) != 0)) {
+			int ret = i915_gem_swizzle_page(obj_priv->pages[i]);
+			if (ret != 0) {
+				DRM_ERROR("Failed to swizzle page\n");
+				return;
+			}
+			set_page_dirty(obj_priv->pages[i]);
+		}
+	}
+}
+
+void
+i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
+	int page_count = obj->size >> PAGE_SHIFT;
+	int i;
+
+	if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
+		return;
+
+	if (obj_priv->bit_17 == NULL) {
+		obj_priv->bit_17 = kmalloc(BITS_TO_LONGS(page_count) *
+					   sizeof(long), GFP_KERNEL);
+		if (obj_priv->bit_17 == NULL) {
+			DRM_ERROR("Failed to allocate memory for bit 17 "
+				  "record\n");
+			return;
+		}
+	}
+
+	for (i = 0; i < page_count; i++) {
+		if (page_to_phys(obj_priv->pages[i]) & (1 << 17))
+			__set_bit(i, obj_priv->bit_17);
+		else
+			__clear_bit(i, obj_priv->bit_17);
+	}
+}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 64773ce5296..c2c8e95ff14 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -367,6 +367,7 @@ static const intel_limit_t intel_limits[] = {
         .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
 	.p2  = { .dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT,
 		 .p2_slow = I9XX_P2_SDVO_DAC_SLOW,	.p2_fast = I9XX_P2_SDVO_DAC_FAST },
+	.find_pll = intel_find_best_PLL,
     },
     { /* INTEL_LIMIT_IGD_LVDS */
         .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
@@ -380,6 +381,7 @@ static const intel_limit_t intel_limits[] = {
 	/* IGD only supports single-channel mode. */
 	.p2  = { .dot_limit = I9XX_P2_LVDS_SLOW_LIMIT,
 		 .p2_slow = I9XX_P2_LVDS_SLOW,	.p2_fast = I9XX_P2_LVDS_SLOW },
+	.find_pll = intel_find_best_PLL,
     },
 
 };
diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index b7f0ebe9f81..3e094beecb9 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -864,8 +864,8 @@ static void intelfb_sysrq(int dummy1, struct tty_struct *dummy3)
 
 static struct sysrq_key_op sysrq_intelfb_restore_op = {
         .handler = intelfb_sysrq,
-        .help_msg = "force fb",
-        .action_msg = "force restore of fb console",
+        .help_msg = "force-fb(G)",
+        .action_msg = "Restore framebuffer console",
 };
 
 int intelfb_probe(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index b06a4a3ff08..55037422538 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -38,7 +38,7 @@
 struct intel_hdmi_priv {
 	u32 sdvox_reg;
 	u32 save_SDVOX;
-	int has_hdmi_sink;
+	bool has_hdmi_sink;
 };
 
 static void intel_hdmi_mode_set(struct drm_encoder *encoder,
@@ -128,6 +128,22 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder,
 	return true;
 }
 
+static void
+intel_hdmi_sink_detect(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_hdmi_priv *hdmi_priv = intel_output->dev_priv;
+	struct edid *edid = NULL;
+
+	edid = drm_get_edid(&intel_output->base,
+			    &intel_output->ddc_bus->adapter);
+	if (edid != NULL) {
+		hdmi_priv->has_hdmi_sink = drm_detect_hdmi_monitor(edid);
+		kfree(edid);
+		intel_output->base.display_info.raw_edid = NULL;
+	}
+}
+
 static enum drm_connector_status
 intel_hdmi_detect(struct drm_connector *connector)
 {
@@ -158,9 +174,10 @@ intel_hdmi_detect(struct drm_connector *connector)
 		return connector_status_unknown;
 	}
 
-	if ((I915_READ(PORT_HOTPLUG_STAT) & bit) != 0)
+	if ((I915_READ(PORT_HOTPLUG_STAT) & bit) != 0) {
+		intel_hdmi_sink_detect(connector);
 		return connector_status_connected;
-	else
+	} else
 		return connector_status_disconnected;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 7b31f55f55c..9913651c1e1 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -1357,6 +1357,23 @@ void intel_sdvo_set_hotplug(struct drm_connector *connector, int on)
 	intel_sdvo_read_response(intel_output, &response, 2);
 }
 
+static void
+intel_sdvo_hdmi_sink_detect(struct drm_connector *connector)
+{
+	struct intel_output *intel_output = to_intel_output(connector);
+	struct intel_sdvo_priv *sdvo_priv = intel_output->dev_priv;
+	struct edid *edid = NULL;
+
+	intel_sdvo_set_control_bus_switch(intel_output, sdvo_priv->ddc_bus);
+	edid = drm_get_edid(&intel_output->base,
+			    &intel_output->ddc_bus->adapter);
+	if (edid != NULL) {
+		sdvo_priv->is_hdmi = drm_detect_hdmi_monitor(edid);
+		kfree(edid);
+		intel_output->base.display_info.raw_edid = NULL;
+	}
+}
+
 static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector)
 {
 	u8 response[2];
@@ -1371,9 +1388,10 @@ static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connect
 	if (status != SDVO_CMD_STATUS_SUCCESS)
 		return connector_status_unknown;
 
-	if ((response[0] != 0) || (response[1] != 0))
+	if ((response[0] != 0) || (response[1] != 0)) {
+		intel_sdvo_hdmi_sink_detect(connector);
 		return connector_status_connected;
-	else
+	} else
 		return connector_status_disconnected;
 }
 
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0e8a9185f67..d73f5f473e3 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -692,6 +692,16 @@ config SENSORS_PCF8591
 	  These devices are hard to detect and rarely found on mainstream
 	  hardware.  If unsure, say N.
 
+config SENSORS_SHT15
+	tristate "Sensiron humidity and temperature sensors. SHT15 and compat."
+	depends on GENERIC_GPIO
+	help
+	  If you say yes here you get support for the Sensiron SHT10, SHT11,
+	  SHT15, SHT71, SHT75 humidity and temperature sensors.
+
+	  This driver can also be built as a module.  If so, the module
+	  will be called sht15.
+
 config SENSORS_SIS5595
 	tristate "Silicon Integrated Systems Corp. SiS5595"
 	depends on PCI
diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile
index 1d3757837b4..0ae26984ba4 100644
--- a/drivers/hwmon/Makefile
+++ b/drivers/hwmon/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_SENSORS_MAX6650)	+= max6650.o
 obj-$(CONFIG_SENSORS_PC87360)	+= pc87360.o
 obj-$(CONFIG_SENSORS_PC87427)	+= pc87427.o
 obj-$(CONFIG_SENSORS_PCF8591)	+= pcf8591.o
+obj-$(CONFIG_SENSORS_SHT15)	+= sht15.o
 obj-$(CONFIG_SENSORS_SIS5595)	+= sis5595.o
 obj-$(CONFIG_SENSORS_SMSC47B397)+= smsc47b397.o
 obj-$(CONFIG_SENSORS_SMSC47M1)	+= smsc47m1.o
diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c
index 55d3dc565be..abca7e9f953 100644
--- a/drivers/hwmon/hp_accel.c
+++ b/drivers/hwmon/hp_accel.c
@@ -34,7 +34,6 @@
 #include <linux/wait.h>
 #include <linux/poll.h>
 #include <linux/freezer.h>
-#include <linux/version.h>
 #include <linux/uaccess.h>
 #include <linux/leds.h>
 #include <acpi/acpi_drivers.h>
diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c
new file mode 100644
index 00000000000..6cbdc2fea73
--- /dev/null
+++ b/drivers/hwmon/sht15.c
@@ -0,0 +1,692 @@
+/*
+ * sht15.c - support for the SHT15 Temperature and Humidity Sensor
+ *
+ * Copyright (c) 2009 Jonathan Cameron
+ *
+ * Copyright (c) 2007 Wouter Horre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Currently ignoring checksum on readings.
+ * Default resolution only (14bit temp, 12bit humidity)
+ * Ignoring battery status.
+ * Heater not enabled.
+ * Timings are all conservative.
+ *
+ * Data sheet available (1/2009) at
+ * http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
+ *
+ * Regulator supply name = vcc
+ */
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/gpio.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/err.h>
+#include <linux/sht15.h>
+#include <linux/regulator/consumer.h>
+#include <asm/atomic.h>
+
+#define SHT15_MEASURE_TEMP	3
+#define SHT15_MEASURE_RH	5
+
+#define SHT15_READING_NOTHING	0
+#define SHT15_READING_TEMP	1
+#define SHT15_READING_HUMID	2
+
+/* Min timings in nsecs */
+#define SHT15_TSCKL		100	/* clock low */
+#define SHT15_TSCKH		100	/* clock high */
+#define SHT15_TSU		150	/* data setup time */
+
+/**
+ * struct sht15_temppair - elements of voltage dependant temp calc
+ * @vdd:	supply voltage in microvolts
+ * @d1:		see data sheet
+ */
+struct sht15_temppair {
+	int vdd; /* microvolts */
+	int d1;
+};
+
+/* Table 9 from data sheet - relates temperature calculation
+ * to supply voltage.
+ */
+static const struct sht15_temppair temppoints[] = {
+	{ 2500000, -39400 },
+	{ 3000000, -39600 },
+	{ 3500000, -39700 },
+	{ 4000000, -39800 },
+	{ 5000000, -40100 },
+};
+
+/**
+ * struct sht15_data - device instance specific data
+ * @pdata:	platform data (gpio's etc)
+ * @read_work:	bh of interrupt handler
+ * @wait_queue:	wait queue for getting values from device
+ * @val_temp:	last temperature value read from device
+ * @val_humid: 	last humidity value read from device
+ * @flag:	status flag used to identify what the last request was
+ * @valid:	are the current stored values valid (start condition)
+ * @last_updat:	time of last update
+ * @read_lock:	mutex to ensure only one read in progress
+ *		at a time.
+ * @dev:	associate device structure
+ * @hwmon_dev:	device associated with hwmon subsystem
+ * @reg:	associated regulator (if specified)
+ * @nb:		notifier block to handle notifications of voltage changes
+ * @supply_uV:	local copy of supply voltage used to allow
+ *		use of regulator consumer if available
+ * @supply_uV_valid:   indicates that an updated value has not yet
+ *		been obtained from the regulator and so any calculations
+ *		based upon it will be invalid.
+ * @update_supply_work:	work struct that is used to update the supply_uV
+ * @interrupt_handled:	flag used to indicate a hander has been scheduled
+ */
+struct sht15_data {
+	struct sht15_platform_data	*pdata;
+	struct work_struct		read_work;
+	wait_queue_head_t		wait_queue;
+	uint16_t			val_temp;
+	uint16_t			val_humid;
+	u8				flag;
+	u8				valid;
+	unsigned long			last_updat;
+	struct mutex			read_lock;
+	struct device			*dev;
+	struct device			*hwmon_dev;
+	struct regulator		*reg;
+	struct notifier_block		nb;
+	int				supply_uV;
+	int				supply_uV_valid;
+	struct work_struct		update_supply_work;
+	atomic_t			interrupt_handled;
+};
+
+/**
+ * sht15_connection_reset() - reset the comms interface
+ * @data:	sht15 specific data
+ *
+ * This implements section 3.4 of the data sheet
+ */
+static void sht15_connection_reset(struct sht15_data *data)
+{
+	int i;
+	gpio_direction_output(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSCKL);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+	for (i = 0; i < 9; ++i) {
+		gpio_set_value(data->pdata->gpio_sck, 1);
+		ndelay(SHT15_TSCKH);
+		gpio_set_value(data->pdata->gpio_sck, 0);
+		ndelay(SHT15_TSCKL);
+	}
+}
+/**
+ * sht15_send_bit() - send an individual bit to the device
+ * @data:	device state data
+ * @val:	value of bit to be sent
+ **/
+static inline void sht15_send_bit(struct sht15_data *data, int val)
+{
+
+	gpio_set_value(data->pdata->gpio_data, val);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSCKH);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL); /* clock low time */
+}
+
+/**
+ * sht15_transmission_start() - specific sequence for new transmission
+ *
+ * @data:	device state data
+ * Timings for this are not documented on the data sheet, so very
+ * conservative ones used in implementation. This implements
+ * figure 12 on the data sheet.
+ **/
+static void sht15_transmission_start(struct sht15_data *data)
+{
+	/* ensure data is high and output */
+	gpio_direction_output(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSCKH);
+	gpio_set_value(data->pdata->gpio_data, 0);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSCKH);
+	gpio_set_value(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+}
+/**
+ * sht15_send_byte() - send a single byte to the device
+ * @data:	device state
+ * @byte:	value to be sent
+ **/
+static void sht15_send_byte(struct sht15_data *data, u8 byte)
+{
+	int i;
+	for (i = 0; i < 8; i++) {
+		sht15_send_bit(data, !!(byte & 0x80));
+		byte <<= 1;
+	}
+}
+/**
+ * sht15_wait_for_response() - checks for ack from device
+ * @data:	device state
+ **/
+static int sht15_wait_for_response(struct sht15_data *data)
+{
+	gpio_direction_input(data->pdata->gpio_data);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSCKH);
+	if (gpio_get_value(data->pdata->gpio_data)) {
+		gpio_set_value(data->pdata->gpio_sck, 0);
+		dev_err(data->dev, "Command not acknowledged\n");
+		sht15_connection_reset(data);
+		return -EIO;
+	}
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+	return 0;
+}
+
+/**
+ * sht15_send_cmd() - Sends a command to the device.
+ * @data:	device state
+ * @cmd:	command byte to be sent
+ *
+ * On entry, sck is output low, data is output pull high
+ * and the interrupt disabled.
+ **/
+static int sht15_send_cmd(struct sht15_data *data, u8 cmd)
+{
+	int ret = 0;
+	sht15_transmission_start(data);
+	sht15_send_byte(data, cmd);
+	ret = sht15_wait_for_response(data);
+	return ret;
+}
+/**
+ * sht15_update_single_val() - get a new value from device
+ * @data:		device instance specific data
+ * @command:		command sent to request value
+ * @timeout_msecs:	timeout after which comms are assumed
+ *			to have failed are reset.
+ **/
+static inline int sht15_update_single_val(struct sht15_data *data,
+					  int command,
+					  int timeout_msecs)
+{
+	int ret;
+	ret = sht15_send_cmd(data, command);
+	if (ret)
+		return ret;
+
+	gpio_direction_input(data->pdata->gpio_data);
+	atomic_set(&data->interrupt_handled, 0);
+
+	enable_irq(gpio_to_irq(data->pdata->gpio_data));
+	if (gpio_get_value(data->pdata->gpio_data) == 0) {
+		disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
+		/* Only relevant if the interrupt hasn't occured. */
+		if (!atomic_read(&data->interrupt_handled))
+			schedule_work(&data->read_work);
+	}
+	ret = wait_event_timeout(data->wait_queue,
+				 (data->flag == SHT15_READING_NOTHING),
+				 msecs_to_jiffies(timeout_msecs));
+	if (ret == 0) {/* timeout occurred */
+		disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));;
+		sht15_connection_reset(data);
+		return -ETIME;
+	}
+	return 0;
+}
+
+/**
+ * sht15_update_vals() - get updated readings from device if too old
+ * @data:	device state
+ **/
+static int sht15_update_vals(struct sht15_data *data)
+{
+	int ret = 0;
+	int timeout = HZ;
+
+	mutex_lock(&data->read_lock);
+	if (time_after(jiffies, data->last_updat + timeout)
+	    || !data->valid) {
+		data->flag = SHT15_READING_HUMID;
+		ret = sht15_update_single_val(data, SHT15_MEASURE_RH, 160);
+		if (ret)
+			goto error_ret;
+		data->flag = SHT15_READING_TEMP;
+		ret = sht15_update_single_val(data, SHT15_MEASURE_TEMP, 400);
+		if (ret)
+			goto error_ret;
+		data->valid = 1;
+		data->last_updat = jiffies;
+	}
+error_ret:
+	mutex_unlock(&data->read_lock);
+
+	return ret;
+}
+
+/**
+ * sht15_calc_temp() - convert the raw reading to a temperature
+ * @data:	device state
+ *
+ * As per section 4.3 of the data sheet.
+ **/
+static inline int sht15_calc_temp(struct sht15_data *data)
+{
+	int d1 = 0;
+	int i;
+
+	for (i = 1; i < ARRAY_SIZE(temppoints) - 1; i++)
+		/* Find pointer to interpolate */
+		if (data->supply_uV > temppoints[i - 1].vdd) {
+			d1 = (data->supply_uV/1000 - temppoints[i - 1].vdd)
+				* (temppoints[i].d1 - temppoints[i - 1].d1)
+				/ (temppoints[i].vdd - temppoints[i - 1].vdd)
+				+ temppoints[i - 1].d1;
+			break;
+		}
+
+	return data->val_temp*10 + d1;
+}
+
+/**
+ * sht15_calc_humid() - using last temperature convert raw to humid
+ * @data:	device state
+ *
+ * This is the temperature compensated version as per section 4.2 of
+ * the data sheet.
+ **/
+static inline int sht15_calc_humid(struct sht15_data *data)
+{
+	int RHlinear; /* milli percent */
+	int temp = sht15_calc_temp(data);
+
+	const int c1 = -4;
+	const int c2 = 40500; /* x 10 ^ -6 */
+	const int c3 = 2800; /* x10 ^ -9 */
+
+	RHlinear = c1*1000
+		+ c2 * data->val_humid/1000
+		+ (data->val_humid * data->val_humid * c3)/1000000;
+	return (temp - 25000) * (10000 + 800 * data->val_humid)
+		/ 1000000 + RHlinear;
+}
+
+static ssize_t sht15_show_temp(struct device *dev,
+			       struct device_attribute *attr,
+			       char *buf)
+{
+	int ret;
+	struct sht15_data *data = dev_get_drvdata(dev);
+
+	/* Technically no need to read humidity as well */
+	ret = sht15_update_vals(data);
+
+	return ret ? ret : sprintf(buf, "%d\n",
+				   sht15_calc_temp(data));
+}
+
+static ssize_t sht15_show_humidity(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	int ret;
+	struct sht15_data *data = dev_get_drvdata(dev);
+
+	ret = sht15_update_vals(data);
+
+	return ret ? ret : sprintf(buf, "%d\n", sht15_calc_humid(data));
+
+};
+static ssize_t show_name(struct device *dev,
+			 struct device_attribute *attr,
+			 char *buf)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	return sprintf(buf, "%s\n", pdev->name);
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input,
+			  S_IRUGO, sht15_show_temp,
+			  NULL, 0);
+static SENSOR_DEVICE_ATTR(humidity1_input,
+			  S_IRUGO, sht15_show_humidity,
+			  NULL, 0);
+static DEVICE_ATTR(name, S_IRUGO, show_name, NULL);
+static struct attribute *sht15_attrs[] = {
+	&sensor_dev_attr_temp1_input.dev_attr.attr,
+	&sensor_dev_attr_humidity1_input.dev_attr.attr,
+	&dev_attr_name.attr,
+	NULL,
+};
+
+static const struct attribute_group sht15_attr_group = {
+	.attrs = sht15_attrs,
+};
+
+static irqreturn_t sht15_interrupt_fired(int irq, void *d)
+{
+	struct sht15_data *data = d;
+	/* First disable the interrupt */
+	disable_irq_nosync(irq);
+	atomic_inc(&data->interrupt_handled);
+	/* Then schedule a reading work struct */
+	if (data->flag != SHT15_READING_NOTHING)
+		schedule_work(&data->read_work);
+	return IRQ_HANDLED;
+}
+
+/* Each byte of data is acknowledged by pulling the data line
+ * low for one clock pulse.
+ */
+static void sht15_ack(struct sht15_data *data)
+{
+	gpio_direction_output(data->pdata->gpio_data, 0);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_data, 1);
+
+	gpio_direction_input(data->pdata->gpio_data);
+}
+/**
+ * sht15_end_transmission() - notify device of end of transmission
+ * @data:	device state
+ *
+ * This is basically a NAK. (single clock pulse, data high)
+ **/
+static void sht15_end_transmission(struct sht15_data *data)
+{
+	gpio_direction_output(data->pdata->gpio_data, 1);
+	ndelay(SHT15_TSU);
+	gpio_set_value(data->pdata->gpio_sck, 1);
+	ndelay(SHT15_TSCKH);
+	gpio_set_value(data->pdata->gpio_sck, 0);
+	ndelay(SHT15_TSCKL);
+}
+
+static void sht15_bh_read_data(struct work_struct *work_s)
+{
+	int i;
+	uint16_t val = 0;
+	struct sht15_data *data
+		= container_of(work_s, struct sht15_data,
+			       read_work);
+	/* Firstly, verify the line is low */
+	if (gpio_get_value(data->pdata->gpio_data)) {
+		/* If not, then start the interrupt again - care
+		   here as could have gone low in meantime so verify
+		   it hasn't!
+		*/
+		atomic_set(&data->interrupt_handled, 0);
+		enable_irq(gpio_to_irq(data->pdata->gpio_data));
+		/* If still not occured or another handler has been scheduled */
+		if (gpio_get_value(data->pdata->gpio_data)
+		    || atomic_read(&data->interrupt_handled))
+			return;
+	}
+	/* Read the data back from the device */
+	for (i = 0; i < 16; ++i) {
+		val <<= 1;
+		gpio_set_value(data->pdata->gpio_sck, 1);
+		ndelay(SHT15_TSCKH);
+		val |= !!gpio_get_value(data->pdata->gpio_data);
+		gpio_set_value(data->pdata->gpio_sck, 0);
+		ndelay(SHT15_TSCKL);
+		if (i == 7)
+			sht15_ack(data);
+	}
+	/* Tell the device we are done */
+	sht15_end_transmission(data);
+
+	switch (data->flag) {
+	case SHT15_READING_TEMP:
+		data->val_temp = val;
+		break;
+	case SHT15_READING_HUMID:
+		data->val_humid = val;
+		break;
+	}
+
+	data->flag = SHT15_READING_NOTHING;
+	wake_up(&data->wait_queue);
+}
+
+static void sht15_update_voltage(struct work_struct *work_s)
+{
+	struct sht15_data *data
+		= container_of(work_s, struct sht15_data,
+			       update_supply_work);
+	data->supply_uV = regulator_get_voltage(data->reg);
+}
+
+/**
+ * sht15_invalidate_voltage() - mark supply voltage invalid when notified by reg
+ * @nb:		associated notification structure
+ * @event:	voltage regulator state change event code
+ * @ignored:	function parameter - ignored here
+ *
+ * Note that as the notification code holds the regulator lock, we have
+ * to schedule an update of the supply voltage rather than getting it directly.
+ **/
+static int sht15_invalidate_voltage(struct notifier_block *nb,
+				unsigned long event,
+				void *ignored)
+{
+	struct sht15_data *data = container_of(nb, struct sht15_data, nb);
+
+	if (event == REGULATOR_EVENT_VOLTAGE_CHANGE)
+		data->supply_uV_valid = false;
+	schedule_work(&data->update_supply_work);
+
+	return NOTIFY_OK;
+}
+
+static int __devinit sht15_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	struct sht15_data *data = kzalloc(sizeof(*data), GFP_KERNEL);
+
+	if (!data) {
+		ret = -ENOMEM;
+		dev_err(&pdev->dev, "kzalloc failed");
+		goto error_ret;
+	}
+
+	INIT_WORK(&data->read_work, sht15_bh_read_data);
+	INIT_WORK(&data->update_supply_work, sht15_update_voltage);
+	platform_set_drvdata(pdev, data);
+	mutex_init(&data->read_lock);
+	data->dev = &pdev->dev;
+	init_waitqueue_head(&data->wait_queue);
+
+	if (pdev->dev.platform_data == NULL) {
+		dev_err(&pdev->dev, "no platform data supplied");
+		goto err_free_data;
+	}
+	data->pdata = pdev->dev.platform_data;
+	data->supply_uV = data->pdata->supply_mv*1000;
+
+/* If a regulator is available, query what the supply voltage actually is!*/
+	data->reg = regulator_get(data->dev, "vcc");
+	if (!IS_ERR(data->reg)) {
+		data->supply_uV = regulator_get_voltage(data->reg);
+		regulator_enable(data->reg);
+		/* setup a notifier block to update this if another device
+		 *  causes the voltage to change */
+		data->nb.notifier_call = &sht15_invalidate_voltage;
+		ret = regulator_register_notifier(data->reg, &data->nb);
+	}
+/* Try requesting the GPIOs */
+	ret = gpio_request(data->pdata->gpio_sck, "SHT15 sck");
+	if (ret) {
+		dev_err(&pdev->dev, "gpio request failed");
+		goto err_free_data;
+	}
+	gpio_direction_output(data->pdata->gpio_sck, 0);
+	ret = gpio_request(data->pdata->gpio_data, "SHT15 data");
+	if (ret) {
+		dev_err(&pdev->dev, "gpio request failed");
+		goto err_release_gpio_sck;
+	}
+	ret = sysfs_create_group(&pdev->dev.kobj, &sht15_attr_group);
+	if (ret) {
+		dev_err(&pdev->dev, "sysfs create failed");
+		goto err_free_data;
+	}
+
+	ret = request_irq(gpio_to_irq(data->pdata->gpio_data),
+			  sht15_interrupt_fired,
+			  IRQF_TRIGGER_FALLING,
+			  "sht15 data",
+			  data);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to get irq for data line");
+		goto err_release_gpio_data;
+	}
+	disable_irq_nosync(gpio_to_irq(data->pdata->gpio_data));
+	sht15_connection_reset(data);
+	sht15_send_cmd(data, 0x1E);
+
+	data->hwmon_dev = hwmon_device_register(data->dev);
+	if (IS_ERR(data->hwmon_dev)) {
+		ret = PTR_ERR(data->hwmon_dev);
+		goto err_release_gpio_data;
+	}
+	return 0;
+
+err_release_gpio_data:
+	gpio_free(data->pdata->gpio_data);
+err_release_gpio_sck:
+	gpio_free(data->pdata->gpio_sck);
+err_free_data:
+	kfree(data);
+error_ret:
+
+	return ret;
+}
+
+static int __devexit sht15_remove(struct platform_device *pdev)
+{
+	struct sht15_data *data = platform_get_drvdata(pdev);
+
+	/* Make sure any reads from the device are done and
+	 * prevent new ones beginnning */
+	mutex_lock(&data->read_lock);
+	hwmon_device_unregister(data->hwmon_dev);
+	sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group);
+	if (!IS_ERR(data->reg)) {
+		regulator_unregister_notifier(data->reg, &data->nb);
+		regulator_disable(data->reg);
+		regulator_put(data->reg);
+	}
+
+	free_irq(gpio_to_irq(data->pdata->gpio_data), data);
+	gpio_free(data->pdata->gpio_data);
+	gpio_free(data->pdata->gpio_sck);
+	mutex_unlock(&data->read_lock);
+	kfree(data);
+	return 0;
+}
+
+
+static struct platform_driver sht_drivers[] = {
+	{
+		.driver = {
+			.name = "sht10",
+			.owner = THIS_MODULE,
+		},
+		.probe = sht15_probe,
+		.remove = sht15_remove,
+	}, {
+		.driver = {
+			.name = "sht11",
+			.owner = THIS_MODULE,
+		},
+		.probe = sht15_probe,
+		.remove = sht15_remove,
+	}, {
+		.driver = {
+			.name = "sht15",
+			.owner = THIS_MODULE,
+		},
+		.probe = sht15_probe,
+		.remove = sht15_remove,
+	}, {
+		.driver = {
+			.name = "sht71",
+			.owner = THIS_MODULE,
+		},
+		.probe = sht15_probe,
+		.remove = sht15_remove,
+	}, {
+		.driver = {
+			.name = "sht75",
+			.owner = THIS_MODULE,
+		},
+		.probe = sht15_probe,
+		.remove = sht15_remove,
+	},
+};
+
+
+static int __init sht15_init(void)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(sht_drivers); i++) {
+		ret = platform_driver_register(&sht_drivers[i]);
+		if (ret)
+			goto error_unreg;
+	}
+
+	return 0;
+
+error_unreg:
+	while (--i >= 0)
+		platform_driver_unregister(&sht_drivers[i]);
+
+	return ret;
+}
+module_init(sht15_init);
+
+static void __exit sht15_exit(void)
+{
+	int i;
+	for (i = ARRAY_SIZE(sht_drivers) - 1; i >= 0; i--)
+		platform_driver_unregister(&sht_drivers[i]);
+}
+module_exit(sht15_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
deleted file mode 100644
index 345098b4ca7..00000000000
--- a/drivers/md/dm-bio-list.h
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (C) 2004 Red Hat UK Ltd.
- *
- * This file is released under the GPL.
- */
-
-#ifndef DM_BIO_LIST_H
-#define DM_BIO_LIST_H
-
-#include <linux/bio.h>
-
-#ifdef CONFIG_BLOCK
-
-struct bio_list {
-	struct bio *head;
-	struct bio *tail;
-};
-
-static inline int bio_list_empty(const struct bio_list *bl)
-{
-	return bl->head == NULL;
-}
-
-static inline void bio_list_init(struct bio_list *bl)
-{
-	bl->head = bl->tail = NULL;
-}
-
-#define bio_list_for_each(bio, bl) \
-	for (bio = (bl)->head; bio; bio = bio->bi_next)
-
-static inline unsigned bio_list_size(const struct bio_list *bl)
-{
-	unsigned sz = 0;
-	struct bio *bio;
-
-	bio_list_for_each(bio, bl)
-		sz++;
-
-	return sz;
-}
-
-static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
-{
-	bio->bi_next = NULL;
-
-	if (bl->tail)
-		bl->tail->bi_next = bio;
-	else
-		bl->head = bio;
-
-	bl->tail = bio;
-}
-
-static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
-{
-	bio->bi_next = bl->head;
-
-	bl->head = bio;
-
-	if (!bl->tail)
-		bl->tail = bio;
-}
-
-static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
-{
-	if (!bl2->head)
-		return;
-
-	if (bl->tail)
-		bl->tail->bi_next = bl2->head;
-	else
-		bl->head = bl2->head;
-
-	bl->tail = bl2->tail;
-}
-
-static inline void bio_list_merge_head(struct bio_list *bl,
-				       struct bio_list *bl2)
-{
-	if (!bl2->head)
-		return;
-
-	if (bl->head)
-		bl2->tail->bi_next = bl->head;
-	else
-		bl->tail = bl2->tail;
-
-	bl->head = bl2->head;
-}
-
-static inline struct bio *bio_list_pop(struct bio_list *bl)
-{
-	struct bio *bio = bl->head;
-
-	if (bio) {
-		bl->head = bl->head->bi_next;
-		if (!bl->head)
-			bl->tail = NULL;
-
-		bio->bi_next = NULL;
-	}
-
-	return bio;
-}
-
-static inline struct bio *bio_list_get(struct bio_list *bl)
-{
-	struct bio *bio = bl->head;
-
-	bl->head = bl->tail = NULL;
-
-	return bio;
-}
-
-#endif /* CONFIG_BLOCK */
-#endif
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 59ee1b015d2..559dbb52bc8 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -15,8 +15,6 @@
 
 #include <linux/device-mapper.h>
 
-#include "dm-bio-list.h"
-
 #define DM_MSG_PREFIX "delay"
 
 struct delay_c {
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 095f77bf968..6a386ab4f7e 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -8,7 +8,6 @@
 #include <linux/device-mapper.h>
 
 #include "dm-path-selector.h"
-#include "dm-bio-list.h"
 #include "dm-bio-record.h"
 #include "dm-uevent.h"
 
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 536ef0bef15..076fbb4e967 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -5,7 +5,6 @@
  * This file is released under the GPL.
  */
 
-#include "dm-bio-list.h"
 #include "dm-bio-record.h"
 
 #include <linux/init.h>
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 59f8d9df9e1..7b899be0b08 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -14,7 +14,6 @@
 #include <linux/vmalloc.h>
 
 #include "dm.h"
-#include "dm-bio-list.h"
 
 #define	DM_MSG_PREFIX	"region hash"
 
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 981a0413068..d73f17fc777 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -22,7 +22,6 @@
 #include <linux/workqueue.h>
 
 #include "dm-exception-store.h"
-#include "dm-bio-list.h"
 
 #define DM_MSG_PREFIX "snapshots"
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8a994be035b..424f7b048c3 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -6,7 +6,6 @@
  */
 
 #include "dm.h"
-#include "dm-bio-list.h"
 #include "dm-uevent.h"
 
 #include <linux/init.h>
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 274b491a11c..36df9109cde 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -35,7 +35,6 @@
 #include <linux/blkdev.h>
 #include <linux/seq_file.h>
 #include "md.h"
-#include "dm-bio-list.h"
 #include "raid1.h"
 #include "bitmap.h"
 
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index e293d92641a..81a54f17417 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -22,7 +22,6 @@
 #include <linux/blkdev.h>
 #include <linux/seq_file.h>
 #include "md.h"
-#include "dm-bio-list.h"
 #include "raid10.h"
 #include "bitmap.h"
 
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index d184dfab963..db39f4a52f5 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -278,7 +278,7 @@ static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
  * We only use page mode writes; the alternative is sloooow. This routine
  * writes at most one page.
  */
-static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf,
+static ssize_t at24_eeprom_write(struct at24_data *at24, const char *buf,
 		unsigned offset, size_t count)
 {
 	struct i2c_client *client;
@@ -347,8 +347,8 @@ static ssize_t at24_eeprom_write(struct at24_data *at24, char *buf,
 	return -ETIMEDOUT;
 }
 
-static ssize_t at24_write(struct at24_data *at24,
-		char *buf, loff_t off, size_t count)
+static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
+			  size_t count)
 {
 	ssize_t retval = 0;
 
@@ -406,7 +406,7 @@ static ssize_t at24_macc_read(struct memory_accessor *macc, char *buf,
 	return at24_read(at24, buf, offset, count);
 }
 
-static ssize_t at24_macc_write(struct memory_accessor *macc, char *buf,
+static ssize_t at24_macc_write(struct memory_accessor *macc, const char *buf,
 			  off_t offset, size_t count)
 {
 	struct at24_data *at24 = container_of(macc, struct at24_data, macc);
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index 6bc0dac5c1e..b34cb5f79ee 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -140,7 +140,8 @@ at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 
 static ssize_t
-at25_ee_write(struct at25_data *at25, char *buf, loff_t off, size_t count)
+at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
+	      size_t count)
 {
 	ssize_t			status = 0;
 	unsigned		written = 0;
@@ -276,7 +277,7 @@ static ssize_t at25_mem_read(struct memory_accessor *mem, char *buf,
 	return at25_ee_read(at25, buf, offset, count);
 }
 
-static ssize_t at25_mem_write(struct memory_accessor *mem, char *buf,
+static ssize_t at25_mem_write(struct memory_accessor *mem, const char *buf,
 			  off_t offset, size_t count)
 {
 	struct at25_data *at25 = container_of(mem, struct at25_data, mem);
diff --git a/drivers/misc/sgi-xp/xpc.h b/drivers/misc/sgi-xp/xpc.h
index 114444cfd49..b94d5f76770 100644
--- a/drivers/misc/sgi-xp/xpc.h
+++ b/drivers/misc/sgi-xp/xpc.h
@@ -90,18 +90,21 @@ struct xpc_rsvd_page {
 	short max_npartitions;	/* value of XPC_MAX_PARTITIONS */
 	u8 version;
 	u8 pad1[3];		/* align to next u64 in 1st 64-byte cacheline */
+	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
 	union {
-		unsigned long vars_pa;	/* phys address of struct xpc_vars */
-		unsigned long activate_gru_mq_desc_gpa; /* phys addr of */
-							/* activate mq's */
-							/* gru mq descriptor */
+		struct {
+			unsigned long vars_pa;	/* phys addr */
+		} sn2;
+		struct {
+			unsigned long heartbeat_gpa; /* phys addr */
+			unsigned long activate_gru_mq_desc_gpa; /* phys addr */
+		} uv;
 	} sn;
-	unsigned long ts_jiffies; /* timestamp when rsvd pg was setup by XPC */
-	u64 pad2[10];		/* align to last u64 in 2nd 64-byte cacheline */
+	u64 pad2[9];		/* align to last u64 in 2nd 64-byte cacheline */
 	u64 SAL_nasids_size;	/* SAL: size of each nasid mask in bytes */
 };
 
-#define XPC_RP_VERSION _XPC_VERSION(2, 0) /* version 2.0 of the reserved page */
+#define XPC_RP_VERSION _XPC_VERSION(3, 0) /* version 3.0 of the reserved page */
 
 /*
  * Define the structures by which XPC variables can be exported to other
@@ -182,6 +185,17 @@ struct xpc_vars_part_sn2 {
 				 (XPC_RP_MACH_NASIDS(_rp) + \
 				  xpc_nasid_mask_nlongs))
 
+
+/*
+ * The following structure describes the partition's heartbeat info which
+ * will be periodically read by other partitions to determine whether this
+ * XPC is still 'alive'.
+ */
+struct xpc_heartbeat_uv {
+	unsigned long value;
+	unsigned long offline;	/* if 0, heartbeat should be changing */
+};
+
 /*
  * Info pertinent to a GRU message queue using a watch list for irq generation.
  */
@@ -198,7 +212,7 @@ struct xpc_gru_mq_uv {
 
 /*
  * The activate_mq is used to send/receive GRU messages that affect XPC's
- * heartbeat, partition active state, and channel state. This is UV only.
+ * partition active state and channel state. This is uv only.
  */
 struct xpc_activate_mq_msghdr_uv {
 	unsigned int gru_msg_hdr; /* FOR GRU INTERNAL USE ONLY */
@@ -210,33 +224,27 @@ struct xpc_activate_mq_msghdr_uv {
 
 /* activate_mq defined message types */
 #define XPC_ACTIVATE_MQ_MSG_SYNC_ACT_STATE_UV		0
-#define XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV		1
-#define XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV	2
-#define XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV		3
 
-#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		4
-#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		5
+#define XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV		1
+#define XPC_ACTIVATE_MQ_MSG_DEACTIVATE_REQ_UV		2
 
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	6
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		7
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	8
-#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		9
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREQUEST_UV	3
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_CLOSEREPLY_UV		4
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREQUEST_UV	5
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENREPLY_UV		6
+#define XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV	7
 
-#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		10
-#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		11
+#define XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV		8
+#define XPC_ACTIVATE_MQ_MSG_MARK_DISENGAGED_UV		9
 
 struct xpc_activate_mq_msg_uv {
 	struct xpc_activate_mq_msghdr_uv hdr;
 };
 
-struct xpc_activate_mq_msg_heartbeat_req_uv {
-	struct xpc_activate_mq_msghdr_uv hdr;
-	u64 heartbeat;
-};
-
 struct xpc_activate_mq_msg_activate_req_uv {
 	struct xpc_activate_mq_msghdr_uv hdr;
 	unsigned long rp_gpa;
+	unsigned long heartbeat_gpa;
 	unsigned long activate_gru_mq_desc_gpa;
 };
 
@@ -271,6 +279,11 @@ struct xpc_activate_mq_msg_chctl_openreply_uv {
 	unsigned long notify_gru_mq_desc_gpa;
 };
 
+struct xpc_activate_mq_msg_chctl_opencomplete_uv {
+	struct xpc_activate_mq_msghdr_uv hdr;
+	short ch_number;
+};
+
 /*
  * Functions registered by add_timer() or called by kernel_thread() only
  * allow for a single 64-bit argument. The following macros can be used to
@@ -576,30 +589,32 @@ struct xpc_channel {
 
 #define	XPC_C_WASCONNECTED	0x00000001	/* channel was connected */
 
-#define	XPC_C_ROPENREPLY	0x00000002	/* remote open channel reply */
-#define	XPC_C_OPENREPLY		0x00000004	/* local open channel reply */
-#define	XPC_C_ROPENREQUEST	0x00000008     /* remote open channel request */
-#define	XPC_C_OPENREQUEST	0x00000010	/* local open channel request */
+#define XPC_C_ROPENCOMPLETE	0x00000002    /* remote open channel complete */
+#define XPC_C_OPENCOMPLETE	0x00000004     /* local open channel complete */
+#define	XPC_C_ROPENREPLY	0x00000008	/* remote open channel reply */
+#define	XPC_C_OPENREPLY		0x00000010	/* local open channel reply */
+#define	XPC_C_ROPENREQUEST	0x00000020     /* remote open channel request */
+#define	XPC_C_OPENREQUEST	0x00000040	/* local open channel request */
 
-#define	XPC_C_SETUP		0x00000020 /* channel's msgqueues are alloc'd */
-#define	XPC_C_CONNECTEDCALLOUT	0x00000040     /* connected callout initiated */
+#define	XPC_C_SETUP		0x00000080 /* channel's msgqueues are alloc'd */
+#define	XPC_C_CONNECTEDCALLOUT	0x00000100     /* connected callout initiated */
 #define	XPC_C_CONNECTEDCALLOUT_MADE \
-				0x00000080     /* connected callout completed */
-#define	XPC_C_CONNECTED		0x00000100	/* local channel is connected */
-#define	XPC_C_CONNECTING	0x00000200	/* channel is being connected */
+				0x00000200     /* connected callout completed */
+#define	XPC_C_CONNECTED		0x00000400	/* local channel is connected */
+#define	XPC_C_CONNECTING	0x00000800	/* channel is being connected */
 
-#define	XPC_C_RCLOSEREPLY	0x00000400	/* remote close channel reply */
-#define	XPC_C_CLOSEREPLY	0x00000800	/* local close channel reply */
-#define	XPC_C_RCLOSEREQUEST	0x00001000    /* remote close channel request */
-#define	XPC_C_CLOSEREQUEST	0x00002000     /* local close channel request */
+#define	XPC_C_RCLOSEREPLY	0x00001000	/* remote close channel reply */
+#define	XPC_C_CLOSEREPLY	0x00002000	/* local close channel reply */
+#define	XPC_C_RCLOSEREQUEST	0x00004000    /* remote close channel request */
+#define	XPC_C_CLOSEREQUEST	0x00008000     /* local close channel request */
 
-#define	XPC_C_DISCONNECTED	0x00004000	/* channel is disconnected */
-#define	XPC_C_DISCONNECTING	0x00008000   /* channel is being disconnected */
+#define	XPC_C_DISCONNECTED	0x00010000	/* channel is disconnected */
+#define	XPC_C_DISCONNECTING	0x00020000   /* channel is being disconnected */
 #define	XPC_C_DISCONNECTINGCALLOUT \
-				0x00010000 /* disconnecting callout initiated */
+				0x00040000 /* disconnecting callout initiated */
 #define	XPC_C_DISCONNECTINGCALLOUT_MADE \
-				0x00020000 /* disconnecting callout completed */
-#define	XPC_C_WDISCONNECT	0x00040000  /* waiting for channel disconnect */
+				0x00080000 /* disconnecting callout completed */
+#define	XPC_C_WDISCONNECT	0x00100000  /* waiting for channel disconnect */
 
 /*
  * The channel control flags (chctl) union consists of a 64-bit variable which
@@ -618,11 +633,13 @@ union xpc_channel_ctl_flags {
 #define	XPC_CHCTL_CLOSEREPLY	0x02
 #define	XPC_CHCTL_OPENREQUEST	0x04
 #define	XPC_CHCTL_OPENREPLY	0x08
-#define	XPC_CHCTL_MSGREQUEST	0x10
+#define XPC_CHCTL_OPENCOMPLETE	0x10
+#define	XPC_CHCTL_MSGREQUEST	0x20
 
 #define XPC_OPENCLOSE_CHCTL_FLAGS \
 			(XPC_CHCTL_CLOSEREQUEST | XPC_CHCTL_CLOSEREPLY | \
-			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY)
+			 XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY | \
+			 XPC_CHCTL_OPENCOMPLETE)
 #define XPC_MSG_CHCTL_FLAGS	XPC_CHCTL_MSGREQUEST
 
 static inline int
@@ -687,6 +704,9 @@ struct xpc_partition_sn2 {
 };
 
 struct xpc_partition_uv {
+	unsigned long heartbeat_gpa; /* phys addr of partition's heartbeat */
+	struct xpc_heartbeat_uv cached_heartbeat; /* cached copy of */
+						  /* partition's heartbeat */
 	unsigned long activate_gru_mq_desc_gpa;	/* phys addr of parititon's */
 						/* activate mq's gru mq */
 						/* descriptor */
@@ -698,14 +718,12 @@ struct xpc_partition_uv {
 	u8 remote_act_state;	/* remote partition's act_state */
 	u8 act_state_req;	/* act_state request from remote partition */
 	enum xp_retval reason;	/* reason for deactivate act_state request */
-	u64 heartbeat;		/* incremented by remote partition */
 };
 
 /* struct xpc_partition_uv flags */
 
-#define XPC_P_HEARTBEAT_OFFLINE_UV		0x00000001
+#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000001
 #define XPC_P_ENGAGED_UV			0x00000002
-#define XPC_P_CACHED_ACTIVATE_GRU_MQ_DESC_UV	0x00000004
 
 /* struct xpc_partition_uv act_state change requests */
 
@@ -762,6 +780,62 @@ struct xpc_partition {
 
 } ____cacheline_aligned;
 
+struct xpc_arch_operations {
+	int (*setup_partitions) (void);
+	void (*teardown_partitions) (void);
+	void (*process_activate_IRQ_rcvd) (void);
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *);
+	int (*setup_rsvd_page) (struct xpc_rsvd_page *);
+
+	void (*allow_hb) (short);
+	void (*disallow_hb) (short);
+	void (*disallow_all_hbs) (void);
+	void (*increment_heartbeat) (void);
+	void (*offline_heartbeat) (void);
+	void (*online_heartbeat) (void);
+	void (*heartbeat_init) (void);
+	void (*heartbeat_exit) (void);
+	enum xp_retval (*get_remote_heartbeat) (struct xpc_partition *);
+
+	void (*request_partition_activation) (struct xpc_rsvd_page *,
+						 unsigned long, int);
+	void (*request_partition_reactivation) (struct xpc_partition *);
+	void (*request_partition_deactivation) (struct xpc_partition *);
+	void (*cancel_partition_deactivation_request) (struct xpc_partition *);
+	enum xp_retval (*setup_ch_structures) (struct xpc_partition *);
+	void (*teardown_ch_structures) (struct xpc_partition *);
+
+	enum xp_retval (*make_first_contact) (struct xpc_partition *);
+
+	u64 (*get_chctl_all_flags) (struct xpc_partition *);
+	void (*send_chctl_closerequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_closereply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openrequest) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_openreply) (struct xpc_channel *, unsigned long *);
+	void (*send_chctl_opencomplete) (struct xpc_channel *, unsigned long *);
+	void (*process_msg_chctl_flags) (struct xpc_partition *, int);
+
+	enum xp_retval (*save_remote_msgqueue_pa) (struct xpc_channel *,
+						      unsigned long);
+
+	enum xp_retval (*setup_msg_structures) (struct xpc_channel *);
+	void (*teardown_msg_structures) (struct xpc_channel *);
+
+	void (*indicate_partition_engaged) (struct xpc_partition *);
+	void (*indicate_partition_disengaged) (struct xpc_partition *);
+	void (*assume_partition_disengaged) (short);
+	int (*partition_engaged) (short);
+	int (*any_partition_engaged) (void);
+
+	int (*n_of_deliverable_payloads) (struct xpc_channel *);
+	enum xp_retval (*send_payload) (struct xpc_channel *, u32, void *,
+					   u16, u8, xpc_notify_func, void *);
+	void *(*get_deliverable_payload) (struct xpc_channel *);
+	void (*received_payload) (struct xpc_channel *, void *);
+	void (*notify_senders_of_disconnect) (struct xpc_channel *);
+};
+
 /* struct xpc_partition act_state values (for XPC HB) */
 
 #define	XPC_P_AS_INACTIVE	0x00	/* partition is not active */
@@ -802,67 +876,17 @@ extern struct xpc_registration xpc_registrations[];
 /* found in xpc_main.c */
 extern struct device *xpc_part;
 extern struct device *xpc_chan;
+extern struct xpc_arch_operations xpc_arch_ops;
 extern int xpc_disengage_timelimit;
 extern int xpc_disengage_timedout;
 extern int xpc_activate_IRQ_rcvd;
 extern spinlock_t xpc_activate_IRQ_rcvd_lock;
 extern wait_queue_head_t xpc_activate_IRQ_wq;
-extern void *xpc_heartbeating_to_mask;
 extern void *xpc_kzalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern void xpc_activate_partition(struct xpc_partition *);
 extern void xpc_activate_kthreads(struct xpc_channel *, int);
 extern void xpc_create_kthreads(struct xpc_channel *, int, int);
 extern void xpc_disconnect_wait(int);
-extern int (*xpc_setup_partitions_sn) (void);
-extern void (*xpc_teardown_partitions_sn) (void);
-extern enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *, u64 *,
-							 unsigned long *,
-							 size_t *);
-extern int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *);
-extern void (*xpc_heartbeat_init) (void);
-extern void (*xpc_heartbeat_exit) (void);
-extern void (*xpc_increment_heartbeat) (void);
-extern void (*xpc_offline_heartbeat) (void);
-extern void (*xpc_online_heartbeat) (void);
-extern enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *);
-extern enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *);
-extern u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *);
-extern enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *);
-extern void (*xpc_teardown_msg_structures) (struct xpc_channel *);
-extern void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *);
-extern void (*xpc_process_msg_chctl_flags) (struct xpc_partition *, int);
-extern int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *);
-extern void *(*xpc_get_deliverable_payload) (struct xpc_channel *);
-extern void (*xpc_request_partition_activation) (struct xpc_rsvd_page *,
-						 unsigned long, int);
-extern void (*xpc_request_partition_reactivation) (struct xpc_partition *);
-extern void (*xpc_request_partition_deactivation) (struct xpc_partition *);
-extern void (*xpc_cancel_partition_deactivation_request) (
-							struct xpc_partition *);
-extern void (*xpc_process_activate_IRQ_rcvd) (void);
-extern enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *);
-extern void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *);
-
-extern void (*xpc_indicate_partition_engaged) (struct xpc_partition *);
-extern int (*xpc_partition_engaged) (short);
-extern int (*xpc_any_partition_engaged) (void);
-extern void (*xpc_indicate_partition_disengaged) (struct xpc_partition *);
-extern void (*xpc_assume_partition_disengaged) (short);
-
-extern void (*xpc_send_chctl_closerequest) (struct xpc_channel *,
-					    unsigned long *);
-extern void (*xpc_send_chctl_closereply) (struct xpc_channel *,
-					  unsigned long *);
-extern void (*xpc_send_chctl_openrequest) (struct xpc_channel *,
-					   unsigned long *);
-extern void (*xpc_send_chctl_openreply) (struct xpc_channel *, unsigned long *);
-
-extern enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *,
-						      unsigned long);
-
-extern enum xp_retval (*xpc_send_payload) (struct xpc_channel *, u32, void *,
-					   u16, u8, xpc_notify_func, void *);
-extern void (*xpc_received_payload) (struct xpc_channel *, void *);
 
 /* found in xpc_sn2.c */
 extern int xpc_init_sn2(void);
@@ -909,40 +933,6 @@ extern void xpc_disconnect_channel(const int, struct xpc_channel *,
 extern void xpc_disconnect_callout(struct xpc_channel *, enum xp_retval);
 extern void xpc_partition_going_down(struct xpc_partition *, enum xp_retval);
 
-static inline int
-xpc_hb_allowed(short partid, void *heartbeating_to_mask)
-{
-	return test_bit(partid, heartbeating_to_mask);
-}
-
-static inline int
-xpc_any_hbs_allowed(void)
-{
-	DBUG_ON(xpc_heartbeating_to_mask == NULL);
-	return !bitmap_empty(xpc_heartbeating_to_mask, xp_max_npartitions);
-}
-
-static inline void
-xpc_allow_hb(short partid)
-{
-	DBUG_ON(xpc_heartbeating_to_mask == NULL);
-	set_bit(partid, xpc_heartbeating_to_mask);
-}
-
-static inline void
-xpc_disallow_hb(short partid)
-{
-	DBUG_ON(xpc_heartbeating_to_mask == NULL);
-	clear_bit(partid, xpc_heartbeating_to_mask);
-}
-
-static inline void
-xpc_disallow_all_hbs(void)
-{
-	DBUG_ON(xpc_heartbeating_to_mask == NULL);
-	bitmap_zero(xpc_heartbeating_to_mask, xp_max_npartitions);
-}
-
 static inline void
 xpc_wakeup_channel_mgr(struct xpc_partition *part)
 {
diff --git a/drivers/misc/sgi-xp/xpc_channel.c b/drivers/misc/sgi-xp/xpc_channel.c
index 99a2534c38a..652593fc486 100644
--- a/drivers/misc/sgi-xp/xpc_channel.c
+++ b/drivers/misc/sgi-xp/xpc_channel.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 /*
@@ -39,34 +39,38 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (!(ch->flags & XPC_C_SETUP)) {
 		spin_unlock_irqrestore(&ch->lock, *irq_flags);
-		ret = xpc_setup_msg_structures(ch);
+		ret = xpc_arch_ops.setup_msg_structures(ch);
 		spin_lock_irqsave(&ch->lock, *irq_flags);
 
 		if (ret != xpSuccess)
 			XPC_DISCONNECT_CHANNEL(ch, ret, irq_flags);
+		else
+			ch->flags |= XPC_C_SETUP;
 
-		ch->flags |= XPC_C_SETUP;
-
-		if (ch->flags & (XPC_C_CONNECTED | XPC_C_DISCONNECTING))
+		if (ch->flags & XPC_C_DISCONNECTING)
 			return;
 	}
 
 	if (!(ch->flags & XPC_C_OPENREPLY)) {
 		ch->flags |= XPC_C_OPENREPLY;
-		xpc_send_chctl_openreply(ch, irq_flags);
+		xpc_arch_ops.send_chctl_openreply(ch, irq_flags);
 	}
 
 	if (!(ch->flags & XPC_C_ROPENREPLY))
 		return;
 
-	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
+	if (!(ch->flags & XPC_C_OPENCOMPLETE)) {
+		ch->flags |= (XPC_C_OPENCOMPLETE | XPC_C_CONNECTED);
+		xpc_arch_ops.send_chctl_opencomplete(ch, irq_flags);
+	}
+
+	if (!(ch->flags & XPC_C_ROPENCOMPLETE))
+		return;
 
 	dev_info(xpc_chan, "channel %d to partition %d connected\n",
 		 ch->number, ch->partid);
 
-	spin_unlock_irqrestore(&ch->lock, *irq_flags);
-	xpc_create_kthreads(ch, 1, 0);
-	spin_lock_irqsave(&ch->lock, *irq_flags);
+	ch->flags = (XPC_C_CONNECTED | XPC_C_SETUP);	/* clear all else */
 }
 
 /*
@@ -96,7 +100,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	if (part->act_state == XPC_P_AS_DEACTIVATING) {
 		/* can't proceed until the other side disengages from us */
-		if (xpc_partition_engaged(ch->partid))
+		if (xpc_arch_ops.partition_engaged(ch->partid))
 			return;
 
 	} else {
@@ -108,7 +112,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 		if (!(ch->flags & XPC_C_CLOSEREPLY)) {
 			ch->flags |= XPC_C_CLOSEREPLY;
-			xpc_send_chctl_closereply(ch, irq_flags);
+			xpc_arch_ops.send_chctl_closereply(ch, irq_flags);
 		}
 
 		if (!(ch->flags & XPC_C_RCLOSEREPLY))
@@ -118,7 +122,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	/* wake those waiting for notify completion */
 	if (atomic_read(&ch->n_to_notify) > 0) {
 		/* we do callout while holding ch->lock, callout can't block */
-		xpc_notify_senders_of_disconnect(ch);
+		xpc_arch_ops.notify_senders_of_disconnect(ch);
 	}
 
 	/* both sides are disconnected now */
@@ -132,7 +136,7 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 	DBUG_ON(atomic_read(&ch->n_to_notify) != 0);
 
 	/* it's now safe to free the channel's message queues */
-	xpc_teardown_msg_structures(ch);
+	xpc_arch_ops.teardown_msg_structures(ch);
 
 	ch->func = NULL;
 	ch->key = NULL;
@@ -144,8 +148,9 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
 	/*
 	 * Mark the channel disconnected and clear all other flags, including
-	 * XPC_C_SETUP (because of call to xpc_teardown_msg_structures()) but
-	 * not including XPC_C_WDISCONNECT (if it was set).
+	 * XPC_C_SETUP (because of call to
+	 * xpc_arch_ops.teardown_msg_structures()) but not including
+	 * XPC_C_WDISCONNECT (if it was set).
 	 */
 	ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
 
@@ -184,6 +189,7 @@ xpc_process_openclose_chctl_flags(struct xpc_partition *part, int ch_number,
 	struct xpc_channel *ch = &part->channels[ch_number];
 	enum xp_retval reason;
 	enum xp_retval ret;
+	int create_kthread = 0;
 
 	spin_lock_irqsave(&ch->lock, irq_flags);
 
@@ -196,8 +202,7 @@ again:
 		 * has had a chance to see that the channel is disconnected.
 		 */
 		ch->delayed_chctl_flags |= chctl_flags;
-		spin_unlock_irqrestore(&ch->lock, irq_flags);
-		return;
+		goto out;
 	}
 
 	if (chctl_flags & XPC_CHCTL_CLOSEREQUEST) {
@@ -239,8 +244,7 @@ again:
 					    XPC_CHCTL_CLOSEREQUEST;
 					spin_unlock(&part->chctl_lock);
 				}
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-				return;
+				goto out;
 			}
 
 			XPC_SET_REASON(ch, 0, 0);
@@ -250,7 +254,8 @@ again:
 			ch->flags |= (XPC_C_CONNECTING | XPC_C_ROPENREQUEST);
 		}
 
-		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY);
+		chctl_flags &= ~(XPC_CHCTL_OPENREQUEST | XPC_CHCTL_OPENREPLY |
+		    XPC_CHCTL_OPENCOMPLETE);
 
 		/*
 		 * The meaningful CLOSEREQUEST connection state fields are:
@@ -269,8 +274,7 @@ again:
 			XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
 
 			DBUG_ON(chctl_flags & XPC_CHCTL_CLOSEREPLY);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 
 		xpc_process_disconnect(ch, &irq_flags);
@@ -283,8 +287,7 @@ again:
 
 		if (ch->flags & XPC_C_DISCONNECTED) {
 			DBUG_ON(part->act_state != XPC_P_AS_DEACTIVATING);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 
 		DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
@@ -299,8 +302,7 @@ again:
 				    XPC_CHCTL_CLOSEREPLY;
 				spin_unlock(&part->chctl_lock);
 			}
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 
 		ch->flags |= XPC_C_RCLOSEREPLY;
@@ -320,14 +322,12 @@ again:
 
 		if (part->act_state == XPC_P_AS_DEACTIVATING ||
 		    (ch->flags & XPC_C_ROPENREQUEST)) {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 
 		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
 			ch->delayed_chctl_flags |= XPC_CHCTL_OPENREQUEST;
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 		DBUG_ON(!(ch->flags & (XPC_C_DISCONNECTED |
 				       XPC_C_OPENREQUEST)));
@@ -341,8 +341,7 @@ again:
 		 */
 		if (args->entry_size == 0 || args->local_nentries == 0) {
 			/* assume OPENREQUEST was delayed by mistake */
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 
 		ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
@@ -352,8 +351,7 @@ again:
 			if (args->entry_size != ch->entry_size) {
 				XPC_DISCONNECT_CHANNEL(ch, xpUnequalMsgSizes,
 						       &irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
-				return;
+				goto out;
 			}
 		} else {
 			ch->entry_size = args->entry_size;
@@ -375,15 +373,13 @@ again:
 			args->local_msgqueue_pa, args->local_nentries,
 			args->remote_nentries, ch->partid, ch->number);
 
-		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED)) {
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
-		}
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
 		if (!(ch->flags & XPC_C_OPENREQUEST)) {
 			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
 					       &irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 
 		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
@@ -400,11 +396,11 @@ again:
 		DBUG_ON(args->local_nentries == 0);
 		DBUG_ON(args->remote_nentries == 0);
 
-		ret = xpc_save_remote_msgqueue_pa(ch, args->local_msgqueue_pa);
+		ret = xpc_arch_ops.save_remote_msgqueue_pa(ch,
+						      args->local_msgqueue_pa);
 		if (ret != xpSuccess) {
 			XPC_DISCONNECT_CHANNEL(ch, ret, &irq_flags);
-			spin_unlock_irqrestore(&ch->lock, irq_flags);
-			return;
+			goto out;
 		}
 		ch->flags |= XPC_C_ROPENREPLY;
 
@@ -430,7 +426,36 @@ again:
 		xpc_process_connect(ch, &irq_flags);
 	}
 
+	if (chctl_flags & XPC_CHCTL_OPENCOMPLETE) {
+
+		dev_dbg(xpc_chan, "XPC_CHCTL_OPENCOMPLETE received from "
+			"partid=%d, channel=%d\n", ch->partid, ch->number);
+
+		if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_DISCONNECTED))
+			goto out;
+
+		if (!(ch->flags & XPC_C_OPENREQUEST) ||
+		    !(ch->flags & XPC_C_OPENREPLY)) {
+			XPC_DISCONNECT_CHANNEL(ch, xpOpenCloseError,
+					       &irq_flags);
+			goto out;
+		}
+
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
+		DBUG_ON(!(ch->flags & XPC_C_ROPENREPLY));
+		DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
+
+		ch->flags |= XPC_C_ROPENCOMPLETE;
+
+		xpc_process_connect(ch, &irq_flags);
+		create_kthread = 1;
+	}
+
+out:
 	spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+	if (create_kthread)
+		xpc_create_kthreads(ch, 1, 0);
 }
 
 /*
@@ -508,7 +533,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 	/* initiate the connection */
 
 	ch->flags |= (XPC_C_OPENREQUEST | XPC_C_CONNECTING);
-	xpc_send_chctl_openrequest(ch, &irq_flags);
+	xpc_arch_ops.send_chctl_openrequest(ch, &irq_flags);
 
 	xpc_process_connect(ch, &irq_flags);
 
@@ -526,7 +551,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
 	int ch_number;
 	u32 ch_flags;
 
-	chctl.all_flags = xpc_get_chctl_all_flags(part);
+	chctl.all_flags = xpc_arch_ops.get_chctl_all_flags(part);
 
 	/*
 	 * Initiate channel connections for registered channels.
@@ -564,10 +589,6 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
 			if (!(ch_flags & XPC_C_OPENREQUEST)) {
 				DBUG_ON(ch_flags & XPC_C_SETUP);
 				(void)xpc_connect_channel(ch);
-			} else {
-				spin_lock_irqsave(&ch->lock, irq_flags);
-				xpc_process_connect(ch, &irq_flags);
-				spin_unlock_irqrestore(&ch->lock, irq_flags);
 			}
 			continue;
 		}
@@ -579,7 +600,7 @@ xpc_process_sent_chctl_flags(struct xpc_partition *part)
 		 */
 
 		if (chctl.flags[ch_number] & XPC_MSG_CHCTL_FLAGS)
-			xpc_process_msg_chctl_flags(part, ch_number);
+			xpc_arch_ops.process_msg_chctl_flags(part, ch_number);
 	}
 }
 
@@ -755,7 +776,7 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 		       XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
 		       XPC_C_CONNECTING | XPC_C_CONNECTED);
 
-	xpc_send_chctl_closerequest(ch, irq_flags);
+	xpc_arch_ops.send_chctl_closerequest(ch, irq_flags);
 
 	if (channel_was_connected)
 		ch->flags |= XPC_C_WASCONNECTED;
@@ -862,8 +883,8 @@ xpc_initiate_send(short partid, int ch_number, u32 flags, void *payload,
 	DBUG_ON(payload == NULL);
 
 	if (xpc_part_ref(part)) {
-		ret = xpc_send_payload(&part->channels[ch_number], flags,
-				       payload, payload_size, 0, NULL, NULL);
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+				  flags, payload, payload_size, 0, NULL, NULL);
 		xpc_part_deref(part);
 	}
 
@@ -914,9 +935,8 @@ xpc_initiate_send_notify(short partid, int ch_number, u32 flags, void *payload,
 	DBUG_ON(func == NULL);
 
 	if (xpc_part_ref(part)) {
-		ret = xpc_send_payload(&part->channels[ch_number], flags,
-				       payload, payload_size, XPC_N_CALL, func,
-				       key);
+		ret = xpc_arch_ops.send_payload(&part->channels[ch_number],
+			  flags, payload, payload_size, XPC_N_CALL, func, key);
 		xpc_part_deref(part);
 	}
 	return ret;
@@ -930,7 +950,7 @@ xpc_deliver_payload(struct xpc_channel *ch)
 {
 	void *payload;
 
-	payload = xpc_get_deliverable_payload(ch);
+	payload = xpc_arch_ops.get_deliverable_payload(ch);
 	if (payload != NULL) {
 
 		/*
@@ -984,7 +1004,7 @@ xpc_initiate_received(short partid, int ch_number, void *payload)
 	DBUG_ON(ch_number < 0 || ch_number >= part->nchannels);
 
 	ch = &part->channels[ch_number];
-	xpc_received_payload(ch, payload);
+	xpc_arch_ops.received_payload(ch, payload);
 
 	/* the call to xpc_msgqueue_ref() was done by xpc_deliver_payload()  */
 	xpc_msgqueue_deref(ch);
diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c
index 1ab9fda87fa..fd3688a3e23 100644
--- a/drivers/misc/sgi-xp/xpc_main.c
+++ b/drivers/misc/sgi-xp/xpc_main.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2009 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 /*
@@ -150,7 +150,6 @@ DECLARE_WAIT_QUEUE_HEAD(xpc_activate_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
 static struct timer_list xpc_hb_timer;
-void *xpc_heartbeating_to_mask;
 
 /* notification that the xpc_hb_checker thread has exited */
 static DECLARE_COMPLETION(xpc_hb_checker_exited);
@@ -170,62 +169,7 @@ static struct notifier_block xpc_die_notifier = {
 	.notifier_call = xpc_system_die,
 };
 
-int (*xpc_setup_partitions_sn) (void);
-void (*xpc_teardown_partitions_sn) (void);
-enum xp_retval (*xpc_get_partition_rsvd_page_pa) (void *buf, u64 *cookie,
-						  unsigned long *rp_pa,
-						  size_t *len);
-int (*xpc_setup_rsvd_page_sn) (struct xpc_rsvd_page *rp);
-void (*xpc_heartbeat_init) (void);
-void (*xpc_heartbeat_exit) (void);
-void (*xpc_increment_heartbeat) (void);
-void (*xpc_offline_heartbeat) (void);
-void (*xpc_online_heartbeat) (void);
-enum xp_retval (*xpc_get_remote_heartbeat) (struct xpc_partition *part);
-
-enum xp_retval (*xpc_make_first_contact) (struct xpc_partition *part);
-void (*xpc_notify_senders_of_disconnect) (struct xpc_channel *ch);
-u64 (*xpc_get_chctl_all_flags) (struct xpc_partition *part);
-enum xp_retval (*xpc_setup_msg_structures) (struct xpc_channel *ch);
-void (*xpc_teardown_msg_structures) (struct xpc_channel *ch);
-void (*xpc_process_msg_chctl_flags) (struct xpc_partition *part, int ch_number);
-int (*xpc_n_of_deliverable_payloads) (struct xpc_channel *ch);
-void *(*xpc_get_deliverable_payload) (struct xpc_channel *ch);
-
-void (*xpc_request_partition_activation) (struct xpc_rsvd_page *remote_rp,
-					  unsigned long remote_rp_pa,
-					  int nasid);
-void (*xpc_request_partition_reactivation) (struct xpc_partition *part);
-void (*xpc_request_partition_deactivation) (struct xpc_partition *part);
-void (*xpc_cancel_partition_deactivation_request) (struct xpc_partition *part);
-
-void (*xpc_process_activate_IRQ_rcvd) (void);
-enum xp_retval (*xpc_setup_ch_structures_sn) (struct xpc_partition *part);
-void (*xpc_teardown_ch_structures_sn) (struct xpc_partition *part);
-
-void (*xpc_indicate_partition_engaged) (struct xpc_partition *part);
-int (*xpc_partition_engaged) (short partid);
-int (*xpc_any_partition_engaged) (void);
-void (*xpc_indicate_partition_disengaged) (struct xpc_partition *part);
-void (*xpc_assume_partition_disengaged) (short partid);
-
-void (*xpc_send_chctl_closerequest) (struct xpc_channel *ch,
-				     unsigned long *irq_flags);
-void (*xpc_send_chctl_closereply) (struct xpc_channel *ch,
-				   unsigned long *irq_flags);
-void (*xpc_send_chctl_openrequest) (struct xpc_channel *ch,
-				    unsigned long *irq_flags);
-void (*xpc_send_chctl_openreply) (struct xpc_channel *ch,
-				  unsigned long *irq_flags);
-
-enum xp_retval (*xpc_save_remote_msgqueue_pa) (struct xpc_channel *ch,
-					       unsigned long msgqueue_pa);
-
-enum xp_retval (*xpc_send_payload) (struct xpc_channel *ch, u32 flags,
-				    void *payload, u16 payload_size,
-				    u8 notify_type, xpc_notify_func func,
-				    void *key);
-void (*xpc_received_payload) (struct xpc_channel *ch, void *payload);
+struct xpc_arch_operations xpc_arch_ops;
 
 /*
  * Timer function to enforce the timelimit on the partition disengage.
@@ -240,7 +184,7 @@ xpc_timeout_partition_disengage(unsigned long data)
 	(void)xpc_partition_disengaged(part);
 
 	DBUG_ON(part->disengage_timeout != 0);
-	DBUG_ON(xpc_partition_engaged(XPC_PARTID(part)));
+	DBUG_ON(xpc_arch_ops.partition_engaged(XPC_PARTID(part)));
 }
 
 /*
@@ -251,7 +195,7 @@ xpc_timeout_partition_disengage(unsigned long data)
 static void
 xpc_hb_beater(unsigned long dummy)
 {
-	xpc_increment_heartbeat();
+	xpc_arch_ops.increment_heartbeat();
 
 	if (time_is_before_eq_jiffies(xpc_hb_check_timeout))
 		wake_up_interruptible(&xpc_activate_IRQ_wq);
@@ -263,7 +207,7 @@ xpc_hb_beater(unsigned long dummy)
 static void
 xpc_start_hb_beater(void)
 {
-	xpc_heartbeat_init();
+	xpc_arch_ops.heartbeat_init();
 	init_timer(&xpc_hb_timer);
 	xpc_hb_timer.function = xpc_hb_beater;
 	xpc_hb_beater(0);
@@ -273,7 +217,7 @@ static void
 xpc_stop_hb_beater(void)
 {
 	del_timer_sync(&xpc_hb_timer);
-	xpc_heartbeat_exit();
+	xpc_arch_ops.heartbeat_exit();
 }
 
 /*
@@ -302,7 +246,7 @@ xpc_check_remote_hb(void)
 			continue;
 		}
 
-		ret = xpc_get_remote_heartbeat(part);
+		ret = xpc_arch_ops.get_remote_heartbeat(part);
 		if (ret != xpSuccess)
 			XPC_DEACTIVATE_PARTITION(part, ret);
 	}
@@ -353,7 +297,7 @@ xpc_hb_checker(void *ignore)
 			force_IRQ = 0;
 			dev_dbg(xpc_part, "processing activate IRQs "
 				"received\n");
-			xpc_process_activate_IRQ_rcvd();
+			xpc_arch_ops.process_activate_IRQ_rcvd();
 		}
 
 		/* wait for IRQ or timeout */
@@ -528,7 +472,7 @@ xpc_setup_ch_structures(struct xpc_partition *part)
 		init_waitqueue_head(&ch->idle_wq);
 	}
 
-	ret = xpc_setup_ch_structures_sn(part);
+	ret = xpc_arch_ops.setup_ch_structures(part);
 	if (ret != xpSuccess)
 		goto out_2;
 
@@ -572,7 +516,7 @@ xpc_teardown_ch_structures(struct xpc_partition *part)
 
 	/* now we can begin tearing down the infrastructure */
 
-	xpc_teardown_ch_structures_sn(part);
+	xpc_arch_ops.teardown_ch_structures(part);
 
 	kfree(part->remote_openclose_args_base);
 	part->remote_openclose_args = NULL;
@@ -620,12 +564,12 @@ xpc_activating(void *__partid)
 
 	dev_dbg(xpc_part, "activating partition %d\n", partid);
 
-	xpc_allow_hb(partid);
+	xpc_arch_ops.allow_hb(partid);
 
 	if (xpc_setup_ch_structures(part) == xpSuccess) {
 		(void)xpc_part_ref(part);	/* this will always succeed */
 
-		if (xpc_make_first_contact(part) == xpSuccess) {
+		if (xpc_arch_ops.make_first_contact(part) == xpSuccess) {
 			xpc_mark_partition_active(part);
 			xpc_channel_mgr(part);
 			/* won't return until partition is deactivating */
@@ -635,12 +579,12 @@ xpc_activating(void *__partid)
 		xpc_teardown_ch_structures(part);
 	}
 
-	xpc_disallow_hb(partid);
+	xpc_arch_ops.disallow_hb(partid);
 	xpc_mark_partition_inactive(part);
 
 	if (part->reason == xpReactivating) {
 		/* interrupting ourselves results in activating partition */
-		xpc_request_partition_reactivation(part);
+		xpc_arch_ops.request_partition_reactivation(part);
 	}
 
 	return 0;
@@ -713,10 +657,13 @@ xpc_activate_kthreads(struct xpc_channel *ch, int needed)
 static void
 xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 {
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
+
 	do {
 		/* deliver messages to their intended recipients */
 
-		while (xpc_n_of_deliverable_payloads(ch) > 0 &&
+		while (n_of_deliverable_payloads(ch) > 0 &&
 		       !(ch->flags & XPC_C_DISCONNECTING)) {
 			xpc_deliver_payload(ch);
 		}
@@ -732,7 +679,7 @@ xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 			"wait_event_interruptible_exclusive()\n");
 
 		(void)wait_event_interruptible_exclusive(ch->idle_wq,
-				(xpc_n_of_deliverable_payloads(ch) > 0 ||
+				(n_of_deliverable_payloads(ch) > 0 ||
 				 (ch->flags & XPC_C_DISCONNECTING)));
 
 		atomic_dec(&ch->kthreads_idle);
@@ -749,6 +696,8 @@ xpc_kthread_start(void *args)
 	struct xpc_channel *ch;
 	int n_needed;
 	unsigned long irq_flags;
+	int (*n_of_deliverable_payloads) (struct xpc_channel *) =
+		xpc_arch_ops.n_of_deliverable_payloads;
 
 	dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
 		partid, ch_number);
@@ -777,7 +726,7 @@ xpc_kthread_start(void *args)
 			 * additional kthreads to help deliver them. We only
 			 * need one less than total #of messages to deliver.
 			 */
-			n_needed = xpc_n_of_deliverable_payloads(ch) - 1;
+			n_needed = n_of_deliverable_payloads(ch) - 1;
 			if (n_needed > 0 && !(ch->flags & XPC_C_DISCONNECTING))
 				xpc_activate_kthreads(ch, n_needed);
 
@@ -805,7 +754,7 @@ xpc_kthread_start(void *args)
 
 	if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 	    atomic_dec_return(&part->nchannels_engaged) == 0) {
-		xpc_indicate_partition_disengaged(part);
+		xpc_arch_ops.indicate_partition_disengaged(part);
 	}
 
 	xpc_msgqueue_deref(ch);
@@ -837,6 +786,8 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 	u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
 	struct xpc_partition *part = &xpc_partitions[ch->partid];
 	struct task_struct *kthread;
+	void (*indicate_partition_disengaged) (struct xpc_partition *) =
+		xpc_arch_ops.indicate_partition_disengaged;
 
 	while (needed-- > 0) {
 
@@ -858,7 +809,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 
 		} else if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
 			   atomic_inc_return(&part->nchannels_engaged) == 1) {
-				xpc_indicate_partition_engaged(part);
+			xpc_arch_ops.indicate_partition_engaged(part);
 		}
 		(void)xpc_part_ref(part);
 		xpc_msgqueue_ref(ch);
@@ -880,7 +831,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed,
 
 			if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 			    atomic_dec_return(&part->nchannels_engaged) == 0) {
-				xpc_indicate_partition_disengaged(part);
+				indicate_partition_disengaged(part);
 			}
 			xpc_msgqueue_deref(ch);
 			xpc_part_deref(part);
@@ -993,13 +944,13 @@ xpc_setup_partitions(void)
 		atomic_set(&part->references, 0);
 	}
 
-	return xpc_setup_partitions_sn();
+	return xpc_arch_ops.setup_partitions();
 }
 
 static void
 xpc_teardown_partitions(void)
 {
-	xpc_teardown_partitions_sn();
+	xpc_arch_ops.teardown_partitions();
 	kfree(xpc_partitions);
 }
 
@@ -1055,7 +1006,7 @@ xpc_do_exit(enum xp_retval reason)
 				disengage_timeout = part->disengage_timeout;
 		}
 
-		if (xpc_any_partition_engaged()) {
+		if (xpc_arch_ops.any_partition_engaged()) {
 			if (time_is_before_jiffies(printmsg_time)) {
 				dev_info(xpc_part, "waiting for remote "
 					 "partitions to deactivate, timeout in "
@@ -1086,8 +1037,7 @@ xpc_do_exit(enum xp_retval reason)
 
 	} while (1);
 
-	DBUG_ON(xpc_any_partition_engaged());
-	DBUG_ON(xpc_any_hbs_allowed() != 0);
+	DBUG_ON(xpc_arch_ops.any_partition_engaged());
 
 	xpc_teardown_rsvd_page();
 
@@ -1152,15 +1102,15 @@ xpc_die_deactivate(void)
 	/* keep xpc_hb_checker thread from doing anything (just in case) */
 	xpc_exiting = 1;
 
-	xpc_disallow_all_hbs();	/*indicate we're deactivated */
+	xpc_arch_ops.disallow_all_hbs();   /*indicate we're deactivated */
 
 	for (partid = 0; partid < xp_max_npartitions; partid++) {
 		part = &xpc_partitions[partid];
 
-		if (xpc_partition_engaged(partid) ||
+		if (xpc_arch_ops.partition_engaged(partid) ||
 		    part->act_state != XPC_P_AS_INACTIVE) {
-			xpc_request_partition_deactivation(part);
-			xpc_indicate_partition_disengaged(part);
+			xpc_arch_ops.request_partition_deactivation(part);
+			xpc_arch_ops.indicate_partition_disengaged(part);
 		}
 	}
 
@@ -1177,7 +1127,7 @@ xpc_die_deactivate(void)
 	wait_to_print = XPC_DEACTIVATE_PRINTMSG_INTERVAL * 1000 * 5;
 
 	while (1) {
-		any_engaged = xpc_any_partition_engaged();
+		any_engaged = xpc_arch_ops.any_partition_engaged();
 		if (!any_engaged) {
 			dev_info(xpc_part, "all partitions have deactivated\n");
 			break;
@@ -1186,7 +1136,7 @@ xpc_die_deactivate(void)
 		if (!keep_waiting--) {
 			for (partid = 0; partid < xp_max_npartitions;
 			     partid++) {
-				if (xpc_partition_engaged(partid)) {
+				if (xpc_arch_ops.partition_engaged(partid)) {
 					dev_info(xpc_part, "deactivate from "
 						 "remote partition %d timed "
 						 "out\n", partid);
@@ -1233,7 +1183,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		/* fall through */
 	case DIE_MCA_MONARCH_ENTER:
 	case DIE_INIT_MONARCH_ENTER:
-		xpc_offline_heartbeat();
+		xpc_arch_ops.offline_heartbeat();
 		break;
 
 	case DIE_KDEBUG_LEAVE:
@@ -1244,7 +1194,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
 		/* fall through */
 	case DIE_MCA_MONARCH_LEAVE:
 	case DIE_INIT_MONARCH_LEAVE:
-		xpc_online_heartbeat();
+		xpc_arch_ops.online_heartbeat();
 		break;
 	}
 #else
diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c
index 6722f6fe4dc..65877bc5eda 100644
--- a/drivers/misc/sgi-xp/xpc_partition.c
+++ b/drivers/misc/sgi-xp/xpc_partition.c
@@ -70,6 +70,9 @@ xpc_get_rsvd_page_pa(int nasid)
 	size_t buf_len = 0;
 	void *buf = buf;
 	void *buf_base = NULL;
+	enum xp_retval (*get_partition_rsvd_page_pa)
+		(void *, u64 *, unsigned long *, size_t *) =
+		xpc_arch_ops.get_partition_rsvd_page_pa;
 
 	while (1) {
 
@@ -79,8 +82,7 @@ xpc_get_rsvd_page_pa(int nasid)
 		 * ??? function or have two versions? Rename rp_pa for UV to
 		 * ??? rp_gpa?
 		 */
-		ret = xpc_get_partition_rsvd_page_pa(buf, &cookie, &rp_pa,
-						     &len);
+		ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
 
 		dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
 			"address=0x%016lx, len=0x%016lx\n", ret,
@@ -172,7 +174,7 @@ xpc_setup_rsvd_page(void)
 	xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
 	xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
 
-	ret = xpc_setup_rsvd_page_sn(rp);
+	ret = xpc_arch_ops.setup_rsvd_page(rp);
 	if (ret != 0)
 		return ret;
 
@@ -264,7 +266,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 	short partid = XPC_PARTID(part);
 	int disengaged;
 
-	disengaged = !xpc_partition_engaged(partid);
+	disengaged = !xpc_arch_ops.partition_engaged(partid);
 	if (part->disengage_timeout) {
 		if (!disengaged) {
 			if (time_is_after_jiffies(part->disengage_timeout)) {
@@ -280,7 +282,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 			dev_info(xpc_part, "deactivate request to remote "
 				 "partition %d timed out\n", partid);
 			xpc_disengage_timedout = 1;
-			xpc_assume_partition_disengaged(partid);
+			xpc_arch_ops.assume_partition_disengaged(partid);
 			disengaged = 1;
 		}
 		part->disengage_timeout = 0;
@@ -294,7 +296,7 @@ xpc_partition_disengaged(struct xpc_partition *part)
 		if (part->act_state != XPC_P_AS_INACTIVE)
 			xpc_wakeup_channel_mgr(part);
 
-		xpc_cancel_partition_deactivation_request(part);
+		xpc_arch_ops.cancel_partition_deactivation_request(part);
 	}
 	return disengaged;
 }
@@ -339,7 +341,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 		spin_unlock_irqrestore(&part->act_lock, irq_flags);
 		if (reason == xpReactivating) {
 			/* we interrupt ourselves to reactivate partition */
-			xpc_request_partition_reactivation(part);
+			xpc_arch_ops.request_partition_reactivation(part);
 		}
 		return;
 	}
@@ -358,7 +360,7 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 	spin_unlock_irqrestore(&part->act_lock, irq_flags);
 
 	/* ask remote partition to deactivate with regard to us */
-	xpc_request_partition_deactivation(part);
+	xpc_arch_ops.request_partition_deactivation(part);
 
 	/* set a timelimit on the disengage phase of the deactivation request */
 	part->disengage_timeout = jiffies + (xpc_disengage_timelimit * HZ);
@@ -496,7 +498,7 @@ xpc_discovery(void)
 				continue;
 			}
 
-			xpc_request_partition_activation(remote_rp,
+			xpc_arch_ops.request_partition_activation(remote_rp,
 							 remote_rp_pa, nasid);
 		}
 	}
diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c
index eaaa964942d..915a3b495da 100644
--- a/drivers/misc/sgi-xp/xpc_sn2.c
+++ b/drivers/misc/sgi-xp/xpc_sn2.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2008 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2008-2009 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 /*
@@ -60,14 +60,14 @@ static struct xpc_vars_sn2 *xpc_vars_sn2;
 static struct xpc_vars_part_sn2 *xpc_vars_part_sn2;
 
 static int
-xpc_setup_partitions_sn_sn2(void)
+xpc_setup_partitions_sn2(void)
 {
 	/* nothing needs to be done */
 	return 0;
 }
 
 static void
-xpc_teardown_partitions_sn_sn2(void)
+xpc_teardown_partitions_sn2(void)
 {
 	/* nothing needs to be done */
 }
@@ -431,6 +431,13 @@ xpc_send_chctl_openreply_sn2(struct xpc_channel *ch, unsigned long *irq_flags)
 }
 
 static void
+xpc_send_chctl_opencomplete_sn2(struct xpc_channel *ch,
+				unsigned long *irq_flags)
+{
+	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_OPENCOMPLETE, irq_flags);
+}
+
+static void
 xpc_send_chctl_msgrequest_sn2(struct xpc_channel *ch)
 {
 	XPC_SEND_NOTIFY_IRQ_SN2(ch, XPC_CHCTL_MSGREQUEST, NULL);
@@ -621,7 +628,7 @@ xpc_get_partition_rsvd_page_pa_sn2(void *buf, u64 *cookie, unsigned long *rp_pa,
 
 
 static int
-xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
+xpc_setup_rsvd_page_sn2(struct xpc_rsvd_page *rp)
 {
 	struct amo *amos_page;
 	int i;
@@ -629,7 +636,7 @@ xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
 
 	xpc_vars_sn2 = XPC_RP_VARS(rp);
 
-	rp->sn.vars_pa = xp_pa(xpc_vars_sn2);
+	rp->sn.sn2.vars_pa = xp_pa(xpc_vars_sn2);
 
 	/* vars_part array follows immediately after vars */
 	xpc_vars_part_sn2 = (struct xpc_vars_part_sn2 *)((u8 *)XPC_RP_VARS(rp) +
@@ -693,6 +700,33 @@ xpc_setup_rsvd_page_sn_sn2(struct xpc_rsvd_page *rp)
 	return 0;
 }
 
+static int
+xpc_hb_allowed_sn2(short partid, void *heartbeating_to_mask)
+{
+	return test_bit(partid, heartbeating_to_mask);
+}
+
+static void
+xpc_allow_hb_sn2(short partid)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	set_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_hb_sn2(short partid)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	clear_bit(partid, xpc_vars_sn2->heartbeating_to_mask);
+}
+
+static void
+xpc_disallow_all_hbs_sn2(void)
+{
+	DBUG_ON(xpc_vars_sn2 == NULL);
+	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, xp_max_npartitions);
+}
+
 static void
 xpc_increment_heartbeat_sn2(void)
 {
@@ -719,7 +753,6 @@ xpc_heartbeat_init_sn2(void)
 	DBUG_ON(xpc_vars_sn2 == NULL);
 
 	bitmap_zero(xpc_vars_sn2->heartbeating_to_mask, XP_MAX_NPARTITIONS_SN2);
-	xpc_heartbeating_to_mask = &xpc_vars_sn2->heartbeating_to_mask[0];
 	xpc_online_heartbeat_sn2();
 }
 
@@ -751,9 +784,9 @@ xpc_get_remote_heartbeat_sn2(struct xpc_partition *part)
 		remote_vars->heartbeating_to_mask[0]);
 
 	if ((remote_vars->heartbeat == part->last_heartbeat &&
-	    remote_vars->heartbeat_offline == 0) ||
-	    !xpc_hb_allowed(sn_partition_id,
-			    &remote_vars->heartbeating_to_mask)) {
+	    !remote_vars->heartbeat_offline) ||
+	    !xpc_hb_allowed_sn2(sn_partition_id,
+				remote_vars->heartbeating_to_mask)) {
 		ret = xpNoHeartbeat;
 	} else {
 		part->last_heartbeat = remote_vars->heartbeat;
@@ -972,7 +1005,7 @@ xpc_identify_activate_IRQ_req_sn2(int nasid)
 		return;
 	}
 
-	remote_vars_pa = remote_rp->sn.vars_pa;
+	remote_vars_pa = remote_rp->sn.sn2.vars_pa;
 	remote_rp_version = remote_rp->version;
 	remote_rp_ts_jiffies = remote_rp->ts_jiffies;
 
@@ -1129,7 +1162,7 @@ xpc_process_activate_IRQ_rcvd_sn2(void)
  * Setup the channel structures that are sn2 specific.
  */
 static enum xp_retval
-xpc_setup_ch_structures_sn_sn2(struct xpc_partition *part)
+xpc_setup_ch_structures_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	struct xpc_channel_sn2 *ch_sn2;
@@ -1251,7 +1284,7 @@ out_1:
  * Teardown the channel structures that are sn2 specific.
  */
 static void
-xpc_teardown_ch_structures_sn_sn2(struct xpc_partition *part)
+xpc_teardown_ch_structures_sn2(struct xpc_partition *part)
 {
 	struct xpc_partition_sn2 *part_sn2 = &part->sn.sn2;
 	short partid = XPC_PARTID(part);
@@ -2315,61 +2348,70 @@ xpc_received_payload_sn2(struct xpc_channel *ch, void *payload)
 		xpc_acknowledge_msgs_sn2(ch, get, msg->flags);
 }
 
+static struct xpc_arch_operations xpc_arch_ops_sn2 = {
+	.setup_partitions = xpc_setup_partitions_sn2,
+	.teardown_partitions = xpc_teardown_partitions_sn2,
+	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2,
+	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2,
+	.setup_rsvd_page = xpc_setup_rsvd_page_sn2,
+
+	.allow_hb = xpc_allow_hb_sn2,
+	.disallow_hb = xpc_disallow_hb_sn2,
+	.disallow_all_hbs = xpc_disallow_all_hbs_sn2,
+	.increment_heartbeat = xpc_increment_heartbeat_sn2,
+	.offline_heartbeat = xpc_offline_heartbeat_sn2,
+	.online_heartbeat = xpc_online_heartbeat_sn2,
+	.heartbeat_init = xpc_heartbeat_init_sn2,
+	.heartbeat_exit = xpc_heartbeat_exit_sn2,
+	.get_remote_heartbeat = xpc_get_remote_heartbeat_sn2,
+
+	.request_partition_activation =
+		xpc_request_partition_activation_sn2,
+	.request_partition_reactivation =
+		xpc_request_partition_reactivation_sn2,
+	.request_partition_deactivation =
+		xpc_request_partition_deactivation_sn2,
+	.cancel_partition_deactivation_request =
+		xpc_cancel_partition_deactivation_request_sn2,
+
+	.setup_ch_structures = xpc_setup_ch_structures_sn2,
+	.teardown_ch_structures = xpc_teardown_ch_structures_sn2,
+
+	.make_first_contact = xpc_make_first_contact_sn2,
+
+	.get_chctl_all_flags = xpc_get_chctl_all_flags_sn2,
+	.send_chctl_closerequest = xpc_send_chctl_closerequest_sn2,
+	.send_chctl_closereply = xpc_send_chctl_closereply_sn2,
+	.send_chctl_openrequest = xpc_send_chctl_openrequest_sn2,
+	.send_chctl_openreply = xpc_send_chctl_openreply_sn2,
+	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_sn2,
+	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2,
+
+	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2,
+
+	.setup_msg_structures = xpc_setup_msg_structures_sn2,
+	.teardown_msg_structures = xpc_teardown_msg_structures_sn2,
+
+	.indicate_partition_engaged = xpc_indicate_partition_engaged_sn2,
+	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_sn2,
+	.partition_engaged = xpc_partition_engaged_sn2,
+	.any_partition_engaged = xpc_any_partition_engaged_sn2,
+	.assume_partition_disengaged = xpc_assume_partition_disengaged_sn2,
+
+	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2,
+	.send_payload = xpc_send_payload_sn2,
+	.get_deliverable_payload = xpc_get_deliverable_payload_sn2,
+	.received_payload = xpc_received_payload_sn2,
+	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2,
+};
+
 int
 xpc_init_sn2(void)
 {
 	int ret;
 	size_t buf_size;
 
-	xpc_setup_partitions_sn = xpc_setup_partitions_sn_sn2;
-	xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_sn2;
-	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_sn2;
-	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_sn2;
-	xpc_increment_heartbeat = xpc_increment_heartbeat_sn2;
-	xpc_offline_heartbeat = xpc_offline_heartbeat_sn2;
-	xpc_online_heartbeat = xpc_online_heartbeat_sn2;
-	xpc_heartbeat_init = xpc_heartbeat_init_sn2;
-	xpc_heartbeat_exit = xpc_heartbeat_exit_sn2;
-	xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_sn2;
-
-	xpc_request_partition_activation = xpc_request_partition_activation_sn2;
-	xpc_request_partition_reactivation =
-	    xpc_request_partition_reactivation_sn2;
-	xpc_request_partition_deactivation =
-	    xpc_request_partition_deactivation_sn2;
-	xpc_cancel_partition_deactivation_request =
-	    xpc_cancel_partition_deactivation_request_sn2;
-
-	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_sn2;
-	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_sn2;
-	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_sn2;
-	xpc_make_first_contact = xpc_make_first_contact_sn2;
-
-	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_sn2;
-	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_sn2;
-	xpc_send_chctl_closereply = xpc_send_chctl_closereply_sn2;
-	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_sn2;
-	xpc_send_chctl_openreply = xpc_send_chctl_openreply_sn2;
-
-	xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_sn2;
-
-	xpc_setup_msg_structures = xpc_setup_msg_structures_sn2;
-	xpc_teardown_msg_structures = xpc_teardown_msg_structures_sn2;
-
-	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_sn2;
-	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_sn2;
-	xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_sn2;
-	xpc_get_deliverable_payload = xpc_get_deliverable_payload_sn2;
-
-	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_sn2;
-	xpc_indicate_partition_disengaged =
-	    xpc_indicate_partition_disengaged_sn2;
-	xpc_partition_engaged = xpc_partition_engaged_sn2;
-	xpc_any_partition_engaged = xpc_any_partition_engaged_sn2;
-	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_sn2;
-
-	xpc_send_payload = xpc_send_payload_sn2;
-	xpc_received_payload = xpc_received_payload_sn2;
+	xpc_arch_ops = xpc_arch_ops_sn2;
 
 	if (offsetof(struct xpc_msg_sn2, payload) > XPC_MSG_HDR_MAX_SIZE) {
 		dev_err(xpc_part, "header portion of struct xpc_msg_sn2 is "
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index f7fff4727ed..9172fcdee4e 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -46,8 +46,7 @@ struct uv_IO_APIC_route_entry {
 };
 #endif
 
-static atomic64_t xpc_heartbeat_uv;
-static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
 
 #define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
 #define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
@@ -63,7 +62,7 @@ static struct xpc_gru_mq_uv *xpc_activate_mq_uv;
 static struct xpc_gru_mq_uv *xpc_notify_mq_uv;
 
 static int
-xpc_setup_partitions_sn_uv(void)
+xpc_setup_partitions_uv(void)
 {
 	short partid;
 	struct xpc_partition_uv *part_uv;
@@ -79,7 +78,7 @@ xpc_setup_partitions_sn_uv(void)
 }
 
 static void
-xpc_teardown_partitions_sn_uv(void)
+xpc_teardown_partitions_uv(void)
 {
 	short partid;
 	struct xpc_partition_uv *part_uv;
@@ -423,41 +422,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		/* syncing of remote_act_state was just done above */
 		break;
 
-	case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
-		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-		msg = container_of(msg_hdr,
-				   struct xpc_activate_mq_msg_heartbeat_req_uv,
-				   hdr);
-		part_uv->heartbeat = msg->heartbeat;
-		break;
-	}
-	case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
-		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-		msg = container_of(msg_hdr,
-				   struct xpc_activate_mq_msg_heartbeat_req_uv,
-				   hdr);
-		part_uv->heartbeat = msg->heartbeat;
-
-		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-		part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
-		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-		break;
-	}
-	case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
-		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-		msg = container_of(msg_hdr,
-				   struct xpc_activate_mq_msg_heartbeat_req_uv,
-				   hdr);
-		part_uv->heartbeat = msg->heartbeat;
-
-		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-		part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
-		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-		break;
-	}
 	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
 		struct xpc_activate_mq_msg_activate_req_uv *msg;
 
@@ -475,6 +439,7 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
 		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
 		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
 
 		if (msg->activate_gru_mq_desc_gpa !=
 		    part_uv->activate_gru_mq_desc_gpa) {
@@ -569,6 +534,17 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
 		xpc_wakeup_channel_mgr(part);
 		break;
 	}
+	case XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV: {
+		struct xpc_activate_mq_msg_chctl_opencomplete_uv *msg;
+
+		msg = container_of(msg_hdr, struct
+				xpc_activate_mq_msg_chctl_opencomplete_uv, hdr);
+		spin_lock_irqsave(&part->chctl_lock, irq_flags);
+		part->chctl.flags[msg->ch_number] |= XPC_CHCTL_OPENCOMPLETE;
+		spin_unlock_irqrestore(&part->chctl_lock, irq_flags);
+
+		xpc_wakeup_channel_mgr(part);
+	}
 	case XPC_ACTIVATE_MQ_MSG_MARK_ENGAGED_UV:
 		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
 		part_uv->flags |= XPC_P_ENGAGED_UV;
@@ -759,7 +735,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
 
 	/*
 	 * !!! Make our side think that the remote partition sent an activate
-	 * !!! message our way by doing what the activate IRQ handler would
+	 * !!! mq message our way by doing what the activate IRQ handler would
 	 * !!! do had one really been sent.
 	 */
 
@@ -806,90 +782,82 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
 }
 
 static int
-xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
+xpc_setup_rsvd_page_uv(struct xpc_rsvd_page *rp)
 {
-	rp->sn.activate_gru_mq_desc_gpa =
+	xpc_heartbeat_uv =
+	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
+	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
+	rp->sn.uv.activate_gru_mq_desc_gpa =
 	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
 	return 0;
 }
 
 static void
-xpc_send_heartbeat_uv(int msg_type)
+xpc_allow_hb_uv(short partid)
 {
-	short partid;
-	struct xpc_partition *part;
-	struct xpc_activate_mq_msg_heartbeat_req_uv msg;
-
-	/*
-	 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
-	 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
-	 * !!! seconds. This is an increase in numalink traffic.
-	 * ??? Is this good?
-	 */
-
-	msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
-
-	partid = find_first_bit(xpc_heartbeating_to_mask_uv,
-				XP_MAX_NPARTITIONS_UV);
-
-	while (partid < XP_MAX_NPARTITIONS_UV) {
-		part = &xpc_partitions[partid];
+}
 
-		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
-					      msg_type);
+static void
+xpc_disallow_hb_uv(short partid)
+{
+}
 
-		partid = find_next_bit(xpc_heartbeating_to_mask_uv,
-				       XP_MAX_NPARTITIONS_UV, partid + 1);
-	}
+static void
+xpc_disallow_all_hbs_uv(void)
+{
 }
 
 static void
 xpc_increment_heartbeat_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
+	xpc_heartbeat_uv->value++;
 }
 
 static void
 xpc_offline_heartbeat_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 1;
 }
 
 static void
 xpc_online_heartbeat_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 0;
 }
 
 static void
 xpc_heartbeat_init_uv(void)
 {
-	atomic64_set(&xpc_heartbeat_uv, 0);
-	bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
-	xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
+	xpc_heartbeat_uv->value = 1;
+	xpc_heartbeat_uv->offline = 0;
 }
 
 static void
 xpc_heartbeat_exit_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+	xpc_offline_heartbeat_uv();
 }
 
 static enum xp_retval
 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
 {
 	struct xpc_partition_uv *part_uv = &part->sn.uv;
-	enum xp_retval ret = xpNoHeartbeat;
+	enum xp_retval ret;
 
-	if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
-	    part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
+	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
+			       part_uv->heartbeat_gpa,
+			       sizeof(struct xpc_heartbeat_uv));
+	if (ret != xpSuccess)
+		return ret;
 
-		if (part_uv->heartbeat != part->last_heartbeat ||
-		    (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
+	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
+	    !part_uv->cached_heartbeat.offline) {
 
-			part->last_heartbeat = part_uv->heartbeat;
-			ret = xpSuccess;
-		}
+		ret = xpNoHeartbeat;
+	} else {
+		part->last_heartbeat = part_uv->cached_heartbeat.value;
 	}
 	return ret;
 }
@@ -904,8 +872,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 
 	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
 	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
 	part->sn.uv.activate_gru_mq_desc_gpa =
-	    remote_rp->sn.activate_gru_mq_desc_gpa;
+	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
 
 	/*
 	 * ??? Is it a good idea to make this conditional on what is
@@ -913,8 +882,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
 	 */
 	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
 		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
 		msg.activate_gru_mq_desc_gpa =
-		    xpc_rsvd_page->sn.activate_gru_mq_desc_gpa;
+		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
 					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
 	}
@@ -1010,7 +980,7 @@ xpc_n_of_fifo_entries_uv(struct xpc_fifo_head_uv *head)
  * Setup the channel structures that are uv specific.
  */
 static enum xp_retval
-xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
+xpc_setup_ch_structures_uv(struct xpc_partition *part)
 {
 	struct xpc_channel_uv *ch_uv;
 	int ch_number;
@@ -1029,7 +999,7 @@ xpc_setup_ch_structures_sn_uv(struct xpc_partition *part)
  * Teardown the channel structures that are uv specific.
  */
 static void
-xpc_teardown_ch_structures_sn_uv(struct xpc_partition *part)
+xpc_teardown_ch_structures_uv(struct xpc_partition *part)
 {
 	/* nothing needs to be done */
 	return;
@@ -1243,6 +1213,16 @@ xpc_send_chctl_openreply_uv(struct xpc_channel *ch, unsigned long *irq_flags)
 }
 
 static void
+xpc_send_chctl_opencomplete_uv(struct xpc_channel *ch, unsigned long *irq_flags)
+{
+	struct xpc_activate_mq_msg_chctl_opencomplete_uv msg;
+
+	msg.ch_number = ch->number;
+	xpc_send_activate_IRQ_ch_uv(ch, irq_flags, &msg, sizeof(msg),
+				    XPC_ACTIVATE_MQ_MSG_CHCTL_OPENCOMPLETE_UV);
+}
+
+static void
 xpc_send_chctl_local_msgrequest_uv(struct xpc_partition *part, int ch_number)
 {
 	unsigned long irq_flags;
@@ -1669,58 +1649,67 @@ xpc_received_payload_uv(struct xpc_channel *ch, void *payload)
 	msg->hdr.msg_slot_number += ch->remote_nentries;
 }
 
+static struct xpc_arch_operations xpc_arch_ops_uv = {
+	.setup_partitions = xpc_setup_partitions_uv,
+	.teardown_partitions = xpc_teardown_partitions_uv,
+	.process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv,
+	.get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv,
+	.setup_rsvd_page = xpc_setup_rsvd_page_uv,
+
+	.allow_hb = xpc_allow_hb_uv,
+	.disallow_hb = xpc_disallow_hb_uv,
+	.disallow_all_hbs = xpc_disallow_all_hbs_uv,
+	.increment_heartbeat = xpc_increment_heartbeat_uv,
+	.offline_heartbeat = xpc_offline_heartbeat_uv,
+	.online_heartbeat = xpc_online_heartbeat_uv,
+	.heartbeat_init = xpc_heartbeat_init_uv,
+	.heartbeat_exit = xpc_heartbeat_exit_uv,
+	.get_remote_heartbeat = xpc_get_remote_heartbeat_uv,
+
+	.request_partition_activation =
+		xpc_request_partition_activation_uv,
+	.request_partition_reactivation =
+		xpc_request_partition_reactivation_uv,
+	.request_partition_deactivation =
+		xpc_request_partition_deactivation_uv,
+	.cancel_partition_deactivation_request =
+		xpc_cancel_partition_deactivation_request_uv,
+
+	.setup_ch_structures = xpc_setup_ch_structures_uv,
+	.teardown_ch_structures = xpc_teardown_ch_structures_uv,
+
+	.make_first_contact = xpc_make_first_contact_uv,
+
+	.get_chctl_all_flags = xpc_get_chctl_all_flags_uv,
+	.send_chctl_closerequest = xpc_send_chctl_closerequest_uv,
+	.send_chctl_closereply = xpc_send_chctl_closereply_uv,
+	.send_chctl_openrequest = xpc_send_chctl_openrequest_uv,
+	.send_chctl_openreply = xpc_send_chctl_openreply_uv,
+	.send_chctl_opencomplete = xpc_send_chctl_opencomplete_uv,
+	.process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv,
+
+	.save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv,
+
+	.setup_msg_structures = xpc_setup_msg_structures_uv,
+	.teardown_msg_structures = xpc_teardown_msg_structures_uv,
+
+	.indicate_partition_engaged = xpc_indicate_partition_engaged_uv,
+	.indicate_partition_disengaged = xpc_indicate_partition_disengaged_uv,
+	.assume_partition_disengaged = xpc_assume_partition_disengaged_uv,
+	.partition_engaged = xpc_partition_engaged_uv,
+	.any_partition_engaged = xpc_any_partition_engaged_uv,
+
+	.n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv,
+	.send_payload = xpc_send_payload_uv,
+	.get_deliverable_payload = xpc_get_deliverable_payload_uv,
+	.received_payload = xpc_received_payload_uv,
+	.notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv,
+};
+
 int
 xpc_init_uv(void)
 {
-	xpc_setup_partitions_sn = xpc_setup_partitions_sn_uv;
-	xpc_teardown_partitions_sn = xpc_teardown_partitions_sn_uv;
-	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
-	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
-	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
-	xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
-	xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
-	xpc_online_heartbeat = xpc_online_heartbeat_uv;
-	xpc_heartbeat_init = xpc_heartbeat_init_uv;
-	xpc_heartbeat_exit = xpc_heartbeat_exit_uv;
-	xpc_get_remote_heartbeat = xpc_get_remote_heartbeat_uv;
-
-	xpc_request_partition_activation = xpc_request_partition_activation_uv;
-	xpc_request_partition_reactivation =
-	    xpc_request_partition_reactivation_uv;
-	xpc_request_partition_deactivation =
-	    xpc_request_partition_deactivation_uv;
-	xpc_cancel_partition_deactivation_request =
-	    xpc_cancel_partition_deactivation_request_uv;
-
-	xpc_setup_ch_structures_sn = xpc_setup_ch_structures_sn_uv;
-	xpc_teardown_ch_structures_sn = xpc_teardown_ch_structures_sn_uv;
-
-	xpc_make_first_contact = xpc_make_first_contact_uv;
-
-	xpc_get_chctl_all_flags = xpc_get_chctl_all_flags_uv;
-	xpc_send_chctl_closerequest = xpc_send_chctl_closerequest_uv;
-	xpc_send_chctl_closereply = xpc_send_chctl_closereply_uv;
-	xpc_send_chctl_openrequest = xpc_send_chctl_openrequest_uv;
-	xpc_send_chctl_openreply = xpc_send_chctl_openreply_uv;
-
-	xpc_save_remote_msgqueue_pa = xpc_save_remote_msgqueue_pa_uv;
-
-	xpc_setup_msg_structures = xpc_setup_msg_structures_uv;
-	xpc_teardown_msg_structures = xpc_teardown_msg_structures_uv;
-
-	xpc_indicate_partition_engaged = xpc_indicate_partition_engaged_uv;
-	xpc_indicate_partition_disengaged =
-	    xpc_indicate_partition_disengaged_uv;
-	xpc_assume_partition_disengaged = xpc_assume_partition_disengaged_uv;
-	xpc_partition_engaged = xpc_partition_engaged_uv;
-	xpc_any_partition_engaged = xpc_any_partition_engaged_uv;
-
-	xpc_n_of_deliverable_payloads = xpc_n_of_deliverable_payloads_uv;
-	xpc_process_msg_chctl_flags = xpc_process_msg_chctl_flags_uv;
-	xpc_send_payload = xpc_send_payload_uv;
-	xpc_notify_senders_of_disconnect = xpc_notify_senders_of_disconnect_uv;
-	xpc_get_deliverable_payload = xpc_get_deliverable_payload_uv;
-	xpc_received_payload = xpc_received_payload_uv;
+	xpc_arch_ops = xpc_arch_ops_uv;
 
 	if (sizeof(struct xpc_notify_mq_msghdr_uv) > XPC_MSG_HDR_MAX_SIZE) {
 		dev_err(xpc_part, "xpc_notify_mq_msghdr_uv is larger than %d\n",
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index deb7b53167e..83a12125b94 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -2532,8 +2532,8 @@ static int __devinit atl1c_probe(struct pci_dev *pdev,
 	 * various kernel subsystems to support the mechanics required by a
 	 * fixed-high-32-bit system.
 	 */
-	if ((pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) ||
-	    (pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK) != 0)) {
+	if ((pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) ||
+	    (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)) {
 		dev_err(&pdev->dev, "No usable DMA configuration,aborting\n");
 		goto err_dma;
 	}
diff --git a/drivers/net/benet/be_main.c b/drivers/net/benet/be_main.c
index 9b75aa63006..30d0c81c989 100644
--- a/drivers/net/benet/be_main.c
+++ b/drivers/net/benet/be_main.c
@@ -1821,11 +1821,11 @@ static int __devinit be_probe(struct pci_dev *pdev,
 
 	be_msix_enable(adapter);
 
-	status = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+	status = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
 	if (!status) {
 		netdev->features |= NETIF_F_HIGHDMA;
 	} else {
-		status = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+		status = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (status) {
 			dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
 			goto free_netdev;
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index ece35040288..621a7c0c46b 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -2591,13 +2591,13 @@ static int
 jme_pci_dma64(struct pci_dev *pdev)
 {
 	if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
-	    !pci_set_dma_mask(pdev, DMA_64BIT_MASK))
-		if (!pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))
+	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
+		if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
 			return 1;
 
 	if (pdev->device == PCI_DEVICE_ID_JMICRON_JMC250 &&
-	    !pci_set_dma_mask(pdev, DMA_40BIT_MASK))
-		if (!pci_set_consistent_dma_mask(pdev, DMA_40BIT_MASK))
+	    !pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
+		if (!pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40)))
 			return 1;
 
 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
diff --git a/drivers/net/wireless/ath9k/pci.c b/drivers/net/wireless/ath9k/pci.c
index 6dbc58580ab..168411d322a 100644
--- a/drivers/net/wireless/ath9k/pci.c
+++ b/drivers/net/wireless/ath9k/pci.c
@@ -93,14 +93,14 @@ static int ath_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (pci_enable_device(pdev))
 		return -EIO;
 
-	ret =  pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	ret =  pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 
 	if (ret) {
 		printk(KERN_ERR "ath9k: 32-bit DMA not available\n");
 		goto bad;
 	}
 
-	ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+	ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
 
 	if (ret) {
 		printk(KERN_ERR "ath9k: 32-bit DMA consistent "
diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index e3569a0a952..b1610ea4bb3 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -492,8 +492,8 @@ static int __devinit p54p_probe(struct pci_dev *pdev,
 		goto err_disable_dev;
 	}
 
-	if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) ||
-	    pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) {
+	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) ||
+	    pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
 		dev_err(&pdev->dev, "No suitable DMA available\n");
 		goto err_free_reg;
 	}
diff --git a/drivers/parisc/superio.c b/drivers/parisc/superio.c
index 4fa3bb2ddfe..33e5ade774c 100644
--- a/drivers/parisc/superio.c
+++ b/drivers/parisc/superio.c
@@ -434,7 +434,8 @@ static void __init superio_parport_init(void)
 			0 /*base_hi*/,
 			PAR_IRQ, 
 			PARPORT_DMA_NONE /* dma */,
-			NULL /*struct pci_dev* */) )
+			NULL /*struct pci_dev* */),
+			0 /* shared irq flags */ )
 
 		printk(KERN_WARNING PFX "Probing parallel port failed.\n");
 #endif	/* CONFIG_PARPORT_PC */
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 0570794ccf1..d1815272c43 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -20,6 +20,7 @@
 #include <linux/slab.h>
 #include <linux/buffer_head.h>
 #include <linux/hdreg.h>
+#include <linux/async.h>
 
 #include <asm/ccwdev.h>
 #include <asm/ebcdic.h>
@@ -480,8 +481,10 @@ static void dasd_change_state(struct dasd_device *device)
         if (rc && rc != -EAGAIN)
                 device->target = device->state;
 
-	if (device->state == device->target)
+	if (device->state == device->target) {
 		wake_up(&dasd_init_waitq);
+		dasd_put_device(device);
+	}
 
 	/* let user-space know that the device status changed */
 	kobject_uevent(&device->cdev->dev.kobj, KOBJ_CHANGE);
@@ -513,12 +516,15 @@ void dasd_kick_device(struct dasd_device *device)
  */
 void dasd_set_target_state(struct dasd_device *device, int target)
 {
+	dasd_get_device(device);
 	/* If we are in probeonly mode stop at DASD_STATE_READY. */
 	if (dasd_probeonly && target > DASD_STATE_READY)
 		target = DASD_STATE_READY;
 	if (device->target != target) {
-                if (device->state == target)
+		if (device->state == target) {
 			wake_up(&dasd_init_waitq);
+			dasd_put_device(device);
+		}
 		device->target = target;
 	}
 	if (device->state != device->target)
@@ -2148,6 +2154,22 @@ dasd_exit(void)
  * SECTION: common functions for ccw_driver use
  */
 
+static void dasd_generic_auto_online(void *data, async_cookie_t cookie)
+{
+	struct ccw_device *cdev = data;
+	int ret;
+
+	ret = ccw_device_set_online(cdev);
+	if (ret)
+		pr_warning("%s: Setting the DASD online failed with rc=%d\n",
+			   dev_name(&cdev->dev), ret);
+	else {
+		struct dasd_device *device = dasd_device_from_cdev(cdev);
+		wait_event(dasd_init_waitq, _wait_for_device(device));
+		dasd_put_device(device);
+	}
+}
+
 /*
  * Initial attempt at a probe function. this can be simplified once
  * the other detection code is gone.
@@ -2180,10 +2202,7 @@ int dasd_generic_probe(struct ccw_device *cdev,
 	 */
 	if ((dasd_get_feature(cdev, DASD_FEATURE_INITIAL_ONLINE) > 0 ) ||
 	    (dasd_autodetect && dasd_busid_known(dev_name(&cdev->dev)) != 0))
-		ret = ccw_device_set_online(cdev);
-	if (ret)
-		pr_warning("%s: Setting the DASD online failed with rc=%d\n",
-		       dev_name(&cdev->dev), ret);
+		async_schedule(dasd_generic_auto_online, cdev);
 	return 0;
 }
 
@@ -2290,13 +2309,7 @@ int dasd_generic_set_online(struct ccw_device *cdev,
 	} else
 		pr_debug("dasd_generic device %s found\n",
 				dev_name(&cdev->dev));
-
-	/* FIXME: we have to wait for the root device but we don't want
-	 * to wait for each single device but for all at once. */
-	wait_event(dasd_init_waitq, _wait_for_device(device));
-
 	dasd_put_device(device);
-
 	return rc;
 }
 
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 21254793c60..cb52da033f0 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -2019,15 +2019,23 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
 				ccw++;
 				recid += count;
 				new_track = 0;
+				/* first idaw for a ccw may start anywhere */
+				if (!idaw_dst)
+					idaw_dst = dst;
 			}
-			/* If we start a new idaw, everything is fine and the
-			 * start of the new idaw is the start of this segment.
+			/* If we start a new idaw, we must make sure that it
+			 * starts on an IDA_BLOCK_SIZE boundary.
 			 * If we continue an idaw, we must make sure that the
 			 * current segment begins where the so far accumulated
 			 * idaw ends
 			 */
-			if (!idaw_dst)
-				idaw_dst = dst;
+			if (!idaw_dst) {
+				if (__pa(dst) & (IDA_BLOCK_SIZE-1)) {
+					dasd_sfree_request(cqr, startdev);
+					return ERR_PTR(-ERANGE);
+				} else
+					idaw_dst = dst;
+			}
 			if ((idaw_dst + idaw_len) != dst) {
 				dasd_sfree_request(cqr, startdev);
 				return ERR_PTR(-ERANGE);
diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c
index 9e8a2914259..accd957454e 100644
--- a/drivers/s390/cio/qdio_main.c
+++ b/drivers/s390/cio/qdio_main.c
@@ -881,42 +881,6 @@ no_handler:
 	qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
 }
 
-static void qdio_call_shutdown(struct work_struct *work)
-{
-	struct ccw_device_private *priv;
-	struct ccw_device *cdev;
-
-	priv = container_of(work, struct ccw_device_private, kick_work);
-	cdev = priv->cdev;
-	qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR);
-	put_device(&cdev->dev);
-}
-
-static void qdio_int_error(struct ccw_device *cdev)
-{
-	struct qdio_irq *irq_ptr = cdev->private->qdio_data;
-
-	switch (irq_ptr->state) {
-	case QDIO_IRQ_STATE_INACTIVE:
-	case QDIO_IRQ_STATE_CLEANUP:
-		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
-		break;
-	case QDIO_IRQ_STATE_ESTABLISHED:
-	case QDIO_IRQ_STATE_ACTIVE:
-		qdio_set_state(irq_ptr, QDIO_IRQ_STATE_STOPPED);
-		if (get_device(&cdev->dev)) {
-			/* Can't call shutdown from interrupt context. */
-			PREPARE_WORK(&cdev->private->kick_work,
-				     qdio_call_shutdown);
-			queue_work(ccw_device_work, &cdev->private->kick_work);
-		}
-		break;
-	default:
-		WARN_ON(1);
-	}
-	wake_up(&cdev->private->wait_q);
-}
-
 static int qdio_establish_check_errors(struct ccw_device *cdev, int cstat,
 				       int dstat)
 {
@@ -973,10 +937,8 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
 		switch (PTR_ERR(irb)) {
 		case -EIO:
 			DBF_ERROR("%4x IO error", irq_ptr->schid.sch_no);
-			return;
-		case -ETIMEDOUT:
-			DBF_ERROR("%4x IO timeout", irq_ptr->schid.sch_no);
-			qdio_int_error(cdev);
+			qdio_set_state(irq_ptr, QDIO_IRQ_STATE_ERR);
+			wake_up(&cdev->private->wait_q);
 			return;
 		default:
 			WARN_ON(1);
@@ -1001,7 +963,6 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm,
 	case QDIO_IRQ_STATE_ACTIVE:
 		if (cstat & SCHN_STAT_PCI) {
 			qdio_int_handler_pci(irq_ptr);
-			/* no state change so no need to wake up wait_q */
 			return;
 		}
 		if ((cstat & ~SCHN_STAT_PCI) || dstat) {
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c
index e6d1fc8c54f..a85ad05e854 100644
--- a/drivers/sbus/char/jsflash.c
+++ b/drivers/sbus/char/jsflash.c
@@ -383,18 +383,22 @@ static int jsf_ioctl_program(void __user *arg)
 	return 0;
 }
 
-static int jsf_ioctl(struct inode *inode, struct file *f, unsigned int cmd,
-    unsigned long arg)
+static long jsf_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
+	lock_kernel();
 	int error = -ENOTTY;
 	void __user *argp = (void __user *)arg;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!capable(CAP_SYS_ADMIN)) {
+		unlock_kernel();
 		return -EPERM;
+	}
 	switch (cmd) {
 	case JSFLASH_IDENT:
-		if (copy_to_user(argp, &jsf0.id, JSFIDSZ))
+		if (copy_to_user(argp, &jsf0.id, JSFIDSZ)) {
+			unlock_kernel();
 			return -EFAULT;
+		}
 		break;
 	case JSFLASH_ERASE:
 		error = jsf_ioctl_erase(arg);
@@ -404,6 +408,7 @@ static int jsf_ioctl(struct inode *inode, struct file *f, unsigned int cmd,
 		break;
 	}
 
+	unlock_kernel();
 	return error;
 }
 
@@ -439,7 +444,7 @@ static const struct file_operations jsf_fops = {
 	.llseek =	jsf_lseek,
 	.read =		jsf_read,
 	.write =	jsf_write,
-	.ioctl =	jsf_ioctl,
+	.unlocked_ioctl =	jsf_ioctl,
 	.mmap =		jsf_mmap,
 	.open =		jsf_open,
 	.release =	jsf_release,
diff --git a/drivers/sbus/char/uctrl.c b/drivers/sbus/char/uctrl.c
index 27993c37775..2c56fd56ec6 100644
--- a/drivers/sbus/char/uctrl.c
+++ b/drivers/sbus/char/uctrl.c
@@ -197,9 +197,8 @@ static struct uctrl_driver {
 static void uctrl_get_event_status(struct uctrl_driver *);
 static void uctrl_get_external_status(struct uctrl_driver *);
 
-static int
-uctrl_ioctl(struct inode *inode, struct file *file,
-	      unsigned int cmd, unsigned long arg)
+static long
+uctrl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	switch (cmd) {
 		default:
@@ -226,7 +225,7 @@ static irqreturn_t uctrl_interrupt(int irq, void *dev_id)
 static const struct file_operations uctrl_fops = {
 	.owner =	THIS_MODULE,
 	.llseek =	no_llseek,
-	.ioctl =	uctrl_ioctl,
+	.unlocked_ioctl =	uctrl_ioctl,
 	.open =		uctrl_open,
 };
 
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index fdb14ec4fd4..8b7983aba8f 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -2234,10 +2234,10 @@ static int twa_resume(struct pci_dev *pdev)
 	pci_set_master(pdev);
 	pci_try_set_mwi(pdev);
 
-	if (pci_set_dma_mask(pdev, DMA_64BIT_MASK)
-	    || pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))
-		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)
-		    || pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) {
+	if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64))
+	    || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)))
+		if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
+		    || pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
 			TW_PRINTK(host, TW_DRIVER, 0x40, "Failed to set dma mask during resume");
 			retval = -ENODEV;
 			goto out_disable_device;
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index 280261c451d..2a889853a10 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -1378,7 +1378,7 @@ int aac_get_adapter_info(struct aac_dev* dev)
 	if (dev->nondasd_support && !dev->in_reset)
 		printk(KERN_INFO "%s%d: Non-DASD support enabled.\n",dev->name, dev->id);
 
-	if (dma_get_required_mask(&dev->pdev->dev) > DMA_32BIT_MASK)
+	if (dma_get_required_mask(&dev->pdev->dev) > DMA_BIT_MASK(32))
 		dev->needs_dac = 1;
 	dev->dac_support = 0;
 	if ((sizeof(dma_addr_t) > 4) && dev->needs_dac &&
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index b1bd3fc7bae..36fd2e75da1 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -1394,7 +1394,7 @@ lpfc_parse_bg_err(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd,
 		 */
 		cmd->sense_buffer[8] = 0;     /* Information */
 		cmd->sense_buffer[9] = 0xa;   /* Add. length */
-		do_div(bghm, cmd->device->sector_size);
+		bghm /= cmd->device->sector_size;
 
 		failing_sector = scsi_get_lba(cmd);
 		failing_sector += bghm;
diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c
index 52427a8324f..a91f5143cea 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_base.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_base.c
@@ -855,9 +855,9 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev)
 	if (sizeof(dma_addr_t) > 4) {
 		const uint64_t required_mask =
 		    dma_get_required_mask(&pdev->dev);
-		if ((required_mask > DMA_32BIT_MASK) && !pci_set_dma_mask(pdev,
-		    DMA_64BIT_MASK) && !pci_set_consistent_dma_mask(pdev,
-		    DMA_64BIT_MASK)) {
+		if ((required_mask > DMA_BIT_MASK(32)) && !pci_set_dma_mask(pdev,
+		    DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(pdev,
+		    DMA_BIT_MASK(64))) {
 			ioc->base_add_sg_single = &_base_add_sg_single_64;
 			ioc->sge_size = sizeof(Mpi2SGESimple64_t);
 			desc = "64";
@@ -865,8 +865,8 @@ _base_config_dma_addressing(struct MPT2SAS_ADAPTER *ioc, struct pci_dev *pdev)
 		}
 	}
 
-	if (!pci_set_dma_mask(pdev, DMA_32BIT_MASK)
-	    && !pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK)) {
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))
+	    && !pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
 		ioc->base_add_sg_single = &_base_add_sg_single_32;
 		ioc->sge_size = sizeof(Mpi2SGESimple32_t);
 		desc = "32";
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 7fb9b5c4669..12d13d99b6f 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -44,6 +44,7 @@ struct intc_handle_int {
 struct intc_desc_int {
 	struct list_head list;
 	struct sys_device sysdev;
+	pm_message_t state;
 	unsigned long *reg;
 #ifdef CONFIG_SMP
 	unsigned long *smp;
@@ -786,18 +787,44 @@ static int intc_suspend(struct sys_device *dev, pm_message_t state)
 	/* get intc controller associated with this sysdev */
 	d = container_of(dev, struct intc_desc_int, sysdev);
 
-	/* enable wakeup irqs belonging to this intc controller */
-	for_each_irq_desc(irq, desc) {
-		if ((desc->status & IRQ_WAKEUP) && (desc->chip == &d->chip))
-			intc_enable(irq);
+	switch (state.event) {
+	case PM_EVENT_ON:
+		if (d->state.event != PM_EVENT_FREEZE)
+			break;
+		for_each_irq_desc(irq, desc) {
+			if (desc->chip != &d->chip)
+				continue;
+			if (desc->status & IRQ_DISABLED)
+				intc_disable(irq);
+			else
+				intc_enable(irq);
+		}
+		break;
+	case PM_EVENT_FREEZE:
+		/* nothing has to be done */
+		break;
+	case PM_EVENT_SUSPEND:
+		/* enable wakeup irqs belonging to this intc controller */
+		for_each_irq_desc(irq, desc) {
+			if ((desc->status & IRQ_WAKEUP) && (desc->chip == &d->chip))
+				intc_enable(irq);
+		}
+		break;
 	}
+	d->state = state;
 
 	return 0;
 }
 
+static int intc_resume(struct sys_device *dev)
+{
+	return intc_suspend(dev, PMSG_ON);
+}
+
 static struct sysdev_class intc_sysdev_class = {
 	.name = "intc",
 	.suspend = intc_suspend,
+	.resume = intc_resume,
 };
 
 /* register this intc as sysdev to allow suspend/resume */
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 643908b74bc..8eba98c8ed1 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -658,7 +658,7 @@ int spi_write_then_read(struct spi_device *spi,
 
 	int			status;
 	struct spi_message	message;
-	struct spi_transfer	x;
+	struct spi_transfer	x[2];
 	u8			*local_buf;
 
 	/* Use preallocated DMA-safe buffer.  We can't avoid copying here,
@@ -669,9 +669,15 @@ int spi_write_then_read(struct spi_device *spi,
 		return -EINVAL;
 
 	spi_message_init(&message);
-	memset(&x, 0, sizeof x);
-	x.len = n_tx + n_rx;
-	spi_message_add_tail(&x, &message);
+	memset(x, 0, sizeof x);
+	if (n_tx) {
+		x[0].len = n_tx;
+		spi_message_add_tail(&x[0], &message);
+	}
+	if (n_rx) {
+		x[1].len = n_rx;
+		spi_message_add_tail(&x[1], &message);
+	}
 
 	/* ... unless someone else is using the pre-allocated buffer */
 	if (!mutex_trylock(&lock)) {
@@ -682,15 +688,15 @@ int spi_write_then_read(struct spi_device *spi,
 		local_buf = buf;
 
 	memcpy(local_buf, txbuf, n_tx);
-	x.tx_buf = local_buf;
-	x.rx_buf = local_buf;
+	x[0].tx_buf = local_buf;
+	x[1].rx_buf = local_buf + n_tx;
 
 	/* do the i/o */
 	status = spi_sync(spi, &message);
 	if (status == 0)
-		memcpy(rxbuf, x.rx_buf + n_tx, n_rx);
+		memcpy(rxbuf, x[1].rx_buf, n_rx);
 
-	if (x.tx_buf == buf)
+	if (x[0].tx_buf == buf)
 		mutex_unlock(&lock);
 	else
 		kfree(local_buf);
diff --git a/drivers/staging/b3dfg/b3dfg.c b/drivers/staging/b3dfg/b3dfg.c
index 0348072b3ab..75ebe338c6f 100644
--- a/drivers/staging/b3dfg/b3dfg.c
+++ b/drivers/staging/b3dfg/b3dfg.c
@@ -1000,7 +1000,7 @@ static int __devinit b3dfg_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
-	r = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	r = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 	if (r) {
 		dev_err(&pdev->dev, "no usable DMA configuration\n");
 		goto err_free_res;
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 869d47cb6db..0a69c0977e3 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -546,10 +546,6 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
 	tty->driver_data = acm;
 	acm->tty = tty;
 
-	/* force low_latency on so that our tty_push actually forces the data through,
-	   otherwise it is scheduled, and with high data rates data can get lost. */
-	tty->low_latency = 1;
-
 	if (usb_autopm_get_interface(acm->control) < 0)
 		goto early_bail;
 	else
diff --git a/drivers/usb/otg/nop-usb-xceiv.c b/drivers/usb/otg/nop-usb-xceiv.c
index 4b933f646f2..c567168f89a 100644
--- a/drivers/usb/otg/nop-usb-xceiv.c
+++ b/drivers/usb/otg/nop-usb-xceiv.c
@@ -36,14 +36,14 @@ struct nop_usb_xceiv {
 	struct device		*dev;
 };
 
-static u64 nop_xceiv_dmamask = DMA_32BIT_MASK;
+static u64 nop_xceiv_dmamask = DMA_BIT_MASK(32);
 
 static struct platform_device nop_xceiv_device = {
 	.name           = "nop_usb_xceiv",
 	.id             = -1,
 	.dev = {
 		.dma_mask               = &nop_xceiv_dmamask,
-		.coherent_dma_mask      = DMA_32BIT_MASK,
+		.coherent_dma_mask      = DMA_BIT_MASK(32),
 		.platform_data          = NULL,
 	},
 };
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 2620bf6fe5e..9c4c700c7cc 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -1215,20 +1215,22 @@ static void ti_bulk_in_callback(struct urb *urb)
 	}
 
 	tty = tty_port_tty_get(&port->port);
-	if (tty && urb->actual_length) {
-		usb_serial_debug_data(debug, dev, __func__,
-			urb->actual_length, urb->transfer_buffer);
-
-		if (!tport->tp_is_open)
-			dbg("%s - port closed, dropping data", __func__);
-		else
-			ti_recv(&urb->dev->dev, tty,
+	if (tty) {
+		if (urb->actual_length) {
+			usb_serial_debug_data(debug, dev, __func__,
+				urb->actual_length, urb->transfer_buffer);
+
+			if (!tport->tp_is_open)
+				dbg("%s - port closed, dropping data",
+					__func__);
+			else
+				ti_recv(&urb->dev->dev, tty,
 						urb->transfer_buffer,
 						urb->actual_length);
-
-		spin_lock(&tport->tp_lock);
-		tport->tp_icount.rx += urb->actual_length;
-		spin_unlock(&tport->tp_lock);
+			spin_lock(&tport->tp_lock);
+			tport->tp_icount.rx += urb->actual_length;
+			spin_unlock(&tport->tp_lock);
+		}
 		tty_kref_put(tty);
 	}
 
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 16bb7e3c031..6c37e8ee5ef 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -698,8 +698,8 @@ static void __devinit radeon_get_pllinfo(struct radeonfb_info *rinfo)
 found:
 	/*
 	 * Some methods fail to retrieve SCLK and MCLK values, we apply default
-	 * settings in this case (200Mhz). If that really happne often, we could
-	 * fetch from registers instead...
+	 * settings in this case (200Mhz). If that really happens often, we
+	 * could fetch from registers instead...
 	 */
 	if (rinfo->pll.mclk == 0)
 		rinfo->pll.mclk = 20000;
diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c
index dd37cbcaf8c..157057c79ca 100644
--- a/drivers/video/backlight/backlight.c
+++ b/drivers/video/backlight/backlight.c
@@ -35,8 +35,6 @@ static int fb_notifier_callback(struct notifier_block *self,
 		return 0;
 
 	bd = container_of(self, struct backlight_device, fb_notif);
-	if (!lock_fb_info(evdata->info))
-		return -ENODEV;
 	mutex_lock(&bd->ops_lock);
 	if (bd->ops)
 		if (!bd->ops->check_fb ||
@@ -49,7 +47,6 @@ static int fb_notifier_callback(struct notifier_block *self,
 			backlight_update_status(bd);
 		}
 	mutex_unlock(&bd->ops_lock);
-	unlock_fb_info(evdata->info);
 	return 0;
 }
 
diff --git a/drivers/video/backlight/lcd.c b/drivers/video/backlight/lcd.c
index 0bb13df0fa8..b6449470106 100644
--- a/drivers/video/backlight/lcd.c
+++ b/drivers/video/backlight/lcd.c
@@ -40,8 +40,6 @@ static int fb_notifier_callback(struct notifier_block *self,
 	if (!ld->ops)
 		return 0;
 
-	if (!lock_fb_info(evdata->info))
-		return -ENODEV;
 	mutex_lock(&ld->ops_lock);
 	if (!ld->ops->check_fb || ld->ops->check_fb(ld, evdata->info)) {
 		if (event == FB_EVENT_BLANK) {
@@ -53,7 +51,6 @@ static int fb_notifier_callback(struct notifier_block *self,
 		}
 	}
 	mutex_unlock(&ld->ops_lock);
-	unlock_fb_info(evdata->info);
 	return 0;
 }
 
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index d42e385f091..4c2bf923418 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -567,9 +567,7 @@ static int cirrusfb_check_var(struct fb_var_screeninfo *var,
 	default:
 		dev_dbg(info->device,
 			"Unsupported bpp size: %d\n", var->bits_per_pixel);
-		assert(false);
-		/* should never occur */
-		break;
+		return -EINVAL;
 	}
 
 	if (var->xres_virtual < var->xres)
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 2cd500a304f..471a9a60376 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -2263,9 +2263,12 @@ static void fbcon_generic_blank(struct vc_data *vc, struct fb_info *info,
 	}
 
 
+	if (!lock_fb_info(info))
+		return;
 	event.info = info;
 	event.data = &blank;
 	fb_notifier_call_chain(FB_EVENT_CONBLANK, &event);
+	unlock_fb_info(info);
 }
 
 static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
@@ -2956,8 +2959,6 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 {
 	int i, idx;
 
-	if (!lock_fb_info(info))
-		return -ENODEV;
 	idx = info->node;
 	for (i = first_fb_vc; i <= last_fb_vc; i++) {
 		if (con2fb_map[i] == idx)
@@ -2985,8 +2986,6 @@ static int fbcon_fb_unregistered(struct fb_info *info)
 	if (primary_device == idx)
 		primary_device = -1;
 
-	unlock_fb_info(info);
-
 	if (!num_registered_fb)
 		unregister_con_driver(&fb_con);
 
@@ -3027,11 +3026,8 @@ static int fbcon_fb_registered(struct fb_info *info)
 {
 	int ret = 0, i, idx;
 
-	if (!lock_fb_info(info))
-		return -ENODEV;
 	idx = info->node;
 	fbcon_select_primary(info);
-	unlock_fb_info(info);
 
 	if (info_idx == -1) {
 		for (i = first_fb_vc; i <= last_fb_vc; i++) {
@@ -3152,53 +3148,23 @@ static int fbcon_event_notify(struct notifier_block *self,
 
 	switch(action) {
 	case FB_EVENT_SUSPEND:
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		fbcon_suspended(info);
-		unlock_fb_info(info);
 		break;
 	case FB_EVENT_RESUME:
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		fbcon_resumed(info);
-		unlock_fb_info(info);
 		break;
 	case FB_EVENT_MODE_CHANGE:
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		fbcon_modechanged(info);
-		unlock_fb_info(info);
 		break;
 	case FB_EVENT_MODE_CHANGE_ALL:
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		fbcon_set_all_vcs(info);
-		unlock_fb_info(info);
 		break;
 	case FB_EVENT_MODE_DELETE:
 		mode = event->data;
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		ret = fbcon_mode_deleted(info, mode);
-		unlock_fb_info(info);
 		break;
 	case FB_EVENT_FB_UNBIND:
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		idx = info->node;
-		unlock_fb_info(info);
 		ret = fbcon_fb_unbind(idx);
 		break;
 	case FB_EVENT_FB_REGISTERED:
@@ -3217,29 +3183,14 @@ static int fbcon_event_notify(struct notifier_block *self,
 		con2fb->framebuffer = con2fb_map[con2fb->console - 1];
 		break;
 	case FB_EVENT_BLANK:
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		fbcon_fb_blanked(info, *(int *)event->data);
-		unlock_fb_info(info);
 		break;
 	case FB_EVENT_NEW_MODELIST:
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		fbcon_new_modelist(info);
-		unlock_fb_info(info);
 		break;
 	case FB_EVENT_GET_REQ:
 		caps = event->data;
-		if (!lock_fb_info(info)) {
-			ret = -ENODEV;
-			goto done;
-		}
 		fbcon_get_requirement(info, caps);
-		unlock_fb_info(info);
 		break;
 	}
 done:
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index 0c5b9a9fd56..8dea2bc9270 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -210,12 +210,15 @@ static int __init efifb_probe(struct platform_device *dev)
 	unsigned int size_total;
 	int request_succeeded = 0;
 
-	printk(KERN_INFO "efifb: probing for efifb\n");
-
 	if (!screen_info.lfb_depth)
 		screen_info.lfb_depth = 32;
 	if (!screen_info.pages)
 		screen_info.pages = 1;
+	if (!screen_info.lfb_base) {
+		printk(KERN_DEBUG "efifb: invalid framebuffer address\n");
+		return -ENODEV;
+	}
+	printk(KERN_INFO "efifb: probing for efifb\n");
 
 	/* just assume they're all unset if any are */
 	if (!screen_info.blue_size) {
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 2ac32e6b595..d412a1ddc12 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1097,8 +1097,11 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 			return -EINVAL;
 		con2fb.framebuffer = -1;
 		event.data = &con2fb;
+		if (!lock_fb_info(info))
+			return -ENODEV;
 		event.info = info;
 		fb_notifier_call_chain(FB_EVENT_GET_CONSOLE_MAP, &event);
+		unlock_fb_info(info);
 		ret = copy_to_user(argp, &con2fb, sizeof(con2fb)) ? -EFAULT : 0;
 		break;
 	case FBIOPUT_CON2FBMAP:
@@ -1115,8 +1118,11 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 			break;
 		}
 		event.data = &con2fb;
+		if (!lock_fb_info(info))
+			return -ENODEV;
 		event.info = info;
 		ret = fb_notifier_call_chain(FB_EVENT_SET_CONSOLE_MAP, &event);
+		unlock_fb_info(info);
 		break;
 	case FBIOBLANK:
 		if (!lock_fb_info(info))
@@ -1521,7 +1527,10 @@ register_framebuffer(struct fb_info *fb_info)
 	registered_fb[i] = fb_info;
 
 	event.info = fb_info;
+	if (!lock_fb_info(fb_info))
+		return -ENODEV;
 	fb_notifier_call_chain(FB_EVENT_FB_REGISTERED, &event);
+	unlock_fb_info(fb_info);
 	return 0;
 }
 
@@ -1555,8 +1564,12 @@ unregister_framebuffer(struct fb_info *fb_info)
 		goto done;
 	}
 
+
+	if (!lock_fb_info(fb_info))
+		return -ENODEV;
 	event.info = fb_info;
 	ret = fb_notifier_call_chain(FB_EVENT_FB_UNBIND, &event);
+	unlock_fb_info(fb_info);
 
 	if (ret) {
 		ret = -EINVAL;
@@ -1590,6 +1603,8 @@ void fb_set_suspend(struct fb_info *info, int state)
 {
 	struct fb_event event;
 
+	if (!lock_fb_info(info))
+		return;
 	event.info = info;
 	if (state) {
 		fb_notifier_call_chain(FB_EVENT_SUSPEND, &event);
@@ -1598,6 +1613,7 @@ void fb_set_suspend(struct fb_info *info, int state)
 		info->state = FBINFO_STATE_RUNNING;
 		fb_notifier_call_chain(FB_EVENT_RESUME, &event);
 	}
+	unlock_fb_info(info);
 }
 
 /**
@@ -1667,8 +1683,11 @@ int fb_new_modelist(struct fb_info *info)
 	err = 1;
 
 	if (!list_empty(&info->modelist)) {
+		if (!lock_fb_info(info))
+			return -ENODEV;
 		event.info = info;
 		err = fb_notifier_call_chain(FB_EVENT_NEW_MODELIST, &event);
+		unlock_fb_info(info);
 	}
 
 	return err;
diff --git a/drivers/video/intelfb/intelfb.h b/drivers/video/intelfb/intelfb.h
index a50bea61480..40984551c92 100644
--- a/drivers/video/intelfb/intelfb.h
+++ b/drivers/video/intelfb/intelfb.h
@@ -53,6 +53,7 @@
 #define PCI_DEVICE_ID_INTEL_830M	0x3577
 #define PCI_DEVICE_ID_INTEL_845G	0x2562
 #define PCI_DEVICE_ID_INTEL_85XGM	0x3582
+#define PCI_DEVICE_ID_INTEL_854		0x358E
 #define PCI_DEVICE_ID_INTEL_865G	0x2572
 #define PCI_DEVICE_ID_INTEL_915G	0x2582
 #define PCI_DEVICE_ID_INTEL_915GM	0x2592
@@ -154,6 +155,7 @@ enum intel_chips {
 	INTEL_85XGM,
 	INTEL_852GM,
 	INTEL_852GME,
+	INTEL_854,
 	INTEL_855GM,
 	INTEL_855GME,
 	INTEL_865G,
diff --git a/drivers/video/intelfb/intelfb_i2c.c b/drivers/video/intelfb/intelfb_i2c.c
index b3065492bb2..487f2be4746 100644
--- a/drivers/video/intelfb/intelfb_i2c.c
+++ b/drivers/video/intelfb/intelfb_i2c.c
@@ -156,6 +156,7 @@ void intelfb_create_i2c_busses(struct intelfb_info *dinfo)
 	switch(dinfo->chipset) {
 	case INTEL_830M:
 	case INTEL_845G:
+	case INTEL_854:
 	case INTEL_855GM:
 	case INTEL_865G:
 		dinfo->output[i].type = INTELFB_OUTPUT_DVO;
diff --git a/drivers/video/intelfb/intelfbdrv.c b/drivers/video/intelfb/intelfbdrv.c
index 6d8e5415c80..ace14fe02fc 100644
--- a/drivers/video/intelfb/intelfbdrv.c
+++ b/drivers/video/intelfb/intelfbdrv.c
@@ -182,6 +182,7 @@ static struct pci_device_id intelfb_pci_table[] __devinitdata = {
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_845G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_845G },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_85XGM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_85XGM },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_865G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_865G },
+	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_854, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_854 },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915G },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_915GM, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_915GM },
 	{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_945G, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, INTELFB_CLASS_MASK, INTEL_945G },
diff --git a/drivers/video/intelfb/intelfbhw.c b/drivers/video/intelfb/intelfbhw.c
index 8b26b27c2db..0689f97c523 100644
--- a/drivers/video/intelfb/intelfbhw.c
+++ b/drivers/video/intelfb/intelfbhw.c
@@ -84,6 +84,11 @@ int intelfbhw_get_chipset(struct pci_dev *pdev, struct intelfb_info *dinfo)
 		dinfo->mobile = 0;
 		dinfo->pll_index = PLLS_I8xx;
 		return 0;
+	case PCI_DEVICE_ID_INTEL_854:
+		dinfo->mobile = 1;
+		dinfo->name = "Intel(R) 854";
+		dinfo->chipset = INTEL_854;
+		return 0;
 	case PCI_DEVICE_ID_INTEL_85XGM:
 		tmp = 0;
 		dinfo->mobile = 1;
diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c
index 4dcec48a1d7..c3fad34309e 100644
--- a/drivers/video/s3fb.c
+++ b/drivers/video/s3fb.c
@@ -45,11 +45,11 @@ struct s3fb_info {
 static const struct svga_fb_format s3fb_formats[] = {
 	{ 0,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 0,
 		FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP4,	FB_VISUAL_PSEUDOCOLOR, 8, 16},
-	{ 4,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 0,
+	{ 4,  {0, 4, 0},  {0, 4, 0},  {0, 4, 0}, {0, 0, 0}, 0,
 		FB_TYPE_PACKED_PIXELS, 0,		FB_VISUAL_PSEUDOCOLOR, 8, 16},
-	{ 4,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 1,
+	{ 4,  {0, 4, 0},  {0, 4, 0},  {0, 4, 0}, {0, 0, 0}, 1,
 		FB_TYPE_INTERLEAVED_PLANES, 1,		FB_VISUAL_PSEUDOCOLOR, 8, 16},
-	{ 8,  {0, 6, 0},  {0, 6, 0},  {0, 6, 0}, {0, 0, 0}, 0,
+	{ 8,  {0, 8, 0},  {0, 8, 0},  {0, 8, 0}, {0, 0, 0}, 0,
 		FB_TYPE_PACKED_PIXELS, 0,		FB_VISUAL_PSEUDOCOLOR, 4, 8},
 	{16,  {10, 5, 0}, {5, 5, 0},  {0, 5, 0}, {0, 0, 0}, 0,
 		FB_TYPE_PACKED_PIXELS, 0,		FB_VISUAL_TRUECOLOR, 2, 4},
diff --git a/drivers/video/sa1100fb.c b/drivers/video/sa1100fb.c
index fad58cf9ef7..10ddad8e17d 100644
--- a/drivers/video/sa1100fb.c
+++ b/drivers/video/sa1100fb.c
@@ -199,16 +199,20 @@
 extern void (*sa1100fb_backlight_power)(int on);
 extern void (*sa1100fb_lcd_power)(int on);
 
-/*
- * IMHO this looks wrong.  In 8BPP, length should be 8.
- */
-static struct sa1100fb_rgb rgb_8 = {
+static struct sa1100fb_rgb rgb_4 = {
 	.red	= { .offset = 0,  .length = 4, },
 	.green	= { .offset = 0,  .length = 4, },
 	.blue	= { .offset = 0,  .length = 4, },
 	.transp	= { .offset = 0,  .length = 0, },
 };
 
+static struct sa1100fb_rgb rgb_8 = {
+	.red	= { .offset = 0,  .length = 8, },
+	.green	= { .offset = 0,  .length = 8, },
+	.blue	= { .offset = 0,  .length = 8, },
+	.transp	= { .offset = 0,  .length = 0, },
+};
+
 static struct sa1100fb_rgb def_rgb_16 = {
 	.red	= { .offset = 11, .length = 5, },
 	.green	= { .offset = 5,  .length = 6, },
@@ -613,7 +617,7 @@ sa1100fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 	DPRINTK("var->bits_per_pixel=%d\n", var->bits_per_pixel);
 	switch (var->bits_per_pixel) {
 	case 4:
-		rgbidx = RGB_8;
+		rgbidx = RGB_4;
 		break;
 	case 8:
 		rgbidx = RGB_8;
@@ -1382,6 +1386,7 @@ static struct sa1100fb_info * __init sa1100fb_init_fbinfo(struct device *dev)
 	fbi->fb.monspecs	= monspecs;
 	fbi->fb.pseudo_palette	= (fbi + 1);
 
+	fbi->rgb[RGB_4]		= &rgb_4;
 	fbi->rgb[RGB_8]		= &rgb_8;
 	fbi->rgb[RGB_16]	= &def_rgb_16;
 
diff --git a/drivers/video/sa1100fb.h b/drivers/video/sa1100fb.h
index 86831db9a04..1c3b459865d 100644
--- a/drivers/video/sa1100fb.h
+++ b/drivers/video/sa1100fb.h
@@ -57,9 +57,10 @@ struct sa1100fb_lcd_reg {
 	unsigned long lccr3;
 };
 
-#define RGB_8	(0)
-#define RGB_16	(1)
-#define NR_RGB	2
+#define RGB_4	(0)
+#define RGB_8	(1)
+#define RGB_16	(2)
+#define NR_RGB	3
 
 struct sa1100fb_info {
 	struct fb_info		fb;
diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c
index 346d6458cf7..7e17ee95a97 100644
--- a/drivers/video/sis/sis_main.c
+++ b/drivers/video/sis/sis_main.c
@@ -1129,7 +1129,7 @@ sisfb_bpp_to_var(struct sis_video_info *ivideo, struct fb_var_screeninfo *var)
 	switch(var->bits_per_pixel) {
 	case 8:
 		var->red.offset = var->green.offset = var->blue.offset = 0;
-		var->red.length = var->green.length = var->blue.length = 6;
+		var->red.length = var->green.length = var->blue.length = 8;
 		break;
 	case 16:
 		var->red.offset = 11;
diff --git a/drivers/video/skeletonfb.c b/drivers/video/skeletonfb.c
index a439159204a..89158bc71da 100644
--- a/drivers/video/skeletonfb.c
+++ b/drivers/video/skeletonfb.c
@@ -308,9 +308,11 @@ static int xxxfb_setcolreg(unsigned regno, unsigned red, unsigned green,
      *   color depth = SUM(var->{color}.length)
      *
      * Pseudocolor:
-     *    var->{color}.offset is 0
-     *    var->{color}.length contains width of DAC or the number of unique
-     *                        colors available (color depth)
+     *    var->{color}.offset is 0 unless the palette index takes less than
+     *                        bits_per_pixel bits and is stored in the upper
+     *                        bits of the pixel value
+     *    var->{color}.length is set so that 1 << length is the number of
+     *                        available palette entries
      *    pseudo_palette is not used
      *    RAMDAC[X] is programmed to (red, green, blue)
      *    color depth = var->{color}.length
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index 0b370aebdbf..421770b5e6a 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -55,6 +55,7 @@ static u16 maxvf	__devinitdata; /* maximum vertical frequency */
 static u16 maxhf	__devinitdata; /* maximum horizontal frequency */
 static u16 vbemode	__devinitdata; /* force use of a specific VBE mode */
 static char *mode_option __devinitdata;
+static u8  dac_width	= 6;
 
 static struct uvesafb_ktask *uvfb_tasks[UVESAFB_TASKS_MAX];
 static DEFINE_MUTEX(uvfb_lock);
@@ -303,22 +304,10 @@ static void uvesafb_setup_var(struct fb_var_screeninfo *var,
 		var->blue.offset   = 0;
 		var->transp.offset = 0;
 
-		/*
-		 * We're assuming that we can switch the DAC to 8 bits. If
-		 * this proves to be incorrect, we'll update the fields
-		 * later in set_par().
-		 */
-		if (par->vbe_ib.capabilities & VBE_CAP_CAN_SWITCH_DAC) {
-			var->red.length    = 8;
-			var->green.length  = 8;
-			var->blue.length   = 8;
-			var->transp.length = 0;
-		} else {
-			var->red.length    = 6;
-			var->green.length  = 6;
-			var->blue.length   = 6;
-			var->transp.length = 0;
-		}
+		var->red.length    = 8;
+		var->green.length  = 8;
+		var->blue.length   = 8;
+		var->transp.length = 0;
 	}
 }
 
@@ -1006,7 +995,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
 		struct fb_info *info)
 {
 	struct uvesafb_pal_entry entry;
-	int shift = 16 - info->var.green.length;
+	int shift = 16 - dac_width;
 	int err = 0;
 
 	if (regno >= info->cmap.len)
@@ -1055,7 +1044,7 @@ static int uvesafb_setcolreg(unsigned regno, unsigned red, unsigned green,
 static int uvesafb_setcmap(struct fb_cmap *cmap, struct fb_info *info)
 {
 	struct uvesafb_pal_entry *entries;
-	int shift = 16 - info->var.green.length;
+	int shift = 16 - dac_width;
 	int i, err = 0;
 
 	if (info->var.bits_per_pixel == 8) {
@@ -1317,13 +1306,9 @@ setmode:
 		err = uvesafb_exec(task);
 		if (err || (task->t.regs.eax & 0xffff) != 0x004f ||
 		    ((task->t.regs.ebx & 0xff00) >> 8) != 8) {
-			/*
-			 * We've failed to set the DAC palette format -
-			 * time to correct var.
-			 */
-			info->var.red.length    = 6;
-			info->var.green.length  = 6;
-			info->var.blue.length   = 6;
+			dac_width = 6;
+		} else {
+			dac_width = 8;
 		}
 	}
 
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index cc919ae4657..050d432c7d9 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -318,13 +318,16 @@ static int vfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
 	 *   {hardwarespecific} contains width of RAMDAC
 	 *   cmap[X] is programmed to (X << red.offset) | (X << green.offset) | (X << blue.offset)
 	 *   RAMDAC[X] is programmed to (red, green, blue)
-	 * 
+	 *
 	 * Pseudocolor:
-	 *    uses offset = 0 && length = RAMDAC register width.
-	 *    var->{color}.offset is 0
-	 *    var->{color}.length contains widht of DAC
+	 *    var->{color}.offset is 0 unless the palette index takes less than
+	 *                        bits_per_pixel bits and is stored in the upper
+	 *                        bits of the pixel value
+	 *    var->{color}.length is set so that 1 << length is the number of available
+	 *                        palette entries
 	 *    cmap is not used
 	 *    RAMDAC[X] is programmed to (red, green, blue)
+	 *
 	 * Truecolor:
 	 *    does not use DAC. Usually 3 are present.
 	 *    var->{color}.offset contains start of bitfield
diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c
index 5f54c01c156..bdfd584ad85 100644
--- a/drivers/xen/cpu_hotplug.c
+++ b/drivers/xen/cpu_hotplug.c
@@ -21,29 +21,41 @@ static void disable_hotplug_cpu(int cpu)
 	set_cpu_present(cpu, false);
 }
 
-static void vcpu_hotplug(unsigned int cpu)
+static int vcpu_online(unsigned int cpu)
 {
 	int err;
 	char dir[32], state[32];
 
-	if (!cpu_possible(cpu))
-		return;
-
 	sprintf(dir, "cpu/%u", cpu);
 	err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
 	if (err != 1) {
 		printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
-		return;
+		return err;
 	}
 
-	if (strcmp(state, "online") == 0) {
+	if (strcmp(state, "online") == 0)
+		return 1;
+	else if (strcmp(state, "offline") == 0)
+		return 0;
+
+	printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", state, cpu);
+	return -EINVAL;
+}
+static void vcpu_hotplug(unsigned int cpu)
+{
+	if (!cpu_possible(cpu))
+		return;
+
+	switch (vcpu_online(cpu)) {
+	case 1:
 		enable_hotplug_cpu(cpu);
-	} else if (strcmp(state, "offline") == 0) {
+		break;
+	case 0:
 		(void)cpu_down(cpu);
 		disable_hotplug_cpu(cpu);
-	} else {
-		printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
-		       state, cpu);
+		break;
+	default:
+		break;
 	}
 }
 
@@ -64,12 +76,20 @@ static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
 static int setup_cpu_watcher(struct notifier_block *notifier,
 			      unsigned long event, void *data)
 {
+	int cpu;
 	static struct xenbus_watch cpu_watch = {
 		.node = "cpu",
 		.callback = handle_vcpu_hotplug_event};
 
 	(void)register_xenbus_watch(&cpu_watch);
 
+	for_each_possible_cpu(cpu) {
+		if (vcpu_online(cpu) == 0) {
+			(void)cpu_down(cpu);
+			cpu_clear(cpu, cpu_present_map);
+		}
+	}
+
 	return NOTIFY_DONE;
 }
 
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 0d61db1e7b4..4b5b84837ee 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -62,14 +62,15 @@ static int xen_suspend(void *data)
 	gnttab_resume();
 	xen_mm_unpin_all();
 
-	sysdev_resume();
-
 	if (!*cancelled) {
 		xen_irq_resume();
 		xen_console_resume();
 		xen_timer_resume();
 	}
 
+	sysdev_resume();
+	device_power_up(PMSG_RESUME);
+
 	return 0;
 }
 
diff --git a/fs/bio.c b/fs/bio.c
index e0c9e545bbf..cd42bb882f3 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -348,6 +348,24 @@ err:
 	return NULL;
 }
 
+/**
+ * bio_alloc - allocate a bio for I/O
+ * @gfp_mask:   the GFP_ mask given to the slab allocator
+ * @nr_iovecs:	number of iovecs to pre-allocate
+ *
+ * Description:
+ *   bio_alloc will allocate a bio and associated bio_vec array that can hold
+ *   at least @nr_iovecs entries. Allocations will be done from the
+ *   fs_bio_set. Also see @bio_alloc_bioset.
+ *
+ *   If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
+ *   a bio. This is due to the mempool guarantees. To make this work, callers
+ *   must never allocate more than 1 bio at the time from this pool. Callers
+ *   that need to allocate more than 1 bio must always submit the previously
+ *   allocate bio for IO before attempting to allocate a new one. Failure to
+ *   do so can cause livelocks under memory pressure.
+ *
+ **/
 struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs)
 {
 	struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
diff --git a/fs/buffer.c b/fs/buffer.c
index 13edf7ad3ff..ff8bb1f2333 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -547,7 +547,7 @@ repeat:
 	return err;
 }
 
-void do_thaw_all(unsigned long unused)
+void do_thaw_all(struct work_struct *work)
 {
 	struct super_block *sb;
 	char b[BDEVNAME_SIZE];
@@ -567,6 +567,7 @@ restart:
 			goto restart;
 	}
 	spin_unlock(&sb_lock);
+	kfree(work);
 	printk(KERN_WARNING "Emergency Thaw complete\n");
 }
 
@@ -577,7 +578,13 @@ restart:
  */
 void emergency_thaw_all(void)
 {
-	pdflush_operation(do_thaw_all, 0);
+	struct work_struct *work;
+
+	work = kmalloc(sizeof(*work), GFP_ATOMIC);
+	if (work) {
+		INIT_WORK(work, do_thaw_all);
+		schedule_work(work);
+	}
 }
 
 /**
diff --git a/fs/direct-io.c b/fs/direct-io.c
index da258e7249c..05763bbc205 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -307,8 +307,6 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
 	struct bio *bio;
 
 	bio = bio_alloc(GFP_KERNEL, nr_vecs);
-	if (bio == NULL)
-		return -ENOMEM;
 
 	bio->bi_bdev = bdev;
 	bio->bi_sector = first_sector;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b43b9556366..acf67883110 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -590,9 +590,8 @@ static int ext2_get_blocks(struct inode *inode,
 
 	if (depth == 0)
 		return (err);
-reread:
-	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
 
+	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		first_block = le32_to_cpu(chain[depth - 1].key);
@@ -602,15 +601,16 @@ reread:
 		while (count < maxblocks && count <= blocks_to_boundary) {
 			ext2_fsblk_t blk;
 
-			if (!verify_chain(chain, partial)) {
+			if (!verify_chain(chain, chain + depth - 1)) {
 				/*
 				 * Indirect block might be removed by
 				 * truncate while we were reading it.
 				 * Handling of that case: forget what we've
 				 * got now, go to reread.
 				 */
+				err = -EAGAIN;
 				count = 0;
-				goto changed;
+				break;
 			}
 			blk = le32_to_cpu(*(chain[depth-1].p + count));
 			if (blk == first_block + count)
@@ -618,7 +618,8 @@ reread:
 			else
 				break;
 		}
-		goto got_it;
+		if (err != -EAGAIN)
+			goto got_it;
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
@@ -626,6 +627,33 @@ reread:
 		goto cleanup;
 
 	mutex_lock(&ei->truncate_mutex);
+	/*
+	 * If the indirect block is missing while we are reading
+	 * the chain(ext3_get_branch() returns -EAGAIN err), or
+	 * if the chain has been changed after we grab the semaphore,
+	 * (either because another process truncated this branch, or
+	 * another get_block allocated this branch) re-grab the chain to see if
+	 * the request block has been allocated or not.
+	 *
+	 * Since we already block the truncate/other get_block
+	 * at this point, we will have the current copy of the chain when we
+	 * splice the branch into the tree.
+	 */
+	if (err == -EAGAIN || !verify_chain(chain, partial)) {
+		while (partial > chain) {
+			brelse(partial->bh);
+			partial--;
+		}
+		partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+		if (!partial) {
+			count++;
+			mutex_unlock(&ei->truncate_mutex);
+			if (err)
+				goto cleanup;
+			clear_buffer_new(bh_result);
+			goto got_it;
+		}
+	}
 
 	/*
 	 * Okay, we need to do block allocation.  Lazily initialize the block
@@ -683,12 +711,6 @@ cleanup:
 		partial--;
 	}
 	return err;
-changed:
-	while (partial > chain) {
-		brelse(partial->bh);
-		partial--;
-	}
-	goto reread;
 }
 
 int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 6132353dcf6..2a1cb097976 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2416,8 +2416,6 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 			len = ee_len;
 
 		bio = bio_alloc(GFP_NOIO, len);
-		if (!bio)
-			return -ENOMEM;
 		bio->bi_sector = ee_pblock;
 		bio->bi_bdev   = inode->i_sb->s_bdev;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 2b25133524a..06f30e96567 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -938,9 +938,9 @@ static void fuse_release_user_pages(struct fuse_req *req, int write)
 }
 
 static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
-			       unsigned *nbytesp, int write)
+			       size_t *nbytesp, int write)
 {
-	unsigned nbytes = *nbytesp;
+	size_t nbytes = *nbytesp;
 	unsigned long user_addr = (unsigned long) buf;
 	unsigned offset = user_addr & ~PAGE_MASK;
 	int npages;
@@ -955,7 +955,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
 		return 0;
 	}
 
-	nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
+	nbytes = min_t(size_t, nbytes, FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
 	npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
 	down_read(&current->mm->mmap_sem);
@@ -1298,6 +1298,8 @@ static int fuse_direct_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma->vm_flags & VM_MAYSHARE)
 		return -ENODEV;
 
+	invalidate_inode_pages2(file->f_mapping);
+
 	return generic_file_mmap(file, vma);
 }
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 3984e47d1d3..1afd9f26bcb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -597,7 +597,6 @@ __acquires(&gl->gl_spin)
 
 	GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));
 
-	down_read(&gfs2_umount_flush_sem);
 	if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
 	    gl->gl_demote_state != gl->gl_state) {
 		if (find_first_holder(gl))
@@ -614,15 +613,14 @@ __acquires(&gl->gl_spin)
 		if (ret == 0)
 			goto out_unlock;
 		if (ret == 2)
-			goto out_sem;
+			goto out;
 		gh = find_first_waiter(gl);
 		gl->gl_target = gh->gh_state;
 		if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
 			do_error(gl, 0); /* Fail queued try locks */
 	}
 	do_xmote(gl, gh, gl->gl_target);
-out_sem:
-	up_read(&gfs2_umount_flush_sem);
+out:
 	return;
 
 out_sched:
@@ -631,7 +629,7 @@ out_sched:
 		gfs2_glock_put(gl);
 out_unlock:
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	goto out_sem;
+	goto out;
 }
 
 static void glock_work_func(struct work_struct *work)
@@ -641,6 +639,7 @@ static void glock_work_func(struct work_struct *work)
 
 	if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags))
 		finish_xmote(gl, gl->gl_reply);
+	down_read(&gfs2_umount_flush_sem);
 	spin_lock(&gl->gl_spin);
 	if (test_and_clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
 	    gl->gl_state != LM_ST_UNLOCKED &&
@@ -653,6 +652,7 @@ static void glock_work_func(struct work_struct *work)
 	}
 	run_queue(gl, 0);
 	spin_unlock(&gl->gl_spin);
+	up_read(&gfs2_umount_flush_sem);
 	if (!delay ||
 	    queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
 		gfs2_glock_put(gl);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 7b277d44915..5a31d426116 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -137,15 +137,15 @@ void gfs2_set_iop(struct inode *inode)
 	if (S_ISREG(mode)) {
 		inode->i_op = &gfs2_file_iops;
 		if (gfs2_localflocks(sdp))
-			inode->i_fop = gfs2_file_fops_nolock;
+			inode->i_fop = &gfs2_file_fops_nolock;
 		else
-			inode->i_fop = gfs2_file_fops;
+			inode->i_fop = &gfs2_file_fops;
 	} else if (S_ISDIR(mode)) {
 		inode->i_op = &gfs2_dir_iops;
 		if (gfs2_localflocks(sdp))
-			inode->i_fop = gfs2_dir_fops_nolock;
+			inode->i_fop = &gfs2_dir_fops_nolock;
 		else
-			inode->i_fop = gfs2_dir_fops;
+			inode->i_fop = &gfs2_dir_fops;
 	} else if (S_ISLNK(mode)) {
 		inode->i_op = &gfs2_symlink_iops;
 	} else {
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index dca4fee3078..c30be2b6658 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -101,21 +101,23 @@ void gfs2_dinode_print(const struct gfs2_inode *ip);
 extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
 extern const struct inode_operations gfs2_symlink_iops;
-extern const struct file_operations *gfs2_file_fops_nolock;
-extern const struct file_operations *gfs2_dir_fops_nolock;
+extern const struct file_operations gfs2_file_fops_nolock;
+extern const struct file_operations gfs2_dir_fops_nolock;
 
 extern void gfs2_set_inode_flags(struct inode *inode);
  
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
-extern const struct file_operations *gfs2_file_fops;
-extern const struct file_operations *gfs2_dir_fops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+
 static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
 {
 	return sdp->sd_args.ar_localflocks;
 }
 #else /* Single node only */
-#define gfs2_file_fops NULL
-#define gfs2_dir_fops NULL
+#define gfs2_file_fops gfs2_file_fops_nolock
+#define gfs2_dir_fops gfs2_dir_fops_nolock
+
 static inline int gfs2_localflocks(const struct gfs2_sbd *sdp)
 {
 	return 1;
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 70b9b854894..101caf3ee86 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -705,7 +705,7 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl)
 	}
 }
 
-const struct file_operations *gfs2_file_fops = &(const struct file_operations){
+const struct file_operations gfs2_file_fops = {
 	.llseek		= gfs2_llseek,
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
@@ -723,7 +723,7 @@ const struct file_operations *gfs2_file_fops = &(const struct file_operations){
 	.setlease	= gfs2_setlease,
 };
 
-const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
+const struct file_operations gfs2_dir_fops = {
 	.readdir	= gfs2_readdir,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.open		= gfs2_open,
@@ -735,7 +735,7 @@ const struct file_operations *gfs2_dir_fops = &(const struct file_operations){
 
 #endif /* CONFIG_GFS2_FS_LOCKING_DLM */
 
-const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operations){
+const struct file_operations gfs2_file_fops_nolock = {
 	.llseek		= gfs2_llseek,
 	.read		= do_sync_read,
 	.aio_read	= generic_file_aio_read,
@@ -751,7 +751,7 @@ const struct file_operations *gfs2_file_fops_nolock = &(const struct file_operat
 	.setlease	= generic_setlease,
 };
 
-const struct file_operations *gfs2_dir_fops_nolock = &(const struct file_operations){
+const struct file_operations gfs2_dir_fops_nolock = {
 	.readdir	= gfs2_readdir,
 	.unlocked_ioctl	= gfs2_ioctl,
 	.open		= gfs2_open,
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 51883b3ad89..650a730707b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -272,11 +272,6 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector)
 	lock_page(page);
 
 	bio = bio_alloc(GFP_NOFS, 1);
-	if (unlikely(!bio)) {
-		__free_page(page);
-		return -ENOBUFS;
-	}
-
 	bio->bi_sector = sector * (sb->s_blocksize >> 9);
 	bio->bi_bdev = sb->s_bdev;
 	bio_add_page(bio, page, PAGE_SIZE, 0);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index abd5429ae28..1c70fa5168d 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -371,6 +371,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
 	ip = ghs[1].gh_gl->gl_object;
 
 	ip->i_disksize = size;
+	i_size_write(inode, size);
 
 	error = gfs2_meta_inode_buffer(ip, &dibh);
 
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 8d53f66b5bc..152e6c4a0dc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -81,7 +81,7 @@ struct gfs2_quota_change_host {
 
 static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
-static spinlock_t qd_lru_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(qd_lru_lock);
 
 int gfs2_shrink_qd_memory(int nr, gfp_t gfp_mask)
 {
@@ -1364,7 +1364,7 @@ int gfs2_quotad(void *data)
 			refrigerator();
 		t = min(quotad_timeo, statfs_timeo);
 
-		prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
 		spin_lock(&sdp->sd_trunc_lock);
 		empty = list_empty(&sdp->sd_trunc_list);
 		spin_unlock(&sdp->sd_trunc_lock);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 9435dda8f1e..a1cbff2b4d9 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -70,6 +70,10 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
 		BUG();
 		return 0;
 	}
+
+	if (!tree)
+		return 0;
+
 	if (tree->node_size >= PAGE_CACHE_SIZE) {
 		nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
 		spin_lock(&tree->hash_lock);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 36ca2e1a4fa..7b6165f25fb 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -349,6 +349,7 @@ void hfs_mdb_put(struct super_block *sb)
 	if (HFS_SB(sb)->nls_disk)
 		unload_nls(HFS_SB(sb)->nls_disk);
 
+	free_pages((unsigned long)HFS_SB(sb)->bitmap, PAGE_SIZE < 8192 ? 1 : 0);
 	kfree(HFS_SB(sb));
 	sb->s_fs_info = NULL;
 }
diff --git a/fs/inode.c b/fs/inode.c
index d06d6d268de..6ad14a1cd8c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1470,42 +1470,6 @@ static void __wait_on_freeing_inode(struct inode *inode)
 	spin_lock(&inode_lock);
 }
 
-/*
- * We rarely want to lock two inodes that do not have a parent/child
- * relationship (such as directory, child inode) simultaneously. The
- * vast majority of file systems should be able to get along fine
- * without this. Do not use these functions except as a last resort.
- */
-void inode_double_lock(struct inode *inode1, struct inode *inode2)
-{
-	if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
-		if (inode1)
-			mutex_lock(&inode1->i_mutex);
-		else if (inode2)
-			mutex_lock(&inode2->i_mutex);
-		return;
-	}
-
-	if (inode1 < inode2) {
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
-	} else {
-		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
-	}
-}
-EXPORT_SYMBOL(inode_double_lock);
-
-void inode_double_unlock(struct inode *inode1, struct inode *inode2)
-{
-	if (inode1)
-		mutex_unlock(&inode1->i_mutex);
-
-	if (inode2 && inode2 != inode1)
-		mutex_unlock(&inode2->i_mutex);
-}
-EXPORT_SYMBOL(inode_double_unlock);
-
 static __initdata unsigned long ihash_entries;
 static int __init set_ihash_entries(char *str)
 {
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index c7bd649bbbd..3e9afc2a91d 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -55,6 +55,25 @@
  *			need do nothing.
  * RevokeValid set, Revoked set:
  *			buffer has been revoked.
+ *
+ * Locking rules:
+ * We keep two hash tables of revoke records. One hashtable belongs to the
+ * running transaction (is pointed to by journal->j_revoke), the other one
+ * belongs to the committing transaction. Accesses to the second hash table
+ * happen only from the kjournald and no other thread touches this table.  Also
+ * journal_switch_revoke_table() which switches which hashtable belongs to the
+ * running and which to the committing transaction is called only from
+ * kjournald. Therefore we need no locks when accessing the hashtable belonging
+ * to the committing transaction.
+ *
+ * All users operating on the hash table belonging to the running transaction
+ * have a handle to the transaction. Therefore they are safe from kjournald
+ * switching hash tables under them. For operations on the lists of entries in
+ * the hash table j_revoke_lock is used.
+ *
+ * Finally, also replay code uses the hash tables but at this moment noone else
+ * can touch them (filesystem isn't mounted yet) and hence no locking is
+ * needed.
  */
 
 #ifndef __KERNEL__
@@ -402,8 +421,6 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
  * the second time we would still have a pending revoke to cancel.  So,
  * do not trust the Revoked bit on buffers unless RevokeValid is also
  * set.
- *
- * The caller must have the journal locked.
  */
 int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 {
@@ -481,10 +498,7 @@ void journal_switch_revoke_table(journal_t *journal)
 /*
  * Write revoke records to the journal for all entries in the current
  * revoke hash, deleting the entries as we go.
- *
- * Called with the journal lock held.
  */
-
 void journal_write_revoke_records(journal_t *journal,
 				  transaction_t *transaction)
 {
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 24638e059bf..064279e33bb 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -688,6 +688,8 @@ static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = {
 	.bpop_translate		=	NULL,
 };
 
+static struct lock_class_key nilfs_bmap_dat_lock_key;
+
 /**
  * nilfs_bmap_read - read a bmap from an inode
  * @bmap: bmap
@@ -715,6 +717,7 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
 		bmap->b_pops = &nilfs_bmap_ptr_ops_p;
 		bmap->b_last_allocated_key = 0;	/* XXX: use macro */
 		bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
+		lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
 		break;
 	case NILFS_CPFILE_INO:
 	case NILFS_SUFILE_INO:
@@ -772,6 +775,7 @@ void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
 {
 	memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union));
 	init_rwsem(&gcbmap->b_sem);
+	lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
 	gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode;
 }
 
@@ -779,5 +783,6 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap)
 {
 	memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union));
 	init_rwsem(&bmap->b_sem);
+	lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
 	bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
 }
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 7558c977db0..3d0c18a16db 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -35,11 +35,6 @@
 #include "bmap_union.h"
 
 /*
- * NILFS filesystem version
- */
-#define NILFS_VERSION		"2.0.5"
-
-/*
  * nilfs inode data in memory
  */
 struct nilfs_inode_info {
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 6ade0963fc1..4fc081e47d7 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -413,7 +413,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
 	struct nilfs_segment_entry *ent, *n;
 	struct inode *sufile = nilfs->ns_sufile;
 	__u64 segnum[4];
-	time_t mtime;
 	int err;
 	int i;
 
@@ -442,24 +441,13 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
 	 * Collecting segments written after the latest super root.
 	 * These are marked dirty to avoid being reallocated in the next write.
 	 */
-	mtime = get_seconds();
 	list_for_each_entry_safe(ent, n, head, list) {
-		if (ent->segnum == segnum[0]) {
-			list_del(&ent->list);
-			nilfs_free_segment_entry(ent);
-			continue;
-		}
-		err = nilfs_open_segment_entry(ent, sufile);
-		if (unlikely(err))
-			goto failed;
-		if (!nilfs_segment_usage_dirty(ent->raw_su)) {
-			/* make the segment garbage */
-			ent->raw_su->su_nblocks = cpu_to_le32(0);
-			ent->raw_su->su_lastmod = cpu_to_le32(mtime);
-			nilfs_segment_usage_set_dirty(ent->raw_su);
+		if (ent->segnum != segnum[0]) {
+			err = nilfs_sufile_scrap(sufile, ent->segnum);
+			if (unlikely(err))
+				goto failed;
 		}
 		list_del(&ent->list);
-		nilfs_close_segment_entry(ent, sufile);
 		nilfs_free_segment_entry(ent);
 	}
 
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index c774cf397e2..98e68677f04 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -93,6 +93,52 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
 				   create, NULL, bhp);
 }
 
+static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
+				     u64 ncleanadd, u64 ndirtyadd)
+{
+	struct nilfs_sufile_header *header;
+	void *kaddr;
+
+	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
+	header = kaddr + bh_offset(header_bh);
+	le64_add_cpu(&header->sh_ncleansegs, ncleanadd);
+	le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	nilfs_mdt_mark_buffer_dirty(header_bh);
+}
+
+int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
+			void (*dofunc)(struct inode *, __u64,
+				       struct buffer_head *,
+				       struct buffer_head *))
+{
+	struct buffer_head *header_bh, *bh;
+	int ret;
+
+	if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
+		printk(KERN_WARNING "%s: invalid segment number: %llu\n",
+		       __func__, (unsigned long long)segnum);
+		return -EINVAL;
+	}
+	down_write(&NILFS_MDT(sufile)->mi_sem);
+
+	ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+	if (ret < 0)
+		goto out_sem;
+
+	ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, create, &bh);
+	if (!ret) {
+		dofunc(sufile, segnum, header_bh, bh);
+		brelse(bh);
+	}
+	brelse(header_bh);
+
+ out_sem:
+	up_write(&NILFS_MDT(sufile)->mi_sem);
+	return ret;
+}
+
 /**
  * nilfs_sufile_alloc - allocate a segment
  * @sufile: inode of segment usage file
@@ -113,7 +159,6 @@ nilfs_sufile_get_segment_usage_block(struct inode *sufile, __u64 segnum,
 int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 {
 	struct buffer_head *header_bh, *su_bh;
-	struct the_nilfs *nilfs;
 	struct nilfs_sufile_header *header;
 	struct nilfs_segment_usage *su;
 	size_t susz = NILFS_MDT(sufile)->mi_entry_size;
@@ -124,8 +169,6 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 
 	down_write(&NILFS_MDT(sufile)->mi_sem);
 
-	nilfs = NILFS_MDT(sufile)->mi_nilfs;
-
 	ret = nilfs_sufile_get_header_block(sufile, &header_bh);
 	if (ret < 0)
 		goto out_sem;
@@ -192,165 +235,84 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump)
 	return ret;
 }
 
-/**
- * nilfs_sufile_cancel_free -
- * @sufile: inode of segment usage file
- * @segnum: segment number
- *
- * Description:
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- */
-int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
+void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum,
+				 struct buffer_head *header_bh,
+				 struct buffer_head *su_bh)
 {
-	struct buffer_head *header_bh, *su_bh;
-	struct the_nilfs *nilfs;
-	struct nilfs_sufile_header *header;
 	struct nilfs_segment_usage *su;
 	void *kaddr;
-	int ret;
-
-	down_write(&NILFS_MDT(sufile)->mi_sem);
-
-	nilfs = NILFS_MDT(sufile)->mi_nilfs;
-
-	ret = nilfs_sufile_get_header_block(sufile, &header_bh);
-	if (ret < 0)
-		goto out_sem;
-
-	ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
-	if (ret < 0)
-		goto out_header;
 
 	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
-	su = nilfs_sufile_block_get_segment_usage(
-		sufile, segnum, su_bh, kaddr);
+	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
 	if (unlikely(!nilfs_segment_usage_clean(su))) {
 		printk(KERN_WARNING "%s: segment %llu must be clean\n",
 		       __func__, (unsigned long long)segnum);
 		kunmap_atomic(kaddr, KM_USER0);
-		goto out_su_bh;
+		return;
 	}
 	nilfs_segment_usage_set_dirty(su);
 	kunmap_atomic(kaddr, KM_USER0);
 
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
-	header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
-	le64_add_cpu(&header->sh_ncleansegs, -1);
-	le64_add_cpu(&header->sh_ndirtysegs, 1);
-	kunmap_atomic(kaddr, KM_USER0);
-
-	nilfs_mdt_mark_buffer_dirty(header_bh);
+	nilfs_sufile_mod_counter(header_bh, -1, 1);
 	nilfs_mdt_mark_buffer_dirty(su_bh);
 	nilfs_mdt_mark_dirty(sufile);
-
- out_su_bh:
-	brelse(su_bh);
- out_header:
-	brelse(header_bh);
- out_sem:
-	up_write(&NILFS_MDT(sufile)->mi_sem);
-	return ret;
 }
 
-/**
- * nilfs_sufile_freev - free segments
- * @sufile: inode of segment usage file
- * @segnum: array of segment numbers
- * @nsegs: number of segments
- *
- * Description: nilfs_sufile_freev() frees segments specified by @segnum and
- * @nsegs, which must have been returned by a previous call to
- * nilfs_sufile_alloc().
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- */
-#define NILFS_SUFILE_FREEV_PREALLOC	16
-int nilfs_sufile_freev(struct inode *sufile, __u64 *segnum, size_t nsegs)
+void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum,
+			   struct buffer_head *header_bh,
+			   struct buffer_head *su_bh)
 {
-	struct buffer_head *header_bh, **su_bh,
-		*su_bh_prealloc[NILFS_SUFILE_FREEV_PREALLOC];
-	struct the_nilfs *nilfs;
-	struct nilfs_sufile_header *header;
 	struct nilfs_segment_usage *su;
 	void *kaddr;
-	int ret, i;
+	int clean, dirty;
 
-	down_write(&NILFS_MDT(sufile)->mi_sem);
-
-	nilfs = NILFS_MDT(sufile)->mi_nilfs;
-
-	/* prepare resources */
-	if (nsegs <= NILFS_SUFILE_FREEV_PREALLOC)
-		su_bh = su_bh_prealloc;
-	else {
-		su_bh = kmalloc(sizeof(*su_bh) * nsegs, GFP_NOFS);
-		if (su_bh == NULL) {
-			ret = -ENOMEM;
-			goto out_sem;
-		}
-	}
-
-	ret = nilfs_sufile_get_header_block(sufile, &header_bh);
-	if (ret < 0)
-		goto out_su_bh;
-	for (i = 0; i < nsegs; i++) {
-		ret = nilfs_sufile_get_segment_usage_block(sufile, segnum[i],
-							   0, &su_bh[i]);
-		if (ret < 0)
-			goto out_bh;
-	}
-
-	/* free segments */
-	for (i = 0; i < nsegs; i++) {
-		kaddr = kmap_atomic(su_bh[i]->b_page, KM_USER0);
-		su = nilfs_sufile_block_get_segment_usage(
-			sufile, segnum[i], su_bh[i], kaddr);
-		WARN_ON(nilfs_segment_usage_error(su));
-		nilfs_segment_usage_set_clean(su);
+	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+	if (su->su_flags == cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY) &&
+	    su->su_nblocks == cpu_to_le32(0)) {
 		kunmap_atomic(kaddr, KM_USER0);
-		nilfs_mdt_mark_buffer_dirty(su_bh[i]);
+		return;
 	}
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
-	header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
-	le64_add_cpu(&header->sh_ncleansegs, nsegs);
-	le64_add_cpu(&header->sh_ndirtysegs, -(u64)nsegs);
+	clean = nilfs_segment_usage_clean(su);
+	dirty = nilfs_segment_usage_dirty(su);
+
+	/* make the segment garbage */
+	su->su_lastmod = cpu_to_le64(0);
+	su->su_nblocks = cpu_to_le32(0);
+	su->su_flags = cpu_to_le32(1UL << NILFS_SEGMENT_USAGE_DIRTY);
 	kunmap_atomic(kaddr, KM_USER0);
-	nilfs_mdt_mark_buffer_dirty(header_bh);
+
+	nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1);
+	nilfs_mdt_mark_buffer_dirty(su_bh);
 	nilfs_mdt_mark_dirty(sufile);
+}
 
- out_bh:
-	for (i--; i >= 0; i--)
-		brelse(su_bh[i]);
-	brelse(header_bh);
+void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum,
+			  struct buffer_head *header_bh,
+			  struct buffer_head *su_bh)
+{
+	struct nilfs_segment_usage *su;
+	void *kaddr;
+	int sudirty;
 
- out_su_bh:
-	if (su_bh != su_bh_prealloc)
-		kfree(su_bh);
+	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
+	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
+	if (nilfs_segment_usage_clean(su)) {
+		printk(KERN_WARNING "%s: segment %llu is already clean\n",
+		       __func__, (unsigned long long)segnum);
+		kunmap_atomic(kaddr, KM_USER0);
+		return;
+	}
+	WARN_ON(nilfs_segment_usage_error(su));
+	WARN_ON(!nilfs_segment_usage_dirty(su));
 
- out_sem:
-	up_write(&NILFS_MDT(sufile)->mi_sem);
-	return ret;
-}
+	sudirty = nilfs_segment_usage_dirty(su);
+	nilfs_segment_usage_set_clean(su);
+	kunmap_atomic(kaddr, KM_USER0);
+	nilfs_mdt_mark_buffer_dirty(su_bh);
 
-/**
- * nilfs_sufile_free -
- * @sufile:
- * @segnum:
- */
-int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
-{
-	return nilfs_sufile_freev(sufile, &segnum, 1);
+	nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0);
+	nilfs_mdt_mark_dirty(sufile);
 }
 
 /**
@@ -500,72 +462,28 @@ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp)
 	return ret;
 }
 
-/**
- * nilfs_sufile_set_error - mark a segment as erroneous
- * @sufile: inode of segment usage file
- * @segnum: segment number
- *
- * Description: nilfs_sufile_set_error() marks the segment specified by
- * @segnum as erroneous. The error segment will never be used again.
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- *
- * %-EINVAL - Invalid segment usage number.
- */
-int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
+void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
+			       struct buffer_head *header_bh,
+			       struct buffer_head *su_bh)
 {
-	struct buffer_head *header_bh, *su_bh;
 	struct nilfs_segment_usage *su;
-	struct nilfs_sufile_header *header;
 	void *kaddr;
-	int ret;
-
-	if (unlikely(segnum >= nilfs_sufile_get_nsegments(sufile))) {
-		printk(KERN_WARNING "%s: invalid segment number: %llu\n",
-		       __func__, (unsigned long long)segnum);
-		return -EINVAL;
-	}
-	down_write(&NILFS_MDT(sufile)->mi_sem);
-
-	ret = nilfs_sufile_get_header_block(sufile, &header_bh);
-	if (ret < 0)
-		goto out_sem;
-	ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &su_bh);
-	if (ret < 0)
-		goto out_header;
+	int suclean;
 
 	kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
 	su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr);
 	if (nilfs_segment_usage_error(su)) {
 		kunmap_atomic(kaddr, KM_USER0);
-		brelse(su_bh);
-		goto out_header;
+		return;
 	}
-
+	suclean = nilfs_segment_usage_clean(su);
 	nilfs_segment_usage_set_error(su);
 	kunmap_atomic(kaddr, KM_USER0);
-	brelse(su_bh);
 
-	kaddr = kmap_atomic(header_bh->b_page, KM_USER0);
-	header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr);
-	le64_add_cpu(&header->sh_ndirtysegs, -1);
-	kunmap_atomic(kaddr, KM_USER0);
-	nilfs_mdt_mark_buffer_dirty(header_bh);
+	if (suclean)
+		nilfs_sufile_mod_counter(header_bh, -1, 0);
 	nilfs_mdt_mark_buffer_dirty(su_bh);
 	nilfs_mdt_mark_dirty(sufile);
-	brelse(su_bh);
-
- out_header:
-	brelse(header_bh);
-
- out_sem:
-	up_write(&NILFS_MDT(sufile)->mi_sem);
-	return ret;
 }
 
 /**
@@ -625,7 +543,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
 			si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks);
 			si[i + j].sui_flags = le32_to_cpu(su->su_flags) &
 				~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
-			if (nilfs_segment_is_active(nilfs, segnum + i + j))
+			if (nilfs_segment_is_active(nilfs, segnum + j))
 				si[i + j].sui_flags |=
 					(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
 		}
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index d595f33a768..a2e2efd4ade 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -36,9 +36,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile)
 }
 
 int nilfs_sufile_alloc(struct inode *, __u64 *);
-int nilfs_sufile_cancel_free(struct inode *, __u64);
-int nilfs_sufile_freev(struct inode *, __u64 *, size_t);
-int nilfs_sufile_free(struct inode *, __u64);
 int nilfs_sufile_get_segment_usage(struct inode *, __u64,
 				   struct nilfs_segment_usage **,
 				   struct buffer_head **);
@@ -46,9 +43,83 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64,
 				    struct buffer_head *);
 int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
 int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
-int nilfs_sufile_set_error(struct inode *, __u64);
 ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *,
 				size_t);
 
+int nilfs_sufile_update(struct inode *, __u64, int,
+			void (*dofunc)(struct inode *, __u64,
+				       struct buffer_head *,
+				       struct buffer_head *));
+void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
+				 struct buffer_head *);
+void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *,
+			   struct buffer_head *);
+void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *,
+			  struct buffer_head *);
+void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
+			       struct buffer_head *);
+
+/**
+ * nilfs_sufile_cancel_free -
+ * @sufile: inode of segment usage file
+ * @segnum: segment number
+ *
+ * Description:
+ *
+ * Return Value: On success, 0 is returned. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ */
+static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
+{
+	return nilfs_sufile_update(sufile, segnum, 0,
+				   nilfs_sufile_do_cancel_free);
+}
+
+/**
+ * nilfs_sufile_scrap - make a segment garbage
+ * @sufile: inode of segment usage file
+ * @segnum: segment number to be freed
+ */
+static inline int nilfs_sufile_scrap(struct inode *sufile, __u64 segnum)
+{
+	return nilfs_sufile_update(sufile, segnum, 1, nilfs_sufile_do_scrap);
+}
+
+/**
+ * nilfs_sufile_free - free segment
+ * @sufile: inode of segment usage file
+ * @segnum: segment number to be freed
+ */
+static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
+{
+	return nilfs_sufile_update(sufile, segnum, 0, nilfs_sufile_do_free);
+}
+
+/**
+ * nilfs_sufile_set_error - mark a segment as erroneous
+ * @sufile: inode of segment usage file
+ * @segnum: segment number
+ *
+ * Description: nilfs_sufile_set_error() marks the segment specified by
+ * @segnum as erroneous. The error segment will never be used again.
+ *
+ * Return Value: On success, 0 is returned. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EINVAL - Invalid segment usage number.
+ */
+static inline int nilfs_sufile_set_error(struct inode *sufile, __u64 segnum)
+{
+	return nilfs_sufile_update(sufile, segnum, 0,
+				   nilfs_sufile_do_set_error);
+}
 
 #endif	/* _NILFS_SUFILE_H */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index e117e1ea9bf..6989b03e97a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -63,7 +63,6 @@
 MODULE_AUTHOR("NTT Corp.");
 MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
 		   "(NILFS)");
-MODULE_VERSION(NILFS_VERSION);
 MODULE_LICENSE("GPL");
 
 static int nilfs_remount(struct super_block *sb, int *flags, char *data);
@@ -476,11 +475,12 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct super_block *sb = dentry->d_sb;
 	struct nilfs_sb_info *sbi = NILFS_SB(sb);
+	struct the_nilfs *nilfs = sbi->s_nilfs;
+	u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
 	unsigned long long blocks;
 	unsigned long overhead;
 	unsigned long nrsvblocks;
 	sector_t nfreeblocks;
-	struct the_nilfs *nilfs = sbi->s_nilfs;
 	int err;
 
 	/*
@@ -514,6 +514,9 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_files = atomic_read(&sbi->s_inodes_count);
 	buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */
 	buf->f_namelen = NILFS_NAME_LEN;
+	buf->f_fsid.val[0] = (u32)id;
+	buf->f_fsid.val[1] = (u32)(id >> 32);
+
 	return 0;
 }
 
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 33400cf0bbe..7f65b3be4aa 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -115,6 +115,7 @@ void put_nilfs(struct the_nilfs *nilfs)
 static int nilfs_load_super_root(struct the_nilfs *nilfs,
 				 struct nilfs_sb_info *sbi, sector_t sr_block)
 {
+	static struct lock_class_key dat_lock_key;
 	struct buffer_head *bh_sr;
 	struct nilfs_super_root *raw_sr;
 	struct nilfs_super_block **sbp = nilfs->ns_sbp;
@@ -163,6 +164,9 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs,
 	if (unlikely(err))
 		goto failed_sufile;
 
+	lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key);
+	lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key);
+
 	nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat);
 	nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size,
 				 sizeof(struct nilfs_cpfile_header));
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8672b953603..c2a87c885b7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1912,6 +1912,22 @@ out_sems:
 	return written ? written : ret;
 }
 
+static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
+				struct file *out,
+				struct splice_desc *sd)
+{
+	int ret;
+
+	ret = ocfs2_prepare_inode_for_write(out->f_path.dentry,	&sd->pos,
+					    sd->total_len, 0, NULL);
+	if (ret < 0) {
+		mlog_errno(ret);
+		return ret;
+	}
+
+	return splice_from_pipe_feed(pipe, sd, pipe_to_file);
+}
+
 static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 				       struct file *out,
 				       loff_t *ppos,
@@ -1919,38 +1935,76 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
 				       unsigned int flags)
 {
 	int ret;
-	struct inode *inode = out->f_path.dentry->d_inode;
+	struct address_space *mapping = out->f_mapping;
+	struct inode *inode = mapping->host;
+	struct splice_desc sd = {
+		.total_len = len,
+		.flags = flags,
+		.pos = *ppos,
+		.u.file = out,
+	};
 
 	mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", out, pipe,
 		   (unsigned int)len,
 		   out->f_path.dentry->d_name.len,
 		   out->f_path.dentry->d_name.name);
 
-	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+	if (pipe->inode)
+		mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
 
-	ret = ocfs2_rw_lock(inode, 1);
-	if (ret < 0) {
-		mlog_errno(ret);
-		goto out;
-	}
+	splice_from_pipe_begin(&sd);
+	do {
+		ret = splice_from_pipe_next(pipe, &sd);
+		if (ret <= 0)
+			break;
 
-	ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, ppos, len, 0,
-					    NULL);
-	if (ret < 0) {
-		mlog_errno(ret);
-		goto out_unlock;
-	}
+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+		ret = ocfs2_rw_lock(inode, 1);
+		if (ret < 0)
+			mlog_errno(ret);
+		else {
+			ret = ocfs2_splice_to_file(pipe, out, &sd);
+			ocfs2_rw_unlock(inode, 1);
+		}
+		mutex_unlock(&inode->i_mutex);
+	} while (ret > 0);
+	splice_from_pipe_end(pipe, &sd);
 
 	if (pipe->inode)
-		mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
-	ret = generic_file_splice_write_nolock(pipe, out, ppos, len, flags);
-	if (pipe->inode)
 		mutex_unlock(&pipe->inode->i_mutex);
 
-out_unlock:
-	ocfs2_rw_unlock(inode, 1);
-out:
-	mutex_unlock(&inode->i_mutex);
+	if (sd.num_spliced)
+		ret = sd.num_spliced;
+
+	if (ret > 0) {
+		unsigned long nr_pages;
+
+		*ppos += ret;
+		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+		/*
+		 * If file or inode is SYNC and we actually wrote some data,
+		 * sync it.
+		 */
+		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+			int err;
+
+			mutex_lock(&inode->i_mutex);
+			err = ocfs2_rw_lock(inode, 1);
+			if (err < 0) {
+				mlog_errno(err);
+			} else {
+				err = generic_osync_inode(inode, mapping,
+						  OSYNC_METADATA|OSYNC_DATA);
+				ocfs2_rw_unlock(inode, 1);
+			}
+			mutex_unlock(&inode->i_mutex);
+
+			if (err)
+				ret = err;
+		}
+		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
+	}
 
 	mlog_exit(ret);
 	return ret;
diff --git a/fs/pipe.c b/fs/pipe.c
index 4af7aa52181..13414ec45b8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -37,6 +37,42 @@
  * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
  */
 
+static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
+{
+	if (pipe->inode)
+		mutex_lock_nested(&pipe->inode->i_mutex, subclass);
+}
+
+void pipe_lock(struct pipe_inode_info *pipe)
+{
+	/*
+	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
+	 */
+	pipe_lock_nested(pipe, I_MUTEX_PARENT);
+}
+EXPORT_SYMBOL(pipe_lock);
+
+void pipe_unlock(struct pipe_inode_info *pipe)
+{
+	if (pipe->inode)
+		mutex_unlock(&pipe->inode->i_mutex);
+}
+EXPORT_SYMBOL(pipe_unlock);
+
+void pipe_double_lock(struct pipe_inode_info *pipe1,
+		      struct pipe_inode_info *pipe2)
+{
+	BUG_ON(pipe1 == pipe2);
+
+	if (pipe1 < pipe2) {
+		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
+		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+	} else {
+		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
+	}
+}
+
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe)
 {
@@ -47,12 +83,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	 * is considered a noninteractive wait:
 	 */
 	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
+	pipe_unlock(pipe);
 	schedule();
 	finish_wait(&pipe->wait, &wait);
-	if (pipe->inode)
-		mutex_lock(&pipe->inode->i_mutex);
+	pipe_lock(pipe);
 }
 
 static int
diff --git a/fs/splice.c b/fs/splice.c
index c18aa7e03e2..5384a90665d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -182,8 +182,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 	do_wakeup = 0;
 	page_nr = 0;
 
-	if (pipe->inode)
-		mutex_lock(&pipe->inode->i_mutex);
+	pipe_lock(pipe);
 
 	for (;;) {
 		if (!pipe->readers) {
@@ -245,15 +244,13 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
 		pipe->waiting_writers--;
 	}
 
-	if (pipe->inode) {
-		mutex_unlock(&pipe->inode->i_mutex);
+	pipe_unlock(pipe);
 
-		if (do_wakeup) {
-			smp_mb();
-			if (waitqueue_active(&pipe->wait))
-				wake_up_interruptible(&pipe->wait);
-			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
-		}
+	if (do_wakeup) {
+		smp_mb();
+		if (waitqueue_active(&pipe->wait))
+			wake_up_interruptible(&pipe->wait);
+		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	}
 
 	while (page_nr < spd_pages)
@@ -555,8 +552,8 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
  * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create
  * a new page in the output file page cache and fill/dirty that.
  */
-static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
-			struct splice_desc *sd)
+int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+		 struct splice_desc *sd)
 {
 	struct file *file = sd->u.file;
 	struct address_space *mapping = file->f_mapping;
@@ -600,108 +597,178 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 out:
 	return ret;
 }
+EXPORT_SYMBOL(pipe_to_file);
+
+static void wakeup_pipe_writers(struct pipe_inode_info *pipe)
+{
+	smp_mb();
+	if (waitqueue_active(&pipe->wait))
+		wake_up_interruptible(&pipe->wait);
+	kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
+}
 
 /**
- * __splice_from_pipe - splice data from a pipe to given actor
+ * splice_from_pipe_feed - feed available data from a pipe to a file
  * @pipe:	pipe to splice from
  * @sd:		information to @actor
  * @actor:	handler that splices the data
  *
  * Description:
- *    This function does little more than loop over the pipe and call
- *    @actor to do the actual moving of a single struct pipe_buffer to
- *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
- *    pipe_to_user.
+
+ *    This function loops over the pipe and calls @actor to do the
+ *    actual moving of a single struct pipe_buffer to the desired
+ *    destination.  It returns when there's no more buffers left in
+ *    the pipe or if the requested number of bytes (@sd->total_len)
+ *    have been copied.  It returns a positive number (one) if the
+ *    pipe needs to be filled with more data, zero if the required
+ *    number of bytes have been copied and -errno on error.
  *
+ *    This, together with splice_from_pipe_{begin,end,next}, may be
+ *    used to implement the functionality of __splice_from_pipe() when
+ *    locking is required around copying the pipe buffers to the
+ *    destination.
  */
-ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
-			   splice_actor *actor)
+int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,
+			  splice_actor *actor)
 {
-	int ret, do_wakeup, err;
-
-	ret = 0;
-	do_wakeup = 0;
-
-	for (;;) {
-		if (pipe->nrbufs) {
-			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
-			const struct pipe_buf_operations *ops = buf->ops;
+	int ret;
 
-			sd->len = buf->len;
-			if (sd->len > sd->total_len)
-				sd->len = sd->total_len;
+	while (pipe->nrbufs) {
+		struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
+		const struct pipe_buf_operations *ops = buf->ops;
 
-			err = actor(pipe, buf, sd);
-			if (err <= 0) {
-				if (!ret && err != -ENODATA)
-					ret = err;
+		sd->len = buf->len;
+		if (sd->len > sd->total_len)
+			sd->len = sd->total_len;
 
-				break;
-			}
+		ret = actor(pipe, buf, sd);
+		if (ret <= 0) {
+			if (ret == -ENODATA)
+				ret = 0;
+			return ret;
+		}
+		buf->offset += ret;
+		buf->len -= ret;
 
-			ret += err;
-			buf->offset += err;
-			buf->len -= err;
+		sd->num_spliced += ret;
+		sd->len -= ret;
+		sd->pos += ret;
+		sd->total_len -= ret;
 
-			sd->len -= err;
-			sd->pos += err;
-			sd->total_len -= err;
-			if (sd->len)
-				continue;
+		if (!buf->len) {
+			buf->ops = NULL;
+			ops->release(pipe, buf);
+			pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
+			pipe->nrbufs--;
+			if (pipe->inode)
+				sd->need_wakeup = true;
+		}
 
-			if (!buf->len) {
-				buf->ops = NULL;
-				ops->release(pipe, buf);
-				pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1);
-				pipe->nrbufs--;
-				if (pipe->inode)
-					do_wakeup = 1;
-			}
+		if (!sd->total_len)
+			return 0;
+	}
 
-			if (!sd->total_len)
-				break;
-		}
+	return 1;
+}
+EXPORT_SYMBOL(splice_from_pipe_feed);
 
-		if (pipe->nrbufs)
-			continue;
+/**
+ * splice_from_pipe_next - wait for some data to splice from
+ * @pipe:	pipe to splice from
+ * @sd:		information about the splice operation
+ *
+ * Description:
+ *    This function will wait for some data and return a positive
+ *    value (one) if pipe buffers are available.  It will return zero
+ *    or -errno if no more data needs to be spliced.
+ */
+int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+	while (!pipe->nrbufs) {
 		if (!pipe->writers)
-			break;
-		if (!pipe->waiting_writers) {
-			if (ret)
-				break;
-		}
+			return 0;
 
-		if (sd->flags & SPLICE_F_NONBLOCK) {
-			if (!ret)
-				ret = -EAGAIN;
-			break;
-		}
+		if (!pipe->waiting_writers && sd->num_spliced)
+			return 0;
 
-		if (signal_pending(current)) {
-			if (!ret)
-				ret = -ERESTARTSYS;
-			break;
-		}
+		if (sd->flags & SPLICE_F_NONBLOCK)
+			return -EAGAIN;
 
-		if (do_wakeup) {
-			smp_mb();
-			if (waitqueue_active(&pipe->wait))
-				wake_up_interruptible_sync(&pipe->wait);
-			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
-			do_wakeup = 0;
+		if (signal_pending(current))
+			return -ERESTARTSYS;
+
+		if (sd->need_wakeup) {
+			wakeup_pipe_writers(pipe);
+			sd->need_wakeup = false;
 		}
 
 		pipe_wait(pipe);
 	}
 
-	if (do_wakeup) {
-		smp_mb();
-		if (waitqueue_active(&pipe->wait))
-			wake_up_interruptible(&pipe->wait);
-		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
-	}
+	return 1;
+}
+EXPORT_SYMBOL(splice_from_pipe_next);
 
-	return ret;
+/**
+ * splice_from_pipe_begin - start splicing from pipe
+ * @pipe:	pipe to splice from
+ *
+ * Description:
+ *    This function should be called before a loop containing
+ *    splice_from_pipe_next() and splice_from_pipe_feed() to
+ *    initialize the necessary fields of @sd.
+ */
+void splice_from_pipe_begin(struct splice_desc *sd)
+{
+	sd->num_spliced = 0;
+	sd->need_wakeup = false;
+}
+EXPORT_SYMBOL(splice_from_pipe_begin);
+
+/**
+ * splice_from_pipe_end - finish splicing from pipe
+ * @pipe:	pipe to splice from
+ * @sd:		information about the splice operation
+ *
+ * Description:
+ *    This function will wake up pipe writers if necessary.  It should
+ *    be called after a loop containing splice_from_pipe_next() and
+ *    splice_from_pipe_feed().
+ */
+void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)
+{
+	if (sd->need_wakeup)
+		wakeup_pipe_writers(pipe);
+}
+EXPORT_SYMBOL(splice_from_pipe_end);
+
+/**
+ * __splice_from_pipe - splice data from a pipe to given actor
+ * @pipe:	pipe to splice from
+ * @sd:		information to @actor
+ * @actor:	handler that splices the data
+ *
+ * Description:
+ *    This function does little more than loop over the pipe and call
+ *    @actor to do the actual moving of a single struct pipe_buffer to
+ *    the desired destination. See pipe_to_file, pipe_to_sendpage, or
+ *    pipe_to_user.
+ *
+ */
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd,
+			   splice_actor *actor)
+{
+	int ret;
+
+	splice_from_pipe_begin(sd);
+	do {
+		ret = splice_from_pipe_next(pipe, sd);
+		if (ret > 0)
+			ret = splice_from_pipe_feed(pipe, sd, actor);
+	} while (ret > 0);
+	splice_from_pipe_end(pipe, sd);
+
+	return sd->num_spliced ? sd->num_spliced : ret;
 }
 EXPORT_SYMBOL(__splice_from_pipe);
 
@@ -715,7 +782,7 @@ EXPORT_SYMBOL(__splice_from_pipe);
  * @actor:	handler that splices the data
  *
  * Description:
- *    See __splice_from_pipe. This function locks the input and output inodes,
+ *    See __splice_from_pipe. This function locks the pipe inode,
  *    otherwise it's identical to __splice_from_pipe().
  *
  */
@@ -724,7 +791,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 			 splice_actor *actor)
 {
 	ssize_t ret;
-	struct inode *inode = out->f_mapping->host;
 	struct splice_desc sd = {
 		.total_len = len,
 		.flags = flags,
@@ -732,30 +798,15 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		.u.file = out,
 	};
 
-	/*
-	 * The actor worker might be calling ->write_begin and
-	 * ->write_end. Most of the time, these expect i_mutex to
-	 * be held. Since this may result in an ABBA deadlock with
-	 * pipe->inode, we have to order lock acquiry here.
-	 *
-	 * Outer lock must be inode->i_mutex, as pipe_wait() will
-	 * release and reacquire pipe->inode->i_mutex, AND inode must
-	 * never be a pipe.
-	 */
-	WARN_ON(S_ISFIFO(inode->i_mode));
-	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-	if (pipe->inode)
-		mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
+	pipe_lock(pipe);
 	ret = __splice_from_pipe(pipe, &sd, actor);
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
-	mutex_unlock(&inode->i_mutex);
+	pipe_unlock(pipe);
 
 	return ret;
 }
 
 /**
- * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
+ * generic_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
  * @out:	file to write to
  * @ppos:	position in @out
@@ -764,13 +815,12 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
  *
  * Description:
  *    Will either move or copy pages (determined by @flags options) from
- *    the given pipe inode to the given file. The caller is responsible
- *    for acquiring i_mutex on both inodes.
+ *    the given pipe inode to the given file.
  *
  */
 ssize_t
-generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
-				 loff_t *ppos, size_t len, unsigned int flags)
+generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+			  loff_t *ppos, size_t len, unsigned int flags)
 {
 	struct address_space *mapping = out->f_mapping;
 	struct inode *inode = mapping->host;
@@ -781,76 +831,28 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
 		.u.file = out,
 	};
 	ssize_t ret;
-	int err;
-
-	err = file_remove_suid(out);
-	if (unlikely(err))
-		return err;
-
-	ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
-	if (ret > 0) {
-		unsigned long nr_pages;
 
-		*ppos += ret;
-		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	pipe_lock(pipe);
 
-		/*
-		 * If file or inode is SYNC and we actually wrote some data,
-		 * sync it.
-		 */
-		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
-			err = generic_osync_inode(inode, mapping,
-						  OSYNC_METADATA|OSYNC_DATA);
-
-			if (err)
-				ret = err;
-		}
-		balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
-	}
+	splice_from_pipe_begin(&sd);
+	do {
+		ret = splice_from_pipe_next(pipe, &sd);
+		if (ret <= 0)
+			break;
 
-	return ret;
-}
+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+		ret = file_remove_suid(out);
+		if (!ret)
+			ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
+		mutex_unlock(&inode->i_mutex);
+	} while (ret > 0);
+	splice_from_pipe_end(pipe, &sd);
 
-EXPORT_SYMBOL(generic_file_splice_write_nolock);
+	pipe_unlock(pipe);
 
-/**
- * generic_file_splice_write - splice data from a pipe to a file
- * @pipe:	pipe info
- * @out:	file to write to
- * @ppos:	position in @out
- * @len:	number of bytes to splice
- * @flags:	splice modifier flags
- *
- * Description:
- *    Will either move or copy pages (determined by @flags options) from
- *    the given pipe inode to the given file.
- *
- */
-ssize_t
-generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
-			  loff_t *ppos, size_t len, unsigned int flags)
-{
-	struct address_space *mapping = out->f_mapping;
-	struct inode *inode = mapping->host;
-	struct splice_desc sd = {
-		.total_len = len,
-		.flags = flags,
-		.pos = *ppos,
-		.u.file = out,
-	};
-	ssize_t ret;
+	if (sd.num_spliced)
+		ret = sd.num_spliced;
 
-	WARN_ON(S_ISFIFO(inode->i_mode));
-	mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-	ret = file_remove_suid(out);
-	if (likely(!ret)) {
-		if (pipe->inode)
-			mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_CHILD);
-		ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
-		if (pipe->inode)
-			mutex_unlock(&pipe->inode->i_mutex);
-	}
-	mutex_unlock(&inode->i_mutex);
 	if (ret > 0) {
 		unsigned long nr_pages;
 
@@ -1339,8 +1341,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
 	if (!pipe)
 		return -EBADF;
 
-	if (pipe->inode)
-		mutex_lock(&pipe->inode->i_mutex);
+	pipe_lock(pipe);
 
 	error = ret = 0;
 	while (nr_segs) {
@@ -1395,8 +1396,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
 		iov++;
 	}
 
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
+	pipe_unlock(pipe);
 
 	if (!ret)
 		ret = error;
@@ -1524,7 +1524,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		return 0;
 
 	ret = 0;
-	mutex_lock(&pipe->inode->i_mutex);
+	pipe_lock(pipe);
 
 	while (!pipe->nrbufs) {
 		if (signal_pending(current)) {
@@ -1542,7 +1542,7 @@ static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		pipe_wait(pipe);
 	}
 
-	mutex_unlock(&pipe->inode->i_mutex);
+	pipe_unlock(pipe);
 	return ret;
 }
 
@@ -1562,7 +1562,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		return 0;
 
 	ret = 0;
-	mutex_lock(&pipe->inode->i_mutex);
+	pipe_lock(pipe);
 
 	while (pipe->nrbufs >= PIPE_BUFFERS) {
 		if (!pipe->readers) {
@@ -1583,7 +1583,7 @@ static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		pipe->waiting_writers--;
 	}
 
-	mutex_unlock(&pipe->inode->i_mutex);
+	pipe_unlock(pipe);
 	return ret;
 }
 
@@ -1599,10 +1599,10 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 
 	/*
 	 * Potential ABBA deadlock, work around it by ordering lock
-	 * grabbing by inode address. Otherwise two different processes
+	 * grabbing by pipe info address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	inode_double_lock(ipipe->inode, opipe->inode);
+	pipe_double_lock(ipipe, opipe);
 
 	do {
 		if (!opipe->readers) {
@@ -1653,7 +1653,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK))
 		ret = -EAGAIN;
 
-	inode_double_unlock(ipipe->inode, opipe->inode);
+	pipe_unlock(ipipe);
+	pipe_unlock(opipe);
 
 	/*
 	 * If we put data in the output pipe, wakeup any potential readers.
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c13f67300fe..7ec89fc05b2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -153,23 +153,6 @@ xfs_find_bdev_for_inode(
 }
 
 /*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
- */
-STATIC void
-xfs_finish_ioend(
-	xfs_ioend_t	*ioend,
-	int		wait)
-{
-	if (atomic_dec_and_test(&ioend->io_remaining)) {
-		queue_work(xfsdatad_workqueue, &ioend->io_work);
-		if (wait)
-			flush_workqueue(xfsdatad_workqueue);
-	}
-}
-
-/*
  * We're now finished for good with this ioend structure.
  * Update the page state via the associated buffer_heads,
  * release holds on the inode and bio, and finally free
@@ -310,6 +293,27 @@ xfs_end_bio_read(
 }
 
 /*
+ * Schedule IO completion handling on a xfsdatad if this was
+ * the final hold on this ioend. If we are asked to wait,
+ * flush the workqueue.
+ */
+STATIC void
+xfs_finish_ioend(
+	xfs_ioend_t	*ioend,
+	int		wait)
+{
+	if (atomic_dec_and_test(&ioend->io_remaining)) {
+		struct workqueue_struct *wq = xfsdatad_workqueue;
+		if (ioend->io_work.func == xfs_end_bio_unwritten)
+			wq = xfsconvertd_workqueue;
+
+		queue_work(wq, &ioend->io_work);
+		if (wait)
+			flush_workqueue(wq);
+	}
+}
+
+/*
  * Allocate and initialise an IO completion structure.
  * We need to track unwritten extent write completion here initially.
  * We'll need to extend this for updating the ondisk inode size later
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 1dd52884975..221b3e66cee 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -19,6 +19,7 @@
 #define __XFS_AOPS_H__
 
 extern struct workqueue_struct *xfsdatad_workqueue;
+extern struct workqueue_struct *xfsconvertd_workqueue;
 extern mempool_t *xfs_ioend_pool;
 
 /*
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index aa1016bb913..e28800a9f2b 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -51,6 +51,7 @@ static struct shrinker xfs_buf_shake = {
 
 static struct workqueue_struct *xfslogd_workqueue;
 struct workqueue_struct *xfsdatad_workqueue;
+struct workqueue_struct *xfsconvertd_workqueue;
 
 #ifdef XFS_BUF_TRACE
 void
@@ -1775,6 +1776,7 @@ xfs_flush_buftarg(
 	xfs_buf_t	*bp, *n;
 	int		pincount = 0;
 
+	xfs_buf_runall_queues(xfsconvertd_workqueue);
 	xfs_buf_runall_queues(xfsdatad_workqueue);
 	xfs_buf_runall_queues(xfslogd_workqueue);
 
@@ -1831,9 +1833,15 @@ xfs_buf_init(void)
 	if (!xfsdatad_workqueue)
 		goto out_destroy_xfslogd_workqueue;
 
+	xfsconvertd_workqueue = create_workqueue("xfsconvertd");
+	if (!xfsconvertd_workqueue)
+		goto out_destroy_xfsdatad_workqueue;
+
 	register_shrinker(&xfs_buf_shake);
 	return 0;
 
+ out_destroy_xfsdatad_workqueue:
+	destroy_workqueue(xfsdatad_workqueue);
  out_destroy_xfslogd_workqueue:
 	destroy_workqueue(xfslogd_workqueue);
  out_free_buf_zone:
@@ -1849,6 +1857,7 @@ void
 xfs_buf_terminate(void)
 {
 	unregister_shrinker(&xfs_buf_shake);
+	destroy_workqueue(xfsconvertd_workqueue);
 	destroy_workqueue(xfsdatad_workqueue);
 	destroy_workqueue(xfslogd_workqueue);
 	kmem_zone_destroy(xfs_buf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 5aeb7777696..08be36d7326 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -74,14 +74,14 @@ xfs_flush_pages(
 
 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
 		xfs_iflags_clear(ip, XFS_ITRUNCATED);
-		ret = filemap_fdatawrite(mapping);
-		if (flags & XFS_B_ASYNC)
-			return -ret;
-		ret2 = filemap_fdatawait(mapping);
-		if (!ret)
-			ret = ret2;
+		ret = -filemap_fdatawrite(mapping);
 	}
-	return -ret;
+	if (flags & XFS_B_ASYNC)
+		return ret;
+	ret2 = xfs_wait_on_pages(ip, first, last);
+	if (!ret)
+		ret = ret2;
+	return ret;
 }
 
 int
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 7e90daa0d1d..9142192ccbe 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -751,10 +751,26 @@ start:
 			goto relock;
 		}
 	} else {
+		int enospc = 0;
+		ssize_t ret2 = 0;
+
+write_retry:
 		xfs_rw_enter_trace(XFS_WRITE_ENTER, xip, (void *)iovp, segs,
 				*offset, ioflags);
-		ret = generic_file_buffered_write(iocb, iovp, segs,
+		ret2 = generic_file_buffered_write(iocb, iovp, segs,
 				pos, offset, count, ret);
+		/*
+		 * if we just got an ENOSPC, flush the inode now we
+		 * aren't holding any page locks and retry *once*
+		 */
+		if (ret2 == -ENOSPC && !enospc) {
+			error = xfs_flush_pages(xip, 0, -1, 0, FI_NONE);
+			if (error)
+				goto out_unlock_internal;
+			enospc = 1;
+			goto write_retry;
+		}
+		ret = ret2;
 	}
 
 	current->backing_dev_info = NULL;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a608e72fa40..f7ba76633c2 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -62,12 +62,6 @@ xfs_sync_inodes_ag(
 	uint32_t	first_index = 0;
 	int		error = 0;
 	int		last_error = 0;
-	int		fflag = XFS_B_ASYNC;
-
-	if (flags & SYNC_DELWRI)
-		fflag = XFS_B_DELWRI;
-	if (flags & SYNC_WAIT)
-		fflag = 0;		/* synchronous overrides all */
 
 	do {
 		struct inode	*inode;
@@ -128,11 +122,23 @@ xfs_sync_inodes_ag(
 		 * If we have to flush data or wait for I/O completion
 		 * we need to hold the iolock.
 		 */
-		if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) {
-			xfs_ilock(ip, XFS_IOLOCK_SHARED);
-			lock_flags |= XFS_IOLOCK_SHARED;
-			error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE);
-			if (flags & SYNC_IOWAIT)
+		if (flags & SYNC_DELWRI) {
+			if (VN_DIRTY(inode)) {
+				if (flags & SYNC_TRYLOCK) {
+					if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
+						lock_flags |= XFS_IOLOCK_SHARED;
+				} else {
+					xfs_ilock(ip, XFS_IOLOCK_SHARED);
+					lock_flags |= XFS_IOLOCK_SHARED;
+				}
+				if (lock_flags & XFS_IOLOCK_SHARED) {
+					error = xfs_flush_pages(ip, 0, -1,
+							(flags & SYNC_WAIT) ? 0
+								: XFS_B_ASYNC,
+							FI_NONE);
+				}
+			}
+			if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
 				xfs_ioend_wait(ip);
 		}
 		xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -398,15 +404,17 @@ STATIC void
 xfs_syncd_queue_work(
 	struct xfs_mount *mp,
 	void		*data,
-	void		(*syncer)(struct xfs_mount *, void *))
+	void		(*syncer)(struct xfs_mount *, void *),
+	struct completion *completion)
 {
-	struct bhv_vfs_sync_work *work;
+	struct xfs_sync_work *work;
 
-	work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP);
+	work = kmem_alloc(sizeof(struct xfs_sync_work), KM_SLEEP);
 	INIT_LIST_HEAD(&work->w_list);
 	work->w_syncer = syncer;
 	work->w_data = data;
 	work->w_mount = mp;
+	work->w_completion = completion;
 	spin_lock(&mp->m_sync_lock);
 	list_add_tail(&work->w_list, &mp->m_sync_list);
 	spin_unlock(&mp->m_sync_lock);
@@ -420,49 +428,26 @@ xfs_syncd_queue_work(
  * heads, looking about for more room...
  */
 STATIC void
-xfs_flush_inode_work(
-	struct xfs_mount *mp,
-	void		*arg)
-{
-	struct inode	*inode = arg;
-	filemap_flush(inode->i_mapping);
-	iput(inode);
-}
-
-void
-xfs_flush_inode(
-	xfs_inode_t	*ip)
-{
-	struct inode	*inode = VFS_I(ip);
-
-	igrab(inode);
-	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work);
-	delay(msecs_to_jiffies(500));
-}
-
-/*
- * This is the "bigger hammer" version of xfs_flush_inode_work...
- * (IOW, "If at first you don't succeed, use a Bigger Hammer").
- */
-STATIC void
-xfs_flush_device_work(
+xfs_flush_inodes_work(
 	struct xfs_mount *mp,
 	void		*arg)
 {
 	struct inode	*inode = arg;
-	sync_blockdev(mp->m_super->s_bdev);
+	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
+	xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
 	iput(inode);
 }
 
 void
-xfs_flush_device(
+xfs_flush_inodes(
 	xfs_inode_t	*ip)
 {
 	struct inode	*inode = VFS_I(ip);
+	DECLARE_COMPLETION_ONSTACK(completion);
 
 	igrab(inode);
-	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work);
-	delay(msecs_to_jiffies(500));
+	xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inodes_work, &completion);
+	wait_for_completion(&completion);
 	xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC);
 }
 
@@ -497,7 +482,7 @@ xfssyncd(
 {
 	struct xfs_mount	*mp = arg;
 	long			timeleft;
-	bhv_vfs_sync_work_t	*work, *n;
+	xfs_sync_work_t		*work, *n;
 	LIST_HEAD		(tmp);
 
 	set_freezable();
@@ -532,6 +517,8 @@ xfssyncd(
 			list_del(&work->w_list);
 			if (work == &mp->m_sync_work)
 				continue;
+			if (work->w_completion)
+				complete(work->w_completion);
 			kmem_free(work);
 		}
 	}
@@ -545,6 +532,7 @@ xfs_syncd_init(
 {
 	mp->m_sync_work.w_syncer = xfs_sync_worker;
 	mp->m_sync_work.w_mount = mp;
+	mp->m_sync_work.w_completion = NULL;
 	mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
 	if (IS_ERR(mp->m_sync_task))
 		return -PTR_ERR(mp->m_sync_task);
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 04f058c848a..308d5bf6dfb 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,18 +21,20 @@
 struct xfs_mount;
 struct xfs_perag;
 
-typedef struct bhv_vfs_sync_work {
+typedef struct xfs_sync_work {
 	struct list_head	w_list;
 	struct xfs_mount	*w_mount;
 	void			*w_data;	/* syncer routine argument */
 	void			(*w_syncer)(struct xfs_mount *, void *);
-} bhv_vfs_sync_work_t;
+	struct completion	*w_completion;
+} xfs_sync_work_t;
 
 #define SYNC_ATTR		0x0001	/* sync attributes */
 #define SYNC_DELWRI		0x0002	/* look at delayed writes */
 #define SYNC_WAIT		0x0004	/* wait for i/o to complete */
 #define SYNC_BDFLUSH		0x0008	/* BDFLUSH is calling -- don't block */
 #define SYNC_IOWAIT		0x0010  /* wait for all I/O to complete */
+#define SYNC_TRYLOCK		0x0020  /* only try to lock inodes */
 
 int xfs_syncd_init(struct xfs_mount *mp);
 void xfs_syncd_stop(struct xfs_mount *mp);
@@ -43,8 +45,7 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
 int xfs_quiesce_data(struct xfs_mount *mp);
 void xfs_quiesce_attr(struct xfs_mount *mp);
 
-void xfs_flush_inode(struct xfs_inode *ip);
-void xfs_flush_device(struct xfs_inode *ip);
+void xfs_flush_inodes(struct xfs_inode *ip);
 
 int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
 int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 478e587087f..89b81eedce6 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -69,15 +69,6 @@ xfs_inode_alloc(
 	ASSERT(!spin_is_locked(&ip->i_flags_lock));
 	ASSERT(completion_done(&ip->i_flush));
 
-	/*
-	 * initialise the VFS inode here to get failures
-	 * out of the way early.
-	 */
-	if (!inode_init_always(mp->m_super, VFS_I(ip))) {
-		kmem_zone_free(xfs_inode_zone, ip);
-		return NULL;
-	}
-
 	/* initialise the xfs inode */
 	ip->i_ino = ino;
 	ip->i_mount = mp;
@@ -113,6 +104,20 @@ xfs_inode_alloc(
 #ifdef XFS_DIR2_TRACE
 	ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS);
 #endif
+	/*
+	* Now initialise the VFS inode. We do this after the xfs_inode
+	* initialisation as internal failures will result in ->destroy_inode
+	* being called and that will pass down through the reclaim path and
+	* free the XFS inode. This path requires the XFS inode to already be
+	* initialised. Hence if this call fails, the xfs_inode has already
+	* been freed and we should not reference it at all in the error
+	* handling.
+	*/
+	if (!inode_init_always(mp->m_super, VFS_I(ip)))
+		return NULL;
+
+	/* prevent anyone from using this yet */
+	VFS_I(ip)->i_state = I_NEW|I_LOCK;
 
 	return ip;
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 08ce72316bf..5aaa2d7ec15 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -338,38 +338,6 @@ xfs_iomap_eof_align_last_fsb(
 }
 
 STATIC int
-xfs_flush_space(
-	xfs_inode_t	*ip,
-	int		*fsynced,
-	int		*ioflags)
-{
-	switch (*fsynced) {
-	case 0:
-		if (ip->i_delayed_blks) {
-			xfs_iunlock(ip, XFS_ILOCK_EXCL);
-			xfs_flush_inode(ip);
-			xfs_ilock(ip, XFS_ILOCK_EXCL);
-			*fsynced = 1;
-		} else {
-			*ioflags |= BMAPI_SYNC;
-			*fsynced = 2;
-		}
-		return 0;
-	case 1:
-		*fsynced = 2;
-		*ioflags |= BMAPI_SYNC;
-		return 0;
-	case 2:
-		xfs_iunlock(ip, XFS_ILOCK_EXCL);
-		xfs_flush_device(ip);
-		xfs_ilock(ip, XFS_ILOCK_EXCL);
-		*fsynced = 3;
-		return 0;
-	}
-	return 1;
-}
-
-STATIC int
 xfs_cmn_err_fsblock_zero(
 	xfs_inode_t	*ip,
 	xfs_bmbt_irec_t	*imap)
@@ -538,15 +506,9 @@ error_out:
 }
 
 /*
- * If the caller is doing a write at the end of the file,
- * then extend the allocation out to the file system's write
- * iosize.  We clean up any extra space left over when the
- * file is closed in xfs_inactive().
- *
- * For sync writes, we are flushing delayed allocate space to
- * try to make additional space available for allocation near
- * the filesystem full boundary - preallocation hurts in that
- * situation, of course.
+ * If the caller is doing a write at the end of the file, then extend the
+ * allocation out to the file system's write iosize.  We clean up any extra
+ * space left over when the file is closed in xfs_inactive().
  */
 STATIC int
 xfs_iomap_eof_want_preallocate(
@@ -565,7 +527,7 @@ xfs_iomap_eof_want_preallocate(
 	int		n, error, imaps;
 
 	*prealloc = 0;
-	if ((ioflag & BMAPI_SYNC) || (offset + count) <= ip->i_size)
+	if ((offset + count) <= ip->i_size)
 		return 0;
 
 	/*
@@ -611,7 +573,7 @@ xfs_iomap_write_delay(
 	xfs_extlen_t	extsz;
 	int		nimaps;
 	xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
-	int		prealloc, fsynced = 0;
+	int		prealloc, flushed = 0;
 	int		error;
 
 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -627,12 +589,12 @@ xfs_iomap_write_delay(
 	extsz = xfs_get_extsz_hint(ip);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
-retry:
 	error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
 				ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
 	if (error)
 		return error;
 
+retry:
 	if (prealloc) {
 		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
 		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
@@ -659,15 +621,22 @@ retry:
 
 	/*
 	 * If bmapi returned us nothing, and if we didn't get back EDQUOT,
-	 * then we must have run out of space - flush delalloc, and retry..
+	 * then we must have run out of space - flush all other inodes with
+	 * delalloc blocks and retry without EOF preallocation.
 	 */
 	if (nimaps == 0) {
 		xfs_iomap_enter_trace(XFS_IOMAP_WRITE_NOSPACE,
 					ip, offset, count);
-		if (xfs_flush_space(ip, &fsynced, &ioflag))
+		if (flushed)
 			return XFS_ERROR(ENOSPC);
 
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		xfs_flush_inodes(ip);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+		flushed = 1;
 		error = 0;
+		prealloc = 0;
 		goto retry;
 	}
 
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index a1cc1322fc0..fdcf7b82747 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -40,8 +40,7 @@ typedef enum {
 	BMAPI_IGNSTATE = (1 << 4),	/* ignore unwritten state on read */
 	BMAPI_DIRECT = (1 << 5),	/* direct instead of buffered write */
 	BMAPI_MMAP = (1 << 6),		/* allocate for mmap write */
-	BMAPI_SYNC = (1 << 7),		/* sync write to flush delalloc space */
-	BMAPI_TRYLOCK = (1 << 8),	/* non-blocking request */
+	BMAPI_TRYLOCK = (1 << 7),	/* non-blocking request */
 } bmapi_flags_t;
 
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f76c6d7cea2..3750f04ede0 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -562,9 +562,8 @@ xfs_log_mount(
 	}
 
 	mp->m_log = xlog_alloc_log(mp, log_target, blk_offset, num_bblks);
-	if (!mp->m_log) {
-		cmn_err(CE_WARN, "XFS: Log allocation failed: No memory!");
-		error = ENOMEM;
+	if (IS_ERR(mp->m_log)) {
+		error = -PTR_ERR(mp->m_log);
 		goto out;
 	}
 
@@ -1180,10 +1179,13 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	xfs_buf_t		*bp;
 	int			i;
 	int			iclogsize;
+	int			error = ENOMEM;
 
 	log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
-	if (!log)
-		return NULL;
+	if (!log) {
+		xlog_warn("XFS: Log allocation failed: No memory!");
+		goto out;
+	}
 
 	log->l_mp	   = mp;
 	log->l_targ	   = log_target;
@@ -1201,19 +1203,35 @@ xlog_alloc_log(xfs_mount_t	*mp,
 	log->l_grant_reserve_cycle = 1;
 	log->l_grant_write_cycle = 1;
 
+	error = EFSCORRUPTED;
 	if (xfs_sb_version_hassector(&mp->m_sb)) {
 		log->l_sectbb_log = mp->m_sb.sb_logsectlog - BBSHIFT;
-		ASSERT(log->l_sectbb_log <= mp->m_sectbb_log);
+		if (log->l_sectbb_log < 0 ||
+		    log->l_sectbb_log > mp->m_sectbb_log) {
+			xlog_warn("XFS: Log sector size (0x%x) out of range.",
+						log->l_sectbb_log);
+			goto out_free_log;
+		}
+
 		/* for larger sector sizes, must have v2 or external log */
-		ASSERT(log->l_sectbb_log == 0 ||
-			log->l_logBBstart == 0 ||
-			xfs_sb_version_haslogv2(&mp->m_sb));
-		ASSERT(mp->m_sb.sb_logsectlog >= BBSHIFT);
+		if (log->l_sectbb_log != 0 &&
+		    (log->l_logBBstart != 0 &&
+		     !xfs_sb_version_haslogv2(&mp->m_sb))) {
+			xlog_warn("XFS: log sector size (0x%x) invalid "
+				  "for configuration.", log->l_sectbb_log);
+			goto out_free_log;
+		}
+		if (mp->m_sb.sb_logsectlog < BBSHIFT) {
+			xlog_warn("XFS: Log sector log (0x%x) too small.",
+						mp->m_sb.sb_logsectlog);
+			goto out_free_log;
+		}
 	}
 	log->l_sectbb_mask = (1 << log->l_sectbb_log) - 1;
 
 	xlog_get_iclog_buffer_size(mp, log);
 
+	error = ENOMEM;
 	bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
 	if (!bp)
 		goto out_free_log;
@@ -1313,7 +1331,8 @@ out_free_iclog:
 	xfs_buf_free(log->l_xbuf);
 out_free_log:
 	kmem_free(log);
-	return NULL;
+out:
+	return ERR_PTR(-error);
 }	/* xlog_alloc_log */
 
 
@@ -2541,18 +2560,19 @@ redo:
 			xlog_ins_ticketq(&log->l_reserve_headq, tic);
 		xlog_trace_loggrant(log, tic,
 				    "xlog_grant_log_space: sleep 2");
+		spin_unlock(&log->l_grant_lock);
+		xlog_grant_push_ail(log->l_mp, need_bytes);
+		spin_lock(&log->l_grant_lock);
+
 		XFS_STATS_INC(xs_sleep_logspace);
 		sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
 
-		if (XLOG_FORCED_SHUTDOWN(log)) {
-			spin_lock(&log->l_grant_lock);
+		spin_lock(&log->l_grant_lock);
+		if (XLOG_FORCED_SHUTDOWN(log))
 			goto error_return;
-		}
 
 		xlog_trace_loggrant(log, tic,
 				    "xlog_grant_log_space: wake 2");
-		xlog_grant_push_ail(log->l_mp, need_bytes);
-		spin_lock(&log->l_grant_lock);
 		goto redo;
 	} else if (tic->t_flags & XLOG_TIC_IN_Q)
 		xlog_del_ticketq(&log->l_reserve_headq, tic);
@@ -2631,7 +2651,7 @@ xlog_regrant_write_log_space(xlog_t	   *log,
 	 * for more free space, otherwise try to get some space for
 	 * this transaction.
 	 */
-
+	need_bytes = tic->t_unit_res;
 	if ((ntic = log->l_write_headq)) {
 		free_bytes = xlog_space_left(log, log->l_grant_write_cycle,
 					     log->l_grant_write_bytes);
@@ -2651,26 +2671,25 @@ xlog_regrant_write_log_space(xlog_t	   *log,
 
 			xlog_trace_loggrant(log, tic,
 				    "xlog_regrant_write_log_space: sleep 1");
+			spin_unlock(&log->l_grant_lock);
+			xlog_grant_push_ail(log->l_mp, need_bytes);
+			spin_lock(&log->l_grant_lock);
+
 			XFS_STATS_INC(xs_sleep_logspace);
 			sv_wait(&tic->t_wait, PINOD|PLTWAIT,
 				&log->l_grant_lock, s);
 
 			/* If we're shutting down, this tic is already
 			 * off the queue */
-			if (XLOG_FORCED_SHUTDOWN(log)) {
-				spin_lock(&log->l_grant_lock);
+			spin_lock(&log->l_grant_lock);
+			if (XLOG_FORCED_SHUTDOWN(log))
 				goto error_return;
-			}
 
 			xlog_trace_loggrant(log, tic,
 				    "xlog_regrant_write_log_space: wake 1");
-			xlog_grant_push_ail(log->l_mp, tic->t_unit_res);
-			spin_lock(&log->l_grant_lock);
 		}
 	}
 
-	need_bytes = tic->t_unit_res;
-
 redo:
 	if (XLOG_FORCED_SHUTDOWN(log))
 		goto error_return;
@@ -2680,19 +2699,20 @@ redo:
 	if (free_bytes < need_bytes) {
 		if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
 			xlog_ins_ticketq(&log->l_write_headq, tic);
+		spin_unlock(&log->l_grant_lock);
+		xlog_grant_push_ail(log->l_mp, need_bytes);
+		spin_lock(&log->l_grant_lock);
+
 		XFS_STATS_INC(xs_sleep_logspace);
 		sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
 
 		/* If we're shutting down, this tic is already off the queue */
-		if (XLOG_FORCED_SHUTDOWN(log)) {
-			spin_lock(&log->l_grant_lock);
+		spin_lock(&log->l_grant_lock);
+		if (XLOG_FORCED_SHUTDOWN(log))
 			goto error_return;
-		}
 
 		xlog_trace_loggrant(log, tic,
 				    "xlog_regrant_write_log_space: wake 2");
-		xlog_grant_push_ail(log->l_mp, need_bytes);
-		spin_lock(&log->l_grant_lock);
 		goto redo;
 	} else if (tic->t_flags & XLOG_TIC_IN_Q)
 		xlog_del_ticketq(&log->l_write_headq, tic);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7af44adffc8..d6a64392f98 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -313,7 +313,7 @@ typedef struct xfs_mount {
 #endif
 	struct xfs_mru_cache	*m_filestream;  /* per-mount filestream data */
 	struct task_struct	*m_sync_task;	/* generalised sync thread */
-	bhv_vfs_sync_work_t	m_sync_work;	/* work item for VFS_SYNC */
+	xfs_sync_work_t		m_sync_work;	/* work item for VFS_SYNC */
 	struct list_head	m_sync_list;	/* sync thread work item list */
 	spinlock_t		m_sync_lock;	/* work item list lock */
 	int			m_sync_seq;	/* sync thread generation no. */
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 7394c7af5de..19cf90a9c76 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -1457,6 +1457,13 @@ xfs_create(
 	error = xfs_trans_reserve(tp, resblks, log_res, 0,
 			XFS_TRANS_PERM_LOG_RES, log_count);
 	if (error == ENOSPC) {
+		/* flush outstanding delalloc blocks and retry */
+		xfs_flush_inodes(dp);
+		error = xfs_trans_reserve(tp, resblks, XFS_CREATE_LOG_RES(mp), 0,
+			XFS_TRANS_PERM_LOG_RES, XFS_CREATE_LOG_COUNT);
+	}
+	if (error == ENOSPC) {
+		/* No space at all so try a "no-allocation" reservation */
 		resblks = 0;
 		error = xfs_trans_reserve(tp, 0, log_res, 0,
 				XFS_TRANS_PERM_LOG_RES, log_count);
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 37b82cb96c8..e727fe0d145 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -88,7 +88,7 @@ extern void warn_slowpath(const char *file, const int line,
 
 #else /* !CONFIG_BUG */
 #ifndef HAVE_ARCH_BUG
-#define BUG()
+#define BUG() do {} while(0)
 #endif
 
 #ifndef HAVE_ARCH_BUG_ON
diff --git a/include/asm-generic/siginfo.h b/include/asm-generic/siginfo.h
index 35752dadd6d..c840719a8c5 100644
--- a/include/asm-generic/siginfo.h
+++ b/include/asm-generic/siginfo.h
@@ -201,7 +201,7 @@ typedef struct siginfo {
 #define TRAP_TRACE	(__SI_FAULT|2)	/* process trace trap */
 #define TRAP_BRANCH     (__SI_FAULT|3)  /* process taken branch trap */
 #define TRAP_HWBKPT     (__SI_FAULT|4)  /* hardware breakpoint/watchpoint */
-#define NSIGTRAP	2
+#define NSIGTRAP	4
 
 /*
  * SIGCHLD si_codes
diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h
index 2df74eb0956..9477af01a63 100644
--- a/include/drm/drm_pciids.h
+++ b/include/drm/drm_pciids.h
@@ -472,6 +472,7 @@
 	{0x8086, 0x2562, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0x8086, 0x3582, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0x8086, 0x2572, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
+	{0x8086, 0x358e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, \
 	{0, 0, 0}
 
 #define gamma_PCI_IDS \
@@ -533,4 +534,5 @@
 	{0x8086, 0x2e22, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa001, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0x8086, 0xa011, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
+	{0x8086, 0x35e8, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA << 8, 0xffff00, 0}, \
 	{0, 0, 0}
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 67e3353a56d..95962fa8398 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -594,6 +594,9 @@ struct drm_i915_gem_busy {
 #define I915_BIT_6_SWIZZLE_9_10_11	4
 /* Not seen by userland */
 #define I915_BIT_6_SWIZZLE_UNKNOWN	5
+/* Seen by userland. */
+#define I915_BIT_6_SWIZZLE_9_17		6
+#define I915_BIT_6_SWIZZLE_9_10_17	7
 
 struct drm_i915_gem_set_tiling {
 	/** Handle of the buffer to have its tiling state updated */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b900d2c67d2..b89cf2d8289 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -504,6 +504,115 @@ static inline int bio_has_data(struct bio *bio)
 	return bio && bio->bi_io_vec != NULL;
 }
 
+/*
+ * BIO list managment for use by remapping drivers (e.g. DM or MD).
+ *
+ * A bio_list anchors a singly-linked list of bios chained through the bi_next
+ * member of the bio.  The bio_list also caches the last list member to allow
+ * fast access to the tail.
+ */
+struct bio_list {
+	struct bio *head;
+	struct bio *tail;
+};
+
+static inline int bio_list_empty(const struct bio_list *bl)
+{
+	return bl->head == NULL;
+}
+
+static inline void bio_list_init(struct bio_list *bl)
+{
+	bl->head = bl->tail = NULL;
+}
+
+#define bio_list_for_each(bio, bl) \
+	for (bio = (bl)->head; bio; bio = bio->bi_next)
+
+static inline unsigned bio_list_size(const struct bio_list *bl)
+{
+	unsigned sz = 0;
+	struct bio *bio;
+
+	bio_list_for_each(bio, bl)
+		sz++;
+
+	return sz;
+}
+
+static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
+{
+	bio->bi_next = NULL;
+
+	if (bl->tail)
+		bl->tail->bi_next = bio;
+	else
+		bl->head = bio;
+
+	bl->tail = bio;
+}
+
+static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
+{
+	bio->bi_next = bl->head;
+
+	bl->head = bio;
+
+	if (!bl->tail)
+		bl->tail = bio;
+}
+
+static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
+{
+	if (!bl2->head)
+		return;
+
+	if (bl->tail)
+		bl->tail->bi_next = bl2->head;
+	else
+		bl->head = bl2->head;
+
+	bl->tail = bl2->tail;
+}
+
+static inline void bio_list_merge_head(struct bio_list *bl,
+				       struct bio_list *bl2)
+{
+	if (!bl2->head)
+		return;
+
+	if (bl->head)
+		bl2->tail->bi_next = bl->head;
+	else
+		bl->tail = bl2->tail;
+
+	bl->head = bl2->head;
+}
+
+static inline struct bio *bio_list_pop(struct bio_list *bl)
+{
+	struct bio *bio = bl->head;
+
+	if (bio) {
+		bl->head = bl->head->bi_next;
+		if (!bl->head)
+			bl->tail = NULL;
+
+		bio->bi_next = NULL;
+	}
+
+	return bio;
+}
+
+static inline struct bio *bio_list_get(struct bio_list *bl)
+{
+	struct bio *bio = bl->head;
+
+	bl->head = bl->tail = NULL;
+
+	return bio;
+}
+
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
 #define bip_vec_idx(bip, idx)	(&(bip->bip_vec[(idx)]))
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 493dedb7a67..29b3ce3f2a1 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <asm/atomic.h>
+#include <asm/system.h>
 
 struct task_struct;
 
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f563c501393..330c4b1bfca 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -173,8 +173,12 @@ struct fb_fix_screeninfo {
 /* Interpretation of offset for color fields: All offsets are from the right,
  * inside a "pixel" value, which is exactly 'bits_per_pixel' wide (means: you
  * can use the offset as right argument to <<). A pixel afterwards is a bit
- * stream and is written to video memory as that unmodified. This implies
- * big-endian byte order if bits_per_pixel is greater than 8.
+ * stream and is written to video memory as that unmodified.
+ *
+ * For pseudocolor: offset and length should be the same for all color
+ * components. Offset specifies the position of the least significant bit
+ * of the pallette index in a pixel value. Length indicates the number
+ * of available palette entries (i.e. # of entries = 1 << length).
  */
 struct fb_bitfield {
 	__u32 offset;			/* beginning of bitfield	*/
diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index 671decbd2ae..934e22d6580 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -11,6 +11,8 @@
 #ifndef _LINUX_FIEMAP_H
 #define _LINUX_FIEMAP_H
 
+#include <linux/types.h>
+
 struct fiemap_extent {
 	__u64 fe_logical;  /* logical offset in bytes for the start of
 			    * the extent from the beginning of the file */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 562d2855cf3..e766be0d432 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -87,6 +87,60 @@ struct inodes_stat_t {
  */
 #define FMODE_NOCMTIME		((__force fmode_t)2048)
 
+/*
+ * The below are the various read and write types that we support. Some of
+ * them include behavioral modifiers that send information down to the
+ * block layer and IO scheduler. Terminology:
+ *
+ *	The block layer uses device plugging to defer IO a little bit, in
+ *	the hope that we will see more IO very shortly. This increases
+ *	coalescing of adjacent IO and thus reduces the number of IOs we
+ *	have to send to the device. It also allows for better queuing,
+ *	if the IO isn't mergeable. If the caller is going to be waiting
+ *	for the IO, then he must ensure that the device is unplugged so
+ *	that the IO is dispatched to the driver.
+ *
+ *	All IO is handled async in Linux. This is fine for background
+ *	writes, but for reads or writes that someone waits for completion
+ *	on, we want to notify the block layer and IO scheduler so that they
+ *	know about it. That allows them to make better scheduling
+ *	decisions. So when the below references 'sync' and 'async', it
+ *	is referencing this priority hint.
+ *
+ * With that in mind, the available types are:
+ *
+ * READ			A normal read operation. Device will be plugged.
+ * READ_SYNC		A synchronous read. Device is not plugged, caller can
+ *			immediately wait on this read without caring about
+ *			unplugging.
+ * READA		Used for read-ahead operations. Lower priority, and the
+ *			 block layer could (in theory) choose to ignore this
+ *			request if it runs into resource problems.
+ * WRITE		A normal async write. Device will be plugged.
+ * SWRITE		Like WRITE, but a special case for ll_rw_block() that
+ *			tells it to lock the buffer first. Normally a buffer
+ *			must be locked before doing IO.
+ * WRITE_SYNC_PLUG	Synchronous write. Identical to WRITE, but passes down
+ *			the hint that someone will be waiting on this IO
+ *			shortly. The device must still be unplugged explicitly,
+ *			WRITE_SYNC_PLUG does not do this as we could be
+ *			submitting more writes before we actually wait on any
+ *			of them.
+ * WRITE_SYNC		Like WRITE_SYNC_PLUG, but also unplugs the device
+ *			immediately after submission. The write equivalent
+ *			of READ_SYNC.
+ * WRITE_ODIRECT	Special case write for O_DIRECT only.
+ * SWRITE_SYNC
+ * SWRITE_SYNC_PLUG	Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
+ *			See SWRITE.
+ * WRITE_BARRIER	Like WRITE, but tells the block layer that all
+ *			previously submitted writes must be safely on storage
+ *			before this one is started. Also guarantees that when
+ *			this write is complete, it itself is also safely on
+ *			storage. Prevents reordering of writes on both sides
+ *			of this IO.
+ *
+ */
 #define RW_MASK		1
 #define RWA_MASK	2
 #define READ 0
@@ -102,6 +156,11 @@ struct inodes_stat_t {
 			(SWRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
 #define SWRITE_SYNC	(SWRITE_SYNC_PLUG | (1 << BIO_RW_UNPLUG))
 #define WRITE_BARRIER	(WRITE | (1 << BIO_RW_BARRIER))
+
+/*
+ * These aren't really reads or writes, they pass down information about
+ * parts of device that are now unused by the file system.
+ */
 #define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD)
 #define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER))
 
@@ -738,9 +797,6 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_QUOTA
 };
 
-extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
-extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
-
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
@@ -2150,8 +2206,6 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
-extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
-		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
diff --git a/include/linux/fsl_devices.h b/include/linux/fsl_devices.h
index f2a78b5e8b5..43fc95d822d 100644
--- a/include/linux/fsl_devices.h
+++ b/include/linux/fsl_devices.h
@@ -43,10 +43,6 @@
  *
  */
 
-/* Flags related to I2C device features */
-#define FSL_I2C_DEV_SEPARATE_DFSRR	0x00000001
-#define FSL_I2C_DEV_CLOCK_5200		0x00000002
-
 enum fsl_usb2_operating_modes {
 	FSL_USB2_MPH_HOST,
 	FSL_USB2_DR_HOST,
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dcfb93337e9..d87247d2641 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -15,19 +15,6 @@
 extern struct files_struct init_files;
 extern struct fs_struct init_fs;
 
-#define INIT_KIOCTX(name, which_mm) \
-{							\
-	.users		= ATOMIC_INIT(1),		\
-	.dead		= 0,				\
-	.mm		= &which_mm,			\
-	.user_id	= 0,				\
-	.next		= NULL,				\
-	.wait		= __WAIT_QUEUE_HEAD_INITIALIZER(name.wait), \
-	.ctx_lock	= __SPIN_LOCK_UNLOCKED(name.ctx_lock), \
-	.reqs_active	= 0U,				\
-	.max_reqs	= ~0U,				\
-}
-
 #define INIT_MM(name) \
 {			 					\
 	.mm_rb		= RB_ROOT,				\
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ee98cd57088..06ba90c211a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2514,6 +2514,8 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_TBG3	0x3433
 #define PCI_DEVICE_ID_INTEL_82830_HB	0x3575
 #define PCI_DEVICE_ID_INTEL_82830_CGC	0x3577
+#define PCI_DEVICE_ID_INTEL_82854_HB	0x358c
+#define PCI_DEVICE_ID_INTEL_82854_IG	0x358e
 #define PCI_DEVICE_ID_INTEL_82855GM_HB	0x3580
 #define PCI_DEVICE_ID_INTEL_82855GM_IG	0x3582
 #define PCI_DEVICE_ID_INTEL_E7520_MCH	0x3590
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 8e4120285f7..c8f038554e8 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -134,6 +134,11 @@ struct pipe_buf_operations {
    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
 #define PIPE_SIZE		PAGE_SIZE
 
+/* Pipe lock and unlock operations */
+void pipe_lock(struct pipe_inode_info *);
+void pipe_unlock(struct pipe_inode_info *);
+void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
+
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
 
diff --git a/include/linux/sht15.h b/include/linux/sht15.h
new file mode 100644
index 00000000000..046bce05eca
--- /dev/null
+++ b/include/linux/sht15.h
@@ -0,0 +1,24 @@
+/*
+ * sht15.h - support for the SHT15 Temperature and Humidity Sensor
+ *
+ * Copyright (c) 2009 Jonathan Cameron
+ *
+ * Copyright (c) 2007 Wouter Horre
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/**
+ * struct sht15_platform_data - sht15 connectivity info
+ * @gpio_data:	no. of gpio to which bidirectional data line is connected
+ * @gpio_sck:	no. of gpio to which the data clock is connected.
+ * @supply_mv:	supply voltage in mv. Overridden by regulator if available.
+ **/
+struct sht15_platform_data {
+	int gpio_data;
+	int gpio_sck;
+	int supply_mv;
+};
+
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 528dcb93c2f..5f3faa9d15a 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -36,6 +36,8 @@ struct splice_desc {
 		void *data;		/* cookie */
 	} u;
 	loff_t pos;			/* file position */
+	size_t num_spliced;		/* number of bytes already spliced */
+	bool need_wakeup;		/* need to wake up writer */
 };
 
 struct partial_page {
@@ -66,6 +68,16 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
 				splice_actor *);
 extern ssize_t __splice_from_pipe(struct pipe_inode_info *,
 				  struct splice_desc *, splice_actor *);
+extern int splice_from_pipe_feed(struct pipe_inode_info *, struct splice_desc *,
+				 splice_actor *);
+extern int splice_from_pipe_next(struct pipe_inode_info *,
+				 struct splice_desc *);
+extern void splice_from_pipe_begin(struct splice_desc *);
+extern void splice_from_pipe_end(struct pipe_inode_info *,
+				 struct splice_desc *);
+extern int pipe_to_file(struct pipe_inode_info *, struct pipe_buffer *,
+			struct splice_desc *);
+
 extern ssize_t splice_to_pipe(struct pipe_inode_info *,
 			      struct splice_pipe_desc *);
 extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index b9584254259..625e9e4639c 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -29,7 +29,7 @@
 /**
  * usb_serial_port: structure for the specific ports of a device.
  * @serial: pointer back to the struct usb_serial owner of this port.
- * @tty: pointer to the corresponding tty for this port.
+ * @port: pointer to the corresponding tty_port for this port.
  * @lock: spinlock to grab when updating portions of this structure.
  * @mutex: mutex used to synchronize serial_open() and serial_close()
  *	access for this port.
@@ -44,19 +44,22 @@
  * @interrupt_out_endpointAddress: endpoint address for the interrupt out pipe
  * 	for this port.
  * @bulk_in_buffer: pointer to the bulk in buffer for this port.
+ * @bulk_in_size: the size of the bulk_in_buffer, in bytes.
  * @read_urb: pointer to the bulk in struct urb for this port.
  * @bulk_in_endpointAddress: endpoint address for the bulk in pipe for this
  *	port.
  * @bulk_out_buffer: pointer to the bulk out buffer for this port.
  * @bulk_out_size: the size of the bulk_out_buffer, in bytes.
  * @write_urb: pointer to the bulk out struct urb for this port.
+ * @write_urb_busy: port`s writing status
  * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
  *	port.
  * @write_wait: a wait_queue_head_t used by the port.
  * @work: work queue entry for the line discipline waking up.
- * @open_count: number of times this port has been opened.
  * @throttled: nonzero if the read urb is inactive to throttle the device
  * @throttle_req: nonzero if the tty wants to throttle us
+ * @console: attached usb serial console
+ * @dev: pointer to the serial device
  *
  * This structure is used by the usb-serial core and drivers for the specific
  * ports of a device.
diff --git a/include/sound/jack.h b/include/sound/jack.h
index 6b013c6f6a0..f236e426a70 100644
--- a/include/sound/jack.h
+++ b/include/sound/jack.h
@@ -50,6 +50,8 @@ struct snd_jack {
 	int type;
 	const char *id;
 	char name[100];
+	void *private_data;
+	void (*private_free)(struct snd_jack *);
 };
 
 #ifdef CONFIG_SND_JACK
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 8904b1900d7..c1729689161 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -268,7 +268,8 @@ struct snd_pcm_runtime {
 	int overrange;
 	snd_pcm_uframes_t avail_max;
 	snd_pcm_uframes_t hw_ptr_base;	/* Position at buffer restart */
-	snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time*/
+	snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */
+	unsigned long hw_ptr_jiffies;	/* Time when hw_ptr is updated */
 
 	/* -- HW params -- */
 	snd_pcm_access_t access;	/* access mode */
diff --git a/include/video/cyblafb.h b/include/video/cyblafb.h
deleted file mode 100644
index d3c1d4e2c8e..00000000000
--- a/include/video/cyblafb.h
+++ /dev/null
@@ -1,175 +0,0 @@
-
-#ifndef CYBLAFB_DEBUG
-#define CYBLAFB_DEBUG 0
-#endif
-
-#if CYBLAFB_DEBUG
-#define debug(f,a...)	printk("%s:" f,  __func__ , ## a);
-#else
-#define debug(f,a...)
-#endif
-
-#define output(f, a...) printk("cyblafb: " f, ## a)
-
-#define Kb	(1024)
-#define Mb	(Kb*Kb)
-
-/* PCI IDS of supported cards temporarily here */
-
-#define CYBERBLADEi1	0x8500
-
-/* these defines are for 'lcd' variable */
-#define LCD_STRETCH	0
-#define LCD_CENTER	1
-#define LCD_BIOS	2
-
-/* display types */
-#define DISPLAY_CRT	0
-#define DISPLAY_FP	1
-
-#define ROP_S	0xCC
-
-#define point(x,y) ((y)<<16|(x))
-
-//
-// Attribute Regs, ARxx, 3c0/3c1
-//
-#define AR00	0x00
-#define AR01	0x01
-#define AR02	0x02
-#define AR03	0x03
-#define AR04	0x04
-#define AR05	0x05
-#define AR06	0x06
-#define AR07	0x07
-#define AR08	0x08
-#define AR09	0x09
-#define AR0A	0x0A
-#define AR0B	0x0B
-#define AR0C	0x0C
-#define AR0D	0x0D
-#define AR0E	0x0E
-#define AR0F	0x0F
-#define AR10	0x10
-#define AR12	0x12
-#define AR13	0x13
-
-//
-// Sequencer Regs, SRxx, 3c4/3c5
-//
-#define SR00	0x00
-#define SR01	0x01
-#define SR02	0x02
-#define SR03	0x03
-#define SR04	0x04
-#define SR0D	0x0D
-#define SR0E	0x0E
-#define SR11	0x11
-#define SR18	0x18
-#define SR19	0x19
-
-//
-//
-//
-#define CR00	0x00
-#define CR01	0x01
-#define CR02	0x02
-#define CR03	0x03
-#define CR04	0x04
-#define CR05	0x05
-#define CR06	0x06
-#define CR07	0x07
-#define CR08	0x08
-#define CR09	0x09
-#define CR0A	0x0A
-#define CR0B	0x0B
-#define CR0C	0x0C
-#define CR0D	0x0D
-#define CR0E	0x0E
-#define CR0F	0x0F
-#define CR10	0x10
-#define CR11	0x11
-#define CR12	0x12
-#define CR13	0x13
-#define CR14	0x14
-#define CR15	0x15
-#define CR16	0x16
-#define CR17	0x17
-#define CR18	0x18
-#define CR19	0x19
-#define CR1A	0x1A
-#define CR1B	0x1B
-#define CR1C	0x1C
-#define CR1D	0x1D
-#define CR1E	0x1E
-#define CR1F	0x1F
-#define CR20	0x20
-#define CR21	0x21
-#define CR27	0x27
-#define CR29	0x29
-#define CR2A	0x2A
-#define CR2B	0x2B
-#define CR2D	0x2D
-#define CR2F	0x2F
-#define CR36	0x36
-#define CR38	0x38
-#define CR39	0x39
-#define CR3A	0x3A
-#define CR55	0x55
-#define CR56	0x56
-#define CR57	0x57
-#define CR58	0x58
-
-//
-//
-//
-
-#define GR00	0x01
-#define GR01	0x01
-#define GR02	0x02
-#define GR03	0x03
-#define GR04	0x04
-#define GR05	0x05
-#define GR06	0x06
-#define GR07	0x07
-#define GR08	0x08
-#define GR0F	0x0F
-#define GR20	0x20
-#define GR23	0x23
-#define GR2F	0x2F
-#define GR30	0x30
-#define GR31	0x31
-#define GR33	0x33
-#define GR52	0x52
-#define GR53	0x53
-#define GR5D	0x5d
-
-
-//
-// Graphics Engine
-//
-#define GEBase	0x2100		// could be mapped elsewhere if we like it
-#define GE00	(GEBase+0x00)	// source 1, p 111
-#define GE04	(GEBase+0x04)	// source 2, p 111
-#define GE08	(GEBase+0x08)	// destination 1, p 111
-#define GE0C	(GEBase+0x0C)	// destination 2, p 112
-#define GE10	(GEBase+0x10)	// right view base & enable, p 112
-#define GE13	(GEBase+0x13)	// left view base & enable, p 112
-#define GE18	(GEBase+0x18)	// block write start address, p 112
-#define GE1C	(GEBase+0x1C)	// block write end address, p 112
-#define GE20	(GEBase+0x20)	// engine status, p 113
-#define GE24	(GEBase+0x24)	// reset all GE pointers
-#define GE44	(GEBase+0x44)	// command register, p 126
-#define GE48	(GEBase+0x48)	// raster operation, p 127
-#define GE60	(GEBase+0x60)	// foreground color, p 128
-#define GE64	(GEBase+0x64)	// background color, p 128
-#define GE6C	(GEBase+0x6C)	// Pattern and Style, p 129, ok
-#define GE9C	(GEBase+0x9C)	// pixel engine data port, p 125
-#define GEB8	(GEBase+0xB8)	// Destination Stride / Buffer Base 0, p 133
-#define GEBC	(GEBase+0xBC)	// Destination Stride / Buffer Base 1, p 133
-#define GEC0	(GEBase+0xC0)	// Destination Stride / Buffer Base 2, p 133
-#define GEC4	(GEBase+0xC4)	// Destination Stride / Buffer Base 3, p 133
-#define GEC8	(GEBase+0xC8)	// Source Stride / Buffer Base 0, p 133
-#define GECC	(GEBase+0xCC)	// Source Stride / Buffer Base 1, p 133
-#define GED0	(GEBase+0xD0)	// Source Stride / Buffer Base 2, p 133
-#define GED4	(GEBase+0xD4)	// Source Stride / Buffer Base 3, p 133
diff --git a/init/initramfs.c b/init/initramfs.c
index 80cd713f6cc..9ee7b781041 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -310,7 +310,8 @@ static int __init do_name(void)
 			if (wfd >= 0) {
 				sys_fchown(wfd, uid, gid);
 				sys_fchmod(wfd, mode);
-				sys_ftruncate(wfd, body_len);
+				if (body_len)
+					sys_ftruncate(wfd, body_len);
 				vcollected = kstrdup(collected, GFP_KERNEL);
 				state = CopyFile;
 			}
@@ -515,6 +516,7 @@ skip:
 	initrd_end = 0;
 }
 
+#ifdef CONFIG_BLK_DEV_RAM
 #define BUF_SIZE 1024
 static void __init clean_rootfs(void)
 {
@@ -561,6 +563,7 @@ static void __init clean_rootfs(void)
 	sys_close(fd);
 	kfree(buf);
 }
+#endif
 
 static int __init populate_rootfs(void)
 {
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 89f60ec8ee5..24ae46dfe45 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -22,6 +22,7 @@
 #define MIN_MSGSIZEMAX	128		/* min value for msgsize_max */
 #define MAX_MSGSIZEMAX	(8192*128)	/* max value for msgsize_max */
 
+#ifdef CONFIG_PROC_SYSCTL
 static void *get_mq(ctl_table *table)
 {
 	char *which = table->data;
@@ -30,7 +31,6 @@ static void *get_mq(ctl_table *table)
 	return which;
 }
 
-#ifdef CONFIG_PROC_SYSCTL
 static int proc_mq_dointvec(ctl_table *table, int write, struct file *filp,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 505f319e489..8ba052c86d4 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -64,8 +64,6 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
 	struct bio *bio;
 
 	bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
-	if (!bio)
-		return -ENOMEM;
 	bio->bi_sector = page_off * (PAGE_SIZE >> 9);
 	bio->bi_bdev = resume_bdev;
 	bio->bi_end_io = end_swap_bio_read;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 64191fa09b7..dfcd83ceee3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -604,10 +604,11 @@ repeat:
 		ret = security_ptrace_traceme(current->parent);
 
 		/*
-		 * Set the ptrace bit in the process ptrace flags.
-		 * Then link us on our parent's ptraced list.
+		 * Check PF_EXITING to ensure ->real_parent has not passed
+		 * exit_ptrace(). Otherwise we don't report the error but
+		 * pretend ->real_parent untraces us right after return.
 		 */
-		if (!ret) {
+		if (!ret && !(current->real_parent->flags & PF_EXITING)) {
 			current->ptrace |= PT_PTRACED;
 			__ptrace_link(current, current->real_parent);
 		}
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c7b8457d0d..a967c9feb90 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -58,6 +58,10 @@ static DEFINE_MUTEX(rcu_barrier_mutex);
 static struct completion rcu_barrier_completion;
 int rcu_scheduler_active __read_mostly;
 
+static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
+static struct rcu_head rcu_migrate_head[3];
+static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
+
 /*
  * Awaken the corresponding synchronize_rcu() instance now that a
  * grace period has elapsed.
@@ -122,7 +126,10 @@ static void rcu_barrier_func(void *type)
 	}
 }
 
-static inline void wait_migrated_callbacks(void);
+static inline void wait_migrated_callbacks(void)
+{
+	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
+}
 
 /*
  * Orchestrate the specified type of RCU barrier, waiting for all
@@ -179,21 +186,12 @@ void rcu_barrier_sched(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
-static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
-static struct rcu_head rcu_migrate_head[3];
-static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
-
 static void rcu_migrate_callback(struct rcu_head *notused)
 {
 	if (atomic_dec_and_test(&rcu_migrate_type_count))
 		wake_up(&rcu_migrate_wq);
 }
 
-static inline void wait_migrated_callbacks(void)
-{
-	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
-}
-
 static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
 		unsigned long action, void *hcpu)
 {
diff --git a/kernel/sys.c b/kernel/sys.c
index 51dbb55604e..e7998cf3149 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -360,6 +360,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 		void __user *, arg)
 {
 	char buffer[256];
+	int ret = 0;
 
 	/* We only trust the superuser with rebooting the system. */
 	if (!capable(CAP_SYS_BOOT))
@@ -397,7 +398,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 		kernel_halt();
 		unlock_kernel();
 		do_exit(0);
-		break;
+		panic("cannot halt");
 
 	case LINUX_REBOOT_CMD_POWER_OFF:
 		kernel_power_off();
@@ -417,29 +418,22 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
 
 #ifdef CONFIG_KEXEC
 	case LINUX_REBOOT_CMD_KEXEC:
-		{
-			int ret;
-			ret = kernel_kexec();
-			unlock_kernel();
-			return ret;
-		}
+		ret = kernel_kexec();
+		break;
 #endif
 
 #ifdef CONFIG_HIBERNATION
 	case LINUX_REBOOT_CMD_SW_SUSPEND:
-		{
-			int ret = hibernate();
-			unlock_kernel();
-			return ret;
-		}
+		ret = hibernate();
+		break;
 #endif
 
 	default:
-		unlock_kernel();
-		return -EINVAL;
+		ret = -EINVAL;
+		break;
 	}
 	unlock_kernel();
-	return 0;
+	return ret;
 }
 
 static void deferred_cad(struct work_struct *dummy)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4286b62b34a..e3d2c7dd59b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -902,16 +902,6 @@ static struct ctl_table kern_table[] = {
 		.proc_handler   = &proc_dointvec,
 	},
 #endif
-#ifdef CONFIG_UNEVICTABLE_LRU
-	{
-		.ctl_name	= CTL_UNNUMBERED,
-		.procname	= "scan_unevictable_pages",
-		.data		= &scan_unevictable_pages,
-		.maxlen		= sizeof(scan_unevictable_pages),
-		.mode		= 0644,
-		.proc_handler	= &scan_unevictable_handler,
-	},
-#endif
 #ifdef CONFIG_SLOW_WORK
 	{
 		.ctl_name	= CTL_UNNUMBERED,
@@ -1302,6 +1292,16 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
+#ifdef CONFIG_UNEVICTABLE_LRU
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "scan_unevictable_pages",
+		.data		= &scan_unevictable_pages,
+		.maxlen		= sizeof(scan_unevictable_pages),
+		.mode		= 0644,
+		.proc_handler	= &scan_unevictable_handler,
+	},
+#endif
 /*
  * NOTE: do not add new entries to this table unless you have read
  * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/Kconfig b/mm/Kconfig
index b53427ad30a..57971d2ab84 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -213,6 +213,8 @@ config UNEVICTABLE_LRU
 	  will use one page flag and increase the code size a little,
 	  say Y unless you know what you are doing.
 
+	  See Documentation/vm/unevictable-lru.txt for more information.
+
 config HAVE_MLOCK
 	bool
 	default y if MMU=y
diff --git a/mm/filemap.c b/mm/filemap.c
index 2e2d38ebda4..8bd498040f3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -567,8 +567,8 @@ EXPORT_SYMBOL(wait_on_page_bit);
 
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
- * @page - Page defining the wait queue of interest
- * @waiter - Waiter to add to the queue
+ * @page: Page defining the wait queue of interest
+ * @waiter: Waiter to add to the queue
  *
  * Add an arbitrary @waiter to the wait queue for the nominated @page.
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2fc6d6c4823..e44fb0fbb80 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -932,7 +932,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
 	if (unlikely(!mem))
 		return 0;
 
-	VM_BUG_ON(mem_cgroup_is_obsolete(mem));
+	VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem));
 
 	while (1) {
 		int ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index d94d2e9146b..f9cb20ebb99 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -24,6 +24,7 @@
 #include <linux/init.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
+#include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/mm.h>
 #include <linux/module.h>
@@ -43,7 +44,6 @@ static struct vfsmount *shm_mnt;
 #include <linux/exportfs.h>
 #include <linux/generic_acl.h>
 #include <linux/mman.h>
-#include <linux/pagemap.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
@@ -65,13 +65,28 @@ static struct vfsmount *shm_mnt;
 #include <asm/div64.h>
 #include <asm/pgtable.h>
 
+/*
+ * The maximum size of a shmem/tmpfs file is limited by the maximum size of
+ * its triple-indirect swap vector - see illustration at shmem_swp_entry().
+ *
+ * With 4kB page size, maximum file size is just over 2TB on a 32-bit kernel,
+ * but one eighth of that on a 64-bit kernel.  With 8kB page size, maximum
+ * file size is just over 4TB on a 64-bit kernel, but 16TB on a 32-bit kernel,
+ * MAX_LFS_FILESIZE being then more restrictive than swap vector layout.
+ *
+ * We use / and * instead of shifts in the definitions below, so that the swap
+ * vector can be tested with small even values (e.g. 20) for ENTRIES_PER_PAGE.
+ */
 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
-#define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
-#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
+#define ENTRIES_PER_PAGEPAGE ((unsigned long long)ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
 
-#define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
-#define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
+#define SHMSWP_MAX_INDEX (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
+#define SHMSWP_MAX_BYTES (SHMSWP_MAX_INDEX << PAGE_CACHE_SHIFT)
 
+#define SHMEM_MAX_BYTES  min_t(unsigned long long, SHMSWP_MAX_BYTES, MAX_LFS_FILESIZE)
+#define SHMEM_MAX_INDEX  ((unsigned long)((SHMEM_MAX_BYTES+1) >> PAGE_CACHE_SHIFT))
+
+#define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
 
 /* info->flags needs VM_flags to handle pagein/truncate races efficiently */
@@ -2581,7 +2596,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 #define shmem_get_inode(sb, mode, dev, flags)	ramfs_get_inode(sb, mode, dev)
 #define shmem_acct_size(flags, size)		0
 #define shmem_unacct_size(flags, size)		do {} while (0)
-#define SHMEM_MAX_BYTES				LLONG_MAX
+#define SHMEM_MAX_BYTES				MAX_LFS_FILESIZE
 
 #endif /* CONFIG_SHMEM */
 
diff --git a/mm/util.c b/mm/util.c
index 2599e83eea1..55bef160b9f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -223,6 +223,22 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 }
 #endif
 
+/**
+ * get_user_pages_fast() - pin user pages in memory
+ * @start:	starting user address
+ * @nr_pages:	number of pages from start to pin
+ * @write:	whether pages will be written to
+ * @pages:	array that receives pointers to the pages pinned.
+ *		Should be at least nr_pages long.
+ *
+ * Attempt to pin user pages in memory without taking mm->mmap_sem.
+ * If not successful, it will fall back to taking the lock and
+ * calling get_user_pages().
+ *
+ * Returns number of pages pinned. This may be fewer than the number
+ * requested. If nr_pages is 0 or negative, returns 0. If no pages
+ * were pinned, returns -errno.
+ */
 int __attribute__((weak)) get_user_pages_fast(unsigned long start,
 				int nr_pages, int write, struct page **pages)
 {
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index a0affd9cfca..d4d41b3efc7 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
@@ -1773,7 +1773,7 @@ void tomoyo_load_policy(const char *filename)
 	envp[2] = NULL;
 	call_usermodehelper(argv[0], argv, envp, 1);
 
-	printk(KERN_INFO "TOMOYO: 2.2.0-pre   2009/02/01\n");
+	printk(KERN_INFO "TOMOYO: 2.2.0   2009/04/01\n");
 	printk(KERN_INFO "Mandatory Access Control activated.\n");
 	tomoyo_policy_loaded = true;
 	{ /* Check all profiles currently assigned to domains are defined. */
@@ -1800,7 +1800,7 @@ void tomoyo_load_policy(const char *filename)
 static int tomoyo_read_version(struct tomoyo_io_buffer *head)
 {
 	if (!head->read_eof) {
-		tomoyo_io_printf(head, "2.2.0-pre");
+		tomoyo_io_printf(head, "2.2.0");
 		head->read_eof = true;
 	}
 	return 0;
diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h
index e77e6a6de0f..678f4ff16aa 100644
--- a/security/tomoyo/common.h
+++ b/security/tomoyo/common.h
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 2f2b449ffd2..2d6748741a2 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c
index 65f50c1c5ee..2316da8ec5b 100644
--- a/security/tomoyo/file.c
+++ b/security/tomoyo/file.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index 3bbe01a7a4b..bf8e2b45168 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
diff --git a/security/tomoyo/realpath.h b/security/tomoyo/realpath.h
index 7ec9fc9cbc0..78217a37960 100644
--- a/security/tomoyo/realpath.h
+++ b/security/tomoyo/realpath.h
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 3eeeae12c4d..5b481912752 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
diff --git a/security/tomoyo/tomoyo.h b/security/tomoyo/tomoyo.h
index a0c8f6e0bea..41c6ebafb9c 100644
--- a/security/tomoyo/tomoyo.h
+++ b/security/tomoyo/tomoyo.h
@@ -5,7 +5,7 @@
  *
  * Copyright (C) 2005-2009  NTT DATA CORPORATION
  *
- * Version: 2.2.0-pre   2009/02/01
+ * Version: 2.2.0   2009/04/01
  *
  */
 
diff --git a/sound/core/control.c b/sound/core/control.c
index 4b20fa2b7e6..17b8d47a5cd 100644
--- a/sound/core/control.c
+++ b/sound/core/control.c
@@ -723,14 +723,11 @@ static int snd_ctl_elem_read_user(struct snd_card *card,
 {
 	struct snd_ctl_elem_value *control;
 	int result;
-	
-	control = kmalloc(sizeof(*control), GFP_KERNEL);
-	if (control == NULL)
-		return -ENOMEM;	
-	if (copy_from_user(control, _control, sizeof(*control))) {
-		kfree(control);
-		return -EFAULT;
-	}
+
+	control = memdup_user(_control, sizeof(*control));
+	if (IS_ERR(control))
+		return PTR_ERR(control);
+
 	snd_power_lock(card);
 	result = snd_power_wait(card, SNDRV_CTL_POWER_D0);
 	if (result >= 0)
@@ -784,13 +781,10 @@ static int snd_ctl_elem_write_user(struct snd_ctl_file *file,
 	struct snd_card *card;
 	int result;
 
-	control = kmalloc(sizeof(*control), GFP_KERNEL);
-	if (control == NULL)
-		return -ENOMEM;	
-	if (copy_from_user(control, _control, sizeof(*control))) {
-		kfree(control);
-		return -EFAULT;
-	}
+	control = memdup_user(_control, sizeof(*control));
+	if (IS_ERR(control))
+		return PTR_ERR(control);
+
 	card = file->card;
 	snd_power_lock(card);
 	result = snd_power_wait(card, SNDRV_CTL_POWER_D0);
@@ -916,13 +910,10 @@ static int snd_ctl_elem_user_tlv(struct snd_kcontrol *kcontrol,
 	if (op_flag > 0) {
 		if (size > 1024 * 128)	/* sane value */
 			return -EINVAL;
-		new_data = kmalloc(size, GFP_KERNEL);
-		if (new_data == NULL)
-			return -ENOMEM;
-		if (copy_from_user(new_data, tlv, size)) {
-			kfree(new_data);
-			return -EFAULT;
-		}
+
+		new_data = memdup_user(tlv, size);
+		if (IS_ERR(new_data))
+			return PTR_ERR(new_data);
 		change = ue->tlv_data_size != size;
 		if (!change)
 			change = memcmp(ue->tlv_data, new_data, size);
diff --git a/sound/core/jack.c b/sound/core/jack.c
index c8254c667c6..d54d1a05fe6 100644
--- a/sound/core/jack.c
+++ b/sound/core/jack.c
@@ -35,6 +35,9 @@ static int snd_jack_dev_free(struct snd_device *device)
 {
 	struct snd_jack *jack = device->device_data;
 
+	if (jack->private_free)
+		jack->private_free(jack);
+
 	/* If the input device is registered with the input subsystem
 	 * then we need to use a different deallocator. */
 	if (jack->registered)
diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c
index 36d7a599823..08bfed594a8 100644
--- a/sound/core/pcm_compat.c
+++ b/sound/core/pcm_compat.c
@@ -232,14 +232,11 @@ static int snd_pcm_ioctl_hw_params_compat(struct snd_pcm_substream *substream,
 	if (! (runtime = substream->runtime))
 		return -ENOTTY;
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
 	/* only fifo_size is different, so just copy all */
-	if (copy_from_user(data, data32, sizeof(*data32))) {
-		err = -EFAULT;
-		goto error;
-	}
+	data = memdup_user(data32, sizeof(*data32));
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
 	if (refine)
 		err = snd_pcm_hw_refine(substream, data);
 	else
diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c
index fbb2e391591..63d088f2265 100644
--- a/sound/core/pcm_lib.c
+++ b/sound/core/pcm_lib.c
@@ -209,9 +209,11 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 {
 	struct snd_pcm_runtime *runtime = substream->runtime;
 	snd_pcm_uframes_t pos;
-	snd_pcm_uframes_t new_hw_ptr, hw_ptr_interrupt, hw_base;
-	snd_pcm_sframes_t delta;
+	snd_pcm_uframes_t old_hw_ptr, new_hw_ptr, hw_ptr_interrupt, hw_base;
+	snd_pcm_sframes_t hdelta, delta;
+	unsigned long jdelta;
 
+	old_hw_ptr = runtime->status->hw_ptr;
 	pos = snd_pcm_update_hw_ptr_pos(substream, runtime);
 	if (pos == SNDRV_PCM_POS_XRUN) {
 		xrun(substream);
@@ -247,7 +249,30 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 			new_hw_ptr = hw_base + pos;
 		}
 	}
-	if (delta > runtime->period_size) {
+	hdelta = new_hw_ptr - old_hw_ptr;
+	jdelta = jiffies - runtime->hw_ptr_jiffies;
+	if (((hdelta * HZ) / runtime->rate) > jdelta + HZ/100) {
+		delta = jdelta /
+			(((runtime->period_size * HZ) / runtime->rate)
+								+ HZ/100);
+		hw_ptr_error(substream,
+			     "hw_ptr skipping! [Q] "
+			     "(pos=%ld, delta=%ld, period=%ld, "
+			     "jdelta=%lu/%lu/%lu)\n",
+			     (long)pos, (long)hdelta,
+			     (long)runtime->period_size, jdelta,
+			     ((hdelta * HZ) / runtime->rate), delta);
+		hw_ptr_interrupt = runtime->hw_ptr_interrupt +
+				   runtime->period_size * delta;
+		if (hw_ptr_interrupt >= runtime->boundary)
+			hw_ptr_interrupt -= runtime->boundary;
+		/* rebase to interrupt position */
+		hw_base = new_hw_ptr = hw_ptr_interrupt;
+		/* align hw_base to buffer_size */
+		hw_base -= hw_base % runtime->buffer_size;
+		delta = 0;
+	}
+	if (delta > runtime->period_size + runtime->period_size / 2) {
 		hw_ptr_error(substream,
 			     "Lost interrupts? "
 			     "(stream=%i, delta=%ld, intr_ptr=%ld)\n",
@@ -263,6 +288,7 @@ static int snd_pcm_update_hw_ptr_interrupt(struct snd_pcm_substream *substream)
 
 	runtime->hw_ptr_base = hw_base;
 	runtime->status->hw_ptr = new_hw_ptr;
+	runtime->hw_ptr_jiffies = jiffies;
 	runtime->hw_ptr_interrupt = hw_ptr_interrupt;
 
 	return snd_pcm_update_hw_ptr_post(substream, runtime);
@@ -275,6 +301,7 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 	snd_pcm_uframes_t pos;
 	snd_pcm_uframes_t old_hw_ptr, new_hw_ptr, hw_base;
 	snd_pcm_sframes_t delta;
+	unsigned long jdelta;
 
 	old_hw_ptr = runtime->status->hw_ptr;
 	pos = snd_pcm_update_hw_ptr_pos(substream, runtime);
@@ -286,14 +313,15 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 	new_hw_ptr = hw_base + pos;
 
 	delta = new_hw_ptr - old_hw_ptr;
+	jdelta = jiffies - runtime->hw_ptr_jiffies;
 	if (delta < 0) {
 		delta += runtime->buffer_size;
 		if (delta < 0) {
 			hw_ptr_error(substream, 
 				     "Unexpected hw_pointer value [2] "
-				     "(stream=%i, pos=%ld, old_ptr=%ld)\n",
+				     "(stream=%i, pos=%ld, old_ptr=%ld, jdelta=%li)\n",
 				     substream->stream, (long)pos,
-				     (long)old_hw_ptr);
+				     (long)old_hw_ptr, jdelta);
 			return 0;
 		}
 		hw_base += runtime->buffer_size;
@@ -301,12 +329,13 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 			hw_base = 0;
 		new_hw_ptr = hw_base + pos;
 	}
-	if (delta > runtime->period_size && runtime->periods > 1) {
+	if (((delta * HZ) / runtime->rate) > jdelta + HZ/100) {
 		hw_ptr_error(substream,
 			     "hw_ptr skipping! "
-			     "(pos=%ld, delta=%ld, period=%ld)\n",
+			     "(pos=%ld, delta=%ld, period=%ld, jdelta=%lu/%lu)\n",
 			     (long)pos, (long)delta,
-			     (long)runtime->period_size);
+			     (long)runtime->period_size, jdelta,
+			     ((delta * HZ) / runtime->rate));
 		return 0;
 	}
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK &&
@@ -315,6 +344,7 @@ int snd_pcm_update_hw_ptr(struct snd_pcm_substream *substream)
 
 	runtime->hw_ptr_base = hw_base;
 	runtime->status->hw_ptr = new_hw_ptr;
+	runtime->hw_ptr_jiffies = jiffies;
 
 	return snd_pcm_update_hw_ptr_post(substream, runtime);
 }
@@ -1441,6 +1471,7 @@ static int snd_pcm_lib_ioctl_reset(struct snd_pcm_substream *substream,
 		runtime->status->hw_ptr %= runtime->buffer_size;
 	else
 		runtime->status->hw_ptr = 0;
+	runtime->hw_ptr_jiffies = jiffies;
 	snd_pcm_stream_unlock_irqrestore(substream, flags);
 	return 0;
 }
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index a151fb01ba8..fc6f98e257d 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -327,21 +327,16 @@ static int snd_pcm_hw_refine_user(struct snd_pcm_substream *substream,
 	struct snd_pcm_hw_params *params;
 	int err;
 
-	params = kmalloc(sizeof(*params), GFP_KERNEL);
-	if (!params) {
-		err = -ENOMEM;
-		goto out;
-	}
-	if (copy_from_user(params, _params, sizeof(*params))) {
-		err = -EFAULT;
-		goto out;
-	}
+	params = memdup_user(_params, sizeof(*params));
+	if (IS_ERR(params))
+		return PTR_ERR(params);
+
 	err = snd_pcm_hw_refine(substream, params);
 	if (copy_to_user(_params, params, sizeof(*params))) {
 		if (!err)
 			err = -EFAULT;
 	}
-out:
+
 	kfree(params);
 	return err;
 }
@@ -465,21 +460,16 @@ static int snd_pcm_hw_params_user(struct snd_pcm_substream *substream,
 	struct snd_pcm_hw_params *params;
 	int err;
 
-	params = kmalloc(sizeof(*params), GFP_KERNEL);
-	if (!params) {
-		err = -ENOMEM;
-		goto out;
-	}
-	if (copy_from_user(params, _params, sizeof(*params))) {
-		err = -EFAULT;
-		goto out;
-	}
+	params = memdup_user(_params, sizeof(*params));
+	if (IS_ERR(params))
+		return PTR_ERR(params);
+
 	err = snd_pcm_hw_params(substream, params);
 	if (copy_to_user(_params, params, sizeof(*params))) {
 		if (!err)
 			err = -EFAULT;
 	}
-out:
+
 	kfree(params);
 	return err;
 }
@@ -2593,13 +2583,11 @@ static int snd_pcm_playback_ioctl1(struct file *file,
 			return -EFAULT;
 		if (copy_from_user(&xfern, _xfern, sizeof(xfern)))
 			return -EFAULT;
-		bufs = kmalloc(sizeof(void *) * runtime->channels, GFP_KERNEL);
-		if (bufs == NULL)
-			return -ENOMEM;
-		if (copy_from_user(bufs, xfern.bufs, sizeof(void *) * runtime->channels)) {
-			kfree(bufs);
-			return -EFAULT;
-		}
+
+		bufs = memdup_user(xfern.bufs,
+				   sizeof(void *) * runtime->channels);
+		if (IS_ERR(bufs))
+			return PTR_ERR(bufs);
 		result = snd_pcm_lib_writev(substream, bufs, xfern.frames);
 		kfree(bufs);
 		__put_user(result, &_xfern->result);
@@ -2675,13 +2663,11 @@ static int snd_pcm_capture_ioctl1(struct file *file,
 			return -EFAULT;
 		if (copy_from_user(&xfern, _xfern, sizeof(xfern)))
 			return -EFAULT;
-		bufs = kmalloc(sizeof(void *) * runtime->channels, GFP_KERNEL);
-		if (bufs == NULL)
-			return -ENOMEM;
-		if (copy_from_user(bufs, xfern.bufs, sizeof(void *) * runtime->channels)) {
-			kfree(bufs);
-			return -EFAULT;
-		}
+
+		bufs = memdup_user(xfern.bufs,
+				   sizeof(void *) * runtime->channels);
+		if (IS_ERR(bufs))
+			return PTR_ERR(bufs);
 		result = snd_pcm_lib_readv(substream, bufs, xfern.frames);
 		kfree(bufs);
 		__put_user(result, &_xfern->result);
@@ -3312,18 +3298,12 @@ static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream,
 	int err;
 
 	params = kmalloc(sizeof(*params), GFP_KERNEL);
-	if (!params) {
-		err = -ENOMEM;
-		goto out;
-	}
-	oparams = kmalloc(sizeof(*oparams), GFP_KERNEL);
-	if (!oparams) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!params)
+		return -ENOMEM;
 
-	if (copy_from_user(oparams, _oparams, sizeof(*oparams))) {
-		err = -EFAULT;
+	oparams = memdup_user(_oparams, sizeof(*oparams));
+	if (IS_ERR(oparams)) {
+		err = PTR_ERR(oparams);
 		goto out;
 	}
 	snd_pcm_hw_convert_from_old_params(params, oparams);
@@ -3333,9 +3313,10 @@ static int snd_pcm_hw_refine_old_user(struct snd_pcm_substream *substream,
 		if (!err)
 			err = -EFAULT;
 	}
+
+	kfree(oparams);
 out:
 	kfree(params);
-	kfree(oparams);
 	return err;
 }
 
@@ -3347,17 +3328,12 @@ static int snd_pcm_hw_params_old_user(struct snd_pcm_substream *substream,
 	int err;
 
 	params = kmalloc(sizeof(*params), GFP_KERNEL);
-	if (!params) {
-		err = -ENOMEM;
-		goto out;
-	}
-	oparams = kmalloc(sizeof(*oparams), GFP_KERNEL);
-	if (!oparams) {
-		err = -ENOMEM;
-		goto out;
-	}
-	if (copy_from_user(oparams, _oparams, sizeof(*oparams))) {
-		err = -EFAULT;
+	if (!params)
+		return -ENOMEM;
+
+	oparams = memdup_user(_oparams, sizeof(*oparams));
+	if (IS_ERR(oparams)) {
+		err = PTR_ERR(oparams);
 		goto out;
 	}
 	snd_pcm_hw_convert_from_old_params(params, oparams);
@@ -3367,9 +3343,10 @@ static int snd_pcm_hw_params_old_user(struct snd_pcm_substream *substream,
 		if (!err)
 			err = -EFAULT;
 	}
+
+	kfree(oparams);
 out:
 	kfree(params);
-	kfree(oparams);
 	return err;
 }
 #endif /* CONFIG_SND_SUPPORT_OLD_API */
diff --git a/sound/core/seq/seq_compat.c b/sound/core/seq/seq_compat.c
index 38693f47c26..c956fe46256 100644
--- a/sound/core/seq/seq_compat.c
+++ b/sound/core/seq/seq_compat.c
@@ -48,12 +48,11 @@ static int snd_seq_call_port_info_ioctl(struct snd_seq_client *client, unsigned
 	struct snd_seq_port_info *data;
 	mm_segment_t fs;
 
-	data = kmalloc(sizeof(*data), GFP_KERNEL);
-	if (! data)
-		return -ENOMEM;
+	data = memdup_user(data32, sizeof(*data32));
+	if (IS_ERR(data))
+		return PTR_ERR(data);
 
-	if (copy_from_user(data, data32, sizeof(*data32)) ||
-	    get_user(data->flags, &data32->flags) ||
+	if (get_user(data->flags, &data32->flags) ||
 	    get_user(data->time_queue, &data32->time_queue))
 		goto error;
 	data->kernel = NULL;
diff --git a/sound/core/timer.c b/sound/core/timer.c
index 3f0050d0b71..8f8b17ac074 100644
--- a/sound/core/timer.c
+++ b/sound/core/timer.c
@@ -1395,13 +1395,10 @@ static int snd_timer_user_ginfo(struct file *file,
 	struct list_head *p;
 	int err = 0;
 
-	ginfo = kmalloc(sizeof(*ginfo), GFP_KERNEL);
-	if (! ginfo)
-		return -ENOMEM;
-	if (copy_from_user(ginfo, _ginfo, sizeof(*ginfo))) {
-		kfree(ginfo);
-		return -EFAULT;
-	}
+	ginfo = memdup_user(_ginfo, sizeof(*ginfo));
+	if (IS_ERR(ginfo))
+		return PTR_ERR(ginfo);
+
 	tid = ginfo->tid;
 	memset(ginfo, 0, sizeof(*ginfo));
 	ginfo->tid = tid;
diff --git a/sound/isa/sb/sb16_csp.c b/sound/isa/sb/sb16_csp.c
index 49037d074c7..bdc8dde4e4a 100644
--- a/sound/isa/sb/sb16_csp.c
+++ b/sound/isa/sb/sb16_csp.c
@@ -684,15 +684,16 @@ static int snd_sb_csp_load(struct snd_sb_csp * p, const unsigned char *buf, int
  
 static int snd_sb_csp_load_user(struct snd_sb_csp * p, const unsigned char __user *buf, int size, int load_flags)
 {
-	int err = -ENOMEM;
-	unsigned char *kbuf = kmalloc(size, GFP_KERNEL);
-	if (kbuf) {
-		if (copy_from_user(kbuf, buf, size))
-			err = -EFAULT;
-		else
-			err = snd_sb_csp_load(p, kbuf, size, load_flags);
-		kfree(kbuf);
-	}
+	int err;
+	unsigned char *kbuf;
+
+	kbuf = memdup_user(buf, size);
+	if (IS_ERR(kbuf))
+		return PTR_ERR(kbuf);
+
+	err = snd_sb_csp_load(p, kbuf, size, load_flags);
+
+	kfree(kbuf);
 	return err;
 }
 
diff --git a/sound/isa/wavefront/wavefront_fx.c b/sound/isa/wavefront/wavefront_fx.c
index a4345fc0756..2bb1cee0925 100644
--- a/sound/isa/wavefront/wavefront_fx.c
+++ b/sound/isa/wavefront/wavefront_fx.c
@@ -202,15 +202,11 @@ snd_wavefront_fx_ioctl (struct snd_hwdep *sdev, struct file *file,
 					    "> 512 bytes to FX\n");
 				return -EIO;
 			}
-			page_data = kmalloc(r.data[2] * sizeof(short), GFP_KERNEL);
-			if (!page_data)
-				return -ENOMEM;
-			if (copy_from_user (page_data,
-					    (unsigned char __user *) r.data[3],
-					    r.data[2] * sizeof(short))) {
-				kfree(page_data);
-				return -EFAULT;
-			}
+			page_data = memdup_user((unsigned char __user *)
+						r.data[3],
+						r.data[2] * sizeof(short));
+			if (IS_ERR(page_data))
+				return PTR_ERR(page_data);
 			pd = page_data;
 		}
 
diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c
index beb312cca75..5d4ff48c434 100644
--- a/sound/isa/wavefront/wavefront_synth.c
+++ b/sound/isa/wavefront/wavefront_synth.c
@@ -1664,12 +1664,11 @@ snd_wavefront_synth_ioctl (struct snd_hwdep *hw, struct file *file,
 		break;
 
 	case WFCTL_WFCMD:
-		wc = kmalloc(sizeof(*wc), GFP_KERNEL);
-		if (! wc)
-			return -ENOMEM;
-		if (copy_from_user (wc, argp, sizeof (*wc)))
-			err = -EFAULT;
-		else if (wavefront_synth_control (acard, wc) < 0)
+		wc = memdup_user(argp, sizeof(*wc));
+		if (IS_ERR(wc))
+			return PTR_ERR(wc);
+
+		if (wavefront_synth_control (acard, wc) < 0)
 			err = -EIO;
 		else if (copy_to_user (argp, wc, sizeof (*wc)))
 			err = -EFAULT;
diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
index 191e1cd9997..4b302d86f5f 100644
--- a/sound/pci/emu10k1/emufx.c
+++ b/sound/pci/emu10k1/emufx.c
@@ -2493,24 +2493,17 @@ static int snd_emu10k1_fx8010_ioctl(struct snd_hwdep * hw, struct file *file, un
 	case SNDRV_EMU10K1_IOCTL_CODE_POKE:
 		if (!capable(CAP_SYS_ADMIN))
 			return -EPERM;
-		icode = kmalloc(sizeof(*icode), GFP_KERNEL);
-		if (icode == NULL)
-			return -ENOMEM;
-		if (copy_from_user(icode, argp, sizeof(*icode))) {
-			kfree(icode);
-			return -EFAULT;
-		}
+
+		icode = memdup_user(argp, sizeof(*icode));
+		if (IS_ERR(icode))
+			return PTR_ERR(icode);
 		res = snd_emu10k1_icode_poke(emu, icode);
 		kfree(icode);
 		return res;
 	case SNDRV_EMU10K1_IOCTL_CODE_PEEK:
-		icode = kmalloc(sizeof(*icode), GFP_KERNEL);
-		if (icode == NULL)
-			return -ENOMEM;
-		if (copy_from_user(icode, argp, sizeof(*icode))) {
-			kfree(icode);
-			return -EFAULT;
-		}
+		icode = memdup_user(argp, sizeof(*icode));
+		if (IS_ERR(icode))
+			return PTR_ERR(icode);
 		res = snd_emu10k1_icode_peek(emu, icode);
 		if (res == 0 && copy_to_user(argp, icode, sizeof(*icode))) {
 			kfree(icode);
@@ -2519,24 +2512,16 @@ static int snd_emu10k1_fx8010_ioctl(struct snd_hwdep * hw, struct file *file, un
 		kfree(icode);
 		return res;
 	case SNDRV_EMU10K1_IOCTL_PCM_POKE:
-		ipcm = kmalloc(sizeof(*ipcm), GFP_KERNEL);
-		if (ipcm == NULL)
-			return -ENOMEM;
-		if (copy_from_user(ipcm, argp, sizeof(*ipcm))) {
-			kfree(ipcm);
-			return -EFAULT;
-		}
+		ipcm = memdup_user(argp, sizeof(*ipcm));
+		if (IS_ERR(ipcm))
+			return PTR_ERR(ipcm);
 		res = snd_emu10k1_ipcm_poke(emu, ipcm);
 		kfree(ipcm);
 		return res;
 	case SNDRV_EMU10K1_IOCTL_PCM_PEEK:
-		ipcm = kzalloc(sizeof(*ipcm), GFP_KERNEL);
-		if (ipcm == NULL)
-			return -ENOMEM;
-		if (copy_from_user(ipcm, argp, sizeof(*ipcm))) {
-			kfree(ipcm);
-			return -EFAULT;
-		}
+		ipcm = memdup_user(argp, sizeof(*ipcm));
+		if (IS_ERR(ipcm))
+			return PTR_ERR(ipcm);
 		res = snd_emu10k1_ipcm_peek(emu, ipcm);
 		if (res == 0 && copy_to_user(argp, ipcm, sizeof(*ipcm))) {
 			kfree(ipcm);
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index a4e5e595211..fd6e6f337d1 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -2250,7 +2250,11 @@ int snd_hda_codec_write_cache(struct hda_codec *codec, hda_nid_t nid,
 	err = bus->ops.command(bus, res);
 	if (!err) {
 		struct hda_cache_head *c;
-		u32 key = build_cmd_cache_key(nid, verb);
+		u32 key;
+		/* parm may contain the verb stuff for get/set amp */
+		verb = verb | (parm >> 8);
+		parm &= 0xff;
+		key = build_cmd_cache_key(nid, verb);
 		c = get_alloc_hash(&codec->cmd_cache, key);
 		if (c)
 			c->val = parm;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 30829ee920c..bc882f8f163 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -312,6 +312,9 @@ struct azx_dev {
 	unsigned int period_bytes; /* size of the period in bytes */
 	unsigned int frags;	/* number for period in the play buffer */
 	unsigned int fifo_size;	/* FIFO size */
+	unsigned int start_flag: 1;	/* stream full start flag */
+	unsigned long start_jiffies;	/* start + minimum jiffies */
+	unsigned long min_jiffies;	/* minimum jiffies before position is valid */
 
 	void __iomem *sd_addr;	/* stream descriptor pointer */
 
@@ -330,7 +333,6 @@ struct azx_dev {
 	unsigned int opened :1;
 	unsigned int running :1;
 	unsigned int irq_pending :1;
-	unsigned int irq_ignore :1;
 	/*
 	 * For VIA:
 	 *  A flag to ensure DMA position is 0
@@ -975,7 +977,7 @@ static irqreturn_t azx_interrupt(int irq, void *dev_id)
 	struct azx *chip = dev_id;
 	struct azx_dev *azx_dev;
 	u32 status;
-	int i;
+	int i, ok;
 
 	spin_lock(&chip->reg_lock);
 
@@ -991,18 +993,14 @@ static irqreturn_t azx_interrupt(int irq, void *dev_id)
 			azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK);
 			if (!azx_dev->substream || !azx_dev->running)
 				continue;
-			/* ignore the first dummy IRQ (due to pos_adj) */
-			if (azx_dev->irq_ignore) {
-				azx_dev->irq_ignore = 0;
-				continue;
-			}
 			/* check whether this IRQ is really acceptable */
-			if (azx_position_ok(chip, azx_dev)) {
+			ok = azx_position_ok(chip, azx_dev);
+			if (ok == 1) {
 				azx_dev->irq_pending = 0;
 				spin_unlock(&chip->reg_lock);
 				snd_pcm_period_elapsed(azx_dev->substream);
 				spin_lock(&chip->reg_lock);
-			} else if (chip->bus && chip->bus->workq) {
+			} else if (ok == 0 && chip->bus && chip->bus->workq) {
 				/* bogus IRQ, process it later */
 				azx_dev->irq_pending = 1;
 				queue_work(chip->bus->workq,
@@ -1088,7 +1086,6 @@ static int azx_setup_periods(struct azx *chip,
 	bdl = (u32 *)azx_dev->bdl.area;
 	ofs = 0;
 	azx_dev->frags = 0;
-	azx_dev->irq_ignore = 0;
 	pos_adj = bdl_pos_adj[chip->dev_index];
 	if (pos_adj > 0) {
 		struct snd_pcm_runtime *runtime = substream->runtime;
@@ -1109,7 +1106,6 @@ static int azx_setup_periods(struct azx *chip,
 					 &bdl, ofs, pos_adj, 1);
 			if (ofs < 0)
 				goto error;
-			azx_dev->irq_ignore = 1;
 		}
 	} else
 		pos_adj = 0;
@@ -1155,6 +1151,9 @@ static void azx_stream_reset(struct azx *chip, struct azx_dev *azx_dev)
 	while (((val = azx_sd_readb(azx_dev, SD_CTL)) & SD_CTL_STREAM_RESET) &&
 	       --timeout)
 		;
+
+	/* reset first position - may not be synced with hw at this time */
+	*azx_dev->posbuf = 0;
 }
 
 /*
@@ -1409,7 +1408,6 @@ static int azx_pcm_open(struct snd_pcm_substream *substream)
 	snd_pcm_set_sync(substream);
 	mutex_unlock(&chip->open_mutex);
 
-	azx_stream_reset(chip, azx_dev);
 	return 0;
 }
 
@@ -1474,6 +1472,7 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 	unsigned int bufsize, period_bytes, format_val;
 	int err;
 
+	azx_stream_reset(chip, azx_dev);
 	format_val = snd_hda_calc_stream_format(runtime->rate,
 						runtime->channels,
 						runtime->format,
@@ -1502,6 +1501,8 @@ static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 			return err;
 	}
 
+	azx_dev->min_jiffies = (runtime->period_size * HZ) /
+						(runtime->rate * 2);
 	azx_setup_controller(chip, azx_dev);
 	if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK)
 		azx_dev->fifo_size = azx_sd_readw(azx_dev, SD_FIFOSIZE) + 1;
@@ -1518,13 +1519,14 @@ static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 	struct azx *chip = apcm->chip;
 	struct azx_dev *azx_dev;
 	struct snd_pcm_substream *s;
-	int start, nsync = 0, sbits = 0;
+	int rstart = 0, start, nsync = 0, sbits = 0;
 	int nwait, timeout;
 
 	switch (cmd) {
+	case SNDRV_PCM_TRIGGER_START:
+		rstart = 1;
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 	case SNDRV_PCM_TRIGGER_RESUME:
-	case SNDRV_PCM_TRIGGER_START:
 		start = 1;
 		break;
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
@@ -1554,6 +1556,10 @@ static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 		if (s->pcm->card != substream->pcm->card)
 			continue;
 		azx_dev = get_azx_dev(s);
+		if (rstart) {
+			azx_dev->start_flag = 1;
+			azx_dev->start_jiffies = jiffies + azx_dev->min_jiffies;
+		}
 		if (start)
 			azx_stream_start(chip, azx_dev);
 		else
@@ -1703,6 +1709,11 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev)
 {
 	unsigned int pos;
 
+	if (azx_dev->start_flag &&
+	    time_before_eq(jiffies, azx_dev->start_jiffies))
+		return -1;	/* bogus (too early) interrupt */
+	azx_dev->start_flag = 0;
+
 	pos = azx_get_position(chip, azx_dev);
 	if (chip->position_fix == POS_FIX_AUTO) {
 		if (!pos) {
@@ -2260,11 +2271,11 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
 		gcap &= ~0x01;
 
 	/* allow 64bit DMA address if supported by H/W */
-	if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_64BIT_MASK))
-		pci_set_consistent_dma_mask(pci, DMA_64BIT_MASK);
+	if ((gcap & 0x01) && !pci_set_dma_mask(pci, DMA_BIT_MASK(64)))
+		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(64));
 	else {
-		pci_set_dma_mask(pci, DMA_32BIT_MASK);
-		pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK);
+		pci_set_dma_mask(pci, DMA_BIT_MASK(32));
+		pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(32));
 	}
 
 	/* read number of streams from GCAP register instead of using
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 1f2ad76ca94..56ce19e68cb 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -350,12 +350,20 @@ static int conexant_mux_enum_put(struct snd_kcontrol *kcontrol,
 }
 
 #ifdef CONFIG_SND_JACK
+static void conexant_free_jack_priv(struct snd_jack *jack)
+{
+	struct conexant_jack *jacks = jack->private_data;
+	jacks->nid = 0;
+	jacks->jack = NULL;
+}
+
 static int conexant_add_jack(struct hda_codec *codec,
 		hda_nid_t nid, int type)
 {
 	struct conexant_spec *spec;
 	struct conexant_jack *jack;
 	const char *name;
+	int err;
 
 	spec = codec->spec;
 	snd_array_init(&spec->jacks, sizeof(*jack), 32);
@@ -368,7 +376,12 @@ static int conexant_add_jack(struct hda_codec *codec,
 	jack->nid = nid;
 	jack->type = type;
 
-	return snd_jack_new(codec->bus->card, name, type, &jack->jack);
+	err = snd_jack_new(codec->bus->card, name, type, &jack->jack);
+	if (err < 0)
+		return err;
+	jack->jack->private_data = jack;
+	jack->jack->private_free = conexant_free_jack_priv;
+	return 0;
 }
 
 static void conexant_report_jack(struct hda_codec *codec, hda_nid_t nid)
@@ -455,8 +468,10 @@ static void conexant_free(struct hda_codec *codec)
 	if (spec->jacks.list) {
 		struct conexant_jack *jacks = spec->jacks.list;
 		int i;
-		for (i = 0; i < spec->jacks.used; i++)
-			snd_device_free(codec->bus->card, &jacks[i].jack);
+		for (i = 0; i < spec->jacks.used; i++, jacks++) {
+			if (jacks->jack)
+				snd_device_free(codec->bus->card, jacks->jack);
+		}
 		snd_array_free(&spec->jacks);
 	}
 #endif
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index f35e58a2d92..6ed787eedd0 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -8742,10 +8742,9 @@ static struct snd_pci_quirk alc883_cfg_tbl[] = {
 	SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC883_LAPTOP_EAPD),
 	SND_PCI_QUIRK(0x15d9, 0x8780, "Supermicro PDSBA", ALC883_3ST_6ch),
 	SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_MEDION),
-	SND_PCI_QUIRK(0x1734, 0x1107, "FSC AMILO Xi2550",
+	SND_PCI_QUIRK_MASK(0x1734, 0xfff0, 0x1100, "FSC AMILO Xi/Pi25xx",
 		      ALC883_FUJITSU_PI2515),
-	SND_PCI_QUIRK(0x1734, 0x1108, "Fujitsu AMILO Pi2515", ALC883_FUJITSU_PI2515),
-	SND_PCI_QUIRK(0x1734, 0x113d, "Fujitsu AMILO Xa3530",
+	SND_PCI_QUIRK_MASK(0x1734, 0xfff0, 0x1130, "Fujitsu AMILO Xa35xx",
 		ALC888_FUJITSU_XA3530),
 	SND_PCI_QUIRK(0x17aa, 0x101e, "Lenovo 101e", ALC883_LENOVO_101E_2ch),
 	SND_PCI_QUIRK(0x17aa, 0x2085, "Lenovo NB0763", ALC883_LENOVO_NB0763),
diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
index 61996a2f45d..ce30b459aee 100644
--- a/sound/pci/hda/patch_sigmatel.c
+++ b/sound/pci/hda/patch_sigmatel.c
@@ -3851,6 +3851,15 @@ static void stac_gpio_set(struct hda_codec *codec, unsigned int mask,
 			   AC_VERB_SET_GPIO_DATA, gpiostate); /* sync */
 }
 
+#ifdef CONFIG_SND_JACK
+static void stac92xx_free_jack_priv(struct snd_jack *jack)
+{
+	struct sigmatel_jack *jacks = jack->private_data;
+	jacks->nid = 0;
+	jacks->jack = NULL;
+}
+#endif
+
 static int stac92xx_add_jack(struct hda_codec *codec,
 		hda_nid_t nid, int type)
 {
@@ -3860,6 +3869,7 @@ static int stac92xx_add_jack(struct hda_codec *codec,
 	int def_conf = snd_hda_codec_get_pincfg(codec, nid);
 	int connectivity = get_defcfg_connect(def_conf);
 	char name[32];
+	int err;
 
 	if (connectivity && connectivity != AC_JACK_PORT_FIXED)
 		return 0;
@@ -3876,10 +3886,15 @@ static int stac92xx_add_jack(struct hda_codec *codec,
 		snd_hda_get_jack_connectivity(def_conf),
 		snd_hda_get_jack_location(def_conf));
 
-	return snd_jack_new(codec->bus->card, name, type, &jack->jack);
-#else
-	return 0;
+	err = snd_jack_new(codec->bus->card, name, type, &jack->jack);
+	if (err < 0) {
+		jack->nid = 0;
+		return err;
+	}
+	jack->jack->private_data = jack;
+	jack->jack->private_free = stac92xx_free_jack_priv;
 #endif
+	return 0;
 }
 
 static int stac_add_event(struct sigmatel_spec *spec, hda_nid_t nid,
@@ -4138,8 +4153,10 @@ static void stac92xx_free_jacks(struct hda_codec *codec)
 	if (!codec->bus->shutdown && spec->jacks.list) {
 		struct sigmatel_jack *jacks = spec->jacks.list;
 		int i;
-		for (i = 0; i < spec->jacks.used; i++)
-			snd_device_free(codec->bus->card, &jacks[i].jack);
+		for (i = 0; i < spec->jacks.used; i++, jacks++) {
+			if (jacks->jack)
+				snd_device_free(codec->bus->card, jacks->jack);
+		}
 	}
 	snd_array_free(&spec->jacks);
 #endif
diff --git a/sound/pci/intel8x0.c b/sound/pci/intel8x0.c
index 57648810eaf..5dced5b7938 100644
--- a/sound/pci/intel8x0.c
+++ b/sound/pci/intel8x0.c
@@ -355,6 +355,9 @@ struct ichdev {
         unsigned int fragsize1;
         unsigned int position;
 	unsigned int pos_shift;
+	unsigned int last_pos;
+	unsigned long last_pos_jiffies;
+	unsigned int jiffy_to_bytes;
         int frags;
         int lvi;
         int lvi_frag;
@@ -838,7 +841,10 @@ static int snd_intel8x0_pcm_trigger(struct snd_pcm_substream *substream, int cmd
 		ichdev->suspended = 0;
 		/* fallthru */
 	case SNDRV_PCM_TRIGGER_START:
+	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
 		val = ICH_IOCE | ICH_STARTBM;
+		ichdev->last_pos = ichdev->position;
+		ichdev->last_pos_jiffies = jiffies;
 		break;
 	case SNDRV_PCM_TRIGGER_SUSPEND:
 		ichdev->suspended = 1;
@@ -849,9 +855,6 @@ static int snd_intel8x0_pcm_trigger(struct snd_pcm_substream *substream, int cmd
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
 		val = ICH_IOCE;
 		break;
-	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-		val = ICH_IOCE | ICH_STARTBM;
-		break;
 	default:
 		return -EINVAL;
 	}
@@ -1045,6 +1048,7 @@ static int snd_intel8x0_pcm_prepare(struct snd_pcm_substream *substream)
 			ichdev->pos_shift = (runtime->sample_bits > 16) ? 2 : 1;
 	}
 	snd_intel8x0_setup_periods(chip, ichdev);
+	ichdev->jiffy_to_bytes = (runtime->rate * 4 * ichdev->pos_shift) / HZ;
 	return 0;
 }
 
@@ -1053,7 +1057,7 @@ static snd_pcm_uframes_t snd_intel8x0_pcm_pointer(struct snd_pcm_substream *subs
 	struct intel8x0 *chip = snd_pcm_substream_chip(substream);
 	struct ichdev *ichdev = get_ichdev(substream);
 	size_t ptr1, ptr;
-	int civ, timeout = 100;
+	int civ, timeout = 10;
 	unsigned int position;
 
 	spin_lock(&chip->reg_lock);
@@ -1069,9 +1073,19 @@ static snd_pcm_uframes_t snd_intel8x0_pcm_pointer(struct snd_pcm_substream *subs
 		    ptr1 == igetword(chip, ichdev->reg_offset + ichdev->roff_picb))
 			break;
 	} while (timeout--);
-	ptr1 <<= ichdev->pos_shift;
-	ptr = ichdev->fragsize1 - ptr1;
-	ptr += position;
+	if (ptr1 != 0) {
+		ptr1 <<= ichdev->pos_shift;
+		ptr = ichdev->fragsize1 - ptr1;
+		ptr += position;
+		ichdev->last_pos = ptr;
+		ichdev->last_pos_jiffies = jiffies;
+	} else {
+		ptr1 = jiffies - ichdev->last_pos_jiffies;
+		if (ptr1)
+			ptr1 -= 1;
+		ptr = ichdev->last_pos + ptr1 * ichdev->jiffy_to_bytes;
+		ptr %= ichdev->size;
+	}
 	spin_unlock(&chip->reg_lock);
 	if (ptr >= ichdev->size)
 		return 0;
@@ -2661,12 +2675,14 @@ static void __devinit intel8x0_measure_ac97_clock(struct intel8x0 *chip)
 	struct snd_pcm_substream *subs;
 	struct ichdev *ichdev;
 	unsigned long port;
-	unsigned long pos, t;
-	struct timeval start_time, stop_time;
+	unsigned long pos, pos1, t;
+	int civ, timeout = 1000, attempt = 1;
+	struct timespec start_time, stop_time;
 
 	if (chip->ac97_bus->clock != 48000)
 		return; /* specified in module option */
 
+      __again:
 	subs = chip->pcm[0]->streams[0].substream;
 	if (! subs || subs->dma_buffer.bytes < INTEL8X0_TESTBUF_SIZE) {
 		snd_printk(KERN_WARNING "no playback buffer allocated - aborting measure ac97 clock\n");
@@ -2674,7 +2690,7 @@ static void __devinit intel8x0_measure_ac97_clock(struct intel8x0 *chip)
 	}
 	ichdev = &chip->ichd[ICHD_PCMOUT];
 	ichdev->physbuf = subs->dma_buffer.addr;
-	ichdev->size = chip->ichd[ICHD_PCMOUT].fragsize = INTEL8X0_TESTBUF_SIZE;
+	ichdev->size = ichdev->fragsize = INTEL8X0_TESTBUF_SIZE;
 	ichdev->substream = NULL; /* don't process interrupts */
 
 	/* set rate */
@@ -2693,16 +2709,31 @@ static void __devinit intel8x0_measure_ac97_clock(struct intel8x0 *chip)
 		iputbyte(chip, port + ICH_REG_OFF_CR, ICH_IOCE);
 		iputdword(chip, ICHREG(ALI_DMACR), 1 << ichdev->ali_slot);
 	}
-	do_gettimeofday(&start_time);
+	do_posix_clock_monotonic_gettime(&start_time);
 	spin_unlock_irq(&chip->reg_lock);
 	msleep(50);
 	spin_lock_irq(&chip->reg_lock);
 	/* check the position */
-	pos = ichdev->fragsize1;
-	pos -= igetword(chip, ichdev->reg_offset + ichdev->roff_picb) << ichdev->pos_shift;
-	pos += ichdev->position;
+	do {
+		civ = igetbyte(chip, ichdev->reg_offset + ICH_REG_OFF_CIV);
+		pos1 = igetword(chip, ichdev->reg_offset + ichdev->roff_picb);
+		if (pos1 == 0) {
+			udelay(10);
+			continue;
+		}
+		if (civ == igetbyte(chip, ichdev->reg_offset + ICH_REG_OFF_CIV) &&
+		    pos1 == igetword(chip, ichdev->reg_offset + ichdev->roff_picb))
+			break;
+	} while (timeout--);
+	if (pos1 == 0) {	/* oops, this value is not reliable */
+		pos = 0;
+	} else {
+		pos = ichdev->fragsize1;
+		pos -= pos1 << ichdev->pos_shift;
+		pos += ichdev->position;
+	}
 	chip->in_measurement = 0;
-	do_gettimeofday(&stop_time);
+	do_posix_clock_monotonic_gettime(&stop_time);
 	/* stop */
 	if (chip->device_type == DEVICE_ALI) {
 		iputdword(chip, ICHREG(ALI_DMACR), 1 << (ichdev->ali_slot + 16));
@@ -2717,19 +2748,37 @@ static void __devinit intel8x0_measure_ac97_clock(struct intel8x0 *chip)
 	iputbyte(chip, port + ICH_REG_OFF_CR, ICH_RESETREGS);
 	spin_unlock_irq(&chip->reg_lock);
 
+	if (pos == 0) {
+		snd_printk(KERN_ERR "intel8x0: measure - unreliable DMA position..\n");
+	      __retry:
+		if (attempt < 2) {
+			attempt++;
+			goto __again;
+		}
+		return;
+	}
+
+	pos /= 4;
 	t = stop_time.tv_sec - start_time.tv_sec;
 	t *= 1000000;
-	t += stop_time.tv_usec - start_time.tv_usec;
-	printk(KERN_INFO "%s: measured %lu usecs\n", __func__, t);
+	t += (stop_time.tv_nsec - start_time.tv_nsec) / 1000;
+	printk(KERN_INFO "%s: measured %lu usecs (%lu samples)\n", __func__, t, pos);
 	if (t == 0) {
-		snd_printk(KERN_ERR "?? calculation error..\n");
-		return;
+		snd_printk(KERN_ERR "intel8x0: ?? calculation error..\n");
+		goto __retry;
 	}
-	pos = (pos / 4) * 1000;
+	pos *= 1000;
 	pos = (pos / t) * 1000 + ((pos % t) * 1000) / t;
-	if (pos < 40000 || pos >= 60000) 
+	if (pos < 40000 || pos >= 60000) {
 		/* abnormal value. hw problem? */
 		printk(KERN_INFO "intel8x0: measured clock %ld rejected\n", pos);
+		goto __retry;
+	} else if (pos > 40500 && pos < 41500)
+		/* first exception - 41000Hz reference clock */
+		chip->ac97_bus->clock = 41000;
+	else if (pos > 43600 && pos < 44600)
+		/* second exception - 44100HZ reference clock */
+		chip->ac97_bus->clock = 44100;
 	else if (pos < 47500 || pos > 48500)
 		/* not 48000Hz, tuning the clock.. */
 		chip->ac97_bus->clock = (chip->ac97_bus->clock * 48000) / pos;
diff --git a/sound/soc/pxa/magician.c b/sound/soc/pxa/magician.c
index f7c4544f785..0625c342a1c 100644
--- a/sound/soc/pxa/magician.c
+++ b/sound/soc/pxa/magician.c
@@ -27,8 +27,6 @@
 #include <sound/soc.h>
 #include <sound/soc-dapm.h>
 
-#include <mach/pxa-regs.h>
-#include <mach/hardware.h>
 #include <mach/magician.h>
 #include <asm/mach-types.h>
 #include "../codecs/uda1380.h"
diff --git a/sound/soc/s3c24xx/Kconfig b/sound/soc/s3c24xx/Kconfig
index 2f3a21eee05..df494d1e346 100644
--- a/sound/soc/s3c24xx/Kconfig
+++ b/sound/soc/s3c24xx/Kconfig
@@ -1,10 +1,10 @@
 config SND_S3C24XX_SOC
 	tristate "SoC Audio for the Samsung S3CXXXX chips"
-	depends on ARCH_S3C2410 || ARCH_S3C64XX
+	depends on ARCH_S3C2410
 	help
 	  Say Y or M if you want to add support for codecs attached to
-	  the S3C24XX and S3C64XX AC97, I2S or SSP interface. You will
-	  also need to select the audio interfaces to support below.
+	  the S3C24XX AC97 or I2S interfaces. You will also need to
+	  select the audio interfaces to support below.
 
 config SND_S3C24XX_SOC_I2S
 	tristate
diff --git a/sound/usb/caiaq/Makefile b/sound/usb/caiaq/Makefile
index 23dadd5a11c..388999653aa 100644
--- a/sound/usb/caiaq/Makefile
+++ b/sound/usb/caiaq/Makefile
@@ -1,4 +1,4 @@
-snd-usb-caiaq-y := caiaq-device.o caiaq-audio.o caiaq-midi.o caiaq-control.o
-snd-usb-caiaq-$(CONFIG_SND_USB_CAIAQ_INPUT) += caiaq-input.o
+snd-usb-caiaq-y := device.o audio.o midi.o control.o
+snd-usb-caiaq-$(CONFIG_SND_USB_CAIAQ_INPUT) += input.o
 
 obj-$(CONFIG_SND_USB_CAIAQ) += snd-usb-caiaq.o
diff --git a/sound/usb/caiaq/caiaq-audio.c b/sound/usb/caiaq/audio.c
index 08d51e0c9fe..3f45c0fe61a 100644
--- a/sound/usb/caiaq/caiaq-audio.c
+++ b/sound/usb/caiaq/audio.c
@@ -16,20 +16,14 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */
 
+#include <linux/spinlock.h>
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/interrupt.h>
 #include <linux/usb.h>
-#include <linux/spinlock.h>
 #include <sound/core.h>
-#include <sound/initval.h>
 #include <sound/pcm.h>
-#include <sound/rawmidi.h>
-#include <linux/input.h>
 
-#include "caiaq-device.h"
-#include "caiaq-audio.h"
+#include "device.h"
+#include "audio.h"
 
 #define N_URBS			32
 #define CLOCK_DRIFT_TOLERANCE	5
diff --git a/sound/usb/caiaq/caiaq-audio.h b/sound/usb/caiaq/audio.h
index 8ab1f8d9529..8ab1f8d9529 100644
--- a/sound/usb/caiaq/caiaq-audio.h
+++ b/sound/usb/caiaq/audio.h
diff --git a/sound/usb/caiaq/caiaq-control.c b/sound/usb/caiaq/control.c
index e92c2bbf4fe..537102ba6b9 100644
--- a/sound/usb/caiaq/caiaq-control.c
+++ b/sound/usb/caiaq/control.c
@@ -18,17 +18,13 @@
  */
 
 #include <linux/init.h>
-#include <linux/interrupt.h>
 #include <linux/usb.h>
+#include <sound/control.h>
 #include <sound/core.h>
-#include <sound/initval.h>
 #include <sound/pcm.h>
-#include <sound/rawmidi.h>
-#include <sound/control.h>
-#include <linux/input.h>
 
-#include "caiaq-device.h"
-#include "caiaq-control.h"
+#include "device.h"
+#include "control.h"
 
 #define CNT_INTVAL 0x10000
 
diff --git a/sound/usb/caiaq/caiaq-control.h b/sound/usb/caiaq/control.h
index 2e7ab1aa4fb..2e7ab1aa4fb 100644
--- a/sound/usb/caiaq/caiaq-control.h
+++ b/sound/usb/caiaq/control.h
diff --git a/sound/usb/caiaq/caiaq-device.c b/sound/usb/caiaq/device.c
index cf573a982fd..6d517705da0 100644
--- a/sound/usb/caiaq/caiaq-device.c
+++ b/sound/usb/caiaq/device.c
@@ -19,27 +19,20 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */
 
-#include <linux/init.h>
-#include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/init.h>
 #include <linux/usb.h>
-#include <linux/input.h>
-#include <linux/spinlock.h>
-#include <sound/core.h>
 #include <sound/initval.h>
+#include <sound/core.h>
 #include <sound/pcm.h>
-#include <sound/rawmidi.h>
-#include <sound/control.h>
-
-#include "caiaq-device.h"
-#include "caiaq-audio.h"
-#include "caiaq-midi.h"
-#include "caiaq-control.h"
 
-#ifdef CONFIG_SND_USB_CAIAQ_INPUT
-#include "caiaq-input.h"
-#endif
+#include "device.h"
+#include "audio.h"
+#include "midi.h"
+#include "control.h"
+#include "input.h"
 
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("caiaq USB audio, version 1.3.13");
diff --git a/sound/usb/caiaq/caiaq-device.h b/sound/usb/caiaq/device.h
index 4cce1ad7493..4cce1ad7493 100644
--- a/sound/usb/caiaq/caiaq-device.h
+++ b/sound/usb/caiaq/device.h
diff --git a/sound/usb/caiaq/caiaq-input.c b/sound/usb/caiaq/input.c
index f743847a5e5..a48d309bd94 100644
--- a/sound/usb/caiaq/caiaq-input.c
+++ b/sound/usb/caiaq/input.c
@@ -17,17 +17,12 @@
 */
 
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/input.h>
 #include <linux/usb.h>
 #include <linux/usb/input.h>
-#include <linux/spinlock.h>
-#include <sound/core.h>
-#include <sound/rawmidi.h>
 #include <sound/pcm.h>
-#include "caiaq-device.h"
-#include "caiaq-input.h"
+
+#include "device.h"
+#include "input.h"
 
 static unsigned short keycode_ak1[] =  { KEY_C, KEY_B, KEY_A };
 static unsigned short keycode_rk2[] =  { KEY_1, KEY_2, KEY_3, KEY_4,
diff --git a/sound/usb/caiaq/caiaq-input.h b/sound/usb/caiaq/input.h
index ced53557786..ced53557786 100644
--- a/sound/usb/caiaq/caiaq-input.h
+++ b/sound/usb/caiaq/input.h
diff --git a/sound/usb/caiaq/caiaq-midi.c b/sound/usb/caiaq/midi.c
index f19fd360c93..8fa8cd88d76 100644
--- a/sound/usb/caiaq/caiaq-midi.c
+++ b/sound/usb/caiaq/midi.c
@@ -16,20 +16,13 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */
 
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/interrupt.h>
 #include <linux/usb.h>
-#include <linux/input.h>
-#include <linux/spinlock.h>
-#include <sound/core.h>
 #include <sound/rawmidi.h>
+#include <sound/core.h>
 #include <sound/pcm.h>
 
-#include "caiaq-device.h"
-#include "caiaq-midi.h"
-
+#include "device.h"
+#include "midi.h"
 
 static int snd_usb_caiaq_midi_input_open(struct snd_rawmidi_substream *substream)
 {
diff --git a/sound/usb/caiaq/caiaq-midi.h b/sound/usb/caiaq/midi.h
index 9d16db027fc..9d16db027fc 100644
--- a/sound/usb/caiaq/caiaq-midi.h
+++ b/sound/usb/caiaq/midi.h
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index 98276aafefe..012ff1f6f8a 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -349,14 +349,10 @@ static int usb_stream_hwdep_ioctl(struct snd_hwdep *hw, struct file *file,
 	if (cmd != SNDRV_USB_STREAM_IOCTL_SET_PARAMS)
 		return -ENOTTY;
 
-	cfg = kmalloc(sizeof(*cfg), GFP_KERNEL);
-	if (!cfg)
-		return -ENOMEM;
+	cfg = memdup_user((void *)arg, sizeof(*cfg));
+	if (IS_ERR(cfg))
+		return PTR_ERR(cfg);
 
-	if (copy_from_user(cfg, (void *)arg, sizeof(*cfg))) {
-		err = -EFAULT;
-		goto free;
-	}
 	if (cfg->version != USB_STREAM_INTERFACE_VERSION) {
 		err = -ENXIO;
 		goto free;
diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
index 4af8740db71..f3d8f71265d 100644
--- a/sound/usb/usx2y/usX2Yhwdep.c
+++ b/sound/usb/usx2y/usX2Yhwdep.c
@@ -203,13 +203,12 @@ static int snd_usX2Y_hwdep_dsp_load(struct snd_hwdep *hw,
 
 	if (access_ok(VERIFY_READ, dsp->image, dsp->length)) {
 		struct usb_device* dev = priv->chip.dev;
-		char *buf = kmalloc(dsp->length, GFP_KERNEL);
-		if (!buf)
-			return -ENOMEM;
-		if (copy_from_user(buf, dsp->image, dsp->length)) {
-			kfree(buf);
-			return -EFAULT;
-		}
+		char *buf;
+
+		buf = memdup_user(dsp->image, dsp->length);
+		if (IS_ERR(buf))
+			return PTR_ERR(buf);
+
 		err = usb_set_interface(dev, 0, 1);
 		if (err)
 			snd_printk(KERN_ERR "usb_set_interface error \n");