79 files changed, 994 insertions, 596 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 028a8444d95..e8acd1f0345 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -84,10 +84,9 @@
     runs an instance of gdb against the vmlinux file which contains
     the symbols (not boot image such as bzImage, zImage, uImage...).
     In gdb the developer specifies the connection parameters and
-    connects to kgdb.  Depending on which kgdb I/O modules exist in
-    the kernel for a given architecture, it may be possible to debug
-    the test machine's kernel with the development machine using a
-    rs232 or ethernet connection.
+    connects to kgdb.  The type of connection a developer makes with
+    gdb depends on the availability of kgdb I/O modules compiled as
+    builtin's or kernel modules in the test machine's kernel.
     </para>
   </chapter>
   <chapter id="CompilingAKernel">
@@ -223,7 +222,7 @@
   </para>
   <para>
   IMPORTANT NOTE: Using this option with kgdb over the console
-  (kgdboc) or kgdb over ethernet (kgdboe) is not supported.
+  (kgdboc) is not supported.
   </para>
   </sect1>
   </chapter>
@@ -249,18 +248,11 @@
     (gdb) target remote /dev/ttyS0
     </programlisting>
     <para>
-    Example (kgdb to a terminal server):
+    Example (kgdb to a terminal server on tcp port 2012):
     </para>
     <programlisting>
     % gdb ./vmlinux
-    (gdb) target remote udp:192.168.2.2:6443
-    </programlisting>
-    <para>
-    Example (kgdb over ethernet):
-    </para>
-    <programlisting>
-    % gdb ./vmlinux
-    (gdb) target remote udp:192.168.2.2:6443
+    (gdb) target remote 192.168.2.2:2012
     </programlisting>
     <para>
     Once connected, you can debug a kernel the way you would debug an
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index f4a8ebc1ef1..2d845730d4e 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -2,17 +2,12 @@ Naming and data format standards for sysfs files
 ------------------------------------------------
 
 The libsensors library offers an interface to the raw sensors data
-through the sysfs interface. See libsensors documentation and source for
-further information. As of writing this document, libsensors
-(from lm_sensors 2.8.3) is heavily chip-dependent. Adding or updating
-support for any given chip requires modifying the library's code.
-This is because libsensors was written for the procfs interface
-older kernel modules were using, which wasn't standardized enough.
-Recent versions of libsensors (from lm_sensors 2.8.2 and later) have
-support for the sysfs interface, though.
-
-The new sysfs interface was designed to be as chip-independent as
-possible.
+through the sysfs interface. Since lm-sensors 3.0.0, libsensors is
+completely chip-independent. It assumes that all the kernel drivers
+implement the standard sysfs interface described in this document.
+This makes adding or updating support for any given chip very easy, as
+libsensors, and applications using it, do not need to be modified.
+This is a major improvement compared to lm-sensors 2.
 
 Note that motherboards vary widely in the connections to sensor chips.
 There is no standard that ensures, for example, that the second
@@ -35,19 +30,17 @@ access this data in a simple and consistent way. That said, such programs
 will have to implement conversion, labeling and hiding of inputs. For
 this reason, it is still not recommended to bypass the library.
 
-If you are developing a userspace application please send us feedback on
-this standard.
-
-Note that this standard isn't completely established yet, so it is subject
-to changes. If you are writing a new hardware monitoring driver those
-features can't seem to fit in this interface, please contact us with your
-extension proposal. Keep in mind that backward compatibility must be
-preserved.
-
 Each chip gets its own directory in the sysfs /sys/devices tree.  To
 find all sensor chips, it is easier to follow the device symlinks from
 /sys/class/hwmon/hwmon*.
 
+Up to lm-sensors 3.0.0, libsensors looks for hardware monitoring attributes
+in the "physical" device directory. Since lm-sensors 3.0.1, attributes found
+in the hwmon "class" device directory are also supported. Complex drivers
+(e.g. drivers for multifunction chips) may want to use this possibility to
+avoid namespace pollution. The only drawback will be that older versions of
+libsensors won't support the driver in question.
+
 All sysfs values are fixed point numbers.
 
 There is only one value per file, unlike the older /proc specification.
diff --git a/MAINTAINERS b/MAINTAINERS
index cd587eec9fa..8f0ec46a709 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4431,10 +4431,10 @@ M:	johnpol@2ka.mipt.ru
 S:	Maintained
 
 W83791D HARDWARE MONITORING DRIVER
-P:	Charles Spirakis
-M:	bezaur@gmail.com
+P:	Marc Hulsman
+M:	m.hulsman@tudelft.nl
 L:	lm-sensors@lm-sensors.org
-S:	Odd Fixes
+S:	Maintained
 
 W83793 HARDWARE MONITORING DRIVER
 P:	Rudolf Marek
diff --git a/Makefile b/Makefile
index 2b4977c9844..6aff5f47c21 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 26
-EXTRAVERSION = -rc7
+EXTRAVERSION = -rc8
 NAME = Rotary Wombat
 
 # *DOCUMENTATION*
diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile
index 4e1a8e2c454..4759fe751aa 100644
--- a/arch/alpha/Makefile
+++ b/arch/alpha/Makefile
@@ -13,6 +13,7 @@ NM := $(NM) -B
 LDFLAGS_vmlinux	:= -static -N #-relax
 CHECKFLAGS	+= -D__alpha__ -m64
 cflags-y	:= -pipe -mno-fp-regs -ffixed-8 -msmall-data
+cflags-y	+= $(call cc-option, -fno-jump-tables)
 
 cpuflags-$(CONFIG_ALPHA_EV4)		:= -mcpu=ev4
 cpuflags-$(CONFIG_ALPHA_EV5)		:= -mcpu=ev5
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
index c0750291b44..d9980d47ab8 100644
--- a/arch/alpha/kernel/core_t2.c
+++ b/arch/alpha/kernel/core_t2.c
@@ -74,6 +74,8 @@
 # define DBG(args)
 #endif
 
+DEFINE_SPINLOCK(t2_hae_lock);
+
 static volatile unsigned int t2_mcheck_any_expected;
 static volatile unsigned int t2_mcheck_last_taken;
 
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 36ab22a7ea1..5cf45fc5134 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -71,6 +71,23 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_i
 static void __init
 quirk_cypress(struct pci_dev *dev)
 {
+	/* The Notorious Cy82C693 chip.  */
+
+	/* The generic legacy mode IDE fixup in drivers/pci/probe.c
+	   doesn't work correctly with the Cypress IDE controller as
+	   it has non-standard register layout.  Fix that.  */
+	if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) {
+		dev->resource[2].start = dev->resource[3].start = 0;
+		dev->resource[2].end = dev->resource[3].end = 0;
+		dev->resource[2].flags = dev->resource[3].flags = 0;
+		if (PCI_FUNC(dev->devfn) == 2) {
+			dev->resource[0].start = 0x170;
+			dev->resource[0].end = 0x177;
+			dev->resource[1].start = 0x376;
+			dev->resource[1].end = 0x376;
+		}
+	}
+
 	/* The Cypress bridge responds on the PCI bus in the address range
 	   0xffff0000-0xffffffff (conventional x86 BIOS ROM).  There is no
 	   way to turn this off.  The bridge also supports several extended
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index dc57790250d..c778779007f 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -447,7 +447,7 @@ struct unaligned_stat {
 
 
 /* Macro for exception fixup code to access integer registers.  */
-#define una_reg(r)  (regs->regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
+#define una_reg(r)  (_regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
 
 
 asmlinkage void
@@ -456,6 +456,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg,
 {
 	long error, tmp1, tmp2, tmp3, tmp4;
 	unsigned long pc = regs->pc - 4;
+	unsigned long *_regs = regs->regs;
 	const struct exception_table_entry *fixup;
 
 	unaligned[0].count++;
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 207a8b5a0c4..0be5630ff56 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
 #
 #   http://www.arm.linux.org.uk/developer/machines/?action=new
 #
-# Last update: Sat Apr 19 11:23:38 2008
+# Last update: Mon Jul 7 16:25:39 2008
 #
 # machine_is_xxx	CONFIG_xxxx		MACH_TYPE_xxx		number
 #
@@ -560,7 +560,6 @@ husky			MACH_HUSKY		HUSKY			543
 boxer			MACH_BOXER		BOXER			544
 shepherd		MACH_SHEPHERD		SHEPHERD		545
 aml42800aa		MACH_AML42800AA		AML42800AA		546
-ml674001		MACH_MACH_TYPE_ML674001	MACH_TYPE_ML674001	547
 lpc2294			MACH_LPC2294		LPC2294			548
 switchgrass		MACH_SWITCHGRASS	SWITCHGRASS		549
 ens_cmu			MACH_ENS_CMU		ENS_CMU			550
@@ -748,7 +747,6 @@ anubis			MACH_ANUBIS		ANUBIS			734
 ite8152			MACH_ITE8152		ITE8152			735
 lpc3xxx			MACH_LPC3XXX		LPC3XXX			736
 puppeteer		MACH_PUPPETEER		PUPPETEER		737
-vt001			MACH_MACH_VADATECH	MACH_VADATECH		738
 e570			MACH_E570		E570			739
 x50			MACH_X50		X50			740
 recon			MACH_RECON		RECON			741
@@ -839,7 +837,7 @@ ccxp270			MACH_CCXP		CCXP			825
 omap_gsample		MACH_OMAP_GSAMPLE	OMAP_GSAMPLE		826
 realview_eb		MACH_REALVIEW_EB	REALVIEW_EB		827
 samoa			MACH_SAMOA		SAMOA			828
-t3xscale		MACH_T3XSCALE		T3XSCALE		829
+palmt3			MACH_PALMT3		PALMT3			829
 i878			MACH_I878		I878			830
 borzoi			MACH_BORZOI		BORZOI			831
 gecko			MACH_GECKO		GECKO			832
@@ -895,7 +893,7 @@ mio8390			MACH_MIO8390		MIO8390			881
 omi_board		MACH_OMI_BOARD		OMI_BOARD		882
 mx21civ			MACH_MX21CIV		MX21CIV			883
 mahi_cdac		MACH_MAHI_CDAC		MAHI_CDAC		884
-xscale_palmtx		MACH_XSCALE_PALMTX	XSCALE_PALMTX		885
+palmtx			MACH_PALMTX		PALMTX			885
 s3c2413			MACH_S3C2413		S3C2413			887
 samsys_ep0		MACH_SAMSYS_EP0		SAMSYS_EP0		888
 wg302v1			MACH_WG302V1		WG302V1			889
@@ -918,7 +916,7 @@ nxdb500			MACH_NXDB500		NXDB500			905
 apf9328			MACH_APF9328		APF9328			906
 omap_wipoq		MACH_OMAP_WIPOQ		OMAP_WIPOQ		907
 omap_twip		MACH_OMAP_TWIP		OMAP_TWIP		908
-xscale_treo650		MACH_XSCALE_PALMTREO650	XSCALE_PALMTREO650	909
+palmtreo650		MACH_PALMTREO650	PALMTREO650		909
 acumen			MACH_ACUMEN		ACUMEN			910
 xp100			MACH_XP100		XP100			911
 fs2410			MACH_FS2410		FS2410			912
@@ -926,8 +924,8 @@ pxa270_cerf		MACH_PXA270_CERF	PXA270_CERF		913
 sq2ftlpalm		MACH_SQ2FTLPALM		SQ2FTLPALM		914
 bsemserver		MACH_BSEMSERVER		BSEMSERVER		915
 netclient		MACH_NETCLIENT		NETCLIENT		916
-xscale_palmtt5		MACH_XSCALE_PALMTT5	XSCALE_PALMTT5		917
-xscale_palmtc		MACH_OMAP_PALMTC	OMAP_PALMTC		918
+palmt5			MACH_PALMT5		PALMT5			917
+palmtc			MACH_PALMTC		PALMTC			918
 omap_apollon		MACH_OMAP_APOLLON	OMAP_APOLLON		919
 mxc30030evb		MACH_MXC30030EVB	MXC30030EVB		920
 rea_2d			MACH_REA_2D		REA_2D			921
@@ -1220,7 +1218,6 @@ empca400		MACH_EMPCA400		EMPCA400		1211
 em7210			MACH_EM7210		EM7210			1212
 htchermes		MACH_HTCHERMES		HTCHERMES		1213
 eti_c1			MACH_ETI_C1		ETI_C1			1214
-mach_dep2410		MACH_MACH_DEP2410	MACH_DEP2410		1215
 ac100			MACH_AC100		AC100			1216
 sneetch			MACH_SNEETCH		SNEETCH			1217
 studentmate		MACH_STUDENTMATE	STUDENTMATE		1218
@@ -1421,10 +1418,10 @@ looxc550		MACH_LOOXC550		LOOXC550		1417
 cnty_titan		MACH_CNTY_TITAN		CNTY_TITAN		1418
 app3xx			MACH_APP3XX		APP3XX			1419
 sideoatsgrama		MACH_SIDEOATSGRAMA	SIDEOATSGRAMA		1420
-xscale_palmt700p	MACH_XSCALE_PALMT700P	XSCALE_PALMT700P	1421
-xscale_palmt700w	MACH_XSCALE_PALMT700W	XSCALE_PALMT700W	1422
-xscale_palmt750		MACH_XSCALE_PALMT750	XSCALE_PALMT750		1423
-xscale_palmt755p	MACH_XSCALE_PALMT755P	XSCALE_PALMT755P	1424
+palmtreo700p		MACH_PALMTREO700P	PALMTREO700P		1421
+palmtreo700w		MACH_PALMTREO700W	PALMTREO700W		1422
+palmtreo750		MACH_PALMTREO750	PALMTREO750		1423
+palmtreo755p		MACH_PALMTREO755P	PALMTREO755P		1424
 ezreganut9200		MACH_EZREGANUT9200	EZREGANUT9200		1425
 sarge			MACH_SARGE		SARGE			1426
 a696			MACH_A696		A696			1427
@@ -1463,7 +1460,7 @@ artemis			MACH_ARTEMIS		ARTEMIS			1462
 htctitan		MACH_HTCTITAN		HTCTITAN		1463
 qranium			MACH_QRANIUM		QRANIUM			1464
 adx_wsc2		MACH_ADX_WSC2		ADX_WSC2		1465
-adx_medcom		MACH_ADX_MEDINET	ADX_MEDINET		1466
+adx_medcom		MACH_ADX_MEDCOM		ADX_MEDCOM		1466
 bboard			MACH_BBOARD		BBOARD			1467
 cambria			MACH_CAMBRIA		CAMBRIA			1468
 mt7xxx			MACH_MT7XXX		MT7XXX			1469
@@ -1519,7 +1516,7 @@ wp188			MACH_WP188		WP188			1518
 corsica			MACH_CORSICA		CORSICA			1519
 bigeye			MACH_BIGEYE		BIGEYE			1520
 tll5000			MACH_TLL5000		TLL5000			1522
-hni270			MACH_HNI_X270		HNI_X270		1523
+bebot			MACH_BEBOT		BEBOT			1523
 qong			MACH_QONG		QONG			1524
 tcompact		MACH_TCOMPACT		TCOMPACT		1525
 puma5			MACH_PUMA5		PUMA5			1526
@@ -1636,7 +1633,6 @@ awlug4lcu		MACH_AWLUG4LCU		AWLUG4LCU		1637
 palermoc		MACH_PALERMOC		PALERMOC		1638
 omap_ldp		MACH_OMAP_LDP		OMAP_LDP		1639
 ip500			MACH_IP500		IP500			1640
-mx35ads			MACH_MACH_MX35ADS	MACH_MX35ADS		1641
 ase2			MACH_ASE2		ASE2			1642
 mx35evb			MACH_MX35EVB		MX35EVB			1643
 aml_m8050		MACH_AML_M8050		AML_M8050		1644
@@ -1647,7 +1643,7 @@ badger			MACH_BADGER		BADGER			1648
 trizeps4wl		MACH_TRIZEPS4WL		TRIZEPS4WL		1649
 trizeps5		MACH_TRIZEPS5		TRIZEPS5		1650
 marlin			MACH_MARLIN		MARLIN			1651
-ts7800			MACH_TS7800		TS7800			1652
+ts78xx			MACH_TS78XX		TS78XX			1652
 hpipaq214		MACH_HPIPAQ214		HPIPAQ214		1653
 at572d940dcm		MACH_AT572D940DCM	AT572D940DCM		1654
 ne1board		MACH_NE1BOARD		NE1BOARD		1655
@@ -1720,3 +1716,99 @@ htc_kaiser		MACH_HTC_KAISER		HTC_KAISER		1724
 lg_ks20			MACH_LG_KS20		LG_KS20			1725
 hhgps			MACH_HHGPS		HHGPS			1726
 nokia_n810_wimax	MACH_NOKIA_N810_WIMAX	NOKIA_N810_WIMAX	1727
+insight			MACH_INSIGHT		INSIGHT			1728
+sapphire		MACH_SAPPHIRE		SAPPHIRE		1729
+csb637xo		MACH_CSB637XO		CSB637XO		1730
+evisiong		MACH_EVISIONG		EVISIONG		1731
+stmp37xx		MACH_STMP37XX		STMP37XX		1732
+stmp378x		MACH_STMP38XX		STMP38XX		1733
+tnt			MACH_TNT		TNT			1734
+tbxt			MACH_TBXT		TBXT			1735
+playmate		MACH_PLAYMATE		PLAYMATE		1736
+pns10			MACH_PNS10		PNS10			1737
+eznavi			MACH_EZNAVI		EZNAVI			1738
+ps4000			MACH_PS4000		PS4000			1739
+ezx_a780		MACH_EZX_A780		EZX_A780		1740
+ezx_e680		MACH_EZX_E680		EZX_E680		1741
+ezx_a1200		MACH_EZX_A1200		EZX_A1200		1742
+ezx_e6			MACH_EZX_E6		EZX_E6			1743
+ezx_e2			MACH_EZX_E2		EZX_E2			1744
+ezx_a910		MACH_EZX_A910		EZX_A910		1745
+cwmx31			MACH_CWMX31		CWMX31			1746
+sl2312			MACH_SL2312		SL2312			1747
+blenny			MACH_BLENNY		BLENNY			1748
+ds107			MACH_DS107		DS107			1749
+dsx07			MACH_DSX07		DSX07			1750
+picocom1		MACH_PICOCOM1		PICOCOM1		1751
+lynx_wolverine		MACH_LYNX_WOLVERINE	LYNX_WOLVERINE		1752
+ubisys_p9_sc19		MACH_UBISYS_P9_SC19	UBISYS_P9_SC19		1753
+kratos_low		MACH_KRATOS_LOW		KRATOS_LOW		1754
+m700			MACH_M700		M700			1755
+edmini_v2		MACH_EDMINI_V2		EDMINI_V2		1756
+zipit2			MACH_ZIPIT2		ZIPIT2			1757
+hslfemtocell		MACH_HSLFEMTOCELL	HSLFEMTOCELL		1758
+daintree_at91		MACH_DAINTREE_AT91	DAINTREE_AT91		1759
+sg560usb		MACH_SG560USB		SG560USB		1760
+omap3_pandora		MACH_OMAP3_PANDORA	OMAP3_PANDORA		1761
+usr8200			MACH_USR8200		USR8200			1762
+s1s65k			MACH_S1S65K		S1S65K			1763
+s2s65a			MACH_S2S65A		S2S65A			1764
+icore			MACH_ICORE		ICORE			1765
+mss2			MACH_MSS2		MSS2			1766
+belmont			MACH_BELMONT		BELMONT			1767
+asusp525		MACH_ASUSP525		ASUSP525		1768
+lb88rc8480		MACH_LB88RC8480		LB88RC8480		1769
+hipxa			MACH_HIPXA		HIPXA			1770
+mx25_3ds		MACH_MX25_3DS		MX25_3DS		1771
+m800			MACH_M800		M800			1772
+omap3530_lv_som		MACH_OMAP3530_LV_SOM	OMAP3530_LV_SOM		1773
+prima_evb		MACH_PRIMA_EVB		PRIMA_EVB		1774
+mx31bt1			MACH_MX31BT1		MX31BT1			1775
+atlas4_evb		MACH_ATLAS4_EVB		ATLAS4_EVB		1776
+mx31cicada		MACH_MX31CICADA		MX31CICADA		1777
+mi424wr			MACH_MI424WR		MI424WR			1778
+axs_ultrax		MACH_AXS_ULTRAX		AXS_ULTRAX		1779
+at572d940deb		MACH_AT572D940DEB	AT572D940DEB		1780
+davinci_da8xx_evm	MACH_DAVINCI_DA8XX_EVM	DAVINCI_DA8XX_EVM	1781
+ep9302			MACH_EP9302		EP9302			1782
+at572d940hfeb		MACH_AT572D940HFEB	AT572D940HFEB		1783
+cybook3			MACH_CYBOOK3		CYBOOK3			1784
+wdg002			MACH_WDG002		WDG002			1785
+sg560adsl		MACH_SG560ADSL		SG560ADSL		1786
+nextio_n2800_ica	MACH_NEXTIO_N2800_ICA	NEXTIO_N2800_ICA	1787
+marvell_newdb		MACH_MARVELL_NEWDB	MARVELL_NEWDB		1789
+vandihud		MACH_VANDIHUD		VANDIHUD		1790
+magx_e8			MACH_MAGX_E8		MAGX_E8			1791
+magx_z6			MACH_MAGX_Z6		MAGX_Z6			1792
+magx_v8			MACH_MAGX_V8		MAGX_V8			1793
+magx_u9			MACH_MAGX_U9		MAGX_U9			1794
+toughcf08		MACH_TOUGHCF08		TOUGHCF08		1795
+zw4400			MACH_ZW4400		ZW4400			1796
+marat91			MACH_MARAT91		MARAT91			1797
+overo			MACH_OVERO		OVERO			1798
+at2440evb		MACH_AT2440EVB		AT2440EVB		1799
+neocore926		MACH_NEOCORE926		NEOCORE926		1800
+wnr854t			MACH_WNR854T		WNR854T			1801
+imx27			MACH_IMX27		IMX27			1802
+moose_db		MACH_MOOSE_DB		MOOSE_DB		1803
+fab4			MACH_FAB4		FAB4			1804
+htcdiamond		MACH_HTCDIAMOND		HTCDIAMOND		1805
+fiona			MACH_FIONA		FIONA			1806
+mxc30030_x		MACH_MXC30030_X		MXC30030_X		1807
+bmp1000			MACH_BMP1000		BMP1000			1808
+logi9200		MACH_LOGI9200		LOGI9200		1809
+tqma31			MACH_TQMA31		TQMA31			1810
+ccw9p9215js		MACH_CCW9P9215JS	CCW9P9215JS		1811
+rd88f5181l_ge		MACH_RD88F5181L_GE	RD88F5181L_GE		1812
+sifmain			MACH_SIFMAIN		SIFMAIN			1813
+sam9_l9261		MACH_SAM9_L9261		SAM9_L9261		1814
+cc9m2443js		MACH_CC9M2443JS		CC9M2443JS		1815
+xaria300		MACH_XARIA300		XARIA300		1816
+it9200			MACH_IT9200		IT9200			1817
+rd88f5181l_fxo		MACH_RD88F5181L_FXO	RD88F5181L_FXO		1818
+kriss_sensor		MACH_KRISS_SENSOR	KRISS_SENSOR		1819
+pilz_pmi5		MACH_PILZ_PMI5		PILZ_PMI5		1820
+jade			MACH_JADE		JADE			1821
+ks8695_softplc		MACH_KS8695_SOFTPLC	KS8695_SOFTPLC		1822
+gprisc4			MACH_GPRISC4		GPRISC4			1823
+stamp9260		MACH_STAMP9260		STAMP9260		1824
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 082c31dcfd9..39752cdef6f 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -558,8 +558,6 @@ static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
 	if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
 		rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
 				    NR_PREALLOCATE_RTE_ENTRIES);
-		if (!rte)
-			return NULL;
 		for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
 			list_add(&rte->rte_list, &free_rte_list);
 	}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index f48a809c686..4ae15c8c248 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -578,8 +578,6 @@ setup_arch (char **cmdline_p)
 	cpu_init();	/* initialize the bootstrap CPU */
 	mmu_context_init();	/* initialize context_id bitmap */
 
-	check_sal_cache_flush();
-
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
 #endif
@@ -607,6 +605,7 @@ setup_arch (char **cmdline_p)
 		ia64_mca_init();
 
 	platform_setup(cmdline_p);
+	check_sal_cache_flush();
 	paging_init();
 }
 
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 49d3120415e..e585f9a2afb 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -512,6 +512,8 @@ static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, si
 	int cpu;
 	char optstr[64];
 
+	if (count == 0 || count > sizeof(optstr))
+		return -EINVAL;
 	if (copy_from_user(optstr, user, count))
 		return -EFAULT;
 	optstr[count - 1] = '\0';
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 52e18e6d2ba..e0edaaa6920 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -383,6 +383,7 @@ config VMI
 config KVM_CLOCK
 	bool "KVM paravirtualized clock"
 	select PARAVIRT
+	select PARAVIRT_CLOCK
 	depends on !(X86_VISWS || X86_VOYAGER)
 	help
 	  Turning on this option will allow you to run a paravirtualized clock
@@ -410,6 +411,10 @@ config PARAVIRT
 	  over full virtualization.  However, when run without a hypervisor
 	  the kernel is theoretically slower and slightly larger.
 
+config PARAVIRT_CLOCK
+	bool
+	default n
+
 endif
 
 config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b472..77807d4769c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
 obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
 
 obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
 
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a30986d47..87edf1ceb1d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
 
 #include <linux/clocksource.h>
 #include <linux/kvm_para.h>
+#include <asm/pvclock.h>
 #include <asm/arch_hooks.h>
 #include <asm/msr.h>
 #include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
 early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
 
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
-	int cpu = smp_processor_id();
-	u64 delta = native_read_tsc() - last_tsc;
-	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
-
-static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
  * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
  */
 static unsigned long kvm_get_wallclock(void)
 {
-	u32 wc_sec, wc_nsec;
-	u64 delta;
+	struct pvclock_vcpu_time_info *vcpu_time;
 	struct timespec ts;
-	int version, nsec;
 	int low, high;
 
 	low = (int)__pa(&wall_clock);
 	high = ((u64)__pa(&wall_clock) >> 32);
+	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
 
-	delta = kvm_clock_read();
+	vcpu_time = &get_cpu_var(hv_clock);
+	pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+	put_cpu_var(hv_clock);
 
-	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
-	do {
-		version = wall_clock.wc_version;
-		rmb();
-		wc_sec = wall_clock.wc_sec;
-		wc_nsec = wall_clock.wc_nsec;
-		rmb();
-	} while ((wall_clock.wc_version != version) || (version & 1));
-
-	delta = kvm_clock_read() - delta;
-	delta += wc_nsec;
-	nsec = do_div(delta, NSEC_PER_SEC);
-	set_normalized_timespec(&ts, wc_sec + delta, nsec);
-	/*
-	 * Of all mechanisms of time adjustment I've tested, this one
-	 * was the champion!
-	 */
-	return ts.tv_sec + 1;
+	return ts.tv_sec;
 }
 
 static int kvm_set_wallclock(unsigned long now)
 {
-	return 0;
+	return -1;
 }
 
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
 static cycle_t kvm_clock_read(void)
 {
-	u64 last_tsc, now;
-	int cpu;
+	struct pvclock_vcpu_time_info *src;
+	cycle_t ret;
 
-	preempt_disable();
-	cpu = smp_processor_id();
-
-	last_tsc = get_clock(cpu, tsc_timestamp);
-	now = get_clock(cpu, system_time);
-
-	now += kvm_get_delta(last_tsc);
-	preempt_enable();
-
-	return now;
+	src = &get_cpu_var(hv_clock);
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(hv_clock);
+	return ret;
 }
+
 static struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
 {
 	int cpu = smp_processor_id();
 	int low, high;
 	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
 	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+	printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+	       cpu, high, low, txt);
 	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
 }
 
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
 	 * Now that the first cpu already had this clocksource initialized,
 	 * we shouldn't fail.
 	 */
-	WARN_ON(kvm_register_clock());
+	WARN_ON(kvm_register_clock("secondary cpu clock"));
 	/* ok, done with our trickery, call native */
 	setup_secondary_APIC_clock();
 }
 #endif
 
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+	WARN_ON(kvm_register_clock("primary cpu clock"));
+	native_smp_prepare_boot_cpu();
+}
+#endif
+
 /*
  * After the clock is registered, the host will keep writing to the
  * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
 		return;
 
 	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
-		if (kvm_register_clock())
+		if (kvm_register_clock("boot clock"))
 			return;
 		pv_time_ops.get_wallclock = kvm_get_wallclock;
 		pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
 #ifdef CONFIG_X86_LOCAL_APIC
 		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
 #endif
+#ifdef CONFIG_SMP
+		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+#endif
 		machine_ops.shutdown  = kvm_shutdown;
 #ifdef CONFIG_KEXEC
 		machine_ops.crash_shutdown  = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 00000000000..05fbe9a0325
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
+/*  paravirtual clock -- common code used by kvm/xen
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <asm/pvclock.h>
+
+/*
+ * These are perodically updated
+ *    xen: magic shared_info page
+ *    kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
+	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
+	u32 tsc_to_nsec_mul;
+	int tsc_shift;
+	u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+	u64 product;
+#ifdef __i386__
+	u32 tmp1, tmp2;
+#endif
+
+	if (shift < 0)
+		delta >>= -shift;
+	else
+		delta <<= shift;
+
+#ifdef __i386__
+	__asm__ (
+		"mul  %5       ; "
+		"mov  %4,%%eax ; "
+		"mov  %%edx,%4 ; "
+		"mul  %5       ; "
+		"xor  %5,%5    ; "
+		"add  %4,%%eax ; "
+		"adc  %5,%%edx ; "
+		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+	__asm__ (
+		"mul %%rdx ; shrd $32,%%rdx,%%rax"
+		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+	return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+	u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+					struct pvclock_vcpu_time_info *src)
+{
+	do {
+		dst->version = src->version;
+		rmb();		/* fetch version before data */
+		dst->tsc_timestamp     = src->tsc_timestamp;
+		dst->system_timestamp  = src->system_time;
+		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+		dst->tsc_shift         = src->tsc_shift;
+		rmb();		/* test version after fetching data */
+	} while ((src->version & 1) || (dst->version != src->version));
+
+	return dst->version;
+}
+
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+	struct pvclock_shadow_time shadow;
+	unsigned version;
+	cycle_t ret, offset;
+
+	do {
+		version = pvclock_get_time_values(&shadow, src);
+		barrier();
+		offset = pvclock_get_nsec_offset(&shadow);
+		ret = shadow.system_timestamp + offset;
+		barrier();
+	} while (version != src->version);
+
+	return ret;
+}
+
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
+			    struct pvclock_vcpu_time_info *vcpu_time,
+			    struct timespec *ts)
+{
+	u32 version;
+	u64 delta;
+	struct timespec now;
+
+	/* get wallclock at system boot */
+	do {
+		version = wall_clock->version;
+		rmb();		/* fetch version before time */
+		now.tv_sec  = wall_clock->sec;
+		now.tv_nsec = wall_clock->nsec;
+		rmb();		/* fetch time before checking version */
+	} while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+	delta = pvclock_clocksource_read(vcpu_time);	/* time since system boot */
+	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+	now.tv_sec = delta;
+
+	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f2f5d260874..3829aa7b663 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,9 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps)
 
 	atomic_inc(&pt->pending);
 	smp_mb__after_atomic_inc();
-	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
-		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		wake_up_interruptible(&vcpu0->wq);
+	if (vcpu0) {
+		set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
+		if (waitqueue_active(&vcpu0->wq)) {
+			vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+			wake_up_interruptible(&vcpu0->wq);
+		}
 	}
 
 	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c297c50eba6..ebc03f5ae16 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 	wait_queue_head_t *q = &apic->vcpu->wq;
 
 	atomic_inc(&apic->timer.pending);
+	set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
 	if (waitqueue_active(q)) {
 		apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		wake_up_interruptible(q);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ee3f53098f0..7e7c3969f7a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 			rmap_remove(kvm, spte);
 			--kvm->stat.lpages;
 			set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+			spte = NULL;
 			write_protected = 1;
 		}
 		spte = rmap_next(kvm, rmapp, spte);
@@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		struct kvm_mmu_page *shadow;
 
 		spte |= PT_WRITABLE_MASK;
-		if (user_fault) {
-			mmu_unshadow(vcpu->kvm, gfn);
-			goto unshadowed;
-		}
 
 		shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
 		if (shadow ||
@@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 	}
 
-unshadowed:
-
 	if (pte_access & ACC_WRITE_MASK)
 		mark_page_dirty(vcpu->kvm, gfn);
 
@@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 				  u64 *spte,
 				  const void *new)
 {
-	if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
-	    && !vcpu->arch.update_pte.largepage) {
-		++vcpu->kvm->stat.mmu_pde_zapped;
-		return;
-	}
+	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+		if (!vcpu->arch.update_pte.largepage ||
+		    sp->role.glevels == PT32_ROOT_LEVEL) {
+			++vcpu->kvm->stat.mmu_pde_zapped;
+			return;
+		}
+        }
 
 	++vcpu->kvm->stat.mmu_pte_updated;
 	if (sp->role.glevels == PT32_ROOT_LEVEL)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 02efbe75f31..540e9517907 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 	load_transition_efer(vmx);
 }
 
-static void vmx_load_host_state(struct vcpu_vmx *vmx)
+static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 {
 	unsigned long flags;
 
@@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
 	reload_host_efer(vmx);
 }
 
+static void vmx_load_host_state(struct vcpu_vmx *vmx)
+{
+	preempt_disable();
+	__vmx_load_host_state(vmx);
+	preempt_enable();
+}
+
 /*
  * Switches to specified vcpu, until a matching vcpu_put(), but assumes
  * vcpu mutex is already taken.
@@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
-	vmx_load_host_state(to_vmx(vcpu));
+	__vmx_load_host_state(to_vmx(vcpu));
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 	switch (msr_index) {
 #ifdef CONFIG_X86_64
 	case MSR_EFER:
+		vmx_load_host_state(vmx);
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
-		if (vmx->host_state.loaded) {
-			reload_host_efer(vmx);
-			load_transition_efer(vmx);
-		}
 		break;
 	case MSR_FS_BASE:
 		vmcs_writel(GUEST_FS_BASE, data);
@@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 		guest_write_tsc(data);
 		break;
 	default:
+		vmx_load_host_state(vmx);
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
 			msr->data = data;
-			if (vmx->host_state.loaded)
-				load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
 			break;
 		}
 		ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00acf1301a1..63a77caa59f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 {
 	static int version;
-	struct kvm_wall_clock wc;
-	struct timespec wc_ts;
+	struct pvclock_wall_clock wc;
+	struct timespec now, sys, boot;
 
 	if (!wall_clock)
 		return;
@@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 
 	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 
-	wc_ts = current_kernel_time();
-	wc.wc_sec = wc_ts.tv_sec;
-	wc.wc_nsec = wc_ts.tv_nsec;
-	wc.wc_version = version;
+	/*
+	 * The guest calculates current wall clock time by adding
+	 * system time (updated by kvm_write_guest_time below) to the
+	 * wall clock specified here.  guest system time equals host
+	 * system time for us, thus we must fill in host boot time here.
+	 */
+	now = current_kernel_time();
+	ktime_get_ts(&sys);
+	boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
+
+	wc.sec = boot.tv_sec;
+	wc.nsec = boot.tv_nsec;
+	wc.version = version;
 
 	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
 
@@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
 }
 
+static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
+{
+	uint32_t quotient, remainder;
+
+	/* Don't try to replace with do_div(), this one calculates
+	 * "(dividend << 32) / divisor" */
+	__asm__ ( "divl %4"
+		  : "=a" (quotient), "=d" (remainder)
+		  : "0" (0), "1" (dividend), "r" (divisor) );
+	return quotient;
+}
+
+static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
+{
+	uint64_t nsecs = 1000000000LL;
+	int32_t  shift = 0;
+	uint64_t tps64;
+	uint32_t tps32;
+
+	tps64 = tsc_khz * 1000LL;
+	while (tps64 > nsecs*2) {
+		tps64 >>= 1;
+		shift--;
+	}
+
+	tps32 = (uint32_t)tps64;
+	while (tps32 <= (uint32_t)nsecs) {
+		tps32 <<= 1;
+		shift++;
+	}
+
+	hv_clock->tsc_shift = shift;
+	hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
+
+	pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
+		 __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
+		 hv_clock->tsc_to_system_mul);
+}
+
 static void kvm_write_guest_time(struct kvm_vcpu *v)
 {
 	struct timespec ts;
@@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	if ((!vcpu->time_page))
 		return;
 
+	if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
+		kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
+		vcpu->hv_clock_tsc_khz = tsc_khz;
+	}
+
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
 	kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
@@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
-	 * state, we just write "2" at the end
+	 * state, we just increase by 2 at the end.
 	 */
-	vcpu->hv_clock.version = 2;
+	vcpu->hv_clock.version += 2;
 
 	shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
 
 	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
-		sizeof(vcpu->hv_clock));
+	       sizeof(vcpu->hv_clock));
 
 	kunmap_atomic(shared_kaddr, KM_USER0);
 
@@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		/* ...but clean it before doing the actual write */
 		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
 
-		vcpu->arch.hv_clock.tsc_to_system_mul =
-					clocksource_khz2mult(tsc_khz, 22);
-		vcpu->arch.hv_clock.tsc_shift = 22;
-
 		down_read(&current->mm->mmap_sem);
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
@@ -2759,6 +2808,8 @@ again:
 	if (vcpu->requests) {
 		if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
 			__kvm_migrate_timers(vcpu);
+		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+			kvm_x86_ops->tlb_flush(vcpu);
 		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
 				       &vcpu->requests)) {
 			kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -2772,6 +2823,7 @@ again:
 		}
 	}
 
+	clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
 	kvm_inject_pending_timer_irqs(vcpu);
 
 	preempt_disable();
@@ -2781,21 +2833,13 @@ again:
 
 	local_irq_disable();
 
-	if (need_resched()) {
+	if (vcpu->requests || need_resched()) {
 		local_irq_enable();
 		preempt_enable();
 		r = 1;
 		goto out;
 	}
 
-	if (vcpu->requests)
-		if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
-			local_irq_enable();
-			preempt_enable();
-			r = 1;
-			goto out;
-		}
-
 	if (signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -2825,9 +2869,6 @@ again:
 
 	kvm_guest_enter();
 
-	if (vcpu->requests)
-		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-			kvm_x86_ops->tlb_flush(vcpu);
 
 	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
 	kvm_x86_ops->run(vcpu, kvm_run);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be2737..6c388e593bc 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,8 +5,9 @@
 config XEN
 	bool "Xen guest support"
 	select PARAVIRT
+	select PARAVIRT_CLOCK
 	depends on X86_32
-	depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
+	depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d6..f09c1c69c37 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
 static __init void xen_pagetable_setup_start(pgd_t *base)
 {
 	pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
+	int i;
 
 	/* special set_pte for pagetable initialization */
 	pv_mmu_ops.set_pte = xen_set_pte_init;
 
 	init_mm.pgd = base;
 	/*
-	 * copy top-level of Xen-supplied pagetable into place.	 For
-	 * !PAE we can use this as-is, but for PAE it is a stand-in
-	 * while we copy the pmd pages.
+	 * copy top-level of Xen-supplied pagetable into place.  This
+	 * is a stand-in while we copy the pmd pages.
 	 */
 	memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
 
-	if (PTRS_PER_PMD > 1) {
-		int i;
-		/*
-		 * For PAE, need to allocate new pmds, rather than
-		 * share Xen's, since Xen doesn't like pmd's being
-		 * shared between address spaces.
-		 */
-		for (i = 0; i < PTRS_PER_PGD; i++) {
-			if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
-				pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+	/*
+	 * For PAE, need to allocate new pmds, rather than
+	 * share Xen's, since Xen doesn't like pmd's being
+	 * shared between address spaces.
+	 */
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
+			pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
 
-				memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
-				       PAGE_SIZE);
+			memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
+			       PAGE_SIZE);
 
-				make_lowmem_page_readonly(pmd);
+			make_lowmem_page_readonly(pmd);
 
-				set_pgd(&base[i], __pgd(1 + __pa(pmd)));
-			} else
-				pgd_clear(&base[i]);
-		}
+			set_pgd(&base[i], __pgd(1 + __pa(pmd)));
+		} else
+			pgd_clear(&base[i]);
 	}
 
 	/* make sure zero_page is mapped RO so we can use it in pagetables */
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 
 	/* Actually pin the pagetable down, but we can't set PG_pinned
 	   yet because the page structures don't exist yet. */
-	{
-		unsigned level;
-
-#ifdef CONFIG_X86_PAE
-		level = MMUEXT_PIN_L3_TABLE;
-#else
-		level = MMUEXT_PIN_L2_TABLE;
-#endif
-
-		pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
-	}
+	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
 }
 
 /* This is called once we have the cpu_possible_map */
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.make_pte = xen_make_pte,
 	.make_pgd = xen_make_pgd,
 
-#ifdef CONFIG_X86_PAE
 	.set_pte_atomic = xen_set_pte_atomic,
 	.set_pte_present = xen_set_pte_at,
 	.set_pud = xen_set_pud,
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
 	.make_pmd = xen_make_pmd,
 	.pmd_val = xen_pmd_val,
-#endif	/* PAE */
 
 	.activate_mm = xen_activate_mm,
 	.dup_mmap = xen_dup_mmap,
@@ -1228,6 +1213,11 @@ asmlinkage void __init xen_start_kernel(void)
 	if (xen_feature(XENFEAT_supervisor_mode_kernel))
 		pv_info.kernel_rpl = 0;
 
+	/* Prevent unwanted bits from being set in PTEs. */
+	__supported_pte_mask &= ~_PAGE_GLOBAL;
+	if (!is_initial_xendomain())
+		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
 	/* set the limit of our address space */
 	xen_reserve_top();
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3525ef523a7..df40bf74ea7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -179,50 +179,56 @@ out:
 		preempt_enable();
 }
 
-pteval_t xen_pte_val(pte_t pte)
+/* Assume pteval_t is equivalent to all the other *val_t types. */
+static pteval_t pte_mfn_to_pfn(pteval_t val)
+{
+	if (val & _PAGE_PRESENT) {
+		unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
+		pteval_t flags = val & ~PTE_MASK;
+		val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+	}
+
+	return val;
+}
+
+static pteval_t pte_pfn_to_mfn(pteval_t val)
 {
-	pteval_t ret = pte.pte;
+	if (val & _PAGE_PRESENT) {
+		unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
+		pteval_t flags = val & ~PTE_MASK;
+		val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+	}
 
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+	return val;
+}
 
-	return ret;
+pteval_t xen_pte_val(pte_t pte)
+{
+	return pte_mfn_to_pfn(pte.pte);
 }
 
 pgdval_t xen_pgd_val(pgd_t pgd)
 {
-	pgdval_t ret = pgd.pgd;
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-	return ret;
+	return pte_mfn_to_pfn(pgd.pgd);
 }
 
 pte_t xen_make_pte(pteval_t pte)
 {
-	if (pte & _PAGE_PRESENT) {
-		pte = phys_to_machine(XPADDR(pte)).maddr;
-		pte &= ~(_PAGE_PCD | _PAGE_PWT);
-	}
-
-	return (pte_t){ .pte = pte };
+	pte = pte_pfn_to_mfn(pte);
+	return native_make_pte(pte);
 }
 
 pgd_t xen_make_pgd(pgdval_t pgd)
 {
-	if (pgd & _PAGE_PRESENT)
-		pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
-	return (pgd_t){ pgd };
+	pgd = pte_pfn_to_mfn(pgd);
+	return native_make_pgd(pgd);
 }
 
 pmdval_t xen_pmd_val(pmd_t pmd)
 {
-	pmdval_t ret = native_pmd_val(pmd);
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-	return ret;
+	return pte_mfn_to_pfn(pmd.pmd);
 }
-#ifdef CONFIG_X86_PAE
+
 void xen_set_pud(pud_t *ptr, pud_t val)
 {
 	struct multicall_space mcs;
@@ -267,17 +273,9 @@ void xen_pmd_clear(pmd_t *pmdp)
 
 pmd_t xen_make_pmd(pmdval_t pmd)
 {
-	if (pmd & _PAGE_PRESENT)
-		pmd = phys_to_machine(XPADDR(pmd)).maddr;
-
+	pmd = pte_pfn_to_mfn(pmd);
 	return native_make_pmd(pmd);
 }
-#else  /* !PAE */
-void xen_set_pte(pte_t *ptep, pte_t pte)
-{
-	*ptep = pte;
-}
-#endif	/* CONFIG_X86_PAE */
 
 /*
   (Yet another) pagetable walker.  This one is intended for pinning a
@@ -430,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level)
    read-only, and can be pinned. */
 void xen_pgd_pin(pgd_t *pgd)
 {
-	unsigned level;
-
 	xen_mc_batch();
 
 	if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
@@ -441,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd)
 		xen_mc_batch();
 	}
 
-#ifdef CONFIG_X86_PAE
-	level = MMUEXT_PIN_L3_TABLE;
-#else
-	level = MMUEXT_PIN_L2_TABLE;
-#endif
-
-	xen_do_pin(level, PFN_DOWN(__pa(pgd)));
-
+	xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
 	xen_mc_issue(0);
 }
 
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index b5e189b1519..5fe961caffd 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm);
 void xen_pgd_pin(pgd_t *pgd);
 //void xen_pgd_unpin(pgd_t *pgd);
 
-#ifdef CONFIG_X86_PAE
-unsigned long long xen_pte_val(pte_t);
-unsigned long long xen_pmd_val(pmd_t);
-unsigned long long xen_pgd_val(pgd_t);
+pteval_t xen_pte_val(pte_t);
+pmdval_t xen_pmd_val(pmd_t);
+pgdval_t xen_pgd_val(pgd_t);
 
-pte_t xen_make_pte(unsigned long long);
-pmd_t xen_make_pmd(unsigned long long);
-pgd_t xen_make_pgd(unsigned long long);
+pte_t xen_make_pte(pteval_t);
+pmd_t xen_make_pmd(pmdval_t);
+pgd_t xen_make_pgd(pgdval_t);
 
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval);
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val);
 void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
 void xen_pmd_clear(pmd_t *pmdp);
 
-
-#else
-unsigned long xen_pte_val(pte_t);
-unsigned long xen_pmd_val(pmd_t);
-unsigned long xen_pgd_val(pgd_t);
-
-pte_t xen_make_pte(unsigned long);
-pmd_t xen_make_pmd(unsigned long);
-pgd_t xen_make_pgd(unsigned long);
-#endif
-
 #endif	/* _XEN_MMU_H */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 52b2e385698..41e217503c9 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -14,6 +14,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/math64.h>
 
+#include <asm/pvclock.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -31,17 +32,6 @@
 
 static cycle_t xen_clocksource_read(void);
 
-/* These are perodically updated in shared_info, and then copied here. */
-struct shadow_time_info {
-	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
-	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
-	u32 tsc_to_nsec_mul;
-	int tsc_shift;
-	u32 version;
-};
-
-static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
-
 /* runstate info updated by Xen */
 static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
 
@@ -211,7 +201,7 @@ unsigned long long xen_sched_clock(void)
 unsigned long xen_cpu_khz(void)
 {
 	u64 xen_khz = 1000000ULL << 32;
-	const struct vcpu_time_info *info =
+	const struct pvclock_vcpu_time_info *info =
 		&HYPERVISOR_shared_info->vcpu_info[0].time;
 
 	do_div(xen_khz, info->tsc_to_system_mul);
@@ -223,121 +213,26 @@ unsigned long xen_cpu_khz(void)
 	return xen_khz;
 }
 
-/*
- * Reads a consistent set of time-base values from Xen, into a shadow data
- * area.
- */
-static unsigned get_time_values_from_xen(void)
-{
-	struct vcpu_time_info   *src;
-	struct shadow_time_info *dst;
-
-	/* src is shared memory with the hypervisor, so we need to
-	   make sure we get a consistent snapshot, even in the face of
-	   being preempted. */
-	src = &__get_cpu_var(xen_vcpu)->time;
-	dst = &__get_cpu_var(shadow_time);
-
-	do {
-		dst->version = src->version;
-		rmb();		/* fetch version before data */
-		dst->tsc_timestamp     = src->tsc_timestamp;
-		dst->system_timestamp  = src->system_time;
-		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
-		dst->tsc_shift         = src->tsc_shift;
-		rmb();		/* test version after fetching data */
-	} while ((src->version & 1) | (dst->version ^ src->version));
-
-	return dst->version;
-}
-
-/*
- * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
- * yielding a 64-bit result.
- */
-static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
-{
-	u64 product;
-#ifdef __i386__
-	u32 tmp1, tmp2;
-#endif
-
-	if (shift < 0)
-		delta >>= -shift;
-	else
-		delta <<= shift;
-
-#ifdef __i386__
-	__asm__ (
-		"mul  %5       ; "
-		"mov  %4,%%eax ; "
-		"mov  %%edx,%4 ; "
-		"mul  %5       ; "
-		"xor  %5,%5    ; "
-		"add  %4,%%eax ; "
-		"adc  %5,%%edx ; "
-		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
-		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
-#elif __x86_64__
-	__asm__ (
-		"mul %%rdx ; shrd $32,%%rdx,%%rax"
-		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
-#else
-#error implement me!
-#endif
-
-	return product;
-}
-
-static u64 get_nsec_offset(struct shadow_time_info *shadow)
-{
-	u64 now, delta;
-	now = native_read_tsc();
-	delta = now - shadow->tsc_timestamp;
-	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
-}
-
 static cycle_t xen_clocksource_read(void)
 {
-	struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
+        struct pvclock_vcpu_time_info *src;
 	cycle_t ret;
-	unsigned version;
-
-	do {
-		version = get_time_values_from_xen();
-		barrier();
-		ret = shadow->system_timestamp + get_nsec_offset(shadow);
-		barrier();
-	} while (version != __get_cpu_var(xen_vcpu)->time.version);
-
-	put_cpu_var(shadow_time);
 
+	src = &get_cpu_var(xen_vcpu)->time;
+	ret = pvclock_clocksource_read(src);
+	put_cpu_var(xen_vcpu);
 	return ret;
 }
 
 static void xen_read_wallclock(struct timespec *ts)
 {
-	const struct shared_info *s = HYPERVISOR_shared_info;
-	u32 version;
-	u64 delta;
-	struct timespec now;
-
-	/* get wallclock at system boot */
-	do {
-		version = s->wc_version;
-		rmb();		/* fetch version before time */
-		now.tv_sec  = s->wc_sec;
-		now.tv_nsec = s->wc_nsec;
-		rmb();		/* fetch time before checking version */
-	} while ((s->wc_version & 1) | (version ^ s->wc_version));
+	struct shared_info *s = HYPERVISOR_shared_info;
+	struct pvclock_wall_clock *wall_clock = &(s->wc);
+        struct pvclock_vcpu_time_info *vcpu_time;
 
-	delta = xen_clocksource_read();	/* time since system boot */
-	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
-
-	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
-	now.tv_sec = delta;
-
-	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+	vcpu_time = &get_cpu_var(xen_vcpu)->time;
+	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
+	put_cpu_var(xen_vcpu);
 }
 
 unsigned long xen_get_wallclock(void)
@@ -345,7 +240,6 @@ unsigned long xen_get_wallclock(void)
 	struct timespec ts;
 
 	xen_read_wallclock(&ts);
-
 	return ts.tv_sec;
 }
 
@@ -569,8 +463,6 @@ __init void xen_time_init(void)
 {
 	int cpu = smp_processor_id();
 
-	get_time_values_from_xen();
-
 	clocksource_register(&xen_clocksource);
 
 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 288d587ce73..6ec3b4f7719 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -17,7 +17,7 @@ ENTRY(startup_xen)
 
 	__FINIT
 
-.pushsection .bss.page_aligned
+.pushsection .text
 	.align PAGE_SIZE_asm
 ENTRY(hypercall_page)
 	.skip 0x1000
@@ -30,11 +30,7 @@ ENTRY(hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long  startup_xen)
 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long  hypercall_page)
 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz "!writable_page_tables|pae_pgdir_above_4gb")
-#ifdef CONFIG_X86_PAE
 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
-#else
-	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "no")
-#endif
 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
 
 #endif /*CONFIG_XEN */
diff --git a/drivers/char/drm/i915_drv.c b/drivers/char/drm/i915_drv.c
index e8f3d682e3b..93aed1c38bd 100644
--- a/drivers/char/drm/i915_drv.c
+++ b/drivers/char/drm/i915_drv.c
@@ -389,6 +389,7 @@ static int i915_resume(struct drm_device *dev)
 	pci_restore_state(dev->pdev);
 	if (pci_enable_device(dev->pdev))
 		return -1;
+	pci_set_master(dev->pdev);
 
 	pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
 
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index b1a757a5ee2..8f81139d619 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -981,16 +981,9 @@ EXPORT_SYMBOL_GPL(tty_perform_flush);
 int n_tty_ioctl(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg)
 {
-	struct tty_struct *real_tty;
 	unsigned long flags;
 	int retval;
 
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->driver->subtype == PTY_TYPE_MASTER)
-		real_tty = tty->link;
-	else
-		real_tty = tty;
-
 	switch (cmd) {
 	case TCXONC:
 		retval = tty_check_change(tty);
diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c
index ed33fddc4de..f00f497b9ca 100644
--- a/drivers/hwmon/abituguru3.c
+++ b/drivers/hwmon/abituguru3.c
@@ -30,6 +30,7 @@
 #include <linux/platform_device.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
+#include <linux/dmi.h>
 #include <asm/io.h>
 
 /* uGuru3 bank addresses */
@@ -323,7 +324,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
 		{ "AUX1 Fan",		36, 2, 60, 1, 0 },
 		{ NULL, 0, 0, 0, 0, 0 } }
 	},
-	{ 0x0013, "unknown", {
+	{ 0x0013, "Abit AW8D", {
 		{ "CPU Core",		 0, 0, 10, 1, 0 },
 		{ "DDR",		 1, 0, 10, 1, 0 },
 		{ "DDR VTT",		 2, 0, 10, 1, 0 },
@@ -349,6 +350,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
 		{ "AUX2 Fan",		36, 2, 60, 1, 0 },
 		{ "AUX3 Fan",		37, 2, 60, 1, 0 },
 		{ "AUX4 Fan",		38, 2, 60, 1, 0 },
+		{ "AUX5 Fan",		39, 2, 60, 1, 0 },
 		{ NULL, 0, 0, 0, 0, 0 } }
 	},
 	{ 0x0014, "Abit AB9 Pro", {
@@ -1111,11 +1113,12 @@ static int __init abituguru3_detect(void)
 {
 	/* See if there is an uguru3 there. An idle uGuru3 will hold 0x00 or
 	   0x08 at DATA and 0xAC at CMD. Sometimes the uGuru3 will hold 0x05
-	   at CMD instead, why is unknown. So we test for 0x05 too. */
+	   or 0x55 at CMD instead, why is unknown. */
 	u8 data_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_DATA);
 	u8 cmd_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_CMD);
 	if (((data_val == 0x00) || (data_val == 0x08)) &&
-			((cmd_val == 0xAC) || (cmd_val == 0x05)))
+			((cmd_val == 0xAC) || (cmd_val == 0x05) ||
+			 (cmd_val == 0x55)))
 		return ABIT_UGURU3_BASE;
 
 	ABIT_UGURU3_DEBUG("no Abit uGuru3 found, data = 0x%02X, cmd = "
@@ -1138,6 +1141,15 @@ static int __init abituguru3_init(void)
 	int address, err;
 	struct resource res = { .flags = IORESOURCE_IO };
 
+#ifdef CONFIG_DMI
+	const char *board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
+
+	/* safety check, refuse to load on non Abit motherboards */
+	if (!force && (!board_vendor ||
+			strcmp(board_vendor, "http://www.abit.com.tw/")))
+		return -ENODEV;
+#endif
+
 	address = abituguru3_detect();
 	if (address < 0)
 		return address;
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index c1009d6f979..93dbf5e7ff8 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -309,6 +309,9 @@ no_sensor_update:
 						ADT7473_REG_PWM_BHVR(i));
 	}
 
+	i = i2c_smbus_read_byte_data(client, ADT7473_REG_CFG4);
+	data->max_duty_at_overheat = !!(i & ADT7473_CFG4_MAX_DUTY_AT_OVT);
+
 	data->limits_last_updated = local_jiffies;
 	data->limits_valid = 1;
 
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index fa769690515..de698dc7302 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -251,10 +251,13 @@ static int lm75_detach_client(struct i2c_client *client)
    the SMBus standard. */
 static int lm75_read_value(struct i2c_client *client, u8 reg)
 {
+	int value;
+
 	if (reg == LM75_REG_CONF)
 		return i2c_smbus_read_byte_data(client, reg);
-	else
-		return swab16(i2c_smbus_read_word_data(client, reg));
+
+	value = i2c_smbus_read_word_data(client, reg);
+	return (value < 0) ? value : swab16(value);
 }
 
 static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value)
@@ -287,9 +290,16 @@ static struct lm75_data *lm75_update_device(struct device *dev)
 		int i;
 		dev_dbg(&client->dev, "Starting lm75 update\n");
 
-		for (i = 0; i < ARRAY_SIZE(data->temp); i++)
-			data->temp[i] = lm75_read_value(client,
-							LM75_REG_TEMP[i]);
+		for (i = 0; i < ARRAY_SIZE(data->temp); i++) {
+			int status;
+
+			status = lm75_read_value(client, LM75_REG_TEMP[i]);
+			if (status < 0)
+				dev_dbg(&client->dev, "reg %d, err %d\n",
+						LM75_REG_TEMP[i], status);
+			else
+				data->temp[i] = status;
+		}
 		data->last_updated = jiffies;
 		data->valid = 1;
 	}
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 182fe6a5605..ee5eca1c192 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -192,23 +192,20 @@ static int RANGE_TO_REG( int range )
 {
 	int i;
 
-	if ( range < lm85_range_map[0] ) { 
-		return 0 ;
-	} else if ( range > lm85_range_map[15] ) {
+	if (range >= lm85_range_map[15])
 		return 15 ;
-	} else {  /* find closest match */
-		for ( i = 14 ; i >= 0 ; --i ) {
-			if ( range > lm85_range_map[i] ) { /* range bracketed */
-				if ((lm85_range_map[i+1] - range) < 
-					(range - lm85_range_map[i])) {
-					i++;
-					break;
-				}
-				break;
-			}
+
+	/* Find the closest match */
+	for (i = 14; i >= 0; --i) {
+		if (range >= lm85_range_map[i]) {
+			if ((lm85_range_map[i + 1] - range) <
+					(range - lm85_range_map[i]))
+				return i + 1;
+			return i;
 		}
 	}
-	return( i & 0x0f );
+
+	return 0;
 }
 #define RANGE_FROM_REG(val) (lm85_range_map[(val)&0x0f])
 
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index b224079d4e1..d5862e5d99a 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m
 {
 	struct page *page;
 
-	page = alloc_pages(gfp_mask, order);
+	/*
+	 * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+	 * cleared, and subtle failures are seen if they aren't.
+	 */
+	page = alloc_pages(gfp_mask | __GFP_ZERO, order);
 	if (!page)
 		return -ENOMEM;
 
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 5126d5d9ea0..2e554a4ab33 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 * we set it now, so we can trap and pass that trap to the Guest if it
 	 * uses the FPU. */
 	if (cpu->ts)
-		lguest_set_ts();
+		unlazy_fpu(current);
 
 	/* SYSENTER is an optimized way of doing system calls.  We can't allow
 	 * it because it always jumps to privilege level 0.  A normal Guest
@@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	 * trap made the switcher code come back, and an error code which some
 	 * traps set.  */
 
+	 /* Restore SYSENTER if it's supposed to be on. */
+	 if (boot_cpu_has(X86_FEATURE_SEP))
+		wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+
 	/* If the Guest page faulted, then the cr2 register will tell us the
 	 * bad virtual address.  We have to grab this now, because once we
 	 * re-enable interrupts an interrupt could fault and thus overwrite
@@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
 	if (cpu->regs->trapnum == 14)
 		cpu->arch.last_pagefault = read_cr2();
 	/* Similarly, if we took a trap because the Guest used the FPU,
-	 * we have to restore the FPU it expects to see. */
+	 * we have to restore the FPU it expects to see.
+	 * math_state_restore() may sleep and we may even move off to
+	 * a different CPU. So all the critical stuff should be done
+	 * before this.  */
 	else if (cpu->regs->trapnum == 7)
 		math_state_restore();
-
-	/* Restore SYSENTER if it's supposed to be on. */
-	if (boot_cpu_has(X86_FEATURE_SEP))
-		wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
 }
 
 /*H:130 Now we've examined the hypercall code; our Guest can make requests.
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index bafb69b6f7c..fc6f4b8c64b 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -942,7 +942,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 	m->msg_namelen = 0;
 
 	if (skb) {
-		total_len = min(total_len, skb->len);
+		total_len = min_t(size_t, total_len, skb->len);
 		error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len);
 		if (error == 0)
 			error = total_len;
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 8662a6b7a30..25b352b664d 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -68,7 +68,6 @@ obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o
 obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o
 obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o
 obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o
-CFLAGS_hpwdt.o += -O
 obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o
 obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o
 obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4f0f22b020e..76e5b7386af 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 
 #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
 		/* Clear master flag /before/ clearing selector flag. */
-		rmb();
+		wmb();
 #endif
 		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
 		while (pending_words != 0) {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9ecb92f6854..9ff7b1c0423 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 */
 
 	/* Update group descriptor block for new group */
-	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
+	gdp = (struct ext4_group_desc *)((char *)primary->b_data +
+					 gdb_off * EXT4_DESC_SIZE(sb));
 
 	ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
 	ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c19184f2e70..bec76b1c2bb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
 
 }
 
-static inline unsigned int zero_metapath_length(const struct metapath *mp,
-						unsigned height)
+static inline unsigned int metapath_branch_start(const struct metapath *mp)
 {
-	unsigned int i;
-	for (i = 0; i < height - 1; i++) {
-		if (mp->mp_list[i] != 0)
-			return i;
-	}
-	return height;
+	if (mp->mp_list[0] == 0)
+		return 2;
+	return 1;
 }
 
 /**
@@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct buffer_head *dibh = mp->mp_bh[0];
 	u64 bn, dblock = 0;
-	unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0;
+	unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
 	unsigned dblks = 0;
 	unsigned ptrs_per_blk;
 	const unsigned end_of_metadata = height - 1;
@@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 			/* Building up tree height */
 			state = ALLOC_GROW_HEIGHT;
 			iblks = height - ip->i_height;
-			zmpl = zero_metapath_length(mp, height);
-			iblks -= zmpl;
-			iblks += height;
+			branch_start = metapath_branch_start(mp);
+			iblks += (height - branch_start);
 		}
 	}
 
@@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
 					sizeof(struct gfs2_meta_header));
 				*ptr = zero_bn;
 				state = ALLOC_GROW_DEPTH;
-				for(i = zmpl; i < height; i++) {
+				for(i = branch_start; i < height; i++) {
 					if (mp->mp_bh[i] == NULL)
 						break;
 					brelse(mp->mp_bh[i]);
 					mp->mp_bh[i] = NULL;
 				}
-				i = zmpl;
+				i = branch_start;
 			}
 			if (n == 0)
 				break;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6387523a315..3401628d742 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -195,7 +195,7 @@ ulong_aligned:
 	   depending on architecture.  I've experimented with several ways
 	   of writing this section such as using an else before the goto
 	   but this one seems to be the fastest. */
-	while ((unsigned char *)plong < end - 1) {
+	while ((unsigned char *)plong < end - sizeof(unsigned long)) {
 		prefetch(plong + 1);
 		if (((*plong) & LBITMASK) != lskipval)
 			break;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 49c7cd0502c..779d2eb649c 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
 				struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
+	unsigned size;
 
 	if ((res->status = ntohl(*p++)) == 0) {
-		int size = ntohl(*p++);
-		if (size <= NFS3_FHSIZE) {
+		size = ntohl(*p++);
+		if (size <= NFS3_FHSIZE && size != 0) {
 			fh->size = size;
 			memcpy(fh->data, p, size);
 		} else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2a4a024a4e7..614efeed543 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options,
 {
 	struct nfs_mount_data *data = (struct nfs_mount_data *)options;
 
-	memset(args, 0, sizeof(*args));
-
 	if (data == NULL)
 		goto out_no_data;
 
@@ -1251,13 +1249,13 @@ static int nfs_validate_mount_data(void *options,
 	case 5:
 		memset(data->context, 0, sizeof(data->context));
 	case 6:
-		if (data->flags & NFS_MOUNT_VER3)
+		if (data->flags & NFS_MOUNT_VER3) {
+			if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
+				goto out_invalid_fh;
 			mntfh->size = data->root.size;
-		else
+		} else
 			mntfh->size = NFS2_FHSIZE;
 
-		if (mntfh->size > sizeof(mntfh->data))
-			goto out_invalid_fh;
 
 		memcpy(mntfh->data, data->root.data, mntfh->size);
 		if (mntfh->size < sizeof(mntfh->data))
@@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 {
 	struct nfs_server *server = NULL;
 	struct super_block *s;
-	struct nfs_fh mntfh;
-	struct nfs_parsed_mount_data data;
+	struct nfs_parsed_mount_data *data;
+	struct nfs_fh *mntfh;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	struct nfs_sb_mountdata sb_mntdata = {
 		.mntflags = flags,
 	};
-	int error;
+	int error = -ENOMEM;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
+	if (data == NULL || mntfh == NULL)
+		goto out_free_fh;
 
-	security_init_mnt_opts(&data.lsm_opts);
+	security_init_mnt_opts(&data->lsm_opts);
 
 	/* Validate the mount data */
-	error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name);
+	error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
 	if (error < 0)
 		goto out;
 
 	/* Get a volume representation */
-	server = nfs_create_server(&data, &mntfh);
+	server = nfs_create_server(data, mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
 		goto out;
@@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 
 	if (!s->s_root) {
 		/* initial superblock/root creation */
-		nfs_fill_super(s, &data);
+		nfs_fill_super(s, data);
 	}
 
-	mntroot = nfs_get_root(s, &mntfh);
+	mntroot = nfs_get_root(s, mntfh);
 	if (IS_ERR(mntroot)) {
 		error = PTR_ERR(mntroot);
 		goto error_splat_super;
 	}
 
-	error = security_sb_set_mnt_opts(s, &data.lsm_opts);
+	error = security_sb_set_mnt_opts(s, &data->lsm_opts);
 	if (error)
 		goto error_splat_root;
 
@@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	error = 0;
 
 out:
-	kfree(data.nfs_server.hostname);
-	kfree(data.mount_server.hostname);
-	security_free_mnt_opts(&data.lsm_opts);
+	kfree(data->nfs_server.hostname);
+	kfree(data->mount_server.hostname);
+	security_free_mnt_opts(&data->lsm_opts);
+out_free_fh:
+	kfree(mntfh);
+	kfree(data);
 	return error;
 
 out_err_nosb:
@@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options,
 	struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
 	char *c;
 
-	memset(args, 0, sizeof(*args));
-
 	if (data == NULL)
 		goto out_no_data;
 
@@ -1959,26 +1963,31 @@ out_no_client_address:
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
 {
-	struct nfs_parsed_mount_data data;
+	struct nfs_parsed_mount_data *data;
 	struct super_block *s;
 	struct nfs_server *server;
-	struct nfs_fh mntfh;
+	struct nfs_fh *mntfh;
 	struct dentry *mntroot;
 	int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
 	struct nfs_sb_mountdata sb_mntdata = {
 		.mntflags = flags,
 	};
-	int error;
+	int error = -ENOMEM;
 
-	security_init_mnt_opts(&data.lsm_opts);
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
+	if (data == NULL || mntfh == NULL)
+		goto out_free_fh;
+
+	security_init_mnt_opts(&data->lsm_opts);
 
 	/* Validate the mount data */
-	error = nfs4_validate_mount_data(raw_data, &data, dev_name);
+	error = nfs4_validate_mount_data(raw_data, data, dev_name);
 	if (error < 0)
 		goto out;
 
 	/* Get a volume representation */
-	server = nfs4_create_server(&data, &mntfh);
+	server = nfs4_create_server(data, mntfh);
 	if (IS_ERR(server)) {
 		error = PTR_ERR(server);
 		goto out;
@@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 		nfs4_fill_super(s);
 	}
 
-	mntroot = nfs4_get_root(s, &mntfh);
+	mntroot = nfs4_get_root(s, mntfh);
 	if (IS_ERR(mntroot)) {
 		error = PTR_ERR(mntroot);
 		goto error_splat_super;
 	}
 
-	error = security_sb_set_mnt_opts(s, &data.lsm_opts);
+	error = security_sb_set_mnt_opts(s, &data->lsm_opts);
 	if (error)
 		goto error_splat_root;
 
@@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
 	error = 0;
 
 out:
-	kfree(data.client_address);
-	kfree(data.nfs_server.export_path);
-	kfree(data.nfs_server.hostname);
-	security_free_mnt_opts(&data.lsm_opts);
+	kfree(data->client_address);
+	kfree(data->nfs_server.export_path);
+	kfree(data->nfs_server.hostname);
+	security_free_mnt_opts(&data->lsm_opts);
+out_free_fh:
+	kfree(mntfh);
+	kfree(data);
 	return error;
 
 out_free:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6d8ace3e325..f333848fd3b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page,
 	}
 
 	status = nfs_writepage_setup(ctx, page, offset, count);
-	__set_page_dirty_nobuffers(page);
+	if (status < 0)
+		nfs_set_pageerror(page);
+	else
+		__set_page_dirty_nobuffers(page);
 
         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
 			status, (long long)i_size_read(inode));
-	if (status < 0)
-		nfs_set_pageerror(page);
 	return status;
 }
 
diff --git a/fs/select.c b/fs/select.c
index 8dda969614a..da0e88201c3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 						retval++;
 					}
 				}
-				cond_resched();
 			}
 			if (res_in)
 				*rinp = res_in;
@@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
 				*routp = res_out;
 			if (res_ex)
 				*rexp = res_ex;
+			cond_resched();
 		}
 		wait = NULL;
 		if (retval || !*timeout || signal_pending(current))
diff --git a/include/asm-alpha/core_mcpcia.h b/include/asm-alpha/core_mcpcia.h
index 525b4f6a7ac..acf55b48347 100644
--- a/include/asm-alpha/core_mcpcia.h
+++ b/include/asm-alpha/core_mcpcia.h
@@ -261,7 +261,7 @@ struct el_MCPCIA_uncorrected_frame_mcheck {
 	}
 #endif
 
-static inline int __mcpcia_is_mmio(unsigned long addr)
+extern inline int __mcpcia_is_mmio(unsigned long addr)
 {
 	return (addr & 0x80000000UL) == 0;
 }
diff --git a/include/asm-alpha/core_t2.h b/include/asm-alpha/core_t2.h
index 90e6b5d6c21..46bfff58f67 100644
--- a/include/asm-alpha/core_t2.h
+++ b/include/asm-alpha/core_t2.h
@@ -356,13 +356,13 @@ struct el_t2_frame_corrected {
 #define vip	volatile int *
 #define vuip	volatile unsigned int *
 
-static inline u8 t2_inb(unsigned long addr)
+extern inline u8 t2_inb(unsigned long addr)
 {
 	long result = *(vip) ((addr << 5) + T2_IO + 0x00);
 	return __kernel_extbl(result, addr & 3);
 }
 
-static inline void t2_outb(u8 b, unsigned long addr)
+extern inline void t2_outb(u8 b, unsigned long addr)
 {
 	unsigned long w;
 
@@ -371,13 +371,13 @@ static inline void t2_outb(u8 b, unsigned long addr)
 	mb();
 }
 
-static inline u16 t2_inw(unsigned long addr)
+extern inline u16 t2_inw(unsigned long addr)
 {
 	long result = *(vip) ((addr << 5) + T2_IO + 0x08);
 	return __kernel_extwl(result, addr & 3);
 }
 
-static inline void t2_outw(u16 b, unsigned long addr)
+extern inline void t2_outw(u16 b, unsigned long addr)
 {
 	unsigned long w;
 
@@ -386,12 +386,12 @@ static inline void t2_outw(u16 b, unsigned long addr)
 	mb();
 }
 
-static inline u32 t2_inl(unsigned long addr)
+extern inline u32 t2_inl(unsigned long addr)
 {
 	return *(vuip) ((addr << 5) + T2_IO + 0x18);
 }
 
-static inline void t2_outl(u32 b, unsigned long addr)
+extern inline void t2_outl(u32 b, unsigned long addr)
 {
 	*(vuip) ((addr << 5) + T2_IO + 0x18) = b;
 	mb();
@@ -435,7 +435,7 @@ static inline void t2_outl(u32 b, unsigned long addr)
 	set_hae(msb); \
 }
 
-static DEFINE_SPINLOCK(t2_hae_lock);
+extern spinlock_t t2_hae_lock;
 
 /*
  * NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since
diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h
index 38f18cf18c9..e971ab000f9 100644
--- a/include/asm-alpha/io.h
+++ b/include/asm-alpha/io.h
@@ -35,7 +35,7 @@
  * register not being up-to-date with respect to the hardware
  * value.
  */
-static inline void __set_hae(unsigned long new_hae)
+extern inline void __set_hae(unsigned long new_hae)
 {
 	unsigned long flags;
 	local_irq_save(flags);
@@ -49,7 +49,7 @@ static inline void __set_hae(unsigned long new_hae)
 	local_irq_restore(flags);
 }
 
-static inline void set_hae(unsigned long new_hae)
+extern inline void set_hae(unsigned long new_hae)
 {
 	if (new_hae != alpha_mv.hae_cache)
 		__set_hae(new_hae);
@@ -176,7 +176,7 @@ REMAP2(u64, writeq, volatile)
 #undef REMAP1
 #undef REMAP2
 
-static inline void __iomem *generic_ioportmap(unsigned long a)
+extern inline void __iomem *generic_ioportmap(unsigned long a)
 {
 	return alpha_mv.mv_ioportmap(a);
 }
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h
index 6a5be1f7deb..86c08a02d23 100644
--- a/include/asm-alpha/mmu_context.h
+++ b/include/asm-alpha/mmu_context.h
@@ -23,7 +23,7 @@
 #endif
 
 
-extern inline unsigned long
+static inline unsigned long
 __reload_thread(struct pcb_struct *pcb)
 {
 	register unsigned long a0 __asm__("$16");
@@ -114,7 +114,7 @@ extern unsigned long last_asn;
 #define __MMU_EXTERN_INLINE
 #endif
 
-static inline unsigned long
+extern inline unsigned long
 __get_new_mm_context(struct mm_struct *mm, long cpu)
 {
 	unsigned long asn = cpu_last_asn(cpu);
@@ -226,7 +226,7 @@ ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
 # endif
 #endif
 
-extern inline int
+static inline int
 init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
 	int i;
diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h
index 48348fe34c1..3495e8e00d7 100644
--- a/include/asm-alpha/percpu.h
+++ b/include/asm-alpha/percpu.h
@@ -1,6 +1,78 @@
 #ifndef __ALPHA_PERCPU_H
 #define __ALPHA_PERCPU_H
+#include <linux/compiler.h>
+#include <linux/threads.h>
 
-#include <asm-generic/percpu.h>
+/*
+ * Determine the real variable name from the name visible in the
+ * kernel sources.
+ */
+#define per_cpu_var(var) per_cpu__##var
+
+#ifdef CONFIG_SMP
+
+/*
+ * per_cpu_offset() is the offset that has to be added to a
+ * percpu variable to get to the instance for a certain processor.
+ */
+extern unsigned long __per_cpu_offset[NR_CPUS];
+
+#define per_cpu_offset(x) (__per_cpu_offset[x])
+
+#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
+#ifdef CONFIG_DEBUG_PREEMPT
+#define my_cpu_offset per_cpu_offset(smp_processor_id())
+#else
+#define my_cpu_offset __my_cpu_offset
+#endif
+
+#ifndef MODULE
+#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
+#define PER_CPU_ATTRIBUTES
+#else
+/*
+ * To calculate addresses of locally defined variables, GCC uses 32-bit
+ * displacement from the GP. Which doesn't work for per cpu variables in
+ * modules, as an offset to the kernel per cpu area is way above 4G.
+ *
+ * This forces allocation of a GOT entry for per cpu variable using
+ * ldq instruction with a 'literal' relocation.
+ */
+#define SHIFT_PERCPU_PTR(var, offset) ({		\
+	extern int simple_identifier_##var(void);	\
+	unsigned long __ptr, tmp_gp;			\
+	asm (  "br	%1, 1f		  	      \n\
+	1:	ldgp	%1, 0(%1)	    	      \n\
+		ldq %0, per_cpu__" #var"(%1)\t!literal"		\
+		: "=&r"(__ptr), "=&r"(tmp_gp));		\
+	(typeof(&per_cpu_var(var)))(__ptr + (offset)); })
+
+#define PER_CPU_ATTRIBUTES	__used
+
+#endif /* MODULE */
+
+/*
+ * A percpu variable may point to a discarded regions. The following are
+ * established ways to produce a usable pointer from the percpu variable
+ * offset.
+ */
+#define per_cpu(var, cpu) \
+	(*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
+#define __get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(var, my_cpu_offset))
+#define __raw_get_cpu_var(var) \
+	(*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
+
+#else /* ! SMP */
+
+#define per_cpu(var, cpu)		(*((void)(cpu), &per_cpu_var(var)))
+#define __get_cpu_var(var)		per_cpu_var(var)
+#define __raw_get_cpu_var(var)		per_cpu_var(var)
+
+#define PER_CPU_ATTRIBUTES
+
+#endif /* SMP */
+
+#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name)
 
 #endif /* __ALPHA_PERCPU_H */
diff --git a/include/asm-alpha/system.h b/include/asm-alpha/system.h
index ed221d6408f..afe20fa58c9 100644
--- a/include/asm-alpha/system.h
+++ b/include/asm-alpha/system.h
@@ -184,7 +184,7 @@ enum amask_enum {
    __amask; })
 
 #define __CALL_PAL_R0(NAME, TYPE)				\
-static inline TYPE NAME(void)					\
+extern inline TYPE NAME(void)					\
 {								\
 	register TYPE __r0 __asm__("$0");			\
 	__asm__ __volatile__(					\
@@ -196,7 +196,7 @@ static inline TYPE NAME(void)					\
 }
 
 #define __CALL_PAL_W1(NAME, TYPE0)				\
-static inline void NAME(TYPE0 arg0)				\
+extern inline void NAME(TYPE0 arg0)				\
 {								\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
 	__asm__ __volatile__(					\
@@ -207,7 +207,7 @@ static inline void NAME(TYPE0 arg0)				\
 }
 
 #define __CALL_PAL_W2(NAME, TYPE0, TYPE1)			\
-static inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
+extern inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
 {								\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
 	register TYPE1 __r17 __asm__("$17") = arg1;		\
@@ -219,7 +219,7 @@ static inline void NAME(TYPE0 arg0, TYPE1 arg1)			\
 }
 
 #define __CALL_PAL_RW1(NAME, RTYPE, TYPE0)			\
-static inline RTYPE NAME(TYPE0 arg0)				\
+extern inline RTYPE NAME(TYPE0 arg0)				\
 {								\
 	register RTYPE __r0 __asm__("$0");			\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
@@ -232,7 +232,7 @@ static inline RTYPE NAME(TYPE0 arg0)				\
 }
 
 #define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1)		\
-static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1)		\
+extern inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1)		\
 {								\
 	register RTYPE __r0 __asm__("$0");			\
 	register TYPE0 __r16 __asm__("$16") = arg0;		\
diff --git a/include/asm-alpha/vga.h b/include/asm-alpha/vga.h
index e8df1e7aae6..c00106bac52 100644
--- a/include/asm-alpha/vga.h
+++ b/include/asm-alpha/vga.h
@@ -13,7 +13,7 @@
 #define VT_BUF_HAVE_MEMSETW
 #define VT_BUF_HAVE_MEMCPYW
 
-extern inline void scr_writew(u16 val, volatile u16 *addr)
+static inline void scr_writew(u16 val, volatile u16 *addr)
 {
 	if (__is_ioaddr(addr))
 		__raw_writew(val, (volatile u16 __iomem *) addr);
@@ -21,7 +21,7 @@ extern inline void scr_writew(u16 val, volatile u16 *addr)
 		*addr = val;
 }
 
-extern inline u16 scr_readw(volatile const u16 *addr)
+static inline u16 scr_readw(volatile const u16 *addr)
 {
 	if (__is_ioaddr(addr))
 		return __raw_readw((volatile const u16 __iomem *) addr);
@@ -29,7 +29,7 @@ extern inline u16 scr_readw(volatile const u16 *addr)
 		return *addr;
 }
 
-extern inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
+static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
 {
 	if (__is_ioaddr(s))
 		memsetw_io((u16 __iomem *) s, c, count);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1d8cd01fa51..844f2a89afb 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -18,6 +18,7 @@
 #include <linux/kvm_para.h>
 #include <linux/kvm_types.h>
 
+#include <asm/pvclock-abi.h>
 #include <asm/desc.h>
 
 #define KVM_MAX_VCPUS 16
@@ -282,7 +283,8 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 
 	gpa_t time;
-	struct kvm_vcpu_time_info hv_clock;
+	struct pvclock_vcpu_time_info hv_clock;
+	unsigned int hv_clock_tsc_khz;
 	unsigned int time_offset;
 	struct page *time_page;
 };
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 50984594207..bfd9900742b 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -48,24 +48,6 @@ struct kvm_mmu_op_release_pt {
 #ifdef __KERNEL__
 #include <asm/processor.h>
 
-/* xen binary-compatible interface. See xen headers for details */
-struct kvm_vcpu_time_info {
-	uint32_t version;
-	uint32_t pad0;
-	uint64_t tsc_timestamp;
-	uint64_t system_time;
-	uint32_t tsc_to_system_mul;
-	int8_t   tsc_shift;
-	int8_t	 pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct kvm_wall_clock {
-	uint32_t wc_version;
-	uint32_t wc_sec;
-	uint32_t wc_nsec;
-} __attribute__((__packed__));
-
-
 extern void kvmclock_init(void);
 
 
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
new file mode 100644
index 00000000000..6857f840b24
--- /dev/null
+++ b/include/asm-x86/pvclock-abi.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_X86_PVCLOCK_ABI_H_
+#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * Both Xen and KVM are using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time.  There is one per virtual cpu.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done.  Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+	u32   version;
+	u32   pad0;
+	u64   tsc_timestamp;
+	u64   system_time;
+	u32   tsc_to_system_mul;
+	s8    tsc_shift;
+	u8    pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
new file mode 100644
index 00000000000..85b1bba8e0a
--- /dev/null
+++ b/include/asm-x86/pvclock.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_X86_PVCLOCK_H_
+#define _ASM_X86_PVCLOCK_H_
+
+#include <linux/clocksource.h>
+#include <asm/pvclock-abi.h>
+
+/* some helper functions for xen and kvm pv clock sources */
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
+			    struct pvclock_vcpu_time_info *vcpu,
+			    struct timespec *ts);
+
+#endif /* _ASM_X86_PVCLOCK_H_ */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index baf3a4dce28..e11f24038b1 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x)
 	return (pte_t) { .pte = x };
 }
 
-#ifdef CONFIG_X86_PAE
 #define pmd_val_ma(v) ((v).pmd)
 #define pud_val_ma(v) ((v).pgd.pgd)
 #define __pmd_ma(x)	((pmd_t) { (x) } )
-#else  /* !X86_PAE */
-#define pmd_val_ma(v)	((v).pud.pgd.pgd)
-#endif	/* CONFIG_X86_PAE */
 
 #define pgd_val_ma(x)	((x).pgd)
 
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 6a5dbdc8a7d..686895bacd9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -94,7 +94,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 				       unsigned long freepfn,
 				       unsigned long startpfn,
 				       unsigned long endpfn);
-extern void reserve_bootmem_node(pg_data_t *pgdat,
+extern int reserve_bootmem_node(pg_data_t *pgdat,
 				 unsigned long physaddr,
 				 unsigned long size,
 				 int flags);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 092b1b25291..de9d1df4bba 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -33,6 +33,7 @@
 #define KVM_REQ_REPORT_TPR_ACCESS  2
 #define KVM_REQ_MMU_RELOAD         3
 #define KVM_REQ_TRIPLE_FAULT       4
+#define KVM_REQ_PENDING_TIMER      5
 
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 59f1c0bd8f9..d2a00358676 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -27,8 +27,7 @@
  * 	This routine is called by the kernel to write a series of
  * 	characters to the tty device.  The characters may come from
  * 	user space or kernel space.  This routine will return the
- *	number of characters actually accepted for writing.  This
- *	routine is mandatory.
+ *	number of characters actually accepted for writing.
  *
  *	Optional: Required for writable devices.
  *
@@ -134,7 +133,7 @@
  * 	This routine notifies the tty driver that it should hangup the
  * 	tty device.
  *
- *	Required:
+ *	Optional:
  *
  * void (*break_ctl)(struct tty_stuct *tty, int state);
  *
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e0a612bc9c4..f422f7218e1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -367,6 +367,12 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
 		 a->s6_addr32[2] | a->s6_addr32[3] ) == 0); 
 }
 
+static inline int ipv6_addr_loopback(const struct in6_addr *a)
+{
+	return ((a->s6_addr32[0] | a->s6_addr32[1] |
+		 a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0);
+}
+
 static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
 {
 	return ((a->s6_addr32[0] | a->s6_addr32[1] |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index aa540e6be50..d9dd0f70729 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -95,6 +95,11 @@ extern struct list_head net_namespace_list;
 #ifdef CONFIG_NET_NS
 extern void __put_net(struct net *net);
 
+static inline int net_alive(struct net *net)
+{
+	return net && atomic_read(&net->count);
+}
+
 static inline struct net *get_net(struct net *net)
 {
 	atomic_inc(&net->count);
@@ -125,6 +130,12 @@ int net_eq(const struct net *net1, const struct net *net2)
 	return net1 == net2;
 }
 #else
+
+static inline int net_alive(struct net *net)
+{
+	return 1;
+}
+
 static inline struct net *get_net(struct net *net)
 {
 	return net;
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 9b018da48cf..819a0331cda 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -10,6 +10,7 @@
 #define __XEN_PUBLIC_XEN_H__
 
 #include <asm/xen/interface.h>
+#include <asm/pvclock-abi.h>
 
 /*
  * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
@@ -336,7 +337,7 @@ struct vcpu_info {
 	uint8_t evtchn_upcall_mask;
 	unsigned long evtchn_pending_sel;
 	struct arch_vcpu_info arch;
-	struct vcpu_time_info time;
+	struct pvclock_vcpu_time_info time;
 }; /* 64 bytes (x86) */
 
 /*
@@ -384,9 +385,7 @@ struct shared_info {
 	 * Wallclock time: updated only by control software. Guests should base
 	 * their gettimeofday() syscall on this wallclock-base value.
 	 */
-	uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
-	uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
-	uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
+	struct pvclock_wall_clock wc;
 
 	struct arch_shared_info arch;
 
diff --git a/kernel/futex.c b/kernel/futex.c
index 449def8074f..7d1136e97c1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1096,21 +1096,64 @@ static void unqueue_me_pi(struct futex_q *q)
  * private futexes.
  */
 static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
-				struct task_struct *newowner)
+				struct task_struct *newowner,
+				struct rw_semaphore *fshared)
 {
 	u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
 	struct futex_pi_state *pi_state = q->pi_state;
+	struct task_struct *oldowner = pi_state->owner;
 	u32 uval, curval, newval;
-	int ret;
+	int ret, attempt = 0;
 
 	/* Owner died? */
+	if (!pi_state->owner)
+		newtid |= FUTEX_OWNER_DIED;
+
+	/*
+	 * We are here either because we stole the rtmutex from the
+	 * pending owner or we are the pending owner which failed to
+	 * get the rtmutex. We have to replace the pending owner TID
+	 * in the user space variable. This must be atomic as we have
+	 * to preserve the owner died bit here.
+	 *
+	 * Note: We write the user space value _before_ changing the
+	 * pi_state because we can fault here. Imagine swapped out
+	 * pages or a fork, which was running right before we acquired
+	 * mmap_sem, that marked all the anonymous memory readonly for
+	 * cow.
+	 *
+	 * Modifying pi_state _before_ the user space value would
+	 * leave the pi_state in an inconsistent state when we fault
+	 * here, because we need to drop the hash bucket lock to
+	 * handle the fault. This might be observed in the PID check
+	 * in lookup_pi_state.
+	 */
+retry:
+	if (get_futex_value_locked(&uval, uaddr))
+		goto handle_fault;
+
+	while (1) {
+		newval = (uval & FUTEX_OWNER_DIED) | newtid;
+
+		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+
+		if (curval == -EFAULT)
+			goto handle_fault;
+		if (curval == uval)
+			break;
+		uval = curval;
+	}
+
+	/*
+	 * We fixed up user space. Now we need to fix the pi_state
+	 * itself.
+	 */
 	if (pi_state->owner != NULL) {
 		spin_lock_irq(&pi_state->owner->pi_lock);
 		WARN_ON(list_empty(&pi_state->list));
 		list_del_init(&pi_state->list);
 		spin_unlock_irq(&pi_state->owner->pi_lock);
-	} else
-		newtid |= FUTEX_OWNER_DIED;
+	}
 
 	pi_state->owner = newowner;
 
@@ -1118,26 +1161,35 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
 	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &newowner->pi_state_list);
 	spin_unlock_irq(&newowner->pi_lock);
+	return 0;
 
 	/*
-	 * We own it, so we have to replace the pending owner
-	 * TID. This must be atomic as we have preserve the
-	 * owner died bit here.
+	 * To handle the page fault we need to drop the hash bucket
+	 * lock here. That gives the other task (either the pending
+	 * owner itself or the task which stole the rtmutex) the
+	 * chance to try the fixup of the pi_state. So once we are
+	 * back from handling the fault we need to check the pi_state
+	 * after reacquiring the hash bucket lock and before trying to
+	 * do another fixup. When the fixup has been done already we
+	 * simply return.
 	 */
-	ret = get_futex_value_locked(&uval, uaddr);
+handle_fault:
+	spin_unlock(q->lock_ptr);
 
-	while (!ret) {
-		newval = (uval & FUTEX_OWNER_DIED) | newtid;
+	ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
 
-		curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+	spin_lock(q->lock_ptr);
 
-		if (curval == -EFAULT)
-			ret = -EFAULT;
-		if (curval == uval)
-			break;
-		uval = curval;
-	}
-	return ret;
+	/*
+	 * Check if someone else fixed it for us:
+	 */
+	if (pi_state->owner != oldowner)
+		return 0;
+
+	if (ret)
+		return ret;
+
+	goto retry;
 }
 
 /*
@@ -1507,7 +1559,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 		 * that case:
 		 */
 		if (q.pi_state->owner != curr)
-			ret = fixup_pi_state_owner(uaddr, &q, curr);
+			ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
 	} else {
 		/*
 		 * Catch the rare case, where the lock was released
@@ -1539,7 +1591,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
 				int res;
 
 				owner = rt_mutex_owner(&q.pi_state->pi_mutex);
-				res = fixup_pi_state_owner(uaddr, &q, owner);
+				res = fixup_pi_state_owner(uaddr, &q, owner,
+							   fshared);
 
 				/* propagate -EFAULT, if the fixup failed */
 				if (res)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 79e3c90113c..3ec23c3ec97 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -1499,7 +1499,8 @@ int kgdb_nmicallback(int cpu, void *regs)
 	return 1;
 }
 
-void kgdb_console_write(struct console *co, const char *s, unsigned count)
+static void kgdb_console_write(struct console *co, const char *s,
+   unsigned count)
 {
 	unsigned long flags;
 
diff --git a/kernel/sched.c b/kernel/sched.c
index b048ad8a11a..3aaa5c8cb42 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4398,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
 			     signal_pending(current)) ||
 			    (state == TASK_KILLABLE &&
 			     fatal_signal_pending(current))) {
-				__remove_wait_queue(&x->wait, &wait);
-				return -ERESTARTSYS;
+				timeout = -ERESTARTSYS;
+				break;
 			}
 			__set_current_state(state);
 			spin_unlock_irq(&x->wait.lock);
 			timeout = schedule_timeout(timeout);
 			spin_lock_irq(&x->wait.lock);
-			if (!timeout) {
-				__remove_wait_queue(&x->wait, &wait);
-				return timeout;
-			}
-		} while (!x->done);
+		} while (!x->done && timeout);
 		__remove_wait_queue(&x->wait, &wait);
+		if (!x->done)
+			return timeout;
 	}
 	x->done--;
-	return timeout;
+	return timeout ?: 1;
 }
 
 static long __sched
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1dad5bbb59b..0f3c19197fa 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 			if (rt_rq->rt_time || rt_rq->rt_nr_running)
 				idle = 0;
 			spin_unlock(&rt_rq->rt_runtime_lock);
-		}
+		} else if (rt_rq->rt_nr_running)
+			idle = 0;
 
 		if (enqueue)
 			sched_rt_rq_enqueue(rt_rq);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8fb927392b..8d9f60e06f6 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
 	return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
 }
 
-void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
+int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
 				 unsigned long size, int flags)
 {
 	int ret;
 
 	ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
 	if (ret < 0)
-		return;
+		return -ENOMEM;
 	reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
+
+	return 0;
 }
 
 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
diff --git a/mm/memory.c b/mm/memory.c
index 9aefaae4685..d14b251a25a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1045,6 +1045,26 @@ no_page_table:
 	return page;
 }
 
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+	/*
+	 * We don't want to optimize FOLL_ANON for make_pages_present()
+	 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+	 * we want to get the page from the page tables to make sure
+	 * that we serialize and update with any other user of that
+	 * mapping.
+	 */
+	if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+		return 0;
+	/*
+	 * And if we have a fault or a nopfn routine, it's not an
+	 * anonymous region.
+	 */
+	return !vma->vm_ops ||
+		(!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+}
+
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned long start, int len, int write, int force,
 		struct page **pages, struct vm_area_struct **vmas)
@@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		foll_flags = FOLL_TOUCH;
 		if (pages)
 			foll_flags |= FOLL_GET;
-		if (!write && !(vma->vm_flags & VM_LOCKED) &&
-		    (!vma->vm_ops || !vma->vm_ops->fault))
+		if (!write && use_zero_page(vma))
 			foll_flags |= FOLL_ANON;
 
 		do {
@@ -1766,7 +1785,6 @@ gotten:
 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
 	if (likely(pte_same(*page_table, orig_pte))) {
 		if (old_page) {
-			page_remove_rmap(old_page, vma);
 			if (!PageAnon(old_page)) {
 				dec_mm_counter(mm, file_rss);
 				inc_mm_counter(mm, anon_rss);
@@ -1788,6 +1806,32 @@ gotten:
 		lru_cache_add_active(new_page);
 		page_add_new_anon_rmap(new_page, vma, address);
 
+		if (old_page) {
+			/*
+			 * Only after switching the pte to the new page may
+			 * we remove the mapcount here. Otherwise another
+			 * process may come and find the rmap count decremented
+			 * before the pte is switched to the new page, and
+			 * "reuse" the old page writing into it while our pte
+			 * here still points into it and can be read by other
+			 * threads.
+			 *
+			 * The critical issue is to order this
+			 * page_remove_rmap with the ptp_clear_flush above.
+			 * Those stores are ordered by (if nothing else,)
+			 * the barrier present in the atomic_add_negative
+			 * in page_remove_rmap.
+			 *
+			 * Then the TLB flush in ptep_clear_flush ensures that
+			 * no process can access the old page before the
+			 * decremented mapcount is visible. And the old page
+			 * cannot be reused until after the decremented
+			 * mapcount is visible. So transitively, TLBs to
+			 * old page will be flushed before it can be reused.
+			 */
+			page_remove_rmap(old_page, vma);
+		}
+
 		/* Free the old page.. */
 		new_page = old_page;
 		ret |= VM_FAULT_WRITE;
diff --git a/mm/slab.c b/mm/slab.c
index 06236e4ddc1..046607f05f3 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3263,9 +3263,12 @@ retry:
 
 		if (cpuset_zone_allowed_hardwall(zone, flags) &&
 			cache->nodelists[nid] &&
-			cache->nodelists[nid]->free_objects)
+			cache->nodelists[nid]->free_objects) {
 				obj = ____cache_alloc_node(cache,
 					flags | GFP_THISNODE, nid);
+				if (obj)
+					break;
+		}
 	}
 
 	if (!obj) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 68d8df0992a..c421a1f8f0b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2077,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb)
 
 	rcu_read_lock();
 
+	/* Don't receive packets in an exiting network namespace */
+	if (!net_alive(dev_net(skb->dev)))
+		goto out;
+
 #ifdef CONFIG_NET_CLS_ACT
 	if (skb->tc_verd & TC_NCLS) {
 		skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72b4c184dd8..7c52fe277b6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work)
 	struct pernet_operations *ops;
 	struct net *net;
 
+	/* Be very certain incoming network packets will not find us */
+	rcu_barrier();
+
 	net = container_of(work, struct net, work);
 
 	mutex_lock(&net_mutex);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 4e5c8615832..17eb48b8e32 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	if (hdr->version != 6)
 		goto err;
 
+	/*
+	 * RFC4291 2.5.3
+	 * A packet received on an interface with a destination address
+	 * of loopback must be dropped.
+	 */
+	if (!(dev->flags & IFF_LOOPBACK) &&
+	    ipv6_addr_loopback(&hdr->daddr))
+		goto err;
+
 	skb->transport_header = skb->network_header + sizeof(*hdr);
 	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c042ce19bd1..86e28a75267 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -345,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	case IPV6_DSTOPTS:
 	{
 		struct ipv6_txoptions *opt;
+
+		/* remove any sticky options header with a zero option
+		 * length, per RFC3542.
+		 */
 		if (optlen == 0)
 			optval = NULL;
+		else if (optlen < sizeof(struct ipv6_opt_hdr) ||
+			 optlen & 0x7 || optlen > 8 * 255)
+			goto e_inval;
 
 		/* hop-by-hop / destination options are privileged option */
 		retv = -EPERM;
 		if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
 			break;
 
-		if (optlen < sizeof(struct ipv6_opt_hdr) ||
-		    optlen & 0x7 || optlen > 8 * 255)
-			goto e_inval;
-
 		opt = ipv6_renew_options(sk, np->opt, optname,
 					 (struct ipv6_opt_hdr __user *)optval,
 					 optlen);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 28d8bd53bd3..c80d5899f27 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1132,7 +1132,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
 	ieee80211_tx_handler *handler;
 	struct ieee80211_tx_data tx;
 	ieee80211_tx_result res = TX_DROP, res_prepare;
-	int ret, i;
+	int ret, i, retries = 0;
 
 	WARN_ON(__ieee80211_queue_pending(local, control->queue));
 
@@ -1216,6 +1216,13 @@ retry:
 		if (!__ieee80211_queue_stopped(local, control->queue)) {
 			clear_bit(IEEE80211_LINK_STATE_PENDING,
 				  &local->state[control->queue]);
+			retries++;
+			/*
+			 * Driver bug, it's rejecting packets but
+			 * not stopping queues.
+			 */
+			if (WARN_ON_ONCE(retries > 5))
+				goto drop;
 			goto retry;
 		}
 		memcpy(&store->control, control,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e7e3baf7009..0dbcde6758e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
 	if (copy_from_user(&getaddrs, optval, len))
 		return -EFAULT;
 
-	if (getaddrs.addr_num <= 0) return -EINVAL;
+	if (getaddrs.addr_num <= 0 ||
+	    getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr)))
+		return -EINVAL;
 	/*
 	 *  For UDP-style sockets, id specifies the association to query.
 	 *  If the id field is set to the value '0' then the locally bound
diff --git a/sound/isa/sb/sb_mixer.c b/sound/isa/sb/sb_mixer.c
index 91d14224f6b..73d4572d136 100644
--- a/sound/isa/sb/sb_mixer.c
+++ b/sound/isa/sb/sb_mixer.c
@@ -925,7 +925,7 @@ static unsigned char als4000_saved_regs[] = {
 static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 {
 	unsigned char *val = chip->saved_regs;
-	snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+	snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
 	for (; num_regs; num_regs--)
 		*val++ = snd_sbmixer_read(chip, *regs++);
 }
@@ -933,7 +933,7 @@ static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
 {
 	unsigned char *val = chip->saved_regs;
-	snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+	snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
 	for (; num_regs; num_regs--)
 		snd_sbmixer_write(chip, *regs++, *val++);
 }
diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c
index 56f87cd33c1..3f00ddf450f 100644
--- a/sound/pci/aw2/aw2-alsa.c
+++ b/sound/pci/aw2/aw2-alsa.c
@@ -316,6 +316,8 @@ static int __devinit snd_aw2_create(struct snd_card *card,
 		return -ENOMEM;
 	}
 
+	/* (2) initialization of the chip hardware */
+	snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
 
 	if (request_irq(pci->irq, snd_aw2_saa7146_interrupt,
 			IRQF_SHARED, "Audiowerk2", chip)) {
@@ -329,8 +331,6 @@ static int __devinit snd_aw2_create(struct snd_card *card,
 	}
 	chip->irq = pci->irq;
 
-	/* (2) initialization of the chip hardware */
-	snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
 	err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
 	if (err < 0) {
 		free_irq(chip->irq, (void *)chip);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 98778cb69c6..1dcf9f3d110 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -269,28 +269,9 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 	}
 }
 
-static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
 {
-	int i;
-
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		if (ioapic->redirtbl[i].fields.vector == vector)
-			return i;
-	return -1;
-}
-
-void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
 	union ioapic_redir_entry *ent;
-	int gsi;
-
-	gsi = get_eoi_gsi(ioapic, vector);
-	if (gsi == -1) {
-		printk(KERN_WARNING "Can't find redir item for %d EOI\n",
-		       vector);
-		return;
-	}
 
 	ent = &ioapic->redirtbl[gsi];
 	ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
@@ -300,6 +281,16 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
 		ioapic_deliver(ioapic, gsi);
 }
 
+void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++)
+		if (ioapic->redirtbl[i].fields.vector == vector)
+			__kvm_ioapic_update_eoi(ioapic, i);
+}
+
 static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
 {
 	struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;