diff options
79 files changed, 994 insertions, 596 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl index 028a8444d95..e8acd1f0345 100644 --- a/Documentation/DocBook/kgdb.tmpl +++ b/Documentation/DocBook/kgdb.tmpl @@ -84,10 +84,9 @@ runs an instance of gdb against the vmlinux file which contains the symbols (not boot image such as bzImage, zImage, uImage...). In gdb the developer specifies the connection parameters and - connects to kgdb. Depending on which kgdb I/O modules exist in - the kernel for a given architecture, it may be possible to debug - the test machine's kernel with the development machine using a - rs232 or ethernet connection. + connects to kgdb. The type of connection a developer makes with + gdb depends on the availability of kgdb I/O modules compiled as + builtin's or kernel modules in the test machine's kernel. </para> </chapter> <chapter id="CompilingAKernel"> @@ -223,7 +222,7 @@ </para> <para> IMPORTANT NOTE: Using this option with kgdb over the console - (kgdboc) or kgdb over ethernet (kgdboe) is not supported. + (kgdboc) is not supported. </para> </sect1> </chapter> @@ -249,18 +248,11 @@ (gdb) target remote /dev/ttyS0 </programlisting> <para> - Example (kgdb to a terminal server): + Example (kgdb to a terminal server on tcp port 2012): </para> <programlisting> % gdb ./vmlinux - (gdb) target remote udp:192.168.2.2:6443 - </programlisting> - <para> - Example (kgdb over ethernet): - </para> - <programlisting> - % gdb ./vmlinux - (gdb) target remote udp:192.168.2.2:6443 + (gdb) target remote 192.168.2.2:2012 </programlisting> <para> Once connected, you can debug a kernel the way you would debug an diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index f4a8ebc1ef1..2d845730d4e 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface @@ -2,17 +2,12 @@ Naming and data format standards for sysfs files ------------------------------------------------ The libsensors library offers an interface to the raw sensors data -through the sysfs interface. See libsensors documentation and source for -further information. As of writing this document, libsensors -(from lm_sensors 2.8.3) is heavily chip-dependent. Adding or updating -support for any given chip requires modifying the library's code. -This is because libsensors was written for the procfs interface -older kernel modules were using, which wasn't standardized enough. -Recent versions of libsensors (from lm_sensors 2.8.2 and later) have -support for the sysfs interface, though. - -The new sysfs interface was designed to be as chip-independent as -possible. +through the sysfs interface. Since lm-sensors 3.0.0, libsensors is +completely chip-independent. It assumes that all the kernel drivers +implement the standard sysfs interface described in this document. +This makes adding or updating support for any given chip very easy, as +libsensors, and applications using it, do not need to be modified. +This is a major improvement compared to lm-sensors 2. Note that motherboards vary widely in the connections to sensor chips. There is no standard that ensures, for example, that the second @@ -35,19 +30,17 @@ access this data in a simple and consistent way. That said, such programs will have to implement conversion, labeling and hiding of inputs. For this reason, it is still not recommended to bypass the library. -If you are developing a userspace application please send us feedback on -this standard. - -Note that this standard isn't completely established yet, so it is subject -to changes. If you are writing a new hardware monitoring driver those -features can't seem to fit in this interface, please contact us with your -extension proposal. Keep in mind that backward compatibility must be -preserved. - Each chip gets its own directory in the sysfs /sys/devices tree. To find all sensor chips, it is easier to follow the device symlinks from /sys/class/hwmon/hwmon*. +Up to lm-sensors 3.0.0, libsensors looks for hardware monitoring attributes +in the "physical" device directory. Since lm-sensors 3.0.1, attributes found +in the hwmon "class" device directory are also supported. Complex drivers +(e.g. drivers for multifunction chips) may want to use this possibility to +avoid namespace pollution. The only drawback will be that older versions of +libsensors won't support the driver in question. + All sysfs values are fixed point numbers. There is only one value per file, unlike the older /proc specification. diff --git a/MAINTAINERS b/MAINTAINERS index cd587eec9fa..8f0ec46a709 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4431,10 +4431,10 @@ M: johnpol@2ka.mipt.ru S: Maintained W83791D HARDWARE MONITORING DRIVER -P: Charles Spirakis -M: bezaur@gmail.com +P: Marc Hulsman +M: m.hulsman@tudelft.nl L: lm-sensors@lm-sensors.org -S: Odd Fixes +S: Maintained W83793 HARDWARE MONITORING DRIVER P: Rudolf Marek @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 26 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Rotary Wombat # *DOCUMENTATION* diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile index 4e1a8e2c454..4759fe751aa 100644 --- a/arch/alpha/Makefile +++ b/arch/alpha/Makefile @@ -13,6 +13,7 @@ NM := $(NM) -B LDFLAGS_vmlinux := -static -N #-relax CHECKFLAGS += -D__alpha__ -m64 cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data +cflags-y += $(call cc-option, -fno-jump-tables) cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4 cpuflags-$(CONFIG_ALPHA_EV5) := -mcpu=ev5 diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c index c0750291b44..d9980d47ab8 100644 --- a/arch/alpha/kernel/core_t2.c +++ b/arch/alpha/kernel/core_t2.c @@ -74,6 +74,8 @@ # define DBG(args) #endif +DEFINE_SPINLOCK(t2_hae_lock); + static volatile unsigned int t2_mcheck_any_expected; static volatile unsigned int t2_mcheck_last_taken; diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c index 36ab22a7ea1..5cf45fc5134 100644 --- a/arch/alpha/kernel/pci.c +++ b/arch/alpha/kernel/pci.c @@ -71,6 +71,23 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_i static void __init quirk_cypress(struct pci_dev *dev) { + /* The Notorious Cy82C693 chip. */ + + /* The generic legacy mode IDE fixup in drivers/pci/probe.c + doesn't work correctly with the Cypress IDE controller as + it has non-standard register layout. Fix that. */ + if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) { + dev->resource[2].start = dev->resource[3].start = 0; + dev->resource[2].end = dev->resource[3].end = 0; + dev->resource[2].flags = dev->resource[3].flags = 0; + if (PCI_FUNC(dev->devfn) == 2) { + dev->resource[0].start = 0x170; + dev->resource[0].end = 0x177; + dev->resource[1].start = 0x376; + dev->resource[1].end = 0x376; + } + } + /* The Cypress bridge responds on the PCI bus in the address range 0xffff0000-0xffffffff (conventional x86 BIOS ROM). There is no way to turn this off. The bridge also supports several extended diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index dc57790250d..c778779007f 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -447,7 +447,7 @@ struct unaligned_stat { /* Macro for exception fixup code to access integer registers. */ -#define una_reg(r) (regs->regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)]) +#define una_reg(r) (_regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)]) asmlinkage void @@ -456,6 +456,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg, { long error, tmp1, tmp2, tmp3, tmp4; unsigned long pc = regs->pc - 4; + unsigned long *_regs = regs->regs; const struct exception_table_entry *fixup; unaligned[0].count++; diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index 207a8b5a0c4..0be5630ff56 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -12,7 +12,7 @@ # # http://www.arm.linux.org.uk/developer/machines/?action=new # -# Last update: Sat Apr 19 11:23:38 2008 +# Last update: Mon Jul 7 16:25:39 2008 # # machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number # @@ -560,7 +560,6 @@ husky MACH_HUSKY HUSKY 543 boxer MACH_BOXER BOXER 544 shepherd MACH_SHEPHERD SHEPHERD 545 aml42800aa MACH_AML42800AA AML42800AA 546 -ml674001 MACH_MACH_TYPE_ML674001 MACH_TYPE_ML674001 547 lpc2294 MACH_LPC2294 LPC2294 548 switchgrass MACH_SWITCHGRASS SWITCHGRASS 549 ens_cmu MACH_ENS_CMU ENS_CMU 550 @@ -748,7 +747,6 @@ anubis MACH_ANUBIS ANUBIS 734 ite8152 MACH_ITE8152 ITE8152 735 lpc3xxx MACH_LPC3XXX LPC3XXX 736 puppeteer MACH_PUPPETEER PUPPETEER 737 -vt001 MACH_MACH_VADATECH MACH_VADATECH 738 e570 MACH_E570 E570 739 x50 MACH_X50 X50 740 recon MACH_RECON RECON 741 @@ -839,7 +837,7 @@ ccxp270 MACH_CCXP CCXP 825 omap_gsample MACH_OMAP_GSAMPLE OMAP_GSAMPLE 826 realview_eb MACH_REALVIEW_EB REALVIEW_EB 827 samoa MACH_SAMOA SAMOA 828 -t3xscale MACH_T3XSCALE T3XSCALE 829 +palmt3 MACH_PALMT3 PALMT3 829 i878 MACH_I878 I878 830 borzoi MACH_BORZOI BORZOI 831 gecko MACH_GECKO GECKO 832 @@ -895,7 +893,7 @@ mio8390 MACH_MIO8390 MIO8390 881 omi_board MACH_OMI_BOARD OMI_BOARD 882 mx21civ MACH_MX21CIV MX21CIV 883 mahi_cdac MACH_MAHI_CDAC MAHI_CDAC 884 -xscale_palmtx MACH_XSCALE_PALMTX XSCALE_PALMTX 885 +palmtx MACH_PALMTX PALMTX 885 s3c2413 MACH_S3C2413 S3C2413 887 samsys_ep0 MACH_SAMSYS_EP0 SAMSYS_EP0 888 wg302v1 MACH_WG302V1 WG302V1 889 @@ -918,7 +916,7 @@ nxdb500 MACH_NXDB500 NXDB500 905 apf9328 MACH_APF9328 APF9328 906 omap_wipoq MACH_OMAP_WIPOQ OMAP_WIPOQ 907 omap_twip MACH_OMAP_TWIP OMAP_TWIP 908 -xscale_treo650 MACH_XSCALE_PALMTREO650 XSCALE_PALMTREO650 909 +palmtreo650 MACH_PALMTREO650 PALMTREO650 909 acumen MACH_ACUMEN ACUMEN 910 xp100 MACH_XP100 XP100 911 fs2410 MACH_FS2410 FS2410 912 @@ -926,8 +924,8 @@ pxa270_cerf MACH_PXA270_CERF PXA270_CERF 913 sq2ftlpalm MACH_SQ2FTLPALM SQ2FTLPALM 914 bsemserver MACH_BSEMSERVER BSEMSERVER 915 netclient MACH_NETCLIENT NETCLIENT 916 -xscale_palmtt5 MACH_XSCALE_PALMTT5 XSCALE_PALMTT5 917 -xscale_palmtc MACH_OMAP_PALMTC OMAP_PALMTC 918 +palmt5 MACH_PALMT5 PALMT5 917 +palmtc MACH_PALMTC PALMTC 918 omap_apollon MACH_OMAP_APOLLON OMAP_APOLLON 919 mxc30030evb MACH_MXC30030EVB MXC30030EVB 920 rea_2d MACH_REA_2D REA_2D 921 @@ -1220,7 +1218,6 @@ empca400 MACH_EMPCA400 EMPCA400 1211 em7210 MACH_EM7210 EM7210 1212 htchermes MACH_HTCHERMES HTCHERMES 1213 eti_c1 MACH_ETI_C1 ETI_C1 1214 -mach_dep2410 MACH_MACH_DEP2410 MACH_DEP2410 1215 ac100 MACH_AC100 AC100 1216 sneetch MACH_SNEETCH SNEETCH 1217 studentmate MACH_STUDENTMATE STUDENTMATE 1218 @@ -1421,10 +1418,10 @@ looxc550 MACH_LOOXC550 LOOXC550 1417 cnty_titan MACH_CNTY_TITAN CNTY_TITAN 1418 app3xx MACH_APP3XX APP3XX 1419 sideoatsgrama MACH_SIDEOATSGRAMA SIDEOATSGRAMA 1420 -xscale_palmt700p MACH_XSCALE_PALMT700P XSCALE_PALMT700P 1421 -xscale_palmt700w MACH_XSCALE_PALMT700W XSCALE_PALMT700W 1422 -xscale_palmt750 MACH_XSCALE_PALMT750 XSCALE_PALMT750 1423 -xscale_palmt755p MACH_XSCALE_PALMT755P XSCALE_PALMT755P 1424 +palmtreo700p MACH_PALMTREO700P PALMTREO700P 1421 +palmtreo700w MACH_PALMTREO700W PALMTREO700W 1422 +palmtreo750 MACH_PALMTREO750 PALMTREO750 1423 +palmtreo755p MACH_PALMTREO755P PALMTREO755P 1424 ezreganut9200 MACH_EZREGANUT9200 EZREGANUT9200 1425 sarge MACH_SARGE SARGE 1426 a696 MACH_A696 A696 1427 @@ -1463,7 +1460,7 @@ artemis MACH_ARTEMIS ARTEMIS 1462 htctitan MACH_HTCTITAN HTCTITAN 1463 qranium MACH_QRANIUM QRANIUM 1464 adx_wsc2 MACH_ADX_WSC2 ADX_WSC2 1465 -adx_medcom MACH_ADX_MEDINET ADX_MEDINET 1466 +adx_medcom MACH_ADX_MEDCOM ADX_MEDCOM 1466 bboard MACH_BBOARD BBOARD 1467 cambria MACH_CAMBRIA CAMBRIA 1468 mt7xxx MACH_MT7XXX MT7XXX 1469 @@ -1519,7 +1516,7 @@ wp188 MACH_WP188 WP188 1518 corsica MACH_CORSICA CORSICA 1519 bigeye MACH_BIGEYE BIGEYE 1520 tll5000 MACH_TLL5000 TLL5000 1522 -hni270 MACH_HNI_X270 HNI_X270 1523 +bebot MACH_BEBOT BEBOT 1523 qong MACH_QONG QONG 1524 tcompact MACH_TCOMPACT TCOMPACT 1525 puma5 MACH_PUMA5 PUMA5 1526 @@ -1636,7 +1633,6 @@ awlug4lcu MACH_AWLUG4LCU AWLUG4LCU 1637 palermoc MACH_PALERMOC PALERMOC 1638 omap_ldp MACH_OMAP_LDP OMAP_LDP 1639 ip500 MACH_IP500 IP500 1640 -mx35ads MACH_MACH_MX35ADS MACH_MX35ADS 1641 ase2 MACH_ASE2 ASE2 1642 mx35evb MACH_MX35EVB MX35EVB 1643 aml_m8050 MACH_AML_M8050 AML_M8050 1644 @@ -1647,7 +1643,7 @@ badger MACH_BADGER BADGER 1648 trizeps4wl MACH_TRIZEPS4WL TRIZEPS4WL 1649 trizeps5 MACH_TRIZEPS5 TRIZEPS5 1650 marlin MACH_MARLIN MARLIN 1651 -ts7800 MACH_TS7800 TS7800 1652 +ts78xx MACH_TS78XX TS78XX 1652 hpipaq214 MACH_HPIPAQ214 HPIPAQ214 1653 at572d940dcm MACH_AT572D940DCM AT572D940DCM 1654 ne1board MACH_NE1BOARD NE1BOARD 1655 @@ -1720,3 +1716,99 @@ htc_kaiser MACH_HTC_KAISER HTC_KAISER 1724 lg_ks20 MACH_LG_KS20 LG_KS20 1725 hhgps MACH_HHGPS HHGPS 1726 nokia_n810_wimax MACH_NOKIA_N810_WIMAX NOKIA_N810_WIMAX 1727 +insight MACH_INSIGHT INSIGHT 1728 +sapphire MACH_SAPPHIRE SAPPHIRE 1729 +csb637xo MACH_CSB637XO CSB637XO 1730 +evisiong MACH_EVISIONG EVISIONG 1731 +stmp37xx MACH_STMP37XX STMP37XX 1732 +stmp378x MACH_STMP38XX STMP38XX 1733 +tnt MACH_TNT TNT 1734 +tbxt MACH_TBXT TBXT 1735 +playmate MACH_PLAYMATE PLAYMATE 1736 +pns10 MACH_PNS10 PNS10 1737 +eznavi MACH_EZNAVI EZNAVI 1738 +ps4000 MACH_PS4000 PS4000 1739 +ezx_a780 MACH_EZX_A780 EZX_A780 1740 +ezx_e680 MACH_EZX_E680 EZX_E680 1741 +ezx_a1200 MACH_EZX_A1200 EZX_A1200 1742 +ezx_e6 MACH_EZX_E6 EZX_E6 1743 +ezx_e2 MACH_EZX_E2 EZX_E2 1744 +ezx_a910 MACH_EZX_A910 EZX_A910 1745 +cwmx31 MACH_CWMX31 CWMX31 1746 +sl2312 MACH_SL2312 SL2312 1747 +blenny MACH_BLENNY BLENNY 1748 +ds107 MACH_DS107 DS107 1749 +dsx07 MACH_DSX07 DSX07 1750 +picocom1 MACH_PICOCOM1 PICOCOM1 1751 +lynx_wolverine MACH_LYNX_WOLVERINE LYNX_WOLVERINE 1752 +ubisys_p9_sc19 MACH_UBISYS_P9_SC19 UBISYS_P9_SC19 1753 +kratos_low MACH_KRATOS_LOW KRATOS_LOW 1754 +m700 MACH_M700 M700 1755 +edmini_v2 MACH_EDMINI_V2 EDMINI_V2 1756 +zipit2 MACH_ZIPIT2 ZIPIT2 1757 +hslfemtocell MACH_HSLFEMTOCELL HSLFEMTOCELL 1758 +daintree_at91 MACH_DAINTREE_AT91 DAINTREE_AT91 1759 +sg560usb MACH_SG560USB SG560USB 1760 +omap3_pandora MACH_OMAP3_PANDORA OMAP3_PANDORA 1761 +usr8200 MACH_USR8200 USR8200 1762 +s1s65k MACH_S1S65K S1S65K 1763 +s2s65a MACH_S2S65A S2S65A 1764 +icore MACH_ICORE ICORE 1765 +mss2 MACH_MSS2 MSS2 1766 +belmont MACH_BELMONT BELMONT 1767 +asusp525 MACH_ASUSP525 ASUSP525 1768 +lb88rc8480 MACH_LB88RC8480 LB88RC8480 1769 +hipxa MACH_HIPXA HIPXA 1770 +mx25_3ds MACH_MX25_3DS MX25_3DS 1771 +m800 MACH_M800 M800 1772 +omap3530_lv_som MACH_OMAP3530_LV_SOM OMAP3530_LV_SOM 1773 +prima_evb MACH_PRIMA_EVB PRIMA_EVB 1774 +mx31bt1 MACH_MX31BT1 MX31BT1 1775 +atlas4_evb MACH_ATLAS4_EVB ATLAS4_EVB 1776 +mx31cicada MACH_MX31CICADA MX31CICADA 1777 +mi424wr MACH_MI424WR MI424WR 1778 +axs_ultrax MACH_AXS_ULTRAX AXS_ULTRAX 1779 +at572d940deb MACH_AT572D940DEB AT572D940DEB 1780 +davinci_da8xx_evm MACH_DAVINCI_DA8XX_EVM DAVINCI_DA8XX_EVM 1781 +ep9302 MACH_EP9302 EP9302 1782 +at572d940hfeb MACH_AT572D940HFEB AT572D940HFEB 1783 +cybook3 MACH_CYBOOK3 CYBOOK3 1784 +wdg002 MACH_WDG002 WDG002 1785 +sg560adsl MACH_SG560ADSL SG560ADSL 1786 +nextio_n2800_ica MACH_NEXTIO_N2800_ICA NEXTIO_N2800_ICA 1787 +marvell_newdb MACH_MARVELL_NEWDB MARVELL_NEWDB 1789 +vandihud MACH_VANDIHUD VANDIHUD 1790 +magx_e8 MACH_MAGX_E8 MAGX_E8 1791 +magx_z6 MACH_MAGX_Z6 MAGX_Z6 1792 +magx_v8 MACH_MAGX_V8 MAGX_V8 1793 +magx_u9 MACH_MAGX_U9 MAGX_U9 1794 +toughcf08 MACH_TOUGHCF08 TOUGHCF08 1795 +zw4400 MACH_ZW4400 ZW4400 1796 +marat91 MACH_MARAT91 MARAT91 1797 +overo MACH_OVERO OVERO 1798 +at2440evb MACH_AT2440EVB AT2440EVB 1799 +neocore926 MACH_NEOCORE926 NEOCORE926 1800 +wnr854t MACH_WNR854T WNR854T 1801 +imx27 MACH_IMX27 IMX27 1802 +moose_db MACH_MOOSE_DB MOOSE_DB 1803 +fab4 MACH_FAB4 FAB4 1804 +htcdiamond MACH_HTCDIAMOND HTCDIAMOND 1805 +fiona MACH_FIONA FIONA 1806 +mxc30030_x MACH_MXC30030_X MXC30030_X 1807 +bmp1000 MACH_BMP1000 BMP1000 1808 +logi9200 MACH_LOGI9200 LOGI9200 1809 +tqma31 MACH_TQMA31 TQMA31 1810 +ccw9p9215js MACH_CCW9P9215JS CCW9P9215JS 1811 +rd88f5181l_ge MACH_RD88F5181L_GE RD88F5181L_GE 1812 +sifmain MACH_SIFMAIN SIFMAIN 1813 +sam9_l9261 MACH_SAM9_L9261 SAM9_L9261 1814 +cc9m2443js MACH_CC9M2443JS CC9M2443JS 1815 +xaria300 MACH_XARIA300 XARIA300 1816 +it9200 MACH_IT9200 IT9200 1817 +rd88f5181l_fxo MACH_RD88F5181L_FXO RD88F5181L_FXO 1818 +kriss_sensor MACH_KRISS_SENSOR KRISS_SENSOR 1819 +pilz_pmi5 MACH_PILZ_PMI5 PILZ_PMI5 1820 +jade MACH_JADE JADE 1821 +ks8695_softplc MACH_KS8695_SOFTPLC KS8695_SOFTPLC 1822 +gprisc4 MACH_GPRISC4 GPRISC4 1823 +stamp9260 MACH_STAMP9260 STAMP9260 1824 diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 082c31dcfd9..39752cdef6f 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -558,8 +558,6 @@ static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void) if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * NR_PREALLOCATE_RTE_ENTRIES); - if (!rte) - return NULL; for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) list_add(&rte->rte_list, &free_rte_list); } diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index f48a809c686..4ae15c8c248 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -578,8 +578,6 @@ setup_arch (char **cmdline_p) cpu_init(); /* initialize the bootstrap CPU */ mmu_context_init(); /* initialize context_id bitmap */ - check_sal_cache_flush(); - #ifdef CONFIG_ACPI acpi_boot_init(); #endif @@ -607,6 +605,7 @@ setup_arch (char **cmdline_p) ia64_mca_init(); platform_setup(cmdline_p); + check_sal_cache_flush(); paging_init(); } diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 49d3120415e..e585f9a2afb 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -512,6 +512,8 @@ static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, si int cpu; char optstr[64]; + if (count == 0 || count > sizeof(optstr)) + return -EINVAL; if (copy_from_user(optstr, user, count)) return -EFAULT; optstr[count - 1] = '\0'; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 52e18e6d2ba..e0edaaa6920 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -383,6 +383,7 @@ config VMI config KVM_CLOCK bool "KVM paravirtualized clock" select PARAVIRT + select PARAVIRT_CLOCK depends on !(X86_VISWS || X86_VOYAGER) help Turning on this option will allow you to run a paravirtualized clock @@ -410,6 +411,10 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_CLOCK + bool + default n + endif config MEMTEST_BOOTPARAM diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 5e618c3b472..77807d4769c 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 08a30986d47..87edf1ceb1d 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -18,6 +18,7 @@ #include <linux/clocksource.h> #include <linux/kvm_para.h> +#include <asm/pvclock.h> #include <asm/arch_hooks.h> #include <asm/msr.h> #include <asm/apic.h> @@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg) early_param("no-kvmclock", parse_no_kvmclock); /* The hypervisor will put information about time periodically here */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); -#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field +static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); +static struct pvclock_wall_clock wall_clock; -static inline u64 kvm_get_delta(u64 last_tsc) -{ - int cpu = smp_processor_id(); - u64 delta = native_read_tsc() - last_tsc; - return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; -} - -static struct kvm_wall_clock wall_clock; -static cycle_t kvm_clock_read(void); /* * The wallclock is the time of day when we booted. Since then, some time may * have elapsed since the hypervisor wrote the data. So we try to account for @@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void); */ static unsigned long kvm_get_wallclock(void) { - u32 wc_sec, wc_nsec; - u64 delta; + struct pvclock_vcpu_time_info *vcpu_time; struct timespec ts; - int version, nsec; int low, high; low = (int)__pa(&wall_clock); high = ((u64)__pa(&wall_clock) >> 32); + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); - delta = kvm_clock_read(); + vcpu_time = &get_cpu_var(hv_clock); + pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); + put_cpu_var(hv_clock); - native_write_msr(MSR_KVM_WALL_CLOCK, low, high); - do { - version = wall_clock.wc_version; - rmb(); - wc_sec = wall_clock.wc_sec; - wc_nsec = wall_clock.wc_nsec; - rmb(); - } while ((wall_clock.wc_version != version) || (version & 1)); - - delta = kvm_clock_read() - delta; - delta += wc_nsec; - nsec = do_div(delta, NSEC_PER_SEC); - set_normalized_timespec(&ts, wc_sec + delta, nsec); - /* - * Of all mechanisms of time adjustment I've tested, this one - * was the champion! - */ - return ts.tv_sec + 1; + return ts.tv_sec; } static int kvm_set_wallclock(unsigned long now) { - return 0; + return -1; } -/* - * This is our read_clock function. The host puts an tsc timestamp each time - * it updates a new time. Without the tsc adjustment, we can have a situation - * in which a vcpu starts to run earlier (smaller system_time), but probes - * time later (compared to another vcpu), leading to backwards time - */ static cycle_t kvm_clock_read(void) { - u64 last_tsc, now; - int cpu; + struct pvclock_vcpu_time_info *src; + cycle_t ret; - preempt_disable(); - cpu = smp_processor_id(); - - last_tsc = get_clock(cpu, tsc_timestamp); - now = get_clock(cpu, system_time); - - now += kvm_get_delta(last_tsc); - preempt_enable(); - - return now; + src = &get_cpu_var(hv_clock); + ret = pvclock_clocksource_read(src); + put_cpu_var(hv_clock); + return ret; } + static struct clocksource kvm_clock = { .name = "kvm-clock", .read = kvm_clock_read, @@ -123,13 +88,14 @@ static struct clocksource kvm_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static int kvm_register_clock(void) +static int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high; low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); - + printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", + cpu, high, low, txt); return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); } @@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void) * Now that the first cpu already had this clocksource initialized, * we shouldn't fail. */ - WARN_ON(kvm_register_clock()); + WARN_ON(kvm_register_clock("secondary cpu clock")); /* ok, done with our trickery, call native */ setup_secondary_APIC_clock(); } #endif +#ifdef CONFIG_SMP +void __init kvm_smp_prepare_boot_cpu(void) +{ + WARN_ON(kvm_register_clock("primary cpu clock")); + native_smp_prepare_boot_cpu(); +} +#endif + /* * After the clock is registered, the host will keep writing to the * registered memory location. If the guest happens to shutdown, this memory @@ -174,7 +148,7 @@ void __init kvmclock_init(void) return; if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { - if (kvm_register_clock()) + if (kvm_register_clock("boot clock")) return; pv_time_ops.get_wallclock = kvm_get_wallclock; pv_time_ops.set_wallclock = kvm_set_wallclock; @@ -182,6 +156,9 @@ void __init kvmclock_init(void) #ifdef CONFIG_X86_LOCAL_APIC pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; #endif +#ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; +#endif machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC machine_ops.crash_shutdown = kvm_crash_shutdown; diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c new file mode 100644 index 00000000000..05fbe9a0325 --- /dev/null +++ b/arch/x86/kernel/pvclock.c @@ -0,0 +1,141 @@ +/* paravirtual clock -- common code used by kvm/xen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <asm/pvclock.h> + +/* + * These are perodically updated + * xen: magic shared_info page + * kvm: gpa registered via msr + * and then copied here. + */ +struct pvclock_shadow_time { + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_timestamp; /* Time, in nanosecs, since boot. */ + u32 tsc_to_nsec_mul; + int tsc_shift; + u32 version; +}; + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#endif + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#elif __x86_64__ + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); +#else +#error implement me! +#endif + + return product; +} + +static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) +{ + u64 delta = native_read_tsc() - shadow->tsc_timestamp; + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); +} + +/* + * Reads a consistent set of time-base values from hypervisor, + * into a shadow data area. + */ +static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, + struct pvclock_vcpu_time_info *src) +{ + do { + dst->version = src->version; + rmb(); /* fetch version before data */ + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; + rmb(); /* test version after fetching data */ + } while ((src->version & 1) || (dst->version != src->version)); + + return dst->version; +} + +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +{ + struct pvclock_shadow_time shadow; + unsigned version; + cycle_t ret, offset; + + do { + version = pvclock_get_time_values(&shadow, src); + barrier(); + offset = pvclock_get_nsec_offset(&shadow); + ret = shadow.system_timestamp + offset; + barrier(); + } while (version != src->version); + + return ret; +} + +void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, + struct pvclock_vcpu_time_info *vcpu_time, + struct timespec *ts) +{ + u32 version; + u64 delta; + struct timespec now; + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ + delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; + + now.tv_nsec = do_div(delta, NSEC_PER_SEC); + now.tv_sec = delta; + + set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); +} diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index f2f5d260874..3829aa7b663 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -200,9 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps) atomic_inc(&pt->pending); smp_mb__after_atomic_inc(); - if (vcpu0 && waitqueue_active(&vcpu0->wq)) { - vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; - wake_up_interruptible(&vcpu0->wq); + if (vcpu0) { + set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); + if (waitqueue_active(&vcpu0->wq)) { + vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(&vcpu0->wq); + } } pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c297c50eba6..ebc03f5ae16 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic) wait_queue_head_t *q = &apic->vcpu->wq; atomic_inc(&apic->timer.pending); + set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); if (waitqueue_active(q)) { apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; wake_up_interruptible(q); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ee3f53098f0..7e7c3969f7a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) rmap_remove(kvm, spte); --kvm->stat.lpages; set_shadow_pte(spte, shadow_trap_nonpresent_pte); + spte = NULL; write_protected = 1; } spte = rmap_next(kvm, rmapp, spte); @@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, struct kvm_mmu_page *shadow; spte |= PT_WRITABLE_MASK; - if (user_fault) { - mmu_unshadow(vcpu->kvm, gfn); - goto unshadowed; - } shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); if (shadow || @@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, } } -unshadowed: - if (pte_access & ACC_WRITE_MASK) mark_page_dirty(vcpu->kvm, gfn); @@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, u64 *spte, const void *new) { - if ((sp->role.level != PT_PAGE_TABLE_LEVEL) - && !vcpu->arch.update_pte.largepage) { - ++vcpu->kvm->stat.mmu_pde_zapped; - return; - } + if (sp->role.level != PT_PAGE_TABLE_LEVEL) { + if (!vcpu->arch.update_pte.largepage || + sp->role.glevels == PT32_ROOT_LEVEL) { + ++vcpu->kvm->stat.mmu_pde_zapped; + return; + } + } ++vcpu->kvm->stat.mmu_pte_updated; if (sp->role.glevels == PT32_ROOT_LEVEL) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 02efbe75f31..540e9517907 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) load_transition_efer(vmx); } -static void vmx_load_host_state(struct vcpu_vmx *vmx) +static void __vmx_load_host_state(struct vcpu_vmx *vmx) { unsigned long flags; @@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) reload_host_efer(vmx); } +static void vmx_load_host_state(struct vcpu_vmx *vmx) +{ + preempt_disable(); + __vmx_load_host_state(vmx); + preempt_enable(); +} + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) static void vmx_vcpu_put(struct kvm_vcpu *vcpu) { - vmx_load_host_state(to_vmx(vcpu)); + __vmx_load_host_state(to_vmx(vcpu)); } static void vmx_fpu_activate(struct kvm_vcpu *vcpu) @@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) switch (msr_index) { #ifdef CONFIG_X86_64 case MSR_EFER: + vmx_load_host_state(vmx); ret = kvm_set_msr_common(vcpu, msr_index, data); - if (vmx->host_state.loaded) { - reload_host_efer(vmx); - load_transition_efer(vmx); - } break; case MSR_FS_BASE: vmcs_writel(GUEST_FS_BASE, data); @@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) guest_write_tsc(data); break; default: + vmx_load_host_state(vmx); msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; - if (vmx->host_state.loaded) - load_msrs(vmx->guest_msrs, vmx->save_nmsrs); break; } ret = kvm_set_msr_common(vcpu, msr_index, data); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 00acf1301a1..63a77caa59f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) { static int version; - struct kvm_wall_clock wc; - struct timespec wc_ts; + struct pvclock_wall_clock wc; + struct timespec now, sys, boot; if (!wall_clock) return; @@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); - wc_ts = current_kernel_time(); - wc.wc_sec = wc_ts.tv_sec; - wc.wc_nsec = wc_ts.tv_nsec; - wc.wc_version = version; + /* + * The guest calculates current wall clock time by adding + * system time (updated by kvm_write_guest_time below) to the + * wall clock specified here. guest system time equals host + * system time for us, thus we must fill in host boot time here. + */ + now = current_kernel_time(); + ktime_get_ts(&sys); + boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); + + wc.sec = boot.tv_sec; + wc.nsec = boot.tv_nsec; + wc.version = version; kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); @@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); } +static uint32_t div_frac(uint32_t dividend, uint32_t divisor) +{ + uint32_t quotient, remainder; + + /* Don't try to replace with do_div(), this one calculates + * "(dividend << 32) / divisor" */ + __asm__ ( "divl %4" + : "=a" (quotient), "=d" (remainder) + : "0" (0), "1" (dividend), "r" (divisor) ); + return quotient; +} + +static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock) +{ + uint64_t nsecs = 1000000000LL; + int32_t shift = 0; + uint64_t tps64; + uint32_t tps32; + + tps64 = tsc_khz * 1000LL; + while (tps64 > nsecs*2) { + tps64 >>= 1; + shift--; + } + + tps32 = (uint32_t)tps64; + while (tps32 <= (uint32_t)nsecs) { + tps32 <<= 1; + shift++; + } + + hv_clock->tsc_shift = shift; + hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32); + + pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n", + __FUNCTION__, tsc_khz, hv_clock->tsc_shift, + hv_clock->tsc_to_system_mul); +} + static void kvm_write_guest_time(struct kvm_vcpu *v) { struct timespec ts; @@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) if ((!vcpu->time_page)) return; + if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) { + kvm_set_time_scale(tsc_khz, &vcpu->hv_clock); + vcpu->hv_clock_tsc_khz = tsc_khz; + } + /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, @@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) /* * The interface expects us to write an even number signaling that the * update is finished. Since the guest won't see the intermediate - * state, we just write "2" at the end + * state, we just increase by 2 at the end. */ - vcpu->hv_clock.version = 2; + vcpu->hv_clock.version += 2; shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); + sizeof(vcpu->hv_clock)); kunmap_atomic(shared_kaddr, KM_USER0); @@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) /* ...but clean it before doing the actual write */ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); - vcpu->arch.hv_clock.tsc_to_system_mul = - clocksource_khz2mult(tsc_khz, 22); - vcpu->arch.hv_clock.tsc_shift = 22; - down_read(¤t->mm->mmap_sem); vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); @@ -2759,6 +2808,8 @@ again: if (vcpu->requests) { if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) __kvm_migrate_timers(vcpu); + if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + kvm_x86_ops->tlb_flush(vcpu); if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, &vcpu->requests)) { kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; @@ -2772,6 +2823,7 @@ again: } } + clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); kvm_inject_pending_timer_irqs(vcpu); preempt_disable(); @@ -2781,21 +2833,13 @@ again: local_irq_disable(); - if (need_resched()) { + if (vcpu->requests || need_resched()) { local_irq_enable(); preempt_enable(); r = 1; goto out; } - if (vcpu->requests) - if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) { - local_irq_enable(); - preempt_enable(); - r = 1; - goto out; - } - if (signal_pending(current)) { local_irq_enable(); preempt_enable(); @@ -2825,9 +2869,6 @@ again: kvm_guest_enter(); - if (vcpu->requests) - if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) - kvm_x86_ops->tlb_flush(vcpu); KVMTRACE_0D(VMENTRY, vcpu, entryexit); kvm_x86_ops->run(vcpu, kvm_run); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 2e641be2737..6c388e593bc 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -5,8 +5,9 @@ config XEN bool "Xen guest support" select PARAVIRT + select PARAVIRT_CLOCK depends on X86_32 - depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) + depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) help This is the Linux Xen port. Enabling this will allow the kernel to boot in a paravirtualized environment under the diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c8a56e457d6..f09c1c69c37 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) static __init void xen_pagetable_setup_start(pgd_t *base) { pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; + int i; /* special set_pte for pagetable initialization */ pv_mmu_ops.set_pte = xen_set_pte_init; init_mm.pgd = base; /* - * copy top-level of Xen-supplied pagetable into place. For - * !PAE we can use this as-is, but for PAE it is a stand-in - * while we copy the pmd pages. + * copy top-level of Xen-supplied pagetable into place. This + * is a stand-in while we copy the pmd pages. */ memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); - if (PTRS_PER_PMD > 1) { - int i; - /* - * For PAE, need to allocate new pmds, rather than - * share Xen's, since Xen doesn't like pmd's being - * shared between address spaces. - */ - for (i = 0; i < PTRS_PER_PGD; i++) { - if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { - pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); + /* + * For PAE, need to allocate new pmds, rather than + * share Xen's, since Xen doesn't like pmd's being + * shared between address spaces. + */ + for (i = 0; i < PTRS_PER_PGD; i++) { + if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { + pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); - memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), - PAGE_SIZE); + memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), + PAGE_SIZE); - make_lowmem_page_readonly(pmd); + make_lowmem_page_readonly(pmd); - set_pgd(&base[i], __pgd(1 + __pa(pmd))); - } else - pgd_clear(&base[i]); - } + set_pgd(&base[i], __pgd(1 + __pa(pmd))); + } else + pgd_clear(&base[i]); } /* make sure zero_page is mapped RO so we can use it in pagetables */ @@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base) /* Actually pin the pagetable down, but we can't set PG_pinned yet because the page structures don't exist yet. */ - { - unsigned level; - -#ifdef CONFIG_X86_PAE - level = MMUEXT_PIN_L3_TABLE; -#else - level = MMUEXT_PIN_L2_TABLE; -#endif - - pin_pagetable_pfn(level, PFN_DOWN(__pa(base))); - } + pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); } /* This is called once we have the cpu_possible_map */ @@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pte = xen_make_pte, .make_pgd = xen_make_pgd, -#ifdef CONFIG_X86_PAE .set_pte_atomic = xen_set_pte_atomic, .set_pte_present = xen_set_pte_at, .set_pud = xen_set_pud, @@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { .make_pmd = xen_make_pmd, .pmd_val = xen_pmd_val, -#endif /* PAE */ .activate_mm = xen_activate_mm, .dup_mmap = xen_dup_mmap, @@ -1228,6 +1213,11 @@ asmlinkage void __init xen_start_kernel(void) if (xen_feature(XENFEAT_supervisor_mode_kernel)) pv_info.kernel_rpl = 0; + /* Prevent unwanted bits from being set in PTEs. */ + __supported_pte_mask &= ~_PAGE_GLOBAL; + if (!is_initial_xendomain()) + __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); + /* set the limit of our address space */ xen_reserve_top(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3525ef523a7..df40bf74ea7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -179,50 +179,56 @@ out: preempt_enable(); } -pteval_t xen_pte_val(pte_t pte) +/* Assume pteval_t is equivalent to all the other *val_t types. */ +static pteval_t pte_mfn_to_pfn(pteval_t val) +{ + if (val & _PAGE_PRESENT) { + unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; + pteval_t flags = val & ~PTE_MASK; + val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; + } + + return val; +} + +static pteval_t pte_pfn_to_mfn(pteval_t val) { - pteval_t ret = pte.pte; + if (val & _PAGE_PRESENT) { + unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; + pteval_t flags = val & ~PTE_MASK; + val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; + } - if (ret & _PAGE_PRESENT) - ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; + return val; +} - return ret; +pteval_t xen_pte_val(pte_t pte) +{ + return pte_mfn_to_pfn(pte.pte); } pgdval_t xen_pgd_val(pgd_t pgd) { - pgdval_t ret = pgd.pgd; - if (ret & _PAGE_PRESENT) - ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; - return ret; + return pte_mfn_to_pfn(pgd.pgd); } pte_t xen_make_pte(pteval_t pte) { - if (pte & _PAGE_PRESENT) { - pte = phys_to_machine(XPADDR(pte)).maddr; - pte &= ~(_PAGE_PCD | _PAGE_PWT); - } - - return (pte_t){ .pte = pte }; + pte = pte_pfn_to_mfn(pte); + return native_make_pte(pte); } pgd_t xen_make_pgd(pgdval_t pgd) { - if (pgd & _PAGE_PRESENT) - pgd = phys_to_machine(XPADDR(pgd)).maddr; - - return (pgd_t){ pgd }; + pgd = pte_pfn_to_mfn(pgd); + return native_make_pgd(pgd); } pmdval_t xen_pmd_val(pmd_t pmd) { - pmdval_t ret = native_pmd_val(pmd); - if (ret & _PAGE_PRESENT) - ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; - return ret; + return pte_mfn_to_pfn(pmd.pmd); } -#ifdef CONFIG_X86_PAE + void xen_set_pud(pud_t *ptr, pud_t val) { struct multicall_space mcs; @@ -267,17 +273,9 @@ void xen_pmd_clear(pmd_t *pmdp) pmd_t xen_make_pmd(pmdval_t pmd) { - if (pmd & _PAGE_PRESENT) - pmd = phys_to_machine(XPADDR(pmd)).maddr; - + pmd = pte_pfn_to_mfn(pmd); return native_make_pmd(pmd); } -#else /* !PAE */ -void xen_set_pte(pte_t *ptep, pte_t pte) -{ - *ptep = pte; -} -#endif /* CONFIG_X86_PAE */ /* (Yet another) pagetable walker. This one is intended for pinning a @@ -430,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level) read-only, and can be pinned. */ void xen_pgd_pin(pgd_t *pgd) { - unsigned level; - xen_mc_batch(); if (pgd_walk(pgd, pin_page, TASK_SIZE)) { @@ -441,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd) xen_mc_batch(); } -#ifdef CONFIG_X86_PAE - level = MMUEXT_PIN_L3_TABLE; -#else - level = MMUEXT_PIN_L2_TABLE; -#endif - - xen_do_pin(level, PFN_DOWN(__pa(pgd))); - + xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); xen_mc_issue(0); } diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index b5e189b1519..5fe961caffd 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h @@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm); void xen_pgd_pin(pgd_t *pgd); //void xen_pgd_unpin(pgd_t *pgd); -#ifdef CONFIG_X86_PAE -unsigned long long xen_pte_val(pte_t); -unsigned long long xen_pmd_val(pmd_t); -unsigned long long xen_pgd_val(pgd_t); +pteval_t xen_pte_val(pte_t); +pmdval_t xen_pmd_val(pmd_t); +pgdval_t xen_pgd_val(pgd_t); -pte_t xen_make_pte(unsigned long long); -pmd_t xen_make_pmd(unsigned long long); -pgd_t xen_make_pgd(unsigned long long); +pte_t xen_make_pte(pteval_t); +pmd_t xen_make_pmd(pmdval_t); +pgd_t xen_make_pgd(pgdval_t); void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval); @@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val); void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); void xen_pmd_clear(pmd_t *pmdp); - -#else -unsigned long xen_pte_val(pte_t); -unsigned long xen_pmd_val(pmd_t); -unsigned long xen_pgd_val(pgd_t); - -pte_t xen_make_pte(unsigned long); -pmd_t xen_make_pmd(unsigned long); -pgd_t xen_make_pgd(unsigned long); -#endif - #endif /* _XEN_MMU_H */ diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 52b2e385698..41e217503c9 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -14,6 +14,7 @@ #include <linux/kernel_stat.h> #include <linux/math64.h> +#include <asm/pvclock.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> @@ -31,17 +32,6 @@ static cycle_t xen_clocksource_read(void); -/* These are perodically updated in shared_info, and then copied here. */ -struct shadow_time_info { - u64 tsc_timestamp; /* TSC at last update of time vals. */ - u64 system_timestamp; /* Time, in nanosecs, since boot. */ - u32 tsc_to_nsec_mul; - int tsc_shift; - u32 version; -}; - -static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); - /* runstate info updated by Xen */ static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); @@ -211,7 +201,7 @@ unsigned long long xen_sched_clock(void) unsigned long xen_cpu_khz(void) { u64 xen_khz = 1000000ULL << 32; - const struct vcpu_time_info *info = + const struct pvclock_vcpu_time_info *info = &HYPERVISOR_shared_info->vcpu_info[0].time; do_div(xen_khz, info->tsc_to_system_mul); @@ -223,121 +213,26 @@ unsigned long xen_cpu_khz(void) return xen_khz; } -/* - * Reads a consistent set of time-base values from Xen, into a shadow data - * area. - */ -static unsigned get_time_values_from_xen(void) -{ - struct vcpu_time_info *src; - struct shadow_time_info *dst; - - /* src is shared memory with the hypervisor, so we need to - make sure we get a consistent snapshot, even in the face of - being preempted. */ - src = &__get_cpu_var(xen_vcpu)->time; - dst = &__get_cpu_var(shadow_time); - - do { - dst->version = src->version; - rmb(); /* fetch version before data */ - dst->tsc_timestamp = src->tsc_timestamp; - dst->system_timestamp = src->system_time; - dst->tsc_to_nsec_mul = src->tsc_to_system_mul; - dst->tsc_shift = src->tsc_shift; - rmb(); /* test version after fetching data */ - } while ((src->version & 1) | (dst->version ^ src->version)); - - return dst->version; -} - -/* - * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, - * yielding a 64-bit result. - */ -static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) -{ - u64 product; -#ifdef __i386__ - u32 tmp1, tmp2; -#endif - - if (shift < 0) - delta >>= -shift; - else - delta <<= shift; - -#ifdef __i386__ - __asm__ ( - "mul %5 ; " - "mov %4,%%eax ; " - "mov %%edx,%4 ; " - "mul %5 ; " - "xor %5,%5 ; " - "add %4,%%eax ; " - "adc %5,%%edx ; " - : "=A" (product), "=r" (tmp1), "=r" (tmp2) - : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); -#elif __x86_64__ - __asm__ ( - "mul %%rdx ; shrd $32,%%rdx,%%rax" - : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); -#else -#error implement me! -#endif - - return product; -} - -static u64 get_nsec_offset(struct shadow_time_info *shadow) -{ - u64 now, delta; - now = native_read_tsc(); - delta = now - shadow->tsc_timestamp; - return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); -} - static cycle_t xen_clocksource_read(void) { - struct shadow_time_info *shadow = &get_cpu_var(shadow_time); + struct pvclock_vcpu_time_info *src; cycle_t ret; - unsigned version; - - do { - version = get_time_values_from_xen(); - barrier(); - ret = shadow->system_timestamp + get_nsec_offset(shadow); - barrier(); - } while (version != __get_cpu_var(xen_vcpu)->time.version); - - put_cpu_var(shadow_time); + src = &get_cpu_var(xen_vcpu)->time; + ret = pvclock_clocksource_read(src); + put_cpu_var(xen_vcpu); return ret; } static void xen_read_wallclock(struct timespec *ts) { - const struct shared_info *s = HYPERVISOR_shared_info; - u32 version; - u64 delta; - struct timespec now; - - /* get wallclock at system boot */ - do { - version = s->wc_version; - rmb(); /* fetch version before time */ - now.tv_sec = s->wc_sec; - now.tv_nsec = s->wc_nsec; - rmb(); /* fetch time before checking version */ - } while ((s->wc_version & 1) | (version ^ s->wc_version)); + struct shared_info *s = HYPERVISOR_shared_info; + struct pvclock_wall_clock *wall_clock = &(s->wc); + struct pvclock_vcpu_time_info *vcpu_time; - delta = xen_clocksource_read(); /* time since system boot */ - delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; - - now.tv_nsec = do_div(delta, NSEC_PER_SEC); - now.tv_sec = delta; - - set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); + vcpu_time = &get_cpu_var(xen_vcpu)->time; + pvclock_read_wallclock(wall_clock, vcpu_time, ts); + put_cpu_var(xen_vcpu); } unsigned long xen_get_wallclock(void) @@ -345,7 +240,6 @@ unsigned long xen_get_wallclock(void) struct timespec ts; xen_read_wallclock(&ts); - return ts.tv_sec; } @@ -569,8 +463,6 @@ __init void xen_time_init(void) { int cpu = smp_processor_id(); - get_time_values_from_xen(); - clocksource_register(&xen_clocksource); if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 288d587ce73..6ec3b4f7719 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -17,7 +17,7 @@ ENTRY(startup_xen) __FINIT -.pushsection .bss.page_aligned +.pushsection .text .align PAGE_SIZE_asm ENTRY(hypercall_page) .skip 0x1000 @@ -30,11 +30,7 @@ ENTRY(hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") -#ifdef CONFIG_X86_PAE ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") -#else - ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no") -#endif ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") #endif /*CONFIG_XEN */ diff --git a/drivers/char/drm/i915_drv.c b/drivers/char/drm/i915_drv.c index e8f3d682e3b..93aed1c38bd 100644 --- a/drivers/char/drm/i915_drv.c +++ b/drivers/char/drm/i915_drv.c @@ -389,6 +389,7 @@ static int i915_resume(struct drm_device *dev) pci_restore_state(dev->pdev); if (pci_enable_device(dev->pdev)) return -1; + pci_set_master(dev->pdev); pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index b1a757a5ee2..8f81139d619 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -981,16 +981,9 @@ EXPORT_SYMBOL_GPL(tty_perform_flush); int n_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg) { - struct tty_struct *real_tty; unsigned long flags; int retval; - if (tty->driver->type == TTY_DRIVER_TYPE_PTY && - tty->driver->subtype == PTY_TYPE_MASTER) - real_tty = tty->link; - else - real_tty = tty; - switch (cmd) { case TCXONC: retval = tty_check_change(tty); diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c index ed33fddc4de..f00f497b9ca 100644 --- a/drivers/hwmon/abituguru3.c +++ b/drivers/hwmon/abituguru3.c @@ -30,6 +30,7 @@ #include <linux/platform_device.h> #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> +#include <linux/dmi.h> #include <asm/io.h> /* uGuru3 bank addresses */ @@ -323,7 +324,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX1 Fan", 36, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x0013, "unknown", { + { 0x0013, "Abit AW8D", { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR", 1, 0, 10, 1, 0 }, { "DDR VTT", 2, 0, 10, 1, 0 }, @@ -349,6 +350,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX2 Fan", 36, 2, 60, 1, 0 }, { "AUX3 Fan", 37, 2, 60, 1, 0 }, { "AUX4 Fan", 38, 2, 60, 1, 0 }, + { "AUX5 Fan", 39, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, { 0x0014, "Abit AB9 Pro", { @@ -1111,11 +1113,12 @@ static int __init abituguru3_detect(void) { /* See if there is an uguru3 there. An idle uGuru3 will hold 0x00 or 0x08 at DATA and 0xAC at CMD. Sometimes the uGuru3 will hold 0x05 - at CMD instead, why is unknown. So we test for 0x05 too. */ + or 0x55 at CMD instead, why is unknown. */ u8 data_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_DATA); u8 cmd_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_CMD); if (((data_val == 0x00) || (data_val == 0x08)) && - ((cmd_val == 0xAC) || (cmd_val == 0x05))) + ((cmd_val == 0xAC) || (cmd_val == 0x05) || + (cmd_val == 0x55))) return ABIT_UGURU3_BASE; ABIT_UGURU3_DEBUG("no Abit uGuru3 found, data = 0x%02X, cmd = " @@ -1138,6 +1141,15 @@ static int __init abituguru3_init(void) int address, err; struct resource res = { .flags = IORESOURCE_IO }; +#ifdef CONFIG_DMI + const char *board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR); + + /* safety check, refuse to load on non Abit motherboards */ + if (!force && (!board_vendor || + strcmp(board_vendor, "http://www.abit.com.tw/"))) + return -ENODEV; +#endif + address = abituguru3_detect(); if (address < 0) return address; diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c index c1009d6f979..93dbf5e7ff8 100644 --- a/drivers/hwmon/adt7473.c +++ b/drivers/hwmon/adt7473.c @@ -309,6 +309,9 @@ no_sensor_update: ADT7473_REG_PWM_BHVR(i)); } + i = i2c_smbus_read_byte_data(client, ADT7473_REG_CFG4); + data->max_duty_at_overheat = !!(i & ADT7473_CFG4_MAX_DUTY_AT_OVT); + data->limits_last_updated = local_jiffies; data->limits_valid = 1; diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index fa769690515..de698dc7302 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -251,10 +251,13 @@ static int lm75_detach_client(struct i2c_client *client) the SMBus standard. */ static int lm75_read_value(struct i2c_client *client, u8 reg) { + int value; + if (reg == LM75_REG_CONF) return i2c_smbus_read_byte_data(client, reg); - else - return swab16(i2c_smbus_read_word_data(client, reg)); + + value = i2c_smbus_read_word_data(client, reg); + return (value < 0) ? value : swab16(value); } static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value) @@ -287,9 +290,16 @@ static struct lm75_data *lm75_update_device(struct device *dev) int i; dev_dbg(&client->dev, "Starting lm75 update\n"); - for (i = 0; i < ARRAY_SIZE(data->temp); i++) - data->temp[i] = lm75_read_value(client, - LM75_REG_TEMP[i]); + for (i = 0; i < ARRAY_SIZE(data->temp); i++) { + int status; + + status = lm75_read_value(client, LM75_REG_TEMP[i]); + if (status < 0) + dev_dbg(&client->dev, "reg %d, err %d\n", + LM75_REG_TEMP[i], status); + else + data->temp[i] = status; + } data->last_updated = jiffies; data->valid = 1; } diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index 182fe6a5605..ee5eca1c192 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -192,23 +192,20 @@ static int RANGE_TO_REG( int range ) { int i; - if ( range < lm85_range_map[0] ) { - return 0 ; - } else if ( range > lm85_range_map[15] ) { + if (range >= lm85_range_map[15]) return 15 ; - } else { /* find closest match */ - for ( i = 14 ; i >= 0 ; --i ) { - if ( range > lm85_range_map[i] ) { /* range bracketed */ - if ((lm85_range_map[i+1] - range) < - (range - lm85_range_map[i])) { - i++; - break; - } - break; - } + + /* Find the closest match */ + for (i = 14; i >= 0; --i) { + if (range >= lm85_range_map[i]) { + if ((lm85_range_map[i + 1] - range) < + (range - lm85_range_map[i])) + return i + 1; + return i; } } - return( i & 0x0f ); + + return 0; } #define RANGE_FROM_REG(val) (lm85_range_map[(val)&0x0f]) diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index b224079d4e1..d5862e5d99a 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m { struct page *page; - page = alloc_pages(gfp_mask, order); + /* + * Use __GFP_ZERO because buggy firmware assumes ICM pages are + * cleared, and subtle failures are seen if they aren't. + */ + page = alloc_pages(gfp_mask | __GFP_ZERO, order); if (!page) return -ENOMEM; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 5126d5d9ea0..2e554a4ab33 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ if (cpu->ts) - lguest_set_ts(); + unlazy_fpu(current); /* SYSENTER is an optimized way of doing system calls. We can't allow * it because it always jumps to privilege level 0. A normal Guest @@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * trap made the switcher code come back, and an error code which some * traps set. */ + /* Restore SYSENTER if it's supposed to be on. */ + if (boot_cpu_has(X86_FEATURE_SEP)) + wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); + /* If the Guest page faulted, then the cr2 register will tell us the * bad virtual address. We have to grab this now, because once we * re-enable interrupts an interrupt could fault and thus overwrite @@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) if (cpu->regs->trapnum == 14) cpu->arch.last_pagefault = read_cr2(); /* Similarly, if we took a trap because the Guest used the FPU, - * we have to restore the FPU it expects to see. */ + * we have to restore the FPU it expects to see. + * math_state_restore() may sleep and we may even move off to + * a different CPU. So all the critical stuff should be done + * before this. */ else if (cpu->regs->trapnum == 7) math_state_restore(); - - /* Restore SYSENTER if it's supposed to be on. */ - if (boot_cpu_has(X86_FEATURE_SEP)) - wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); } /*H:130 Now we've examined the hypercall code; our Guest can make requests. diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index bafb69b6f7c..fc6f4b8c64b 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -942,7 +942,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock, m->msg_namelen = 0; if (skb) { - total_len = min(total_len, skb->len); + total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); if (error == 0) error = total_len; diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index 8662a6b7a30..25b352b664d 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -68,7 +68,6 @@ obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o -CFLAGS_hpwdt.o += -O obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 4f0f22b020e..76e5b7386af 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) #ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ /* Clear master flag /before/ clearing selector flag. */ - rmb(); + wmb(); #endif pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); while (pending_words != 0) { diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 9ecb92f6854..9ff7b1c0423 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) */ /* Update group descriptor block for new group */ - gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; + gdp = (struct ext4_group_desc *)((char *)primary->b_data + + gdb_off * EXT4_DESC_SIZE(sb)); ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index c19184f2e70..bec76b1c2bb 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block, } -static inline unsigned int zero_metapath_length(const struct metapath *mp, - unsigned height) +static inline unsigned int metapath_branch_start(const struct metapath *mp) { - unsigned int i; - for (i = 0; i < height - 1; i++) { - if (mp->mp_list[i] != 0) - return i; - } - return height; + if (mp->mp_list[0] == 0) + return 2; + return 1; } /** @@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh = mp->mp_bh[0]; u64 bn, dblock = 0; - unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; + unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0; unsigned dblks = 0; unsigned ptrs_per_blk; const unsigned end_of_metadata = height - 1; @@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, /* Building up tree height */ state = ALLOC_GROW_HEIGHT; iblks = height - ip->i_height; - zmpl = zero_metapath_length(mp, height); - iblks -= zmpl; - iblks += height; + branch_start = metapath_branch_start(mp); + iblks += (height - branch_start); } } @@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, sizeof(struct gfs2_meta_header)); *ptr = zero_bn; state = ALLOC_GROW_DEPTH; - for(i = zmpl; i < height; i++) { + for(i = branch_start; i < height; i++) { if (mp->mp_bh[i] == NULL) break; brelse(mp->mp_bh[i]); mp->mp_bh[i] = NULL; } - i = zmpl; + i = branch_start; } if (n == 0) break; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 6387523a315..3401628d742 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -195,7 +195,7 @@ ulong_aligned: depending on architecture. I've experimented with several ways of writing this section such as using an else before the goto but this one seems to be the fastest. */ - while ((unsigned char *)plong < end - 1) { + while ((unsigned char *)plong < end - sizeof(unsigned long)) { prefetch(plong + 1); if (((*plong) & LBITMASK) != lskipval) break; diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 49c7cd0502c..779d2eb649c 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res) { struct nfs_fh *fh = res->fh; + unsigned size; if ((res->status = ntohl(*p++)) == 0) { - int size = ntohl(*p++); - if (size <= NFS3_FHSIZE) { + size = ntohl(*p++); + if (size <= NFS3_FHSIZE && size != 0) { fh->size = size; memcpy(fh->data, p, size); } else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 2a4a024a4e7..614efeed543 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options, { struct nfs_mount_data *data = (struct nfs_mount_data *)options; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1251,13 +1249,13 @@ static int nfs_validate_mount_data(void *options, case 5: memset(data->context, 0, sizeof(data->context)); case 6: - if (data->flags & NFS_MOUNT_VER3) + if (data->flags & NFS_MOUNT_VER3) { + if (data->root.size > NFS3_FHSIZE || data->root.size == 0) + goto out_invalid_fh; mntfh->size = data->root.size; - else + } else mntfh->size = NFS2_FHSIZE; - if (mntfh->size > sizeof(mntfh->data)) - goto out_invalid_fh; memcpy(mntfh->data, data->root.data, mntfh->size); if (mntfh->size < sizeof(mntfh->data)) @@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type, { struct nfs_server *server = NULL; struct super_block *s; - struct nfs_fh mntfh; - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; - security_init_mnt_opts(&data.lsm_opts); + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); + error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs_create_server(&data, &mntfh); + server = nfs_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - nfs_fill_super(s, &data); + nfs_fill_super(s, data); } - mntroot = nfs_get_root(s, &mntfh); + mntroot = nfs_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.nfs_server.hostname); - kfree(data.mount_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->nfs_server.hostname); + kfree(data->mount_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_err_nosb: @@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options, struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; char *c; - memset(args, 0, sizeof(*args)); - if (data == NULL) goto out_no_data; @@ -1959,26 +1963,31 @@ out_no_client_address: static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data data; + struct nfs_parsed_mount_data *data; struct super_block *s; struct nfs_server *server; - struct nfs_fh mntfh; + struct nfs_fh *mntfh; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, }; - int error; + int error = -ENOMEM; - security_init_mnt_opts(&data.lsm_opts); + data = kzalloc(sizeof(*data), GFP_KERNEL); + mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL); + if (data == NULL || mntfh == NULL) + goto out_free_fh; + + security_init_mnt_opts(&data->lsm_opts); /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, &data, dev_name); + error = nfs4_validate_mount_data(raw_data, data, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs4_create_server(&data, &mntfh); + server = nfs4_create_server(data, mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, nfs4_fill_super(s); } - mntroot = nfs4_get_root(s, &mntfh); + mntroot = nfs4_get_root(s, mntfh); if (IS_ERR(mntroot)) { error = PTR_ERR(mntroot); goto error_splat_super; } - error = security_sb_set_mnt_opts(s, &data.lsm_opts); + error = security_sb_set_mnt_opts(s, &data->lsm_opts); if (error) goto error_splat_root; @@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.client_address); - kfree(data.nfs_server.export_path); - kfree(data.nfs_server.hostname); - security_free_mnt_opts(&data.lsm_opts); + kfree(data->client_address); + kfree(data->nfs_server.export_path); + kfree(data->nfs_server.hostname); + security_free_mnt_opts(&data->lsm_opts); +out_free_fh: + kfree(mntfh); + kfree(data); return error; out_free: diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 6d8ace3e325..f333848fd3b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page, } status = nfs_writepage_setup(ctx, page, offset, count); - __set_page_dirty_nobuffers(page); + if (status < 0) + nfs_set_pageerror(page); + else + __set_page_dirty_nobuffers(page); dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); - if (status < 0) - nfs_set_pageerror(page); return status; } diff --git a/fs/select.c b/fs/select.c index 8dda969614a..da0e88201c3 100644 --- a/fs/select.c +++ b/fs/select.c @@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) retval++; } } - cond_resched(); } if (res_in) *rinp = res_in; @@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) *routp = res_out; if (res_ex) *rexp = res_ex; + cond_resched(); } wait = NULL; if (retval || !*timeout || signal_pending(current)) diff --git a/include/asm-alpha/core_mcpcia.h b/include/asm-alpha/core_mcpcia.h index 525b4f6a7ac..acf55b48347 100644 --- a/include/asm-alpha/core_mcpcia.h +++ b/include/asm-alpha/core_mcpcia.h @@ -261,7 +261,7 @@ struct el_MCPCIA_uncorrected_frame_mcheck { } #endif -static inline int __mcpcia_is_mmio(unsigned long addr) +extern inline int __mcpcia_is_mmio(unsigned long addr) { return (addr & 0x80000000UL) == 0; } diff --git a/include/asm-alpha/core_t2.h b/include/asm-alpha/core_t2.h index 90e6b5d6c21..46bfff58f67 100644 --- a/include/asm-alpha/core_t2.h +++ b/include/asm-alpha/core_t2.h @@ -356,13 +356,13 @@ struct el_t2_frame_corrected { #define vip volatile int * #define vuip volatile unsigned int * -static inline u8 t2_inb(unsigned long addr) +extern inline u8 t2_inb(unsigned long addr) { long result = *(vip) ((addr << 5) + T2_IO + 0x00); return __kernel_extbl(result, addr & 3); } -static inline void t2_outb(u8 b, unsigned long addr) +extern inline void t2_outb(u8 b, unsigned long addr) { unsigned long w; @@ -371,13 +371,13 @@ static inline void t2_outb(u8 b, unsigned long addr) mb(); } -static inline u16 t2_inw(unsigned long addr) +extern inline u16 t2_inw(unsigned long addr) { long result = *(vip) ((addr << 5) + T2_IO + 0x08); return __kernel_extwl(result, addr & 3); } -static inline void t2_outw(u16 b, unsigned long addr) +extern inline void t2_outw(u16 b, unsigned long addr) { unsigned long w; @@ -386,12 +386,12 @@ static inline void t2_outw(u16 b, unsigned long addr) mb(); } -static inline u32 t2_inl(unsigned long addr) +extern inline u32 t2_inl(unsigned long addr) { return *(vuip) ((addr << 5) + T2_IO + 0x18); } -static inline void t2_outl(u32 b, unsigned long addr) +extern inline void t2_outl(u32 b, unsigned long addr) { *(vuip) ((addr << 5) + T2_IO + 0x18) = b; mb(); @@ -435,7 +435,7 @@ static inline void t2_outl(u32 b, unsigned long addr) set_hae(msb); \ } -static DEFINE_SPINLOCK(t2_hae_lock); +extern spinlock_t t2_hae_lock; /* * NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h index 38f18cf18c9..e971ab000f9 100644 --- a/include/asm-alpha/io.h +++ b/include/asm-alpha/io.h @@ -35,7 +35,7 @@ * register not being up-to-date with respect to the hardware * value. */ -static inline void __set_hae(unsigned long new_hae) +extern inline void __set_hae(unsigned long new_hae) { unsigned long flags; local_irq_save(flags); @@ -49,7 +49,7 @@ static inline void __set_hae(unsigned long new_hae) local_irq_restore(flags); } -static inline void set_hae(unsigned long new_hae) +extern inline void set_hae(unsigned long new_hae) { if (new_hae != alpha_mv.hae_cache) __set_hae(new_hae); @@ -176,7 +176,7 @@ REMAP2(u64, writeq, volatile) #undef REMAP1 #undef REMAP2 -static inline void __iomem *generic_ioportmap(unsigned long a) +extern inline void __iomem *generic_ioportmap(unsigned long a) { return alpha_mv.mv_ioportmap(a); } diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h index 6a5be1f7deb..86c08a02d23 100644 --- a/include/asm-alpha/mmu_context.h +++ b/include/asm-alpha/mmu_context.h @@ -23,7 +23,7 @@ #endif -extern inline unsigned long +static inline unsigned long __reload_thread(struct pcb_struct *pcb) { register unsigned long a0 __asm__("$16"); @@ -114,7 +114,7 @@ extern unsigned long last_asn; #define __MMU_EXTERN_INLINE #endif -static inline unsigned long +extern inline unsigned long __get_new_mm_context(struct mm_struct *mm, long cpu) { unsigned long asn = cpu_last_asn(cpu); @@ -226,7 +226,7 @@ ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm) # endif #endif -extern inline int +static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { int i; diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h index 48348fe34c1..3495e8e00d7 100644 --- a/include/asm-alpha/percpu.h +++ b/include/asm-alpha/percpu.h @@ -1,6 +1,78 @@ #ifndef __ALPHA_PERCPU_H #define __ALPHA_PERCPU_H +#include <linux/compiler.h> +#include <linux/threads.h> -#include <asm-generic/percpu.h> +/* + * Determine the real variable name from the name visible in the + * kernel sources. + */ +#define per_cpu_var(var) per_cpu__##var + +#ifdef CONFIG_SMP + +/* + * per_cpu_offset() is the offset that has to be added to a + * percpu variable to get to the instance for a certain processor. + */ +extern unsigned long __per_cpu_offset[NR_CPUS]; + +#define per_cpu_offset(x) (__per_cpu_offset[x]) + +#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id()) +#ifdef CONFIG_DEBUG_PREEMPT +#define my_cpu_offset per_cpu_offset(smp_processor_id()) +#else +#define my_cpu_offset __my_cpu_offset +#endif + +#ifndef MODULE +#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset)) +#define PER_CPU_ATTRIBUTES +#else +/* + * To calculate addresses of locally defined variables, GCC uses 32-bit + * displacement from the GP. Which doesn't work for per cpu variables in + * modules, as an offset to the kernel per cpu area is way above 4G. + * + * This forces allocation of a GOT entry for per cpu variable using + * ldq instruction with a 'literal' relocation. + */ +#define SHIFT_PERCPU_PTR(var, offset) ({ \ + extern int simple_identifier_##var(void); \ + unsigned long __ptr, tmp_gp; \ + asm ( "br %1, 1f \n\ + 1: ldgp %1, 0(%1) \n\ + ldq %0, per_cpu__" #var"(%1)\t!literal" \ + : "=&r"(__ptr), "=&r"(tmp_gp)); \ + (typeof(&per_cpu_var(var)))(__ptr + (offset)); }) + +#define PER_CPU_ATTRIBUTES __used + +#endif /* MODULE */ + +/* + * A percpu variable may point to a discarded regions. The following are + * established ways to produce a usable pointer from the percpu variable + * offset. + */ +#define per_cpu(var, cpu) \ + (*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu))) +#define __get_cpu_var(var) \ + (*SHIFT_PERCPU_PTR(var, my_cpu_offset)) +#define __raw_get_cpu_var(var) \ + (*SHIFT_PERCPU_PTR(var, __my_cpu_offset)) + +#else /* ! SMP */ + +#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var))) +#define __get_cpu_var(var) per_cpu_var(var) +#define __raw_get_cpu_var(var) per_cpu_var(var) + +#define PER_CPU_ATTRIBUTES + +#endif /* SMP */ + +#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name) #endif /* __ALPHA_PERCPU_H */ diff --git a/include/asm-alpha/system.h b/include/asm-alpha/system.h index ed221d6408f..afe20fa58c9 100644 --- a/include/asm-alpha/system.h +++ b/include/asm-alpha/system.h @@ -184,7 +184,7 @@ enum amask_enum { __amask; }) #define __CALL_PAL_R0(NAME, TYPE) \ -static inline TYPE NAME(void) \ +extern inline TYPE NAME(void) \ { \ register TYPE __r0 __asm__("$0"); \ __asm__ __volatile__( \ @@ -196,7 +196,7 @@ static inline TYPE NAME(void) \ } #define __CALL_PAL_W1(NAME, TYPE0) \ -static inline void NAME(TYPE0 arg0) \ +extern inline void NAME(TYPE0 arg0) \ { \ register TYPE0 __r16 __asm__("$16") = arg0; \ __asm__ __volatile__( \ @@ -207,7 +207,7 @@ static inline void NAME(TYPE0 arg0) \ } #define __CALL_PAL_W2(NAME, TYPE0, TYPE1) \ -static inline void NAME(TYPE0 arg0, TYPE1 arg1) \ +extern inline void NAME(TYPE0 arg0, TYPE1 arg1) \ { \ register TYPE0 __r16 __asm__("$16") = arg0; \ register TYPE1 __r17 __asm__("$17") = arg1; \ @@ -219,7 +219,7 @@ static inline void NAME(TYPE0 arg0, TYPE1 arg1) \ } #define __CALL_PAL_RW1(NAME, RTYPE, TYPE0) \ -static inline RTYPE NAME(TYPE0 arg0) \ +extern inline RTYPE NAME(TYPE0 arg0) \ { \ register RTYPE __r0 __asm__("$0"); \ register TYPE0 __r16 __asm__("$16") = arg0; \ @@ -232,7 +232,7 @@ static inline RTYPE NAME(TYPE0 arg0) \ } #define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1) \ -static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \ +extern inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \ { \ register RTYPE __r0 __asm__("$0"); \ register TYPE0 __r16 __asm__("$16") = arg0; \ diff --git a/include/asm-alpha/vga.h b/include/asm-alpha/vga.h index e8df1e7aae6..c00106bac52 100644 --- a/include/asm-alpha/vga.h +++ b/include/asm-alpha/vga.h @@ -13,7 +13,7 @@ #define VT_BUF_HAVE_MEMSETW #define VT_BUF_HAVE_MEMCPYW -extern inline void scr_writew(u16 val, volatile u16 *addr) +static inline void scr_writew(u16 val, volatile u16 *addr) { if (__is_ioaddr(addr)) __raw_writew(val, (volatile u16 __iomem *) addr); @@ -21,7 +21,7 @@ extern inline void scr_writew(u16 val, volatile u16 *addr) *addr = val; } -extern inline u16 scr_readw(volatile const u16 *addr) +static inline u16 scr_readw(volatile const u16 *addr) { if (__is_ioaddr(addr)) return __raw_readw((volatile const u16 __iomem *) addr); @@ -29,7 +29,7 @@ extern inline u16 scr_readw(volatile const u16 *addr) return *addr; } -extern inline void scr_memsetw(u16 *s, u16 c, unsigned int count) +static inline void scr_memsetw(u16 *s, u16 c, unsigned int count) { if (__is_ioaddr(s)) memsetw_io((u16 __iomem *) s, c, count); diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index 1d8cd01fa51..844f2a89afb 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -18,6 +18,7 @@ #include <linux/kvm_para.h> #include <linux/kvm_types.h> +#include <asm/pvclock-abi.h> #include <asm/desc.h> #define KVM_MAX_VCPUS 16 @@ -282,7 +283,8 @@ struct kvm_vcpu_arch { struct x86_emulate_ctxt emulate_ctxt; gpa_t time; - struct kvm_vcpu_time_info hv_clock; + struct pvclock_vcpu_time_info hv_clock; + unsigned int hv_clock_tsc_khz; unsigned int time_offset; struct page *time_page; }; diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index 50984594207..bfd9900742b 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h @@ -48,24 +48,6 @@ struct kvm_mmu_op_release_pt { #ifdef __KERNEL__ #include <asm/processor.h> -/* xen binary-compatible interface. See xen headers for details */ -struct kvm_vcpu_time_info { - uint32_t version; - uint32_t pad0; - uint64_t tsc_timestamp; - uint64_t system_time; - uint32_t tsc_to_system_mul; - int8_t tsc_shift; - int8_t pad[3]; -} __attribute__((__packed__)); /* 32 bytes */ - -struct kvm_wall_clock { - uint32_t wc_version; - uint32_t wc_sec; - uint32_t wc_nsec; -} __attribute__((__packed__)); - - extern void kvmclock_init(void); diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h new file mode 100644 index 00000000000..6857f840b24 --- /dev/null +++ b/include/asm-x86/pvclock-abi.h @@ -0,0 +1,42 @@ +#ifndef _ASM_X86_PVCLOCK_ABI_H_ +#define _ASM_X86_PVCLOCK_ABI_H_ +#ifndef __ASSEMBLY__ + +/* + * These structs MUST NOT be changed. + * They are the ABI between hypervisor and guest OS. + * Both Xen and KVM are using this. + * + * pvclock_vcpu_time_info holds the system time and the tsc timestamp + * of the last update. So the guest can use the tsc delta to get a + * more precise system time. There is one per virtual cpu. + * + * pvclock_wall_clock references the point in time when the system + * time was zero (usually boot time), thus the guest calculates the + * current wall clock by adding the system time. + * + * Protocol for the "version" fields is: hypervisor raises it (making + * it uneven) before it starts updating the fields and raises it again + * (making it even) when it is done. Thus the guest can make sure the + * time values it got are consistent by checking the version before + * and after reading them. + */ + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 pad[3]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PVCLOCK_ABI_H_ */ diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h new file mode 100644 index 00000000000..85b1bba8e0a --- /dev/null +++ b/include/asm-x86/pvclock.h @@ -0,0 +1,13 @@ +#ifndef _ASM_X86_PVCLOCK_H_ +#define _ASM_X86_PVCLOCK_H_ + +#include <linux/clocksource.h> +#include <asm/pvclock-abi.h> + +/* some helper functions for xen and kvm pv clock sources */ +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); +void pvclock_read_wallclock(struct pvclock_wall_clock *wall, + struct pvclock_vcpu_time_info *vcpu, + struct timespec *ts); + +#endif /* _ASM_X86_PVCLOCK_H_ */ diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index baf3a4dce28..e11f24038b1 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x) return (pte_t) { .pte = x }; } -#ifdef CONFIG_X86_PAE #define pmd_val_ma(v) ((v).pmd) #define pud_val_ma(v) ((v).pgd.pgd) #define __pmd_ma(x) ((pmd_t) { (x) } ) -#else /* !X86_PAE */ -#define pmd_val_ma(v) ((v).pud.pgd.pgd) -#endif /* CONFIG_X86_PAE */ #define pgd_val_ma(x) ((x).pgd) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 6a5dbdc8a7d..686895bacd9 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -94,7 +94,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); -extern void reserve_bootmem_node(pg_data_t *pgdat, +extern int reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 092b1b25291..de9d1df4bba 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -33,6 +33,7 @@ #define KVM_REQ_REPORT_TPR_ACCESS 2 #define KVM_REQ_MMU_RELOAD 3 #define KVM_REQ_TRIPLE_FAULT 4 +#define KVM_REQ_PENDING_TIMER 5 struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 59f1c0bd8f9..d2a00358676 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -27,8 +27,7 @@ * This routine is called by the kernel to write a series of * characters to the tty device. The characters may come from * user space or kernel space. This routine will return the - * number of characters actually accepted for writing. This - * routine is mandatory. + * number of characters actually accepted for writing. * * Optional: Required for writable devices. * @@ -134,7 +133,7 @@ * This routine notifies the tty driver that it should hangup the * tty device. * - * Required: + * Optional: * * void (*break_ctl)(struct tty_stuct *tty, int state); * diff --git a/include/net/ipv6.h b/include/net/ipv6.h index e0a612bc9c4..f422f7218e1 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -367,6 +367,12 @@ static inline int ipv6_addr_any(const struct in6_addr *a) a->s6_addr32[2] | a->s6_addr32[3] ) == 0); } +static inline int ipv6_addr_loopback(const struct in6_addr *a) +{ + return ((a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0); +} + static inline int ipv6_addr_v4mapped(const struct in6_addr *a) { return ((a->s6_addr32[0] | a->s6_addr32[1] | diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index aa540e6be50..d9dd0f70729 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -95,6 +95,11 @@ extern struct list_head net_namespace_list; #ifdef CONFIG_NET_NS extern void __put_net(struct net *net); +static inline int net_alive(struct net *net) +{ + return net && atomic_read(&net->count); +} + static inline struct net *get_net(struct net *net) { atomic_inc(&net->count); @@ -125,6 +130,12 @@ int net_eq(const struct net *net1, const struct net *net2) return net1 == net2; } #else + +static inline int net_alive(struct net *net) +{ + return 1; +} + static inline struct net *get_net(struct net *net) { return net; diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 9b018da48cf..819a0331cda 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -10,6 +10,7 @@ #define __XEN_PUBLIC_XEN_H__ #include <asm/xen/interface.h> +#include <asm/pvclock-abi.h> /* * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). @@ -336,7 +337,7 @@ struct vcpu_info { uint8_t evtchn_upcall_mask; unsigned long evtchn_pending_sel; struct arch_vcpu_info arch; - struct vcpu_time_info time; + struct pvclock_vcpu_time_info time; }; /* 64 bytes (x86) */ /* @@ -384,9 +385,7 @@ struct shared_info { * Wallclock time: updated only by control software. Guests should base * their gettimeofday() syscall on this wallclock-base value. */ - uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ - uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ - uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + struct pvclock_wall_clock wc; struct arch_shared_info arch; diff --git a/kernel/futex.c b/kernel/futex.c index 449def8074f..7d1136e97c1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1096,21 +1096,64 @@ static void unqueue_me_pi(struct futex_q *q) * private futexes. */ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, - struct task_struct *newowner) + struct task_struct *newowner, + struct rw_semaphore *fshared) { u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; struct futex_pi_state *pi_state = q->pi_state; + struct task_struct *oldowner = pi_state->owner; u32 uval, curval, newval; - int ret; + int ret, attempt = 0; /* Owner died? */ + if (!pi_state->owner) + newtid |= FUTEX_OWNER_DIED; + + /* + * We are here either because we stole the rtmutex from the + * pending owner or we are the pending owner which failed to + * get the rtmutex. We have to replace the pending owner TID + * in the user space variable. This must be atomic as we have + * to preserve the owner died bit here. + * + * Note: We write the user space value _before_ changing the + * pi_state because we can fault here. Imagine swapped out + * pages or a fork, which was running right before we acquired + * mmap_sem, that marked all the anonymous memory readonly for + * cow. + * + * Modifying pi_state _before_ the user space value would + * leave the pi_state in an inconsistent state when we fault + * here, because we need to drop the hash bucket lock to + * handle the fault. This might be observed in the PID check + * in lookup_pi_state. + */ +retry: + if (get_futex_value_locked(&uval, uaddr)) + goto handle_fault; + + while (1) { + newval = (uval & FUTEX_OWNER_DIED) | newtid; + + curval = cmpxchg_futex_value_locked(uaddr, uval, newval); + + if (curval == -EFAULT) + goto handle_fault; + if (curval == uval) + break; + uval = curval; + } + + /* + * We fixed up user space. Now we need to fix the pi_state + * itself. + */ if (pi_state->owner != NULL) { spin_lock_irq(&pi_state->owner->pi_lock); WARN_ON(list_empty(&pi_state->list)); list_del_init(&pi_state->list); spin_unlock_irq(&pi_state->owner->pi_lock); - } else - newtid |= FUTEX_OWNER_DIED; + } pi_state->owner = newowner; @@ -1118,26 +1161,35 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &newowner->pi_state_list); spin_unlock_irq(&newowner->pi_lock); + return 0; /* - * We own it, so we have to replace the pending owner - * TID. This must be atomic as we have preserve the - * owner died bit here. + * To handle the page fault we need to drop the hash bucket + * lock here. That gives the other task (either the pending + * owner itself or the task which stole the rtmutex) the + * chance to try the fixup of the pi_state. So once we are + * back from handling the fault we need to check the pi_state + * after reacquiring the hash bucket lock and before trying to + * do another fixup. When the fixup has been done already we + * simply return. */ - ret = get_futex_value_locked(&uval, uaddr); +handle_fault: + spin_unlock(q->lock_ptr); - while (!ret) { - newval = (uval & FUTEX_OWNER_DIED) | newtid; + ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); - curval = cmpxchg_futex_value_locked(uaddr, uval, newval); + spin_lock(q->lock_ptr); - if (curval == -EFAULT) - ret = -EFAULT; - if (curval == uval) - break; - uval = curval; - } - return ret; + /* + * Check if someone else fixed it for us: + */ + if (pi_state->owner != oldowner) + return 0; + + if (ret) + return ret; + + goto retry; } /* @@ -1507,7 +1559,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, * that case: */ if (q.pi_state->owner != curr) - ret = fixup_pi_state_owner(uaddr, &q, curr); + ret = fixup_pi_state_owner(uaddr, &q, curr, fshared); } else { /* * Catch the rare case, where the lock was released @@ -1539,7 +1591,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, int res; owner = rt_mutex_owner(&q.pi_state->pi_mutex); - res = fixup_pi_state_owner(uaddr, &q, owner); + res = fixup_pi_state_owner(uaddr, &q, owner, + fshared); /* propagate -EFAULT, if the fixup failed */ if (res) diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 79e3c90113c..3ec23c3ec97 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -1499,7 +1499,8 @@ int kgdb_nmicallback(int cpu, void *regs) return 1; } -void kgdb_console_write(struct console *co, const char *s, unsigned count) +static void kgdb_console_write(struct console *co, const char *s, + unsigned count) { unsigned long flags; diff --git a/kernel/sched.c b/kernel/sched.c index b048ad8a11a..3aaa5c8cb42 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4398,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state) signal_pending(current)) || (state == TASK_KILLABLE && fatal_signal_pending(current))) { - __remove_wait_queue(&x->wait, &wait); - return -ERESTARTSYS; + timeout = -ERESTARTSYS; + break; } __set_current_state(state); spin_unlock_irq(&x->wait.lock); timeout = schedule_timeout(timeout); spin_lock_irq(&x->wait.lock); - if (!timeout) { - __remove_wait_queue(&x->wait, &wait); - return timeout; - } - } while (!x->done); + } while (!x->done && timeout); __remove_wait_queue(&x->wait, &wait); + if (!x->done) + return timeout; } x->done--; - return timeout; + return timeout ?: 1; } static long __sched diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1dad5bbb59b..0f3c19197fa 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (rt_rq->rt_time || rt_rq->rt_nr_running) idle = 0; spin_unlock(&rt_rq->rt_runtime_lock); - } + } else if (rt_rq->rt_nr_running) + idle = 0; if (enqueue) sched_rt_rq_enqueue(rt_rq); diff --git a/mm/bootmem.c b/mm/bootmem.c index e8fb927392b..8d9f60e06f6 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); } -void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, +int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, unsigned long size, int flags) { int ret; ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); if (ret < 0) - return; + return -ENOMEM; reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); + + return 0; } void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, diff --git a/mm/memory.c b/mm/memory.c index 9aefaae4685..d14b251a25a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1045,6 +1045,26 @@ no_page_table: return page; } +/* Can we do the FOLL_ANON optimization? */ +static inline int use_zero_page(struct vm_area_struct *vma) +{ + /* + * We don't want to optimize FOLL_ANON for make_pages_present() + * when it tries to page in a VM_LOCKED region. As to VM_SHARED, + * we want to get the page from the page tables to make sure + * that we serialize and update with any other user of that + * mapping. + */ + if (vma->vm_flags & (VM_LOCKED | VM_SHARED)) + return 0; + /* + * And if we have a fault or a nopfn routine, it's not an + * anonymous region. + */ + return !vma->vm_ops || + (!vma->vm_ops->fault && !vma->vm_ops->nopfn); +} + int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, int write, int force, struct page **pages, struct vm_area_struct **vmas) @@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, foll_flags = FOLL_TOUCH; if (pages) foll_flags |= FOLL_GET; - if (!write && !(vma->vm_flags & VM_LOCKED) && - (!vma->vm_ops || !vma->vm_ops->fault)) + if (!write && use_zero_page(vma)) foll_flags |= FOLL_ANON; do { @@ -1766,7 +1785,6 @@ gotten: page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (likely(pte_same(*page_table, orig_pte))) { if (old_page) { - page_remove_rmap(old_page, vma); if (!PageAnon(old_page)) { dec_mm_counter(mm, file_rss); inc_mm_counter(mm, anon_rss); @@ -1788,6 +1806,32 @@ gotten: lru_cache_add_active(new_page); page_add_new_anon_rmap(new_page, vma, address); + if (old_page) { + /* + * Only after switching the pte to the new page may + * we remove the mapcount here. Otherwise another + * process may come and find the rmap count decremented + * before the pte is switched to the new page, and + * "reuse" the old page writing into it while our pte + * here still points into it and can be read by other + * threads. + * + * The critical issue is to order this + * page_remove_rmap with the ptp_clear_flush above. + * Those stores are ordered by (if nothing else,) + * the barrier present in the atomic_add_negative + * in page_remove_rmap. + * + * Then the TLB flush in ptep_clear_flush ensures that + * no process can access the old page before the + * decremented mapcount is visible. And the old page + * cannot be reused until after the decremented + * mapcount is visible. So transitively, TLBs to + * old page will be flushed before it can be reused. + */ + page_remove_rmap(old_page, vma); + } + /* Free the old page.. */ new_page = old_page; ret |= VM_FAULT_WRITE; diff --git a/mm/slab.c b/mm/slab.c index 06236e4ddc1..046607f05f3 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -3263,9 +3263,12 @@ retry: if (cpuset_zone_allowed_hardwall(zone, flags) && cache->nodelists[nid] && - cache->nodelists[nid]->free_objects) + cache->nodelists[nid]->free_objects) { obj = ____cache_alloc_node(cache, flags | GFP_THISNODE, nid); + if (obj) + break; + } } if (!obj) { diff --git a/net/core/dev.c b/net/core/dev.c index 68d8df0992a..c421a1f8f0b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2077,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); + /* Don't receive packets in an exiting network namespace */ + if (!net_alive(dev_net(skb->dev))) + goto out; + #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72b4c184dd8..7c52fe277b6 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; + /* Be very certain incoming network packets will not find us */ + rcu_barrier(); + net = container_of(work, struct net, work); mutex_lock(&net_mutex); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 4e5c8615832..17eb48b8e32 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->version != 6) goto err; + /* + * RFC4291 2.5.3 + * A packet received on an interface with a destination address + * of loopback must be dropped. + */ + if (!(dev->flags & IFF_LOOPBACK) && + ipv6_addr_loopback(&hdr->daddr)) + goto err; + skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index c042ce19bd1..86e28a75267 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -345,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, case IPV6_DSTOPTS: { struct ipv6_txoptions *opt; + + /* remove any sticky options header with a zero option + * length, per RFC3542. + */ if (optlen == 0) optval = NULL; + else if (optlen < sizeof(struct ipv6_opt_hdr) || + optlen & 0x7 || optlen > 8 * 255) + goto e_inval; /* hop-by-hop / destination options are privileged option */ retv = -EPERM; if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) break; - if (optlen < sizeof(struct ipv6_opt_hdr) || - optlen & 0x7 || optlen > 8 * 255) - goto e_inval; - opt = ipv6_renew_options(sk, np->opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 28d8bd53bd3..c80d5899f27 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1132,7 +1132,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb, ieee80211_tx_handler *handler; struct ieee80211_tx_data tx; ieee80211_tx_result res = TX_DROP, res_prepare; - int ret, i; + int ret, i, retries = 0; WARN_ON(__ieee80211_queue_pending(local, control->queue)); @@ -1216,6 +1216,13 @@ retry: if (!__ieee80211_queue_stopped(local, control->queue)) { clear_bit(IEEE80211_LINK_STATE_PENDING, &local->state[control->queue]); + retries++; + /* + * Driver bug, it's rejecting packets but + * not stopping queues. + */ + if (WARN_ON_ONCE(retries > 5)) + goto drop; goto retry; } memcpy(&store->control, control, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index e7e3baf7009..0dbcde6758e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (copy_from_user(&getaddrs, optval, len)) return -EFAULT; - if (getaddrs.addr_num <= 0) return -EINVAL; + if (getaddrs.addr_num <= 0 || + getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr))) + return -EINVAL; /* * For UDP-style sockets, id specifies the association to query. * If the id field is set to the value '0' then the locally bound diff --git a/sound/isa/sb/sb_mixer.c b/sound/isa/sb/sb_mixer.c index 91d14224f6b..73d4572d136 100644 --- a/sound/isa/sb/sb_mixer.c +++ b/sound/isa/sb/sb_mixer.c @@ -925,7 +925,7 @@ static unsigned char als4000_saved_regs[] = { static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs) { unsigned char *val = chip->saved_regs; - snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return); + snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return); for (; num_regs; num_regs--) *val++ = snd_sbmixer_read(chip, *regs++); } @@ -933,7 +933,7 @@ static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs) static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs) { unsigned char *val = chip->saved_regs; - snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return); + snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return); for (; num_regs; num_regs--) snd_sbmixer_write(chip, *regs++, *val++); } diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c index 56f87cd33c1..3f00ddf450f 100644 --- a/sound/pci/aw2/aw2-alsa.c +++ b/sound/pci/aw2/aw2-alsa.c @@ -316,6 +316,8 @@ static int __devinit snd_aw2_create(struct snd_card *card, return -ENOMEM; } + /* (2) initialization of the chip hardware */ + snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt); if (request_irq(pci->irq, snd_aw2_saa7146_interrupt, IRQF_SHARED, "Audiowerk2", chip)) { @@ -329,8 +331,6 @@ static int __devinit snd_aw2_create(struct snd_card *card, } chip->irq = pci->irq; - /* (2) initialization of the chip hardware */ - snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt); err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); if (err < 0) { free_irq(chip->irq, (void *)chip); diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 98778cb69c6..1dcf9f3d110 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -269,28 +269,9 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } } -static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi) { - int i; - - for (i = 0; i < IOAPIC_NUM_PINS; i++) - if (ioapic->redirtbl[i].fields.vector == vector) - return i; - return -1; -} - -void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; union ioapic_redir_entry *ent; - int gsi; - - gsi = get_eoi_gsi(ioapic, vector); - if (gsi == -1) { - printk(KERN_WARNING "Can't find redir item for %d EOI\n", - vector); - return; - } ent = &ioapic->redirtbl[gsi]; ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); @@ -300,6 +281,16 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) ioapic_deliver(ioapic, gsi); } +void kvm_ioapic_update_eoi(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) + if (ioapic->redirtbl[i].fields.vector == vector) + __kvm_ioapic_update_eoi(ioapic, i); +} + static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) { struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; |