From 464abc5de6ea8f4af1c1246e0d1ea7b07362db43 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 25 Feb 2008 13:50:20 +0800
Subject: [Blackfin] arch: Cleanup abd Simplify:

 - Simplify init_arch_irq
 - Make code more readable
 - Remove useless SSYNCs
 - Fix comments

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
---
 include/asm-blackfin/irq.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-blackfin/irq.h b/include/asm-blackfin/irq.h
index 65480dab244..86b67834354 100644
--- a/include/asm-blackfin/irq.h
+++ b/include/asm-blackfin/irq.h
@@ -67,4 +67,6 @@ static __inline__ int irq_canonicalize(int irq)
 #define NO_IRQ ((unsigned int)(-1))
 #endif
 
+#define SIC_SYSIRQ(irq)	(irq - (IRQ_CORETMR + 1))
+
 #endif				/* _BFIN_IRQ_H_ */
-- 
cgit v1.2.3-70-g09d2


From 3927819d511f5b5855e6f2345f24e7b04e4fd2f5 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 25 Feb 2008 14:39:50 +0800
Subject: [Blackfin] arch: Fix CONFIG_PM support for BF561

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
---
 arch/blackfin/mach-common/dpmc.S           | 6 ++----
 include/asm-blackfin/mach-bf561/blackfin.h | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/blackfin/mach-common/dpmc.S b/arch/blackfin/mach-common/dpmc.S
index fc9f6eb9018..9d45aa3265b 100644
--- a/arch/blackfin/mach-common/dpmc.S
+++ b/arch/blackfin/mach-common/dpmc.S
@@ -31,9 +31,6 @@
 #include <asm/blackfin.h>
 #include <asm/mach/irq.h>
 
-.text
-
-#if !defined(CONFIG_BF561)
 
 .section .l1.text
 
@@ -328,10 +325,12 @@ ENTRY(_set_sic_iwr)
 	RTS;
 
 ENTRY(_set_rtc_istat)
+#ifndef CONFIG_BF561
 	P0.H = hi(RTC_ISTAT);
 	P0.L = lo(RTC_ISTAT);
 	w[P0] = R0.L;
 	SSYNC;
+#endif
 	RTS;
 
 ENTRY(_test_pll_locked)
@@ -342,4 +341,3 @@ ENTRY(_test_pll_locked)
 	CC = BITTST(R0,5);
 	IF !CC JUMP 1b;
 	RTS;
-#endif
diff --git a/include/asm-blackfin/mach-bf561/blackfin.h b/include/asm-blackfin/mach-bf561/blackfin.h
index 362617f9384..3a16df2c86d 100644
--- a/include/asm-blackfin/mach-bf561/blackfin.h
+++ b/include/asm-blackfin/mach-bf561/blackfin.h
@@ -49,7 +49,8 @@
 #define bfin_read_FIO_INEN() bfin_read_FIO0_INEN()
 #define bfin_write_FIO_INEN(val) bfin_write_FIO0_INEN(val)
 
-
+#define SIC_IWR0 SICA_IWR0
+#define SIC_IWR1 SICA_IWR1
 #define SIC_IAR0 SICA_IAR0
 #define bfin_write_SIC_IMASK0 bfin_write_SICA_IMASK0
 #define bfin_write_SIC_IMASK1 bfin_write_SICA_IMASK1
-- 
cgit v1.2.3-70-g09d2


From fee40119a2b2abbe239438b74052854db6f3444d Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Mon, 25 Feb 2008 15:06:07 +0800
Subject: [Blackfin] arch: make sure we have proper
 description/copyright/license lines

Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
---
 arch/blackfin/kernel/gptimers.c | 8 ++++----
 include/asm-blackfin/gptimers.h | 7 +++----
 2 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/arch/blackfin/kernel/gptimers.c b/arch/blackfin/kernel/gptimers.c
index 5cf4bdb1df3..1904d8b5332 100644
--- a/arch/blackfin/kernel/gptimers.c
+++ b/arch/blackfin/kernel/gptimers.c
@@ -1,9 +1,9 @@
 /*
- * bfin_gptimers.c - derived from bf53x_timers.c
- *  Driver for General Purpose Timer functions on the Blackfin processor
+ * gptimers.c - Blackfin General Purpose Timer core API
  *
- *  Copyright (C) 2005 John DeHority
- *  Copyright (C) 2006 Hella Aglaia GmbH (awe@aglaia-gmbh.de)
+ * Copyright (c) 2005-2008 Analog Devices Inc.
+ * Copyright (C) 2005 John DeHority
+ * Copyright (C) 2006 Hella Aglaia GmbH (awe@aglaia-gmbh.de)
  *
  * Licensed under the GPLv2.
  */
diff --git a/include/asm-blackfin/gptimers.h b/include/asm-blackfin/gptimers.h
index 8265ea473d5..4f318f1fd2d 100644
--- a/include/asm-blackfin/gptimers.h
+++ b/include/asm-blackfin/gptimers.h
@@ -1,12 +1,11 @@
 /*
- * include/asm/bf5xx_timers.h
- *
- * This file contains the major Data structures and constants
- * used for General Purpose Timer Implementation in BF5xx
+ * gptimers.h - Blackfin General Purpose Timer structs/defines/prototypes
  *
+ * Copyright (c) 2005-2008 Analog Devices Inc.
  * Copyright (C) 2005 John DeHority
  * Copyright (C) 2006 Hella Aglaia GmbH (awe@aglaia-gmbh.de)
  *
+ * Licensed under the GPL-2.
  */
 
 #ifndef _BLACKFIN_TIMERS_H_
-- 
cgit v1.2.3-70-g09d2


From 40edad3efadb3aa486c7a5452401c4de10902496 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Mon, 25 Feb 2008 15:23:30 +0800
Subject: [Blackfin] arch: add bfin_clear_PPIx_STATUS() helper funcs like we
 have for other parts

Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
---
 include/asm-blackfin/mach-bf561/cdefBF561.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-blackfin/mach-bf561/cdefBF561.h b/include/asm-blackfin/mach-bf561/cdefBF561.h
index d667816486c..1bc8d2f89cc 100644
--- a/include/asm-blackfin/mach-bf561/cdefBF561.h
+++ b/include/asm-blackfin/mach-bf561/cdefBF561.h
@@ -559,6 +559,7 @@ static __inline__ void bfin_write_VR_CTL(unsigned int val)
 #define bfin_write_PPI0_CONTROL(val)         bfin_write16(PPI0_CONTROL,val)
 #define bfin_read_PPI0_STATUS()              bfin_read16(PPI0_STATUS)
 #define bfin_write_PPI0_STATUS(val)          bfin_write16(PPI0_STATUS,val)
+#define bfin_clear_PPI0_STATUS()             bfin_read_PPI0_STATUS()
 #define bfin_read_PPI0_COUNT()               bfin_read16(PPI0_COUNT)
 #define bfin_write_PPI0_COUNT(val)           bfin_write16(PPI0_COUNT,val)
 #define bfin_read_PPI0_DELAY()               bfin_read16(PPI0_DELAY)
@@ -570,6 +571,7 @@ static __inline__ void bfin_write_VR_CTL(unsigned int val)
 #define bfin_write_PPI1_CONTROL(val)         bfin_write16(PPI1_CONTROL,val)
 #define bfin_read_PPI1_STATUS()              bfin_read16(PPI1_STATUS)
 #define bfin_write_PPI1_STATUS(val)          bfin_write16(PPI1_STATUS,val)
+#define bfin_clear_PPI1_STATUS()             bfin_read_PPI1_STATUS()
 #define bfin_read_PPI1_COUNT()               bfin_read16(PPI1_COUNT)
 #define bfin_write_PPI1_COUNT(val)           bfin_write16(PPI1_COUNT,val)
 #define bfin_read_PPI1_DELAY()               bfin_read16(PPI1_DELAY)
-- 
cgit v1.2.3-70-g09d2


From 0bcfd70ea11a5d6f2362be463513a60245a62baf Mon Sep 17 00:00:00 2001
From: Mike Frysinger <michael.frysinger@analog.com>
Date: Mon, 24 Dec 2007 19:40:05 +0800
Subject: [Blackfin] serial driver: fix bug - cache the bits of the LSR on
 systems where the LSR is read-to-clear

Cache the bits of the LSR on systems where the LSR is read-to-clear
so that we can safely read the LSR in random places.  this fixes
older parts where break/framing/parity/overflow was not being detected
at all in PIO mode, and this fixes newer parts where
break/framing/parity/overflow was being reported all the time
without being cleared.

Signed-off-by: Mike Frysinger <michael.frysinger@analog.com>
Signed-off-by: Bryan Wu <bryan.wu@analog.com>
---
 drivers/serial/bfin_5xx.c                         | 10 +++++++---
 include/asm-blackfin/mach-bf527/bfin_serial_5xx.h | 19 ++++++++++++++++++-
 include/asm-blackfin/mach-bf533/bfin_serial_5xx.h | 19 ++++++++++++++++++-
 include/asm-blackfin/mach-bf537/bfin_serial_5xx.h | 19 ++++++++++++++++++-
 include/asm-blackfin/mach-bf548/bfin_serial_5xx.h |  1 +
 include/asm-blackfin/mach-bf561/bfin_serial_5xx.h | 19 ++++++++++++++++++-
 6 files changed, 80 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index ca9ceaa113a..af84984df77 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -216,8 +216,10 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 	struct pt_regs *regs = get_irq_regs();
 #endif
 
- 	ch = UART_GET_CHAR(uart);
 	status = UART_GET_LSR(uart);
+	UART_CLEAR_LSR(uart);
+
+ 	ch = UART_GET_CHAR(uart);
  	uart->port.icount.rx++;
 
 #ifdef CONFIG_KGDB_UART
@@ -335,7 +337,7 @@ static irqreturn_t bfin_serial_rx_int(int irq, void *dev_id)
 	struct bfin_serial_port *uart = dev_id;
 
 	spin_lock(&uart->port.lock);
-	while ((UART_GET_IER(uart) & ERBFI) && (UART_GET_LSR(uart) & DR))
+	while (UART_GET_LSR(uart) & DR)
 		bfin_serial_rx_chars(uart);
 	spin_unlock(&uart->port.lock);
 
@@ -347,7 +349,7 @@ static irqreturn_t bfin_serial_tx_int(int irq, void *dev_id)
 	struct bfin_serial_port *uart = dev_id;
 
 	spin_lock(&uart->port.lock);
-	if ((UART_GET_IER(uart) & ETBEI) && (UART_GET_LSR(uart) & THRE))
+	if (UART_GET_LSR(uart) & THRE)
 		bfin_serial_tx_chars(uart);
 	spin_unlock(&uart->port.lock);
 
@@ -428,6 +430,8 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 	int i, flg, status;
 
 	status = UART_GET_LSR(uart);
+	UART_CLEAR_LSR(uart);
+
 	uart->port.icount.rx += CIRC_CNT(uart->rx_dma_buf.head, uart->rx_dma_buf.tail, UART_XMIT_SIZE);;
 
 	if (status & BI) {
diff --git a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h
index 15dbc21eed8..233c585efc1 100644
--- a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h
@@ -23,7 +23,6 @@
 #define UART_GET_DLH(uart)	bfin_read16(((uart)->port.membase + OFFSET_DLH))
 #define UART_GET_IIR(uart)      bfin_read16(((uart)->port.membase + OFFSET_IIR))
 #define UART_GET_LCR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LCR))
-#define UART_GET_LSR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LSR))
 #define UART_GET_GCTL(uart)     bfin_read16(((uart)->port.membase + OFFSET_GCTL))
 
 #define UART_PUT_CHAR(uart, v)   bfin_write16(((uart)->port.membase + OFFSET_THR), v)
@@ -58,6 +57,7 @@
 struct bfin_serial_port {
 	struct uart_port port;
 	unsigned int old_status;
+	unsigned int lsr;
 #ifdef CONFIG_SERIAL_BFIN_DMA
 	int tx_done;
 	int tx_count;
@@ -76,6 +76,23 @@ struct bfin_serial_port {
 #endif
 };
 
+/* The hardware clears the LSR bits upon read, so we need to cache
+ * some of the more fun bits in software so they don't get lost
+ * when checking the LSR in other code paths (TX).
+ */
+static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart)
+{
+	unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR);
+	uart->lsr |= (lsr & (BI|FE|PE|OE));
+	return lsr | uart->lsr;
+}
+
+static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart)
+{
+	uart->lsr = 0;
+	bfin_write16(uart->port.membase + OFFSET_LSR, -1);
+}
+
 struct bfin_serial_port bfin_serial_ports[NR_PORTS];
 struct bfin_serial_res {
 	unsigned long uart_base_addr;
diff --git a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
index 7871d4313f4..b619065ceeb 100644
--- a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
@@ -23,7 +23,6 @@
 #define UART_GET_DLH(uart)	bfin_read16(((uart)->port.membase + OFFSET_DLH))
 #define UART_GET_IIR(uart)      bfin_read16(((uart)->port.membase + OFFSET_IIR))
 #define UART_GET_LCR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LCR))
-#define UART_GET_LSR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LSR))
 #define UART_GET_GCTL(uart)     bfin_read16(((uart)->port.membase + OFFSET_GCTL))
 
 #define UART_PUT_CHAR(uart,v)   bfin_write16(((uart)->port.membase + OFFSET_THR),v)
@@ -46,6 +45,7 @@
 struct bfin_serial_port {
         struct uart_port        port;
         unsigned int            old_status;
+	unsigned int lsr;
 #ifdef CONFIG_SERIAL_BFIN_DMA
 	int			tx_done;
 	int			tx_count;
@@ -64,6 +64,23 @@ struct bfin_serial_port {
 #endif
 };
 
+/* The hardware clears the LSR bits upon read, so we need to cache
+ * some of the more fun bits in software so they don't get lost
+ * when checking the LSR in other code paths (TX).
+ */
+static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart)
+{
+	unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR);
+	uart->lsr |= (lsr & (BI|FE|PE|OE));
+	return lsr | uart->lsr;
+}
+
+static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart)
+{
+	uart->lsr = 0;
+	bfin_write16(uart->port.membase + OFFSET_LSR, -1);
+}
+
 struct bfin_serial_port bfin_serial_ports[NR_PORTS];
 struct bfin_serial_res {
 	unsigned long	uart_base_addr;
diff --git a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h
index 86e45c37983..f18c517cc93 100644
--- a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h
@@ -23,7 +23,6 @@
 #define UART_GET_DLH(uart)	bfin_read16(((uart)->port.membase + OFFSET_DLH))
 #define UART_GET_IIR(uart)      bfin_read16(((uart)->port.membase + OFFSET_IIR))
 #define UART_GET_LCR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LCR))
-#define UART_GET_LSR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LSR))
 #define UART_GET_GCTL(uart)     bfin_read16(((uart)->port.membase + OFFSET_GCTL))
 
 #define UART_PUT_CHAR(uart,v)   bfin_write16(((uart)->port.membase + OFFSET_THR),v)
@@ -58,6 +57,7 @@
 struct bfin_serial_port {
         struct uart_port        port;
         unsigned int            old_status;
+	unsigned int lsr;
 #ifdef CONFIG_SERIAL_BFIN_DMA
 	int			tx_done;
 	int			tx_count;
@@ -76,6 +76,23 @@ struct bfin_serial_port {
 #endif
 };
 
+/* The hardware clears the LSR bits upon read, so we need to cache
+ * some of the more fun bits in software so they don't get lost
+ * when checking the LSR in other code paths (TX).
+ */
+static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart)
+{
+	unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR);
+	uart->lsr |= (lsr & (BI|FE|PE|OE));
+	return lsr | uart->lsr;
+}
+
+static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart)
+{
+	uart->lsr = 0;
+	bfin_write16(uart->port.membase + OFFSET_LSR, -1);
+}
+
 struct bfin_serial_port bfin_serial_ports[NR_PORTS];
 struct bfin_serial_res {
 	unsigned long	uart_base_addr;
diff --git a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
index 3770aa38ee9..8733d47747e 100644
--- a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
@@ -32,6 +32,7 @@
 #define UART_PUT_DLH(uart,v)    bfin_write16(((uart)->port.membase + OFFSET_DLH),v)
 #define UART_PUT_LSR(uart,v)	bfin_write16(((uart)->port.membase + OFFSET_LSR),v)
 #define UART_PUT_LCR(uart,v)    bfin_write16(((uart)->port.membase + OFFSET_LCR),v)
+#define UART_CLEAR_LSR(uart)    bfin_write16(((uart)->port.membase + OFFSET_LSR), -1)
 #define UART_PUT_GCTL(uart,v)   bfin_write16(((uart)->port.membase + OFFSET_GCTL),v)
 
 #if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS)
diff --git a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
index 7871d4313f4..b619065ceeb 100644
--- a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
@@ -23,7 +23,6 @@
 #define UART_GET_DLH(uart)	bfin_read16(((uart)->port.membase + OFFSET_DLH))
 #define UART_GET_IIR(uart)      bfin_read16(((uart)->port.membase + OFFSET_IIR))
 #define UART_GET_LCR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LCR))
-#define UART_GET_LSR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LSR))
 #define UART_GET_GCTL(uart)     bfin_read16(((uart)->port.membase + OFFSET_GCTL))
 
 #define UART_PUT_CHAR(uart,v)   bfin_write16(((uart)->port.membase + OFFSET_THR),v)
@@ -46,6 +45,7 @@
 struct bfin_serial_port {
         struct uart_port        port;
         unsigned int            old_status;
+	unsigned int lsr;
 #ifdef CONFIG_SERIAL_BFIN_DMA
 	int			tx_done;
 	int			tx_count;
@@ -64,6 +64,23 @@ struct bfin_serial_port {
 #endif
 };
 
+/* The hardware clears the LSR bits upon read, so we need to cache
+ * some of the more fun bits in software so they don't get lost
+ * when checking the LSR in other code paths (TX).
+ */
+static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart)
+{
+	unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR);
+	uart->lsr |= (lsr & (BI|FE|PE|OE));
+	return lsr | uart->lsr;
+}
+
+static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart)
+{
+	uart->lsr = 0;
+	bfin_write16(uart->port.membase + OFFSET_LSR, -1);
+}
+
 struct bfin_serial_port bfin_serial_ports[NR_PORTS];
 struct bfin_serial_res {
 	unsigned long	uart_base_addr;
-- 
cgit v1.2.3-70-g09d2


From 8851c71eb97610f0f63121d62345c969f71201a2 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <michael.frysinger@analog.com>
Date: Mon, 24 Dec 2007 19:48:04 +0800
Subject: [Blackfin] serial driver: rework break flood anomaly handling to be
 more robust/realistic about what we can actually work around

Signed-off-by: Mike Frysinger <michael.frysinger@analog.com>
Signed-off-by: Bryan Wu <bryan.wu@analog.com>
---
 drivers/serial/bfin_5xx.c                         | 66 ++++++++++++++++++-----
 include/asm-blackfin/mach-bf533/bfin_serial_5xx.h |  3 ++
 include/asm-blackfin/mach-bf561/bfin_serial_5xx.h |  3 ++
 3 files changed, 58 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index af84984df77..50aa3b2a19b 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -206,12 +206,20 @@ int kgdb_get_debug_char(void)
 }
 #endif
 
+#if ANOMALY_05000230 && defined(CONFIG_SERIAL_BFIN_PIO)
+# define UART_GET_ANOMALY_THRESHOLD(uart)    ((uart)->anomaly_threshold)
+# define UART_SET_ANOMALY_THRESHOLD(uart, v) ((uart)->anomaly_threshold = (v))
+#else
+# define UART_GET_ANOMALY_THRESHOLD(uart)    0
+# define UART_SET_ANOMALY_THRESHOLD(uart, v)
+#endif
+
 #ifdef CONFIG_SERIAL_BFIN_PIO
 static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 {
 	struct tty_struct *tty = uart->port.info->tty;
 	unsigned int status, ch, flg;
-	static int in_break = 0;
+	static struct timeval anomaly_start = { .tv_sec = 0 };
 #ifdef CONFIG_KGDB_UART
 	struct pt_regs *regs = get_irq_regs();
 #endif
@@ -244,28 +252,56 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart)
 #endif
 
 	if (ANOMALY_05000230) {
-		/* The BF533 family of processors have a nice misbehavior where
-		 * they continuously generate characters for a "single" break.
+		/* The BF533 (and BF561) family of processors have a nice anomaly
+		 * where they continuously generate characters for a "single" break.
 		 * We have to basically ignore this flood until the "next" valid
-		 * character comes across.  All other Blackfin families operate
-		 * properly though.
+		 * character comes across.  Due to the nature of the flood, it is
+		 * not possible to reliably catch bytes that are sent too quickly
+		 * after this break.  So application code talking to the Blackfin
+		 * which sends a break signal must allow at least 1.5 character
+		 * times after the end of the break for things to stabilize.  This
+		 * timeout was picked as it must absolutely be larger than 1
+		 * character time +/- some percent.  So 1.5 sounds good.  All other
+		 * Blackfin families operate properly.  Woo.
 		 * Note: While Anomaly 05000230 does not directly address this,
 		 *       the changes that went in for it also fixed this issue.
+		 *       That anomaly was fixed in 0.5+ silicon.  I like bunnies.
 		 */
-		if (in_break) {
-			if (ch != 0) {
-				in_break = 0;
-				ch = UART_GET_CHAR(uart);
-				if (bfin_revid() < 5)
-					return;
-			} else
-				return;
+		if (anomaly_start.tv_sec) {
+			struct timeval curr;
+			suseconds_t usecs;
+
+			if ((~ch & (~ch + 1)) & 0xff)
+				goto known_good_char;
+
+			do_gettimeofday(&curr);
+			if (curr.tv_sec - anomaly_start.tv_sec > 1)
+				goto known_good_char;
+
+			usecs = 0;
+			if (curr.tv_sec != anomaly_start.tv_sec)
+				usecs += USEC_PER_SEC;
+			usecs += curr.tv_usec - anomaly_start.tv_usec;
+
+			if (usecs > UART_GET_ANOMALY_THRESHOLD(uart))
+				goto known_good_char;
+
+			if (ch)
+				anomaly_start.tv_sec = 0;
+			else
+				anomaly_start = curr;
+
+			return;
+
+ known_good_char:
+			anomaly_start.tv_sec = 0;
 		}
 	}
 
 	if (status & BI) {
 		if (ANOMALY_05000230)
-			in_break = 1;
+			if (bfin_revid() < 5)
+				do_gettimeofday(&anomaly_start);
 		uart->port.icount.brk++;
 		if (uart_handle_break(&uart->port))
 			goto ignore_char;
@@ -778,6 +814,8 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios,
 	quot = uart_get_divisor(port, baud);
 	spin_lock_irqsave(&uart->port.lock, flags);
 
+	UART_SET_ANOMALY_THRESHOLD(uart, USEC_PER_SEC / baud * 15);
+
 	do {
 		lsr = UART_GET_LSR(uart);
 	} while (!(lsr & TEMT));
diff --git a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
index b619065ceeb..a1b4f4eebd0 100644
--- a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
@@ -57,6 +57,9 @@ struct bfin_serial_port {
 	struct work_struct	tx_dma_workqueue;
 #else
 	struct work_struct 	cts_workqueue;
+# if ANOMALY_05000230
+	unsigned int anomaly_threshold;
+# endif
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
 	int			cts_pin;
diff --git a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
index b619065ceeb..a1b4f4eebd0 100644
--- a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
@@ -57,6 +57,9 @@ struct bfin_serial_port {
 	struct work_struct	tx_dma_workqueue;
 #else
 	struct work_struct 	cts_workqueue;
+# if ANOMALY_05000230
+	unsigned int anomaly_threshold;
+# endif
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
 	int			cts_pin;
-- 
cgit v1.2.3-70-g09d2


From 4cb4f22b19237e63c460c53fbd1c417cdaf63014 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Sat, 2 Feb 2008 14:29:25 +0800
Subject: [Blackfin] serial driver: Fix bug Poll RTS/CTS status in DMA mode as
 well

https://blackfin.uclinux.org/gf/project/uclinux-dist/tracker/?action=TrackerItemEdit&tracker_item_id=3858

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Bryan Wu <bryan.wu@analog.com>
---
 drivers/serial/bfin_5xx.c                         | 16 +++++-----------
 include/asm-blackfin/mach-bf527/bfin_serial_5xx.h |  3 +--
 include/asm-blackfin/mach-bf533/bfin_serial_5xx.h |  2 +-
 include/asm-blackfin/mach-bf537/bfin_serial_5xx.h |  3 +--
 include/asm-blackfin/mach-bf548/bfin_serial_5xx.h |  3 +--
 include/asm-blackfin/mach-bf561/bfin_serial_5xx.h |  2 +-
 6 files changed, 10 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index a07df539e52..af866ab3f5a 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -53,7 +53,6 @@
 #ifdef CONFIG_SERIAL_BFIN_DMA
 static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart);
 #else
-static void bfin_serial_do_work(struct work_struct *work);
 static void bfin_serial_tx_chars(struct bfin_serial_port *uart);
 #endif
 
@@ -372,8 +371,9 @@ static irqreturn_t bfin_serial_tx_int(int irq, void *dev_id)
 
 	return IRQ_HANDLED;
 }
+#endif
 
-
+#ifdef CONFIG_SERIAL_BFIN_CTSRTS
 static void bfin_serial_do_work(struct work_struct *work)
 {
 	struct bfin_serial_port *uart = container_of(work, struct bfin_serial_port, cts_workqueue);
@@ -607,22 +607,17 @@ static void bfin_serial_mctrl_check(struct bfin_serial_port *uart)
 {
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
 	unsigned int status;
-# ifdef CONFIG_SERIAL_BFIN_DMA
 	struct uart_info *info = uart->port.info;
 	struct tty_struct *tty = info->tty;
 
 	status = bfin_serial_get_mctrl(&uart->port);
+	uart_handle_cts_change(&uart->port, status & TIOCM_CTS);
 	if (!(status & TIOCM_CTS)) {
 		tty->hw_stopped = 1;
+		schedule_work(&uart->cts_workqueue);
 	} else {
 		tty->hw_stopped = 0;
 	}
-# else
-	status = bfin_serial_get_mctrl(&uart->port);
-	uart_handle_cts_change(&uart->port, status & TIOCM_CTS);
-	if (!(status & TIOCM_CTS))
-		schedule_work(&uart->cts_workqueue);
-# endif
 #endif
 }
 
@@ -939,10 +934,9 @@ static void __init bfin_serial_init_ports(void)
 		bfin_serial_ports[i].rx_dma_channel =
 			bfin_serial_resource[i].uart_rx_dma_channel;
 		init_timer(&(bfin_serial_ports[i].rx_dma_timer));
-#else
-		INIT_WORK(&bfin_serial_ports[i].cts_workqueue, bfin_serial_do_work);
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
+		INIT_WORK(&bfin_serial_ports[i].cts_workqueue, bfin_serial_do_work);
 		bfin_serial_ports[i].cts_pin	    =
 			bfin_serial_resource[i].uart_cts_pin;
 		bfin_serial_ports[i].rts_pin	    =
diff --git a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h
index 233c585efc1..c0694ecd2ec 100644
--- a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h
@@ -67,10 +67,9 @@ struct bfin_serial_port {
 	unsigned int tx_dma_channel;
 	unsigned int rx_dma_channel;
 	struct work_struct tx_dma_workqueue;
-#else
-	struct work_struct cts_workqueue;
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
+	struct work_struct cts_workqueue;
 	int cts_pin;
 	int rts_pin;
 #endif
diff --git a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
index a1b4f4eebd0..b6f513bee56 100644
--- a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h
@@ -56,12 +56,12 @@ struct bfin_serial_port {
 	unsigned int		rx_dma_channel;
 	struct work_struct	tx_dma_workqueue;
 #else
-	struct work_struct 	cts_workqueue;
 # if ANOMALY_05000230
 	unsigned int anomaly_threshold;
 # endif
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
+	struct work_struct 	cts_workqueue;
 	int			cts_pin;
 	int			rts_pin;
 #endif
diff --git a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h
index f18c517cc93..8fc672d3105 100644
--- a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h
@@ -67,10 +67,9 @@ struct bfin_serial_port {
 	unsigned int		tx_dma_channel;
 	unsigned int		rx_dma_channel;
 	struct work_struct	tx_dma_workqueue;
-#else
-	struct work_struct 	cts_workqueue;
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
+	struct work_struct 	cts_workqueue;
 	int		cts_pin;
 	int 		rts_pin;
 #endif
diff --git a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
index 8733d47747e..c459c484646 100644
--- a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
@@ -69,10 +69,9 @@ struct bfin_serial_port {
 	unsigned int		tx_dma_channel;
 	unsigned int		rx_dma_channel;
 	struct work_struct	tx_dma_workqueue;
-#else
-	struct work_struct 	cts_workqueue;
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
+	struct work_struct 	cts_workqueue;
 	int		cts_pin;
 	int 		rts_pin;
 #endif
diff --git a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
index a1b4f4eebd0..b6f513bee56 100644
--- a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h
@@ -56,12 +56,12 @@ struct bfin_serial_port {
 	unsigned int		rx_dma_channel;
 	struct work_struct	tx_dma_workqueue;
 #else
-	struct work_struct 	cts_workqueue;
 # if ANOMALY_05000230
 	unsigned int anomaly_threshold;
 # endif
 #endif
 #ifdef CONFIG_SERIAL_BFIN_CTSRTS
+	struct work_struct 	cts_workqueue;
 	int			cts_pin;
 	int			rts_pin;
 #endif
-- 
cgit v1.2.3-70-g09d2


From db288381e26e592b11572ce8199bedeadf0c0830 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Sat, 2 Feb 2008 17:05:02 +0800
Subject: [Blackfin] serial driver: Add flow control support to bf54x

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Signed-off-by: Bryan Wu <bryan.wu@analog.com>
---
 drivers/serial/Kconfig                            |  4 ++--
 drivers/serial/bfin_5xx.c                         | 12 ++++++++++++
 include/asm-blackfin/mach-bf548/bfin_serial_5xx.h |  3 +++
 3 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 202fb5c99b8..cf627cd1b4c 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -699,14 +699,14 @@ config BFIN_UART1_CTSRTS
 
 config UART1_CTS_PIN
 	int "UART1 CTS pin"
-	depends on BFIN_UART1_CTSRTS && (BF53x || BF561)
+	depends on BFIN_UART1_CTSRTS && !BF54x
 	default -1
 	help
 	  Refer to ./include/asm-blackfin/gpio.h to see the GPIO map.
 
 config UART1_RTS_PIN
 	int "UART1 RTS pin"
-	depends on BFIN_UART1_CTSRTS && (BF53x || BF561)
+	depends on BFIN_UART1_CTSRTS && !BF54x
 	default -1
 	help
 	  Refer to ./include/asm-blackfin/gpio.h to see the GPIO map.
diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c
index af866ab3f5a..69ac7007682 100644
--- a/drivers/serial/bfin_5xx.c
+++ b/drivers/serial/bfin_5xx.c
@@ -579,7 +579,11 @@ static unsigned int bfin_serial_get_mctrl(struct uart_port *port)
 	if (uart->cts_pin < 0)
 		return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
 
+# ifdef BF54x
+	if (UART_GET_MSR(uart) & CTS)
+# else
 	if (gpio_get_value(uart->cts_pin))
+# endif
 		return TIOCM_DSR | TIOCM_CAR;
 	else
 #endif
@@ -594,9 +598,17 @@ static void bfin_serial_set_mctrl(struct uart_port *port, unsigned int mctrl)
 		return;
 
 	if (mctrl & TIOCM_RTS)
+# ifdef BF54x
+		UART_PUT_MCR(uart, UART_GET_MCR(uart) & ~MRTS);
+# else
 		gpio_set_value(uart->rts_pin, 0);
+# endif
 	else
+# ifdef BF54x
+		UART_PUT_MCR(uart, UART_GET_MCR(uart) | MRTS);
+# else
 		gpio_set_value(uart->rts_pin, 1);
+# endif
 #endif
 }
 
diff --git a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
index c459c484646..7e6339f62a5 100644
--- a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
+++ b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h
@@ -24,6 +24,8 @@
 #define UART_GET_LCR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LCR))
 #define UART_GET_LSR(uart)      bfin_read16(((uart)->port.membase + OFFSET_LSR))
 #define UART_GET_GCTL(uart)     bfin_read16(((uart)->port.membase + OFFSET_GCTL))
+#define UART_GET_MSR(uart)      bfin_read16(((uart)->port.membase + OFFSET_MSR))
+#define UART_GET_MCR(uart)      bfin_read16(((uart)->port.membase + OFFSET_MCR))
 
 #define UART_PUT_CHAR(uart,v)   bfin_write16(((uart)->port.membase + OFFSET_THR),v)
 #define UART_PUT_DLL(uart,v)    bfin_write16(((uart)->port.membase + OFFSET_DLL),v)
@@ -34,6 +36,7 @@
 #define UART_PUT_LCR(uart,v)    bfin_write16(((uart)->port.membase + OFFSET_LCR),v)
 #define UART_CLEAR_LSR(uart)    bfin_write16(((uart)->port.membase + OFFSET_LSR), -1)
 #define UART_PUT_GCTL(uart,v)   bfin_write16(((uart)->port.membase + OFFSET_GCTL),v)
+#define UART_PUT_MCR(uart,v)    bfin_write16(((uart)->port.membase + OFFSET_MCR),v)
 
 #if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS)
 # define CONFIG_SERIAL_BFIN_CTSRTS
-- 
cgit v1.2.3-70-g09d2


From e48a411fa097c386c6081f4ed5a952aa3e21ca2b Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Mon, 11 Feb 2008 16:55:19 +0100
Subject: AVR32: Define PAGE_SHARED

The virtual framebuffer driver needs PAGE_SHARED, which is not defined
on avr32. Define it.

Reported-by: Oliver Zander <ozander@como.com>
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
---
 include/asm-avr32/pgtable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h
index 018f6e2a024..3ae7b548fce 100644
--- a/include/asm-avr32/pgtable.h
+++ b/include/asm-avr32/pgtable.h
@@ -157,6 +157,7 @@ extern struct page *empty_zero_page;
 #define _PAGE_S(x)	_PAGE_NORMAL(x)
 
 #define PAGE_COPY	_PAGE_P(PAGE_WRITE | PAGE_READ)
+#define PAGE_SHARED	_PAGE_S(PAGE_WRITE | PAGE_READ)
 
 #ifndef __ASSEMBLY__
 /*
-- 
cgit v1.2.3-70-g09d2


From b35905c16ad6428551eb9e49525011bd2700cf56 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 25 Feb 2008 16:54:37 -0500
Subject: ext4: Fix memory and buffer head leak in callers to
 ext4_ext_find_extent()

The path variable returned via ext4_ext_find_extent is a kmalloc
variable and needs to be freeded.  It also contains a reference to
buffer_head which needs to be dropped.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c               | 6 +++---
 fs/ext4/migrate.c               | 5 +++++
 include/linux/ext4_fs_extents.h | 1 +
 3 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e856f660fc3..995ac16102a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -349,7 +349,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 #define ext4_ext_show_leaf(inode,path)
 #endif
 
-static void ext4_ext_drop_refs(struct ext4_ext_path *path)
+void ext4_ext_drop_refs(struct ext4_ext_path *path)
 {
 	int depth = path->p_depth;
 	int i;
@@ -2200,10 +2200,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		newdepth = ext_depth(inode);
 		if (newdepth != depth) {
 			depth = newdepth;
-			path = ext4_ext_find_extent(inode, iblock, NULL);
+			ext4_ext_drop_refs(path);
+			path = ext4_ext_find_extent(inode, iblock, path);
 			if (IS_ERR(path)) {
 				err = PTR_ERR(path);
-				path = NULL;
 				goto out;
 			}
 			eh = path[depth].p_hdr;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 8c6c685b9d2..5c1e27de775 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -43,6 +43,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
 
 	if (IS_ERR(path)) {
 		retval = PTR_ERR(path);
+		path = NULL;
 		goto err_out;
 	}
 
@@ -74,6 +75,10 @@ static int finish_range(handle_t *handle, struct inode *inode,
 	}
 	retval = ext4_ext_insert_extent(handle, inode, path, &newext);
 err_out:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
 	lb->first_pblock = 0;
 	return retval;
 }
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index 697da4bce6c..1285c583b2d 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -227,5 +227,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
 						ext4_lblk_t *, ext4_fsblk_t *);
 extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
 						ext4_lblk_t *, ext4_fsblk_t *);
+extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 #endif /* _LINUX_EXT4_EXTENTS */
 
-- 
cgit v1.2.3-70-g09d2


From 76fc60a2e3c6aa6e98cd3a5cb81a1855c637b274 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 23 Feb 2008 11:12:06 +0800
Subject: [CRYPTO] skcipher: Move chainiv/seqiv into crypto_blkcipher module

For compatibility with dm-crypt initramfs setups it is useful to merge
chainiv/seqiv into the crypto_blkcipher module.  Since they're required
by most algorithms anyway this is an acceptable trade-off.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Makefile                    |  4 ++--
 crypto/ablkcipher.c                |  3 ---
 crypto/blkcipher.c                 | 29 +++++++++++++++++++++++++++++
 crypto/chainiv.c                   | 12 ++++--------
 crypto/eseqiv.c                    | 12 ++++--------
 include/crypto/internal/skcipher.h |  6 ++++++
 6 files changed, 45 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/crypto/Makefile b/crypto/Makefile
index 48c75837995..7cf36253a75 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -12,9 +12,9 @@ obj-$(CONFIG_CRYPTO_AEAD) += aead.o
 
 crypto_blkcipher-objs := ablkcipher.o
 crypto_blkcipher-objs += blkcipher.o
+crypto_blkcipher-objs += chainiv.o
+crypto_blkcipher-objs += eseqiv.o
 obj-$(CONFIG_CRYPTO_BLKCIPHER) += crypto_blkcipher.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER) += chainiv.o
-obj-$(CONFIG_CRYPTO_BLKCIPHER) += eseqiv.o
 obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
 
 crypto_hash-objs := hash.o
diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c
index 3bcb099b4a8..94140b3756f 100644
--- a/crypto/ablkcipher.c
+++ b/crypto/ablkcipher.c
@@ -341,6 +341,3 @@ err:
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_ablkcipher);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Asynchronous block chaining cipher type");
diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c
index 4a7e65c4df4..185f955fb0d 100644
--- a/crypto/blkcipher.c
+++ b/crypto/blkcipher.c
@@ -696,5 +696,34 @@ void skcipher_geniv_exit(struct crypto_tfm *tfm)
 }
 EXPORT_SYMBOL_GPL(skcipher_geniv_exit);
 
+static int __init blkcipher_module_init(void)
+{
+	int err;
+
+	err = chainiv_module_init();
+	if (err)
+		goto out;
+
+	err = eseqiv_module_init();
+	if (err)
+		goto eseqiv_err;
+
+out:
+	return err;
+
+eseqiv_err:
+	chainiv_module_exit();
+	goto out;
+}
+
+static void __exit blkcipher_module_exit(void)
+{
+	eseqiv_module_exit();
+	chainiv_module_exit();
+}
+
+module_init(blkcipher_module_init);
+module_exit(blkcipher_module_exit);
+
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Generic block chaining cipher type");
diff --git a/crypto/chainiv.c b/crypto/chainiv.c
index d17fa0454dc..0a7cac6e908 100644
--- a/crypto/chainiv.c
+++ b/crypto/chainiv.c
@@ -314,18 +314,14 @@ static struct crypto_template chainiv_tmpl = {
 	.module = THIS_MODULE,
 };
 
-static int __init chainiv_module_init(void)
+int __init chainiv_module_init(void)
 {
 	return crypto_register_template(&chainiv_tmpl);
 }
+EXPORT_SYMBOL_GPL(chainiv_module_init);
 
-static void __exit chainiv_module_exit(void)
+void __exit chainiv_module_exit(void)
 {
 	crypto_unregister_template(&chainiv_tmpl);
 }
-
-module_init(chainiv_module_init);
-module_exit(chainiv_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Chain IV Generator");
+EXPORT_SYMBOL_GPL(chainiv_module_exit);
diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c
index eb90d27ae11..6f2cd063b6f 100644
--- a/crypto/eseqiv.c
+++ b/crypto/eseqiv.c
@@ -247,18 +247,14 @@ static struct crypto_template eseqiv_tmpl = {
 	.module = THIS_MODULE,
 };
 
-static int __init eseqiv_module_init(void)
+int __init eseqiv_module_init(void)
 {
 	return crypto_register_template(&eseqiv_tmpl);
 }
+EXPORT_SYMBOL_GPL(eseqiv_module_init);
 
-static void __exit eseqiv_module_exit(void)
+void __exit eseqiv_module_exit(void)
 {
 	crypto_unregister_template(&eseqiv_tmpl);
 }
-
-module_init(eseqiv_module_init);
-module_exit(eseqiv_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Encrypted Sequence Number IV Generator");
+EXPORT_SYMBOL_GPL(eseqiv_module_exit);
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 2ba42cd7d6a..a8f12644a13 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -15,6 +15,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/skcipher.h>
+#include <linux/init.h>
 #include <linux/types.h>
 
 struct rtattr;
@@ -64,6 +65,11 @@ void skcipher_geniv_free(struct crypto_instance *inst);
 int skcipher_geniv_init(struct crypto_tfm *tfm);
 void skcipher_geniv_exit(struct crypto_tfm *tfm);
 
+int __init eseqiv_module_init(void);
+void __exit eseqiv_module_exit(void);
+int __init chainiv_module_init(void);
+void __exit chainiv_module_exit(void);
+
 static inline struct crypto_ablkcipher *skcipher_geniv_cipher(
 	struct crypto_ablkcipher *geniv)
 {
-- 
cgit v1.2.3-70-g09d2


From 5319578ca38a8b90b6d0270c194c65d1dd8f7725 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sat, 23 Feb 2008 23:35:44 -0600
Subject: [SCSI] libsas: export sas_find_local_phy function

This is needed by the to be added I_T reset function in aic94xx.  It
needs to know the local phy so it can send a link or hard reset along
the path.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/libsas/sas_scsi_host.c | 5 +++--
 include/scsi/libsas.h               | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 704ea06a6e5..f3706f4b79c 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -434,7 +434,7 @@ static int sas_recover_I_T(struct domain_device *dev)
 }
 
 /* Find the sas_phy that's attached to this device */
-static struct sas_phy *find_local_sas_phy(struct domain_device *dev)
+struct sas_phy *sas_find_local_phy(struct domain_device *dev)
 {
 	struct domain_device *pdev = dev->parent;
 	struct ex_phy *exphy = NULL;
@@ -456,6 +456,7 @@ static struct sas_phy *find_local_sas_phy(struct domain_device *dev)
 	BUG_ON(!exphy);
 	return exphy->phy;
 }
+EXPORT_SYMBOL_GPL(sas_find_local_phy);
 
 /* Attempt to send a LUN reset message to a device */
 int sas_eh_device_reset_handler(struct scsi_cmnd *cmd)
@@ -482,7 +483,7 @@ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd)
 int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd)
 {
 	struct domain_device *dev = cmd_to_domain_dev(cmd);
-	struct sas_phy *phy = find_local_sas_phy(dev);
+	struct sas_phy *phy = sas_find_local_phy(dev);
 	int res;
 
 	res = sas_phy_reset(phy, 1);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 3ffd6b582a9..39e1cac24bb 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -675,5 +675,6 @@ extern int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy,
 
 extern void sas_ssp_task_response(struct device *dev, struct sas_task *task,
 				  struct ssp_response_iu *iu);
+struct sas_phy *sas_find_local_phy(struct domain_device *dev);
 
 #endif /* _SASLIB_H_ */
-- 
cgit v1.2.3-70-g09d2


From cc645a020510cf68332a71394a32c1eacb92c6ed Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 14 Feb 2008 15:09:27 +0900
Subject: sh: Rename SH-3 CCR3 reg to avoid synclink_cs clash.

drivers/char/pcmcia/synclink_cs.c:284:1: warning: "CCR3" redefined
In file included from include/asm/cache.h:13,
                 from include/asm/processor_32.h:15,
                 from include/asm/processor.h:60,
                 from include/linux/prefetch.h:14,
                 from include/linux/list.h:8,
                 from include/linux/module.h:9,
                 from drivers/char/pcmcia/synclink_cs.c:38:
include/asm/cpu/cache.h:38:1: warning: this is the location of the previous definition

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh3/probe.c | 4 ++--
 include/asm-sh/cpu-sh3/cache.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/cpu/sh3/probe.c b/arch/sh/kernel/cpu/sh3/probe.c
index fcc80bb7bee..10f2a760c5e 100644
--- a/arch/sh/kernel/cpu/sh3/probe.c
+++ b/arch/sh/kernel/cpu/sh3/probe.c
@@ -94,9 +94,9 @@ int __uses_jump_to_uncached detect_cpu_and_cache_system(void)
 		boot_cpu_data.dcache.way_incr	= (1 << 13);
 		boot_cpu_data.dcache.entry_mask	= 0x1ff0;
 		boot_cpu_data.dcache.sets	= 512;
-		ctrl_outl(CCR_CACHE_32KB, CCR3);
+		ctrl_outl(CCR_CACHE_32KB, CCR3_REG);
 #else
-		ctrl_outl(CCR_CACHE_16KB, CCR3);
+		ctrl_outl(CCR_CACHE_16KB, CCR3_REG);
 #endif
 #endif
 	}
diff --git a/include/asm-sh/cpu-sh3/cache.h b/include/asm-sh/cpu-sh3/cache.h
index 56bd838b7db..bee2d81c56b 100644
--- a/include/asm-sh/cpu-sh3/cache.h
+++ b/include/asm-sh/cpu-sh3/cache.h
@@ -35,7 +35,7 @@
     defined(CONFIG_CPU_SUBTYPE_SH7710) || \
     defined(CONFIG_CPU_SUBTYPE_SH7720) || \
     defined(CONFIG_CPU_SUBTYPE_SH7721)
-#define CCR3	0xa40000b4
+#define CCR3_REG	0xa40000b4
 #define CCR_CACHE_16KB  0x00010000
 #define CCR_CACHE_32KB	0x00020000
 #endif
-- 
cgit v1.2.3-70-g09d2


From 2d07b255c7b8a9723010e5c74778e058dc05162e Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Fri, 15 Feb 2008 09:56:34 -0800
Subject: sched: add declaration of sched_tail to sched.h

Avoids sparse warnings:
kernel/sched.c:2170:17: warning: symbol 'schedule_tail' was not declared. Should it be static?

Avoids the need for an external declaration in arch/um/process.c

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/um/kernel/process.c | 2 --
 include/linux/sched.h    | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index fc50d2f959d..e8cb9ff183e 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -128,8 +128,6 @@ void *get_current(void)
 	return current;
 }
 
-extern void schedule_tail(struct task_struct *prev);
-
 /*
  * This is called magically, by its address being stuffed in a jmp_buf
  * and being longjmp-d to.
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e217d188a10..9c17e828d6d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -242,6 +242,7 @@ struct task_struct;
 
 extern void sched_init(void);
 extern void sched_init_smp(void);
+extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 extern void init_idle_bootup_task(struct task_struct *idle);
 
-- 
cgit v1.2.3-70-g09d2


From bdb9441e9c325d50b5ae17f7d3205d65b8ed2e5f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Feb 2008 23:02:48 +0100
Subject: lockdep: increase MAX_LOCK_DEPTH

Some code paths exceed the current max lock depth (XFS), so increase
this limit a bit. I looked at making this a dynamic allocated array,
but we should not advocate insane lock depths, so stay with this as
long as it works...

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e217d188a10..e3ea1243754 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1189,7 +1189,7 @@ struct task_struct {
 	int softirq_context;
 #endif
 #ifdef CONFIG_LOCKDEP
-# define MAX_LOCK_DEPTH 30UL
+# define MAX_LOCK_DEPTH 48UL
 	u64 curr_chain_key;
 	int lockdep_depth;
 	struct held_lock held_locks[MAX_LOCK_DEPTH];
-- 
cgit v1.2.3-70-g09d2


From 24d10f0c37d301e88f6965e3dc0aa684311544e5 Mon Sep 17 00:00:00 2001
From: Adrian McMenamin <adrian@newgolddream.dyndns.info>
Date: Sat, 16 Feb 2008 23:37:33 +0000
Subject: maple: remove unused variable

Remove an unused variable from the definition of struct maple_device

Signed-off-by: Adrian McMenamin <adrian@mcmen.demon.co.uk>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/linux/maple.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/maple.h b/include/linux/maple.h
index 3f01e2bae1a..d31e36ebb43 100644
--- a/include/linux/maple.h
+++ b/include/linux/maple.h
@@ -64,7 +64,6 @@ struct maple_driver {
 	int (*connect) (struct maple_device * dev);
 	void (*disconnect) (struct maple_device * dev);
 	struct device_driver drv;
-	int registered;
 };
 
 void maple_getcond_callback(struct maple_device *dev,
-- 
cgit v1.2.3-70-g09d2


From 8b1d16540c6ae4e62fcff56bd47794951b3ca87a Mon Sep 17 00:00:00 2001
From: Hideo Saito <saito@densan.co.jp>
Date: Tue, 26 Feb 2008 14:28:48 +0900
Subject: sh: Fix up HAS_SR_RB typo in entry-macros.

Signed-off-by: Hideo Saito <saito@densan.co.jp>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/asm-sh/entry-macros.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-sh/entry-macros.S b/include/asm-sh/entry-macros.S
index 500030eae7a..2dab0b8d945 100644
--- a/include/asm-sh/entry-macros.S
+++ b/include/asm-sh/entry-macros.S
@@ -12,7 +12,7 @@
 	not	r11, r11
 	stc	sr, r10
 	and	r11, r10
-#ifdef CONFIG_HAS_SR_RB
+#ifdef CONFIG_CPU_HAS_SR_RB
 	stc	k_g_imask, r11
 	or	r11, r10
 #endif
@@ -20,7 +20,7 @@
 	.endm
 
 	.macro	get_current_thread_info, ti, tmp
-#ifdef CONFIG_HAS_SR_RB
+#ifdef CONFIG_CPU_HAS_SR_RB
 	stc	r7_bank, \ti
 #else
 	mov	#((THREAD_SIZE - 1) >> 10) ^ 0xff, \tmp
-- 
cgit v1.2.3-70-g09d2


From 96de1a8f0275bd67f243833e7088baced518f873 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 26 Feb 2008 14:52:45 +0900
Subject: serial: Move asm-sh/sci.h to linux/serial_sci.h.

This header is needed on other architectures as well (namely h8300),
which currently fails to build without this in place. Rather than
duplicating the port definition completely there, just move this to a
common location instead.

This should get h8300 working again for 2.6.25, in addition to the
changes already pushed by Sato-san in -rc2.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh2/setup-sh7619.c  |  2 +-
 arch/sh/kernel/cpu/sh2a/setup-sh7203.c |  2 +-
 arch/sh/kernel/cpu/sh2a/setup-sh7206.c |  2 +-
 arch/sh/kernel/cpu/sh3/setup-sh7705.c  |  2 +-
 arch/sh/kernel/cpu/sh3/setup-sh770x.c  |  2 +-
 arch/sh/kernel/cpu/sh3/setup-sh7710.c  |  2 +-
 arch/sh/kernel/cpu/sh3/setup-sh7720.c  |  2 +-
 arch/sh/kernel/cpu/sh4/setup-sh4-202.c |  2 +-
 arch/sh/kernel/cpu/sh4/setup-sh7750.c  |  2 +-
 arch/sh/kernel/cpu/sh4/setup-sh7760.c  |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7343.c |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7366.c |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7722.c |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7763.c |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7770.c |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7780.c |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-sh7785.c |  2 +-
 arch/sh/kernel/cpu/sh4a/setup-shx3.c   |  2 +-
 drivers/serial/sh-sci.c                |  2 +-
 include/asm-sh/sci.h                   | 34 ----------------------------------
 include/linux/serial_sci.h             | 32 ++++++++++++++++++++++++++++++++
 21 files changed, 51 insertions(+), 53 deletions(-)
 delete mode 100644 include/asm-sh/sci.h
 create mode 100644 include/linux/serial_sci.h

(limited to 'include')

diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index b230eb278ce..cc530f4d84d 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index db6ef5cecde..e98dc445035 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index a564425b905..e6d4ec445dd 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index dd0a20a685f..f581534cb73 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 #include <asm/rtc.h>
 
 enum {
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index 9066ed78e28..d3733b13ea5 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -16,7 +16,7 @@
 #include <linux/irq.h>
 #include <linux/platform_device.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index 0cc0e2bf135..7406c9ad925 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 #include <asm/rtc.h>
 
 enum {
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 3855ea4c21c..8028082527c 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -16,7 +16,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/io.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 #include <asm/rtc.h>
 
 #define INTC_ICR1	0xA4140010UL
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index dab193293f2..7371abf64f8 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index ae3603aca61..ec884039b91 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/io.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 static struct resource rtc_resources[] = {
 	[0] = {
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 85f81579b97..254c5c55ab9 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 enum {
 	UNUSED = 0,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index c0a3f079dfd..6d4f50cd4aa 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 967e8b69a2f..f26b5cdad0d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -12,7 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 73c778d40d1..b98b4bc93ec 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -10,9 +10,9 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
+#include <linux/serial_sci.h>
 #include <linux/mm.h>
 #include <asm/mmzone.h>
-#include <asm/sci.h>
 
 static struct resource usbf_resources[] = {
 	[0] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index eabd5386812..07c988dc9de 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -12,7 +12,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/io.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 static struct resource rtc_resources[] = {
 	[0] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index 32f4f59a837..b9cec48b180 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -10,7 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index 293004b526f..18dbbe23fea 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/serial.h>
 #include <linux/io.h>
-#include <asm/sci.h>
+#include <linux/serial_sci.h>
 
 static struct resource rtc_resources[] = {
 	[0] = {
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index 74b60e96cdf..621e7329ec6 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -10,10 +10,10 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
+#include <linux/serial_sci.h>
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <asm/mmzone.h>
-#include <asm/sci.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index 4dc958b6b31..bd35f32534b 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -10,9 +10,9 @@
 #include <linux/platform_device.h>
 #include <linux/init.h>
 #include <linux/serial.h>
+#include <linux/serial_sci.h>
 #include <linux/io.h>
 #include <asm/mmzone.h>
-#include <asm/sci.h>
 
 static struct plat_sci_port sci_platform_data[] = {
 	{
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 9ce12cb2ceb..a8c116b80bf 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -41,6 +41,7 @@
 #include <linux/delay.h>
 #include <linux/console.h>
 #include <linux/platform_device.h>
+#include <linux/serial_sci.h>
 
 #ifdef CONFIG_CPU_FREQ
 #include <linux/notifier.h>
@@ -54,7 +55,6 @@
 #include <asm/kgdb.h>
 #endif
 
-#include <asm/sci.h>
 #include "sh-sci.h"
 
 struct sci_port {
diff --git a/include/asm-sh/sci.h b/include/asm-sh/sci.h
deleted file mode 100644
index 52e73660c12..00000000000
--- a/include/asm-sh/sci.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef __ASM_SH_SCI_H
-#define __ASM_SH_SCI_H
-
-#include <linux/serial_core.h>
-
-/*
- * Generic header for SuperH SCI(F)
- *
- * Do not place SH-specific parts in here, sh64 and h8300 depend on this too.
- */
-
-/* Offsets into the sci_port->irqs array */
-enum {
-	SCIx_ERI_IRQ,
-	SCIx_RXI_IRQ,
-	SCIx_TXI_IRQ,
-	SCIx_BRI_IRQ,
-	SCIx_NR_IRQS,
-};
-
-/*
- * Platform device specific platform_data struct
- */
-struct plat_sci_port {
-	void __iomem	*membase;		/* io cookie */
-	unsigned long	mapbase;		/* resource base */
-	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
-	unsigned int	type;			/* SCI / SCIF / IRDA */
-	upf_t		flags;			/* UPF_* flags */
-};
-
-int early_sci_setup(struct uart_port *port);
-
-#endif /* __ASM_SH_SCI_H */
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
new file mode 100644
index 00000000000..893cc53486b
--- /dev/null
+++ b/include/linux/serial_sci.h
@@ -0,0 +1,32 @@
+#ifndef __LINUX_SERIAL_SCI_H
+#define __LINUX_SERIAL_SCI_H
+
+#include <linux/serial_core.h>
+
+/*
+ * Generic header for SuperH SCI(F) (used by sh/sh64/h8300 and related parts)
+ */
+
+/* Offsets into the sci_port->irqs array */
+enum {
+	SCIx_ERI_IRQ,
+	SCIx_RXI_IRQ,
+	SCIx_TXI_IRQ,
+	SCIx_BRI_IRQ,
+	SCIx_NR_IRQS,
+};
+
+/*
+ * Platform device specific platform_data struct
+ */
+struct plat_sci_port {
+	void __iomem	*membase;		/* io cookie */
+	unsigned long	mapbase;		/* resource base */
+	unsigned int	irqs[SCIx_NR_IRQS];	/* ERI, RXI, TXI, BRI */
+	unsigned int	type;			/* SCI / SCIF / IRDA */
+	upf_t		flags;			/* UPF_* flags */
+};
+
+int early_sci_setup(struct uart_port *port);
+
+#endif /* __LINUX_SERIAL_SCI_H */
-- 
cgit v1.2.3-70-g09d2


From cbc34973709eb41b369c304c075cf2069f847012 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 13 Feb 2008 13:14:35 -0800
Subject: lguest: include function prototypes

Added a declaration to asm-x86/lguest.h and moved the extern arrays there
as well.  As an alternative to including asm/lguest.h directly, an
include could be put in linux/lguest.h

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Cc: "rusty@rustcorp.com.au" <rusty@rustcorp.com.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/lguest/boot.c   | 10 +---------
 include/asm-x86/lguest.h | 11 +++++++++++
 2 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 5afdde4895d..1e613fb03e3 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -57,6 +57,7 @@
 #include <linux/lguest_launcher.h>
 #include <linux/virtio_console.h>
 #include <linux/pm.h>
+#include <asm/lguest.h>
 #include <asm/paravirt.h>
 #include <asm/param.h>
 #include <asm/page.h>
@@ -75,15 +76,6 @@
  * behaving in simplified but equivalent ways.  In particular, the Guest is the
  * same kernel as the Host (or at least, built from the same source code). :*/
 
-/* Declarations for definitions in lguest_guest.S */
-extern char lguest_noirq_start[], lguest_noirq_end[];
-extern const char lgstart_cli[], lgend_cli[];
-extern const char lgstart_sti[], lgend_sti[];
-extern const char lgstart_popf[], lgend_popf[];
-extern const char lgstart_pushf[], lgend_pushf[];
-extern const char lgstart_iret[], lgend_iret[];
-extern void lguest_iret(void);
-
 struct lguest_data lguest_data = {
 	.hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF },
 	.noirq_start = (u32)lguest_noirq_start,
diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h
index 4d9367b7297..9b17571e9bc 100644
--- a/include/asm-x86/lguest.h
+++ b/include/asm-x86/lguest.h
@@ -23,6 +23,17 @@
 /* Found in switcher.S */
 extern unsigned long default_idt_entries[];
 
+/* Declarations for definitions in lguest_guest.S */
+extern char lguest_noirq_start[], lguest_noirq_end[];
+extern const char lgstart_cli[], lgend_cli[];
+extern const char lgstart_sti[], lgend_sti[];
+extern const char lgstart_popf[], lgend_popf[];
+extern const char lgstart_pushf[], lgend_pushf[];
+extern const char lgstart_iret[], lgend_iret[];
+
+extern void lguest_iret(void);
+extern void lguest_init(void);
+
 struct lguest_regs
 {
 	/* Manually saved part. */
-- 
cgit v1.2.3-70-g09d2


From 7343b3b3a627eb30e24e921f004f659c8ebb91c5 Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Thu, 14 Feb 2008 14:52:05 -0800
Subject: x86: require family >= 6 if we are using P6 NOPs

The P6 family of NOPs are only available on family >= 6 or above, so
enforce that in the boot code.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.cpu   | 5 +++++
 include/asm-x86/nops.h | 4 +---
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index e09a6b73a1a..86fd2a0e459 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -377,6 +377,10 @@ config X86_OOSTORE
 	def_bool y
 	depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
 
+config X86_P6_NOP
+	def_bool y
+	depends on (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4)
+
 config X86_TSC
 	def_bool y
 	depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64
@@ -390,6 +394,7 @@ config X86_CMOV
 config X86_MINIMUM_CPU_FAMILY
 	int
 	default "64" if X86_64
+	default "6" if X86_32 && X86_P6_NOP
 	default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
 	default "3"
 
diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index fec025c7f58..c52b334b816 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h
@@ -63,9 +63,7 @@
 #define ASM_NOP6 K7_NOP6
 #define ASM_NOP7 K7_NOP7
 #define ASM_NOP8 K7_NOP8
-#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \
-      defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \
-      defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4)
+#elif defined(CONFIG_X86_P6_NOP)
 #define ASM_NOP1 P6_NOP1
 #define ASM_NOP2 P6_NOP2
 #define ASM_NOP3 P6_NOP3
-- 
cgit v1.2.3-70-g09d2


From 4cd20952d74323df06e438c0c3273b5be89d6bfd Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@zytor.com>
Date: Mon, 18 Feb 2008 23:24:33 -0800
Subject: x86: add comments for NOPs

Add comments describing the various NOP sequences.

Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/nops.h | 62 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 45 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h
index c52b334b816..e3b2bce0aff 100644
--- a/include/asm-x86/nops.h
+++ b/include/asm-x86/nops.h
@@ -3,17 +3,29 @@
 
 /* Define nops for use with alternative() */
 
-/* generic versions from gas */
-#define GENERIC_NOP1	".byte 0x90\n"
-#define GENERIC_NOP2    	".byte 0x89,0xf6\n"
-#define GENERIC_NOP3        ".byte 0x8d,0x76,0x00\n"
-#define GENERIC_NOP4        ".byte 0x8d,0x74,0x26,0x00\n"
-#define GENERIC_NOP5        GENERIC_NOP1 GENERIC_NOP4
-#define GENERIC_NOP6	".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP7	".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
-#define GENERIC_NOP8	GENERIC_NOP1 GENERIC_NOP7
+/* generic versions from gas
+   1: nop
+   2: movl %esi,%esi
+   3: leal 0x00(%esi),%esi
+   4: leal 0x00(,%esi,1),%esi
+   6: leal 0x00000000(%esi),%esi
+   7: leal 0x00000000(,%esi,1),%esi
+*/
+#define GENERIC_NOP1 ".byte 0x90\n"
+#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
+#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n"
+#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n"
+#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4
+#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n"
+#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7
 
-/* Opteron 64bit nops */
+/* Opteron 64bit nops
+   1: nop
+   2: osp nop
+   3: osp osp nop
+   4: osp osp osp nop
+*/
 #define K8_NOP1 GENERIC_NOP1
 #define K8_NOP2	".byte 0x66,0x90\n"
 #define K8_NOP3	".byte 0x66,0x66,0x90\n"
@@ -23,19 +35,35 @@
 #define K8_NOP7	K8_NOP4 K8_NOP3
 #define K8_NOP8	K8_NOP4 K8_NOP4
 
-/* K7 nops */
-/* uses eax dependencies (arbitary choice) */
-#define K7_NOP1  GENERIC_NOP1
+/* K7 nops
+   uses eax dependencies (arbitary choice)
+   1: nop
+   2: movl %eax,%eax
+   3: leal (,%eax,1),%eax
+   4: leal 0x00(,%eax,1),%eax
+   6: leal 0x00000000(%eax),%eax
+   7: leal 0x00000000(,%eax,1),%eax
+*/
+#define K7_NOP1	GENERIC_NOP1
 #define K7_NOP2	".byte 0x8b,0xc0\n"
 #define K7_NOP3	".byte 0x8d,0x04,0x20\n"
 #define K7_NOP4	".byte 0x8d,0x44,0x20,0x00\n"
 #define K7_NOP5	K7_NOP4 ASM_NOP1
 #define K7_NOP6	".byte 0x8d,0x80,0,0,0,0\n"
-#define K7_NOP7        ".byte 0x8D,0x04,0x05,0,0,0,0\n"
-#define K7_NOP8        K7_NOP7 ASM_NOP1
+#define K7_NOP7	".byte 0x8D,0x04,0x05,0,0,0,0\n"
+#define K7_NOP8	K7_NOP7 ASM_NOP1
 
-/* P6 nops */
-/* uses eax dependencies (Intel-recommended choice) */
+/* P6 nops
+   uses eax dependencies (Intel-recommended choice)
+   1: nop
+   2: osp nop
+   3: nopl (%eax)
+   4: nopl 0x00(%eax)
+   5: nopl 0x00(%eax,%eax,1)
+   6: osp nopl 0x00(%eax,%eax,1)
+   7: nopl 0x00000000(%eax)
+   8: nopl 0x00000000(%eax,%eax,1)
+*/
 #define P6_NOP1	GENERIC_NOP1
 #define P6_NOP2	".byte 0x66,0x90\n"
 #define P6_NOP3	".byte 0x0f,0x1f,0x00\n"
-- 
cgit v1.2.3-70-g09d2


From 88f3aec7afd9ae3e6f6d221801996b69aad1e3a4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 21 Feb 2008 11:04:11 +0100
Subject: x86: fix spontaneous reboot with allyesconfig bzImage

recently the 64-bit allyesconfig bzImage kernel started spontaneously
rebooting during early bootup.

after a few fun hours spent with early init debugging, it turns out
that we've got this rather annoying limit on the size of the kernel
image:

      #define KERNEL_TEXT_SIZE  (40*1024*1024)

which limit my vmlinux just happened to pass:

       text           data       bss        dec       hex   filename
   29703744        4222751   8646224   42572719   2899baf   vmlinux

40 MB is 42572719 bytes, so my vmlinux was just 1.5% above this limit :-/

So it happily crashed right in head_64.S, which - as we all know - is
the most debuggable code in the whole architecture ;-)

So increase the limit to allow an up to 128MB kernel image to be mapped.
(should anyone be that crazy or lazy)

We have a full 4K of pagetable (level2_kernel_pgt) allocated for these
mappings already, so there's no RAM overhead and the limit was rather
pointless and arbitrary.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S | 22 ++++++++++++++--------
 arch/x86/mm/init_64.c     |  5 +++--
 include/asm-x86/page_64.h |  8 ++++++--
 3 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index eb415043a92..b037b15be7f 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt)
 	/* Since I easily can, map the first 1G.
 	 * Don't set NX because code runs from these pages.
 	 */
-	PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
+	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD)
 
 NEXT_PAGE(level2_kernel_pgt)
-	/* 40MB kernel mapping. The kernel code cannot be bigger than that.
-	   When you change this change KERNEL_TEXT_SIZE in page.h too. */
-	/* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */
-	PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE)
-	/* Module mapping starts here */
-	.fill	(PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0
+	/*
+	 * 128 MB kernel mapping. We spend a full page on this pagetable
+	 * anyway.
+	 *
+	 * The kernel code+data+bss must not be bigger than that.
+	 *
+	 * (NOTE: at +128MB starts the module area, see MODULES_VADDR.
+	 *  If you want to increase this then increase MODULES_VADDR
+	 *  too.)
+	 */
+	PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
+		KERNEL_TEXT_SIZE/PMD_SIZE)
 
 NEXT_PAGE(level2_spare_pgt)
-	.fill   512,8,0
+	.fill   512, 8, 0
 
 #undef PMDS
 #undef NEXT_PAGE
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 6dbb0035c57..a02a14f0f32 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 }
 
 /*
- * The head.S code sets up the kernel high mapping from:
- * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE
+ * The head.S code sets up the kernel high mapping:
+ *
+ *   from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text)
  *
  * phys_addr holds the negative offset to the kernel, which is added
  * to the compile time generated pmds. This results in invalid pmds up
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index f7393bc516e..3e2e3ca6304 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -47,8 +47,12 @@
 #define __PHYSICAL_MASK_SHIFT	46
 #define __VIRTUAL_MASK_SHIFT	48
 
-#define KERNEL_TEXT_SIZE  (40*1024*1024)
-#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL)
+/*
+ * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
+ * arch/x86/kernel/head_64.S), and it is mapped here:
+ */
+#define KERNEL_TEXT_SIZE	(128*1024*1024)
+#define KERNEL_TEXT_START	_AC(0xffffffff80000000, UL)
 
 #ifndef __ASSEMBLY__
 void clear_page(void *page);
-- 
cgit v1.2.3-70-g09d2


From d4afe414189b098d56bcd24280c018aa2ac9a990 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 21 Feb 2008 13:39:30 +0100
Subject: x86: rename KERNEL_TEXT_SIZE => KERNEL_IMAGE_SIZE

The KERNEL_TEXT_SIZE constant was mis-named, as we not only map the kernel
text but data, bss and init sections as well.

That name led me on the wrong path with the KERNEL_TEXT_SIZE regression,
because i knew how big of _text_ my images have and i knew about the 40 MB
"text" limit so i wrongly thought to be on the safe side of the 40 MB limit
with my 29 MB of text, while the total image size was slightly above 40 MB.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/head_64.S | 2 +-
 include/asm-x86/page_64.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index b037b15be7f..a007454133a 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -393,7 +393,7 @@ NEXT_PAGE(level2_kernel_pgt)
 	 *  too.)
 	 */
 	PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL,
-		KERNEL_TEXT_SIZE/PMD_SIZE)
+		KERNEL_IMAGE_SIZE/PMD_SIZE)
 
 NEXT_PAGE(level2_spare_pgt)
 	.fill   512, 8, 0
diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h
index 3e2e3ca6304..143546073b9 100644
--- a/include/asm-x86/page_64.h
+++ b/include/asm-x86/page_64.h
@@ -51,8 +51,8 @@
  * Kernel image size is limited to 128 MB (see level2_kernel_pgt in
  * arch/x86/kernel/head_64.S), and it is mapped here:
  */
-#define KERNEL_TEXT_SIZE	(128*1024*1024)
-#define KERNEL_TEXT_START	_AC(0xffffffff80000000, UL)
+#define KERNEL_IMAGE_SIZE	(128*1024*1024)
+#define KERNEL_IMAGE_START	_AC(0xffffffff80000000, UL)
 
 #ifndef __ASSEMBLY__
 void clear_page(void *page);
-- 
cgit v1.2.3-70-g09d2


From f18edc95a37a901ffcbe91f5e05105f916a04fae Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 16 Feb 2008 14:05:01 +0100
Subject: x86: no robust/pi futex for real i386 CPUs

Real i386 CPUs do not have cmpxchg instructions. Catch it before
crashing on an invalid opcode.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/asm-x86/futex.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h
index cd9f894dd2d..c9952ea9f69 100644
--- a/include/asm-x86/futex.h
+++ b/include/asm-x86/futex.h
@@ -102,6 +102,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
 static inline int
 futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
+
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
+	/* Real i386 machines have no cmpxchg instruction */
+	if (boot_cpu_data.x86 == 3)
+		return -ENOSYS;
+#endif
+
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
 
-- 
cgit v1.2.3-70-g09d2


From 78374676efae525094aee45c0aab4bcab95ea9d1 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Tue, 26 Feb 2008 18:25:53 -0800
Subject: CONNECTOR: make cn_already_initialized static

It is used in connector.c only, so make it static.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 2 +-
 include/linux/connector.h     | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index fea2d3ed9cb..85e2ba7fcfb 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -47,7 +47,7 @@ static LIST_HEAD(notify_list);
 
 static struct cn_dev cdev;
 
-int cn_already_initialized = 0;
+static int cn_already_initialized;
 
 /*
  * msg->seq and msg->ack are used to determine message genealogy.
diff --git a/include/linux/connector.h b/include/linux/connector.h
index da6dd957f90..96a89d3d672 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -170,7 +170,5 @@ int cn_cb_equal(struct cb_id *, struct cb_id *);
 
 void cn_queue_wrapper(struct work_struct *work);
 
-extern int cn_already_initialized;
-
 #endif				/* __KERNEL__ */
 #endif				/* __CONNECTOR_H */
-- 
cgit v1.2.3-70-g09d2


From fbabbed8284d1526ed01754ecd4fabdb941a1ff2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 27 Feb 2008 12:21:18 -0800
Subject: [NETFILTER]: Fix NF_QUEUE_NR() parenthesis

Properly add parens around the macro argument. This is not needed by
the kernel but the macro is exported to userspace, so it shouldn't
make any assumptions.

Also use NF_VERDICT_BITS instead of NF_VERDICT_QBTIS for the left-shift
since thats whats logically correct.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index b74b615492e..f0680c2bee7 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -31,7 +31,7 @@
 #define NF_VERDICT_QMASK 0xffff0000
 #define NF_VERDICT_QBITS 16
 
-#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
 
 /* only for userspace compatibility */
 #ifndef __KERNEL__
-- 
cgit v1.2.3-70-g09d2


From 7e8616d8e7731b026019d9af7cc9914b8bb42bc7 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 27 Feb 2008 16:04:52 -0500
Subject: [SCTP]: Update AUTH structures to match declarations in draft-16.

The new SCTP socket api (draft 16) updates the AUTH API structures.
We never exported these since we knew they would change.
Update the rest to match the draft.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/user.h | 10 ++++++----
 net/sctp/auth.c         |  4 ++--
 net/sctp/socket.c       |  6 +++++-
 3 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h
index 9462d6ae2f3..9619b9d35c9 100644
--- a/include/net/sctp/user.h
+++ b/include/net/sctp/user.h
@@ -411,6 +411,7 @@ struct sctp_event_subscribe {
 	__u8 sctp_shutdown_event;
 	__u8 sctp_partial_delivery_event;
 	__u8 sctp_adaptation_layer_event;
+	__u8 sctp_authentication_event;
 };
 
 /*
@@ -587,7 +588,7 @@ struct sctp_authchunk {
  * endpoint requires the peer to use.
 */
 struct sctp_hmacalgo {
-	__u16		shmac_num_idents;
+	__u32		shmac_num_idents;
 	__u16		shmac_idents[];
 };
 
@@ -600,7 +601,7 @@ struct sctp_hmacalgo {
 struct sctp_authkey {
 	sctp_assoc_t	sca_assoc_id;
 	__u16		sca_keynumber;
-	__u16		sca_keylen;
+	__u16		sca_keylength;
 	__u8		sca_key[];
 };
 
@@ -693,8 +694,9 @@ struct sctp_status {
  * the peer requires to be received authenticated only.
  */
 struct sctp_authchunks {
-	sctp_assoc_t            gauth_assoc_id;
-	uint8_t                 gauth_chunks[];
+	sctp_assoc_t	gauth_assoc_id;
+	__u32		gauth_number_of_chunks;
+	uint8_t		gauth_chunks[];
 };
 
 /*
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 8bb79f28177..675a5c3e68a 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -838,11 +838,11 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
 	}
 
 	/* Create a new key data based on the info passed in */
-	key = sctp_auth_create_key(auth_key->sca_keylen, GFP_KERNEL);
+	key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL);
 	if (!key)
 		goto nomem;
 
-	memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylen);
+	memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
 
 	/* If we are replacing, remove the old keys data from the
 	 * key id.  If we are adding new key id, add it to the
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 848df21dc6c..939892691a2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1964,7 +1964,7 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk,
 static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
 					int optlen)
 {
-	if (optlen != sizeof(struct sctp_event_subscribe))
+	if (optlen > sizeof(struct sctp_event_subscribe))
 		return -EINVAL;
 	if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen))
 		return -EFAULT;
@@ -5094,6 +5094,8 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len,
 	len = num_chunks;
 	if (put_user(len, optlen))
 		return -EFAULT;
+	if (put_user(num_chunks, &p->gauth_number_of_chunks))
+		return -EFAULT;
 	if (copy_to_user(to, ch->chunks, len))
 		return -EFAULT;
 
@@ -5133,6 +5135,8 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len,
 	len = num_chunks;
 	if (put_user(len, optlen))
 		return -EFAULT;
+	if (put_user(num_chunks, &p->gauth_number_of_chunks))
+		return -EFAULT;
 	if (copy_to_user(to, ch->chunks, len))
 		return -EFAULT;
 
-- 
cgit v1.2.3-70-g09d2


From 7729d74ed5099021f79ee8ecfa676829b5bac796 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 28 Feb 2008 21:53:20 -0800
Subject: [SPARC]: Add reboot_command[] extern decl to asm/system.h

Kill off some sparse warnings.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/process.c   | 2 --
 arch/sparc64/kernel/process.c | 2 --
 include/asm-sparc/system.h    | 2 ++
 include/asm-sparc64/system.h  | 2 ++
 4 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index 0bd69d0b5cd..70c0dd22491 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -139,8 +139,6 @@ void cpu_idle(void)
 
 #endif
 
-extern char reboot_command [];
-
 /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */
 void machine_halt(void)
 {
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index 2aafce7dfc0..e116e38b160 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -114,8 +114,6 @@ void cpu_idle(void)
 	}
 }
 
-extern char reboot_command [];
-
 void machine_halt(void)
 {
 	sstate_halt();
diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h
index 45e47c159a6..4e08210cd4c 100644
--- a/include/asm-sparc/system.h
+++ b/include/asm-sparc/system.h
@@ -44,6 +44,8 @@ extern enum sparc_cpu sparc_cpu_model;
 
 #define SUN4M_NCPUS            4              /* Architectural limit of sun4m. */
 
+extern char reboot_command[];
+
 extern struct thread_info *current_set[NR_CPUS];
 
 extern unsigned long empty_bad_page;
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index ed91a5d8d4f..53eae091a17 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -30,6 +30,8 @@ enum sparc_cpu {
 #define ARCH_SUN4C_SUN4 0
 #define ARCH_SUN4 0
 
+extern char reboot_command[];
+
 /* These are here in an effort to more fully work around Spitfire Errata
  * #51.  Essentially, if a memory barrier occurs soon after a mispredicted
  * branch, the chip can stop executing instructions until a trap occurs.
-- 
cgit v1.2.3-70-g09d2


From b59931649256685f294d2d163a4f6d6286fbff05 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Tue, 26 Feb 2008 13:20:58 -0800
Subject: elfcore-compat fix uid/gid types

I overlooked the difference between __kernel_uid_t and uid_t when defining
struct compat_elf_prpsinfo.  The result is a regression in 32-bit core
dumps on x86_64, where the NT_PRPSINFO note has the wrong size and layout.
This patch fixes it.

Signed-off-by: Roland McGrath <roland@redhat.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/elfcore-compat.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h
index 532d13adabc..0a90e1c3a42 100644
--- a/include/linux/elfcore-compat.h
+++ b/include/linux/elfcore-compat.h
@@ -45,8 +45,8 @@ struct compat_elf_prpsinfo
 	char				pr_zomb;
 	char				pr_nice;
 	compat_ulong_t			pr_flag;
-	compat_uid_t			pr_uid;
-	compat_gid_t			pr_gid;
+	__compat_uid_t			pr_uid;
+	__compat_gid_t			pr_gid;
 	compat_pid_t			pr_pid, pr_ppid, pr_pgrp, pr_sid;
 	char				pr_fname[16];
 	char				pr_psargs[ELF_PRARGSZ];
-- 
cgit v1.2.3-70-g09d2


From 57ce36feb4d1281247755bc445bae77728298955 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
Date: Mon, 25 Feb 2008 16:45:03 +0100
Subject: let __dec_zone_page_state use __dec_zone_state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This removes code duplication and makes __dec_zone_page_state look like
__inc_zone_page_state.

Signed-off-by: Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vmstat.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 75370ec0923..9f1b4b46151 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -246,8 +246,7 @@ static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
 static inline void __dec_zone_page_state(struct page *page,
 			enum zone_stat_item item)
 {
-	atomic_long_dec(&page_zone(page)->vm_stat[item]);
-	atomic_long_dec(&vm_stat[item]);
+	__dec_zone_state(page_zone(page), item);
 }
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 2232c2d8e0a6a31061dec311f3d1cf7624bc14f1 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 29 Feb 2008 18:46:50 +0100
Subject: rcu: add support for dynamic ticks and preempt rcu

The PREEMPT-RCU can get stuck if a CPU goes idle and NO_HZ is set. The
idle CPU will not progress the RCU through its grace period and a
synchronize_rcu my get stuck. Without this patch I have a box that will
not boot when PREEMPT_RCU and NO_HZ are set. That same box boots fine
with this patch.

This patch comes from the -rt kernel where it has been tested for
several months.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/hardirq.h    |  10 ++
 include/linux/rcuclassic.h |   3 +
 include/linux/rcupreempt.h |  22 +++++
 kernel/rcupreempt.c        | 224 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/softirq.c           |   1 +
 kernel/time/tick-sched.c   |   3 +
 6 files changed, 259 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index 2961ec78804..49829988bfa 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -109,6 +109,14 @@ static inline void account_system_vtime(struct task_struct *tsk)
 }
 #endif
 
+#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ)
+extern void rcu_irq_enter(void);
+extern void rcu_irq_exit(void);
+#else
+# define rcu_irq_enter() do { } while (0)
+# define rcu_irq_exit() do { } while (0)
+#endif /* CONFIG_PREEMPT_RCU */
+
 /*
  * It is safe to do non-atomic ops on ->hardirq_context,
  * because NMI handlers may not preempt and the ops are
@@ -117,6 +125,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
  */
 #define __irq_enter()					\
 	do {						\
+		rcu_irq_enter();			\
 		account_system_vtime(current);		\
 		add_preempt_count(HARDIRQ_OFFSET);	\
 		trace_hardirq_enter();			\
@@ -135,6 +144,7 @@ extern void irq_enter(void);
 		trace_hardirq_exit();			\
 		account_system_vtime(current);		\
 		sub_preempt_count(HARDIRQ_OFFSET);	\
+		rcu_irq_exit();				\
 	} while (0)
 
 /*
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 4d6624260b4..b3dccd68629 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -160,5 +160,8 @@ extern void rcu_restart_cpu(int cpu);
 extern long rcu_batches_completed(void);
 extern long rcu_batches_completed_bh(void);
 
+#define rcu_enter_nohz()	do { } while (0)
+#define rcu_exit_nohz()		do { } while (0)
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUCLASSIC_H */
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index 60c2a033b19..01152ed532c 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -82,5 +82,27 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
 
 struct softirq_action;
 
+#ifdef CONFIG_NO_HZ
+DECLARE_PER_CPU(long, dynticks_progress_counter);
+
+static inline void rcu_enter_nohz(void)
+{
+	__get_cpu_var(dynticks_progress_counter)++;
+	WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1);
+	mb();
+}
+
+static inline void rcu_exit_nohz(void)
+{
+	mb();
+	__get_cpu_var(dynticks_progress_counter)++;
+	WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1));
+}
+
+#else /* CONFIG_NO_HZ */
+#define rcu_enter_nohz()	do { } while (0)
+#define rcu_exit_nohz()		do { } while (0)
+#endif /* CONFIG_NO_HZ */
+
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPREEMPT_H */
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 987cfb7ade8..c7c52096df4 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -23,6 +23,10 @@
  *		to Suparna Bhattacharya for pushing me completely away
  *		from atomic instructions on the read side.
  *
+ *  - Added handling of Dynamic Ticks
+ *      Copyright 2007 - Paul E. Mckenney <paulmck@us.ibm.com>
+ *                     - Steven Rostedt <srostedt@redhat.com>
+ *
  * Papers:  http://www.rdrop.com/users/paulmck/RCU
  *
  * Design Document: http://lwn.net/Articles/253651/
@@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
 	}
 }
 
+#ifdef CONFIG_NO_HZ
+
+DEFINE_PER_CPU(long, dynticks_progress_counter) = 1;
+static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
+static DEFINE_PER_CPU(int, rcu_update_flag);
+
+/**
+ * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
+ *
+ * If the CPU was idle with dynamic ticks active, this updates the
+ * dynticks_progress_counter to let the RCU handling know that the
+ * CPU is active.
+ */
+void rcu_irq_enter(void)
+{
+	int cpu = smp_processor_id();
+
+	if (per_cpu(rcu_update_flag, cpu))
+		per_cpu(rcu_update_flag, cpu)++;
+
+	/*
+	 * Only update if we are coming from a stopped ticks mode
+	 * (dynticks_progress_counter is even).
+	 */
+	if (!in_interrupt() &&
+	    (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) {
+		/*
+		 * The following might seem like we could have a race
+		 * with NMI/SMIs. But this really isn't a problem.
+		 * Here we do a read/modify/write, and the race happens
+		 * when an NMI/SMI comes in after the read and before
+		 * the write. But NMI/SMIs will increment this counter
+		 * twice before returning, so the zero bit will not
+		 * be corrupted by the NMI/SMI which is the most important
+		 * part.
+		 *
+		 * The only thing is that we would bring back the counter
+		 * to a postion that it was in during the NMI/SMI.
+		 * But the zero bit would be set, so the rest of the
+		 * counter would again be ignored.
+		 *
+		 * On return from the IRQ, the counter may have the zero
+		 * bit be 0 and the counter the same as the return from
+		 * the NMI/SMI. If the state machine was so unlucky to
+		 * see that, it still doesn't matter, since all
+		 * RCU read-side critical sections on this CPU would
+		 * have already completed.
+		 */
+		per_cpu(dynticks_progress_counter, cpu)++;
+		/*
+		 * The following memory barrier ensures that any
+		 * rcu_read_lock() primitives in the irq handler
+		 * are seen by other CPUs to follow the above
+		 * increment to dynticks_progress_counter. This is
+		 * required in order for other CPUs to correctly
+		 * determine when it is safe to advance the RCU
+		 * grace-period state machine.
+		 */
+		smp_mb(); /* see above block comment. */
+		/*
+		 * Since we can't determine the dynamic tick mode from
+		 * the dynticks_progress_counter after this routine,
+		 * we use a second flag to acknowledge that we came
+		 * from an idle state with ticks stopped.
+		 */
+		per_cpu(rcu_update_flag, cpu)++;
+		/*
+		 * If we take an NMI/SMI now, they will also increment
+		 * the rcu_update_flag, and will not update the
+		 * dynticks_progress_counter on exit. That is for
+		 * this IRQ to do.
+		 */
+	}
+}
+
+/**
+ * rcu_irq_exit - Called from exiting Hard irq context.
+ *
+ * If the CPU was idle with dynamic ticks active, update the
+ * dynticks_progress_counter to put let the RCU handling be
+ * aware that the CPU is going back to idle with no ticks.
+ */
+void rcu_irq_exit(void)
+{
+	int cpu = smp_processor_id();
+
+	/*
+	 * rcu_update_flag is set if we interrupted the CPU
+	 * when it was idle with ticks stopped.
+	 * Once this occurs, we keep track of interrupt nesting
+	 * because a NMI/SMI could also come in, and we still
+	 * only want the IRQ that started the increment of the
+	 * dynticks_progress_counter to be the one that modifies
+	 * it on exit.
+	 */
+	if (per_cpu(rcu_update_flag, cpu)) {
+		if (--per_cpu(rcu_update_flag, cpu))
+			return;
+
+		/* This must match the interrupt nesting */
+		WARN_ON(in_interrupt());
+
+		/*
+		 * If an NMI/SMI happens now we are still
+		 * protected by the dynticks_progress_counter being odd.
+		 */
+
+		/*
+		 * The following memory barrier ensures that any
+		 * rcu_read_unlock() primitives in the irq handler
+		 * are seen by other CPUs to preceed the following
+		 * increment to dynticks_progress_counter. This
+		 * is required in order for other CPUs to determine
+		 * when it is safe to advance the RCU grace-period
+		 * state machine.
+		 */
+		smp_mb(); /* see above block comment. */
+		per_cpu(dynticks_progress_counter, cpu)++;
+		WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1);
+	}
+}
+
+static void dyntick_save_progress_counter(int cpu)
+{
+	per_cpu(rcu_dyntick_snapshot, cpu) =
+		per_cpu(dynticks_progress_counter, cpu);
+}
+
+static inline int
+rcu_try_flip_waitack_needed(int cpu)
+{
+	long curr;
+	long snap;
+
+	curr = per_cpu(dynticks_progress_counter, cpu);
+	snap = per_cpu(rcu_dyntick_snapshot, cpu);
+	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+	/*
+	 * If the CPU remained in dynticks mode for the entire time
+	 * and didn't take any interrupts, NMIs, SMIs, or whatever,
+	 * then it cannot be in the middle of an rcu_read_lock(), so
+	 * the next rcu_read_lock() it executes must use the new value
+	 * of the counter.  So we can safely pretend that this CPU
+	 * already acknowledged the counter.
+	 */
+
+	if ((curr == snap) && ((curr & 0x1) == 0))
+		return 0;
+
+	/*
+	 * If the CPU passed through or entered a dynticks idle phase with
+	 * no active irq handlers, then, as above, we can safely pretend
+	 * that this CPU already acknowledged the counter.
+	 */
+
+	if ((curr - snap) > 2 || (snap & 0x1) == 0)
+		return 0;
+
+	/* We need this CPU to explicitly acknowledge the counter flip. */
+
+	return 1;
+}
+
+static inline int
+rcu_try_flip_waitmb_needed(int cpu)
+{
+	long curr;
+	long snap;
+
+	curr = per_cpu(dynticks_progress_counter, cpu);
+	snap = per_cpu(rcu_dyntick_snapshot, cpu);
+	smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
+
+	/*
+	 * If the CPU remained in dynticks mode for the entire time
+	 * and didn't take any interrupts, NMIs, SMIs, or whatever,
+	 * then it cannot have executed an RCU read-side critical section
+	 * during that time, so there is no need for it to execute a
+	 * memory barrier.
+	 */
+
+	if ((curr == snap) && ((curr & 0x1) == 0))
+		return 0;
+
+	/*
+	 * If the CPU either entered or exited an outermost interrupt,
+	 * SMI, NMI, or whatever handler, then we know that it executed
+	 * a memory barrier when doing so.  So we don't need another one.
+	 */
+	if (curr != snap)
+		return 0;
+
+	/* We need the CPU to execute a memory barrier. */
+
+	return 1;
+}
+
+#else /* !CONFIG_NO_HZ */
+
+# define dyntick_save_progress_counter(cpu)	do { } while (0)
+# define rcu_try_flip_waitack_needed(cpu)	(1)
+# define rcu_try_flip_waitmb_needed(cpu)	(1)
+
+#endif /* CONFIG_NO_HZ */
+
 /*
  * Get here when RCU is idle.  Decide whether we need to
  * move out of idle state, and return non-zero if so.
@@ -447,8 +657,10 @@ rcu_try_flip_idle(void)
 
 	/* Now ask each CPU for acknowledgement of the flip. */
 
-	for_each_cpu_mask(cpu, rcu_cpu_online_map)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map) {
 		per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
+		dyntick_save_progress_counter(cpu);
+	}
 
 	return 1;
 }
@@ -464,7 +676,8 @@ rcu_try_flip_waitack(void)
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
 	for_each_cpu_mask(cpu, rcu_cpu_online_map)
-		if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
+		if (rcu_try_flip_waitack_needed(cpu) &&
+		    per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
 			return 0;
 		}
@@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void)
 	smp_mb();  /*  ^^^^^^^^^^^^ */
 
 	/* Call for a memory barrier from each CPU. */
-	for_each_cpu_mask(cpu, rcu_cpu_online_map)
+	for_each_cpu_mask(cpu, rcu_cpu_online_map) {
 		per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
+		dyntick_save_progress_counter(cpu);
+	}
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
 	return 1;
@@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void)
 
 	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
 	for_each_cpu_mask(cpu, rcu_cpu_online_map)
-		if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
+		if (rcu_try_flip_waitmb_needed(cpu) &&
+		    per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
 			RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
 			return 0;
 		}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5b3aea5f471..31e9f2a4792 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -313,6 +313,7 @@ void irq_exit(void)
 	/* Make sure that timer wheel updates are propagated */
 	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
 		tick_nohz_stop_sched_tick();
+	rcu_irq_exit();
 #endif
 	preempt_enable_no_resched();
 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fa9bb73dbdb..2968298f8f3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void)
 			ts->idle_tick = ts->sched_timer.expires;
 			ts->tick_stopped = 1;
 			ts->idle_jiffies = last_jiffies;
+			rcu_enter_nohz();
 		}
 
 		/*
@@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void)
 		return;
 	}
 
+	rcu_exit_nohz();
+
 	/* Update jiffies first */
 	select_nohz_load_balancer(0);
 	now = ktime_get();
-- 
cgit v1.2.3-70-g09d2


From cded932b75ab0a5f9181ee3da34a0a488d1a14fd Mon Sep 17 00:00:00 2001
From: Hans Rosenfeld <hans.rosenfeld@amd.com>
Date: Mon, 18 Feb 2008 18:10:47 +0100
Subject: x86: fix pmd_bad and pud_bad to support huge pages

I recently stumbled upon a problem in the support for huge pages. If a
program using huge pages does not explicitly unmap them, they remain
mapped (and therefore, are lost) after the program exits.

I observed that the free huge page count in /proc/meminfo decreased when
running my program, and it did not increase after the program exited.
After running the program a few times, no more huge pages could be
allocated.

The reason for this seems to be that the x86 pmd_bad and pud_bad
consider pmd/pud entries having the PSE bit set invalid. I think there
is nothing wrong with this bit being set, it just indicates that the
lowest level of translation has been reached. This bit has to be (and
is) checked after the basic validity of the entry has been checked, like
in this fragment from follow_page() in mm/memory.c:

  if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
          goto no_page_table;

  if (pmd_huge(*pmd)) {
          BUG_ON(flags & FOLL_GET);
          page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
          goto out;
  }

Note that this code currently doesn't work as intended if the pmd refers
to a huge page, the pmd_huge() check can not be reached if the page is
huge.

Extending pmd_bad() (and, for future 1GB page support, pud_bad()) to
allow for the PSE bit being set fixes this. For similar reasons,
allowing the NX bit being set is necessary, too. I have seen huge pages
having the NX bit set in their pmd entry, which would cause the same
problem.

Signed-Off-By: Hans Rosenfeld <hans.rosenfeld@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/pgtable_32.h | 4 +++-
 include/asm-x86/pgtable_64.h | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index a842c7222b1..b478efa971e 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -91,7 +91,9 @@ extern unsigned long pg0[];
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)	(!(unsigned long)pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
-#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+#define	pmd_bad(x)	((pmd_val(x) \
+			  & ~(PAGE_MASK | _PAGE_USER | _PAGE_PSE | _PAGE_NX)) \
+			 != _KERNPG_TABLE)
 
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 0a0b77bc736..0a9258333cb 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -153,12 +153,14 @@ static inline unsigned long pgd_bad(pgd_t pgd)
 
 static inline unsigned long pud_bad(pud_t pud)
 {
-	return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
+	return pud_val(pud) &
+		~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
 }
 
 static inline unsigned long pmd_bad(pmd_t pmd)
 {
-	return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
+	return pmd_val(pmd) &
+		~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
 }
 
 #define pte_none(x)	(!pte_val(x))
-- 
cgit v1.2.3-70-g09d2


From 53c58588107973c0e240a1ed4fb8295f274c409d Mon Sep 17 00:00:00 2001
From: Dave Anderson <anderson@redhat.com>
Date: Thu, 21 Feb 2008 11:45:38 -0500
Subject: x86 ptrace: fix ptrace_bts_config structure declaration

The 2.6.25 ptrace_bts_config structure in asm-x86/ptrace-abi.h
is defined with u32 types:

   #include <asm/types.h>

   /* configuration/status structure used in PTRACE_BTS_CONFIG and
      PTRACE_BTS_STATUS commands.
   */
   struct ptrace_bts_config {
           /* requested or actual size of BTS buffer in bytes */
           u32 size;
           /* bitmask of below flags */
           u32 flags;
           /* buffer overflow signal */
           u32 signal;
           /* actual size of bts_struct in bytes */
           u32 bts_size;
   };
   #endif

But u32 is only accessible in asm-x86/types.h if __KERNEL__,
leading to compile errors when ptrace.h is included from
user-space. The double-underscore versions that are exported
to user-space in asm-x86/types.h should be used instead.

Signed-off-by: Dave Anderson <anderson@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/ptrace-abi.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h
index 81a8ee4c55f..f224eb3c315 100644
--- a/include/asm-x86/ptrace-abi.h
+++ b/include/asm-x86/ptrace-abi.h
@@ -89,13 +89,13 @@
 */
 struct ptrace_bts_config {
 	/* requested or actual size of BTS buffer in bytes */
-	u32 size;
+	__u32 size;
 	/* bitmask of below flags */
-	u32 flags;
+	__u32 flags;
 	/* buffer overflow signal */
-	u32 signal;
+	__u32 signal;
 	/* actual size of bts_struct in bytes */
-	u32 bts_size;
+	__u32 bts_size;
 };
 #endif
 
-- 
cgit v1.2.3-70-g09d2


From 94a3f78566ef98a48814d82892f28bb741624cb8 Mon Sep 17 00:00:00 2001
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Sat, 23 Feb 2008 00:23:48 +0100
Subject: [ARM] 4837/1: make __get_unaligned_*() return unsigned types

Eric Sandeen tracked an XFS on ARM corruption bug down to a function
under fs/xfs/ involving some get_unaligned() calls on u64 pointers.
As it turns out, calling ARM's get_unaligned() on a u64 pointer
pointing to the following byte sequence:

	80 81 82 83 84 85 86 87

would return ffffffff83828180 (LE mode.)  This turns out to be
because of implicit u8 -> int promotion in ARM's implementation of
various helpers for get_unaligned(), causing them to accidentally
return signed instead of unsigned values, which in turn caused the
subsequent casts to unsigned long long in __get_unaligned_8_[bl]e()
to sign-extend the lower words.

Fix by casting the return values of __get_unaligned_[24]_[bl]e()
to unsigned int.

Cc: Eric Sandeen <sandeen@sandeen.net>
Cc: Rabeeh Khoury <rabeeh@marvell.com>
Cc: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/unaligned.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/unaligned.h b/include/asm-arm/unaligned.h
index 8431f6eed5c..5db03cf3b90 100644
--- a/include/asm-arm/unaligned.h
+++ b/include/asm-arm/unaligned.h
@@ -40,16 +40,16 @@ extern int __bug_unaligned_x(const void *ptr);
  */
 
 #define __get_unaligned_2_le(__p)					\
-	(__p[0] | __p[1] << 8)
+	(unsigned int)(__p[0] | __p[1] << 8)
 
 #define __get_unaligned_2_be(__p)					\
-	(__p[0] << 8 | __p[1])
+	(unsigned int)(__p[0] << 8 | __p[1])
 
 #define __get_unaligned_4_le(__p)					\
-	(__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24)
+	(unsigned int)(__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24)
 
 #define __get_unaligned_4_be(__p)					\
-	(__p[0] << 24 | __p[1] << 16 | __p[2] << 8 | __p[3])
+	(unsigned int)(__p[0] << 24 | __p[1] << 16 | __p[2] << 8 | __p[3])
 
 #define __get_unaligned_8_le(__p)					\
 	((unsigned long long)__get_unaligned_4_le((__p+4)) << 32 |	\
-- 
cgit v1.2.3-70-g09d2


From 5ce94e9e8b469a17fbd3efa1b940c19b5e43449a Mon Sep 17 00:00:00 2001
From: Thomas Kunze <thommycheck@gmx.de>
Date: Sun, 24 Feb 2008 17:59:34 +0100
Subject: [ARM] 4838/1: Fix kexec for SA1100 machines

This patch sets KEXEC_CONTROL_MEMORY_LIMIT to (-1)UL. As the value is
compared with physical addresses TASK_SIZE makes no sense. Machines
where the RAM addresses start above TASK_SIZE kexecs eats all memory
and crashes the kernel without this patch.

Signed-off-by: Thomas Kunze <thommycheck@gmx.de>
Acked-by: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/kexec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/kexec.h b/include/asm-arm/kexec.h
index 1ee17b6951d..47fe34d692d 100644
--- a/include/asm-arm/kexec.h
+++ b/include/asm-arm/kexec.h
@@ -8,7 +8,7 @@
 /* Maximum address we can reach in physical address mode */
 #define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
 /* Maximum address we can use for the control code buffer */
-#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
 
 #define KEXEC_CONTROL_CODE_SIZE	4096
 
-- 
cgit v1.2.3-70-g09d2


From a3359e21c06cb5b366fb47307b3d2fd23386a774 Mon Sep 17 00:00:00 2001
From: eric miao <eric.y.miao@gmail.com>
Date: Wed, 27 Feb 2008 01:59:28 +0100
Subject: [ARM] 4840/1: pxa: fix the typo in get_irqnr_and_base

This typo causes the incorrect calculation of the IRQ numbers
in the ICIP2 registers.

Signed-off-by: eric miao <eric.miao@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/entry-macro.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/entry-macro.S b/include/asm-arm/arch-pxa/entry-macro.S
index b7e73085146..c145bb01bc8 100644
--- a/include/asm-arm/arch-pxa/entry-macro.S
+++ b/include/asm-arm/arch-pxa/entry-macro.S
@@ -35,7 +35,7 @@
 1004:
 		mrc	p6, 0, \irqstat, c6, c0, 0	@ ICIP2
 		mrc	p6, 0, \irqnr, c7, c0, 0	@ ICMR2
-		ands	\irqstat, \irqstat, \irqnr
+		ands	\irqnr, \irqstat, \irqnr
 		beq	1003f
 		rsb	\irqstat, \irqnr, #0
 		and	\irqstat, \irqstat, \irqnr
-- 
cgit v1.2.3-70-g09d2


From d862ccc570c875e1454fc57ed00f5a1081985b26 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@sirena.org.uk>
Date: Wed, 27 Feb 2008 15:34:56 +0100
Subject: [ARM] 4843/1: Add GCR_CLKBPB for PXA3xx

The PXA3xx AC97 controller has an additional control bit GCR_CLKBPB
which must be used during cold reset.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: eric miao <eric.miao@marvell.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/pxa-regs.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index ac175b4d10c..2357a73340d 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -520,6 +520,9 @@
 #define MCCR_FSRIE	(1 << 1)	/* FIFO Service Request Interrupt Enable */
 
 #define GCR		__REG(0x4050000C)  /* Global Control Register */
+#ifdef CONFIG_PXA3xx
+#define GCR_CLKBPB	(1 << 31)	/* Internal clock enable */
+#endif
 #define GCR_nDMAEN	(1 << 24)	/* non DMA Enable */
 #define GCR_CDONE_IE	(1 << 19)	/* Command Done Interrupt Enable */
 #define GCR_SDONE_IE	(1 << 18)	/* Status Done Interrupt Enable */
-- 
cgit v1.2.3-70-g09d2


From 9176c0b1f5a9099cebc07458042ae6a7c75af7b2 Mon Sep 17 00:00:00 2001
From: Jens Osterkamp <jens@de.ibm.com>
Date: Thu, 28 Feb 2008 11:26:21 +0100
Subject: [POWERPC] move celleb DABRX definitions

This moves the private DABRX definitions for celleb from beat.h to
reg.h to make them usable for all.

Signed-off-by: Jens Osterkamp <jens@de.ibm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/powerpc/platforms/celleb/beat.h | 3 ---
 include/asm-powerpc/reg.h            | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/celleb/beat.h b/arch/powerpc/platforms/celleb/beat.h
index b2e292df13c..ac82ac35b99 100644
--- a/arch/powerpc/platforms/celleb/beat.h
+++ b/arch/powerpc/platforms/celleb/beat.h
@@ -21,9 +21,6 @@
 #ifndef _CELLEB_BEAT_H
 #define _CELLEB_BEAT_H
 
-#define DABRX_KERNEL		(1UL<<1)
-#define DABRX_USER		(1UL<<0)
-
 int64_t beat_get_term_char(uint64_t,uint64_t*,uint64_t*,uint64_t*);
 int64_t beat_put_term_char(uint64_t,uint64_t,uint64_t,uint64_t);
 int64_t beat_repository_encode(int, const char *, uint64_t[4]);
diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h
index 0d6238987df..edc0cfd7f6e 100644
--- a/include/asm-powerpc/reg.h
+++ b/include/asm-powerpc/reg.h
@@ -153,6 +153,9 @@
 #define   CTRL_RUNLATCH	0x1
 #define SPRN_DABR	0x3F5	/* Data Address Breakpoint Register */
 #define   DABR_TRANSLATION	(1UL << 2)
+#define SPRN_DABRX	0x3F7	/* Data Address Breakpoint Register Extension */
+#define   DABRX_USER	(1UL << 0)
+#define   DABRX_KERNEL	(1UL << 1)
 #define SPRN_DAR	0x013	/* Data Address Register */
 #define SPRN_DSISR	0x012	/* Data Storage Interrupt Status Register */
 #define   DSISR_NOHPTE		0x40000000	/* no translation found */
-- 
cgit v1.2.3-70-g09d2


From 674eea0fc4d1d693250b5d3ddad42ca931c87dfd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Mon, 11 Feb 2008 18:37:23 +0200
Subject: KVM: Make the supported cpuid list a host property rather than a vm
 property

One of the use cases for the supported cpuid list is to create a "greatest
common denominator" of cpu capabilities in a server farm.  As such, it is
useful to be able to get the list without creating a virtual machine first.

Since the code does not depend on the vm in any way, all that is needed is
to move it to the device ioctl handler.  The capability identifier is also
changed so that binaries made against -rc1 will fail gracefully.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/x86.c  | 42 ++++++++++++++++++++++--------------------
 include/linux/kvm.h |  4 ++--
 2 files changed, 24 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ec60409299a..a7069ec2267 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -46,6 +46,9 @@
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
+				    struct kvm_cpuid_entry2 __user *entries);
+
 struct kvm_x86_ops *kvm_x86_ops;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -727,6 +730,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_GET_SUPPORTED_CPUID: {
+		struct kvm_cpuid2 __user *cpuid_arg = argp;
+		struct kvm_cpuid2 cpuid;
+
+		r = -EFAULT;
+		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
+			goto out;
+		r = kvm_dev_ioctl_get_supported_cpuid(&cpuid,
+			cpuid_arg->entries);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -974,8 +995,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	put_cpu();
 }
 
-static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm,
-				    struct kvm_cpuid2 *cpuid,
+static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 				    struct kvm_cpuid_entry2 __user *entries)
 {
 	struct kvm_cpuid_entry2 *cpuid_entries;
@@ -1487,24 +1507,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
-	case KVM_GET_SUPPORTED_CPUID: {
-		struct kvm_cpuid2 __user *cpuid_arg = argp;
-		struct kvm_cpuid2 cpuid;
-
-		r = -EFAULT;
-		if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
-			goto out;
-		r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid,
-			cpuid_arg->entries);
-		if (r)
-			goto out;
-
-		r = -EFAULT;
-		if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid))
-			goto out;
-		r = 0;
-		break;
-	}
 	default:
 		;
 	}
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 4de4fd2d860..c1ec04fd000 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -221,6 +221,7 @@ struct kvm_vapic_addr {
  * Get size for mmap(vcpu_fd)
  */
 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
+#define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
 
 /*
  * Extension capability list.
@@ -230,8 +231,8 @@ struct kvm_vapic_addr {
 #define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
 #define KVM_CAP_USER_MEMORY 3
 #define KVM_CAP_SET_TSS_ADDR 4
-#define KVM_CAP_EXT_CPUID 5
 #define KVM_CAP_VAPIC 6
+#define KVM_CAP_EXT_CPUID 7
 
 /*
  * ioctls for VM fds
@@ -249,7 +250,6 @@ struct kvm_vapic_addr {
 #define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
 #define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
-#define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x48, struct kvm_cpuid2)
 /* Device model IOC */
 #define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
 #define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
-- 
cgit v1.2.3-70-g09d2


From a345b4ba2086bacc63884e5d72268415a97bcbff Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.linux-foundation.org>
Date: Mon, 3 Mar 2008 10:02:44 -0800
Subject: Revert "x86: fix pmd_bad and pud_bad to support huge pages"

This reverts commit cded932b75ab0a5f9181ee3da34a0a488d1a14fd.

Arjan bisected down a boot-time hang to this, saying:
  ".. it prevents the kernel to finish booting on my (Penryn based)
   laptop.  The boot stops right after freeing the init memory."

and while it's not clear exactly what triggers it, at this stage we're
better off just reverting it while Ingo tries to figure out what went
wrong.

Requested-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: Hans Rosenfeld <hans.rosenfeld@amd.com>
Cc: Nish Aravamudan <nish.aravamudan@gmail.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-x86/pgtable_32.h | 4 +---
 include/asm-x86/pgtable_64.h | 6 ++----
 2 files changed, 3 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
index b478efa971e..a842c7222b1 100644
--- a/include/asm-x86/pgtable_32.h
+++ b/include/asm-x86/pgtable_32.h
@@ -91,9 +91,7 @@ extern unsigned long pg0[];
 /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
 #define pmd_none(x)	(!(unsigned long)pmd_val(x))
 #define pmd_present(x)	(pmd_val(x) & _PAGE_PRESENT)
-#define	pmd_bad(x)	((pmd_val(x) \
-			  & ~(PAGE_MASK | _PAGE_USER | _PAGE_PSE | _PAGE_NX)) \
-			 != _KERNPG_TABLE)
+#define	pmd_bad(x)	((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
 
 
 #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
index 0a9258333cb..0a0b77bc736 100644
--- a/include/asm-x86/pgtable_64.h
+++ b/include/asm-x86/pgtable_64.h
@@ -153,14 +153,12 @@ static inline unsigned long pgd_bad(pgd_t pgd)
 
 static inline unsigned long pud_bad(pud_t pud)
 {
-	return pud_val(pud) &
-		~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
+	return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
 }
 
 static inline unsigned long pmd_bad(pmd_t pmd)
 {
-	return pmd_val(pmd) &
-		~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
+	return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
 }
 
 #define pte_none(x)	(!pte_val(x))
-- 
cgit v1.2.3-70-g09d2


From d0bcabcd72dda5f553322a1ca92ae31c15b408b6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 29 Feb 2008 22:03:07 -0800
Subject: docbook: fix usb source files

Fix docbook problems in USB source files.
These cause the generated docbook to be incorrect.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/core/usb.c | 6 ++----
 include/linux/usb.h    | 9 +++------
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 4e984060c98..f6f19908f5f 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -99,8 +99,7 @@ struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev,
 EXPORT_SYMBOL_GPL(usb_ifnum_to_if);
 
 /**
- * usb_altnum_to_altsetting - get the altsetting structure with a given
- *	alternate setting number.
+ * usb_altnum_to_altsetting - get the altsetting structure with a given alternate setting number.
  * @intf: the interface containing the altsetting in question
  * @altnum: the desired alternate setting number
  *
@@ -442,8 +441,7 @@ EXPORT_SYMBOL_GPL(usb_put_intf);
  */
 
 /**
- * usb_lock_device_for_reset - cautiously acquire the lock for a
- *	usb device structure
+ * usb_lock_device_for_reset - cautiously acquire the lock for a usb device structure
  * @udev: device that's being locked
  * @iface: interface bound to the driver making the request (optional)
  *
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 2372e2e6b52..5bd3ae8aaaf 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -781,8 +781,7 @@ static inline int usb_endpoint_is_isoc_out(
 	.idVendor = (vend), \
 	.idProduct = (prod)
 /**
- * USB_DEVICE_VER - macro used to describe a specific usb device with a
- *		version range
+ * USB_DEVICE_VER - describe a specific usb device with a version range
  * @vend: the 16 bit USB Vendor ID
  * @prod: the 16 bit USB Product ID
  * @lo: the bcdDevice_lo value
@@ -799,8 +798,7 @@ static inline int usb_endpoint_is_isoc_out(
 	.bcdDevice_hi = (hi)
 
 /**
- * USB_DEVICE_INTERFACE_PROTOCOL - macro used to describe a usb
- *		device with a specific interface protocol
+ * USB_DEVICE_INTERFACE_PROTOCOL - describe a usb device with a specific interface protocol
  * @vend: the 16 bit USB Vendor ID
  * @prod: the 16 bit USB Product ID
  * @pr: bInterfaceProtocol value
@@ -846,8 +844,7 @@ static inline int usb_endpoint_is_isoc_out(
 	.bInterfaceProtocol = (pr)
 
 /**
- * USB_DEVICE_AND_INTERFACE_INFO - macro used to describe a specific usb device
- * 		with a class of usb interfaces
+ * USB_DEVICE_AND_INTERFACE_INFO - describe a specific usb device with a class of usb interfaces
  * @vend: the 16 bit USB Vendor ID
  * @prod: the 16 bit USB Product ID
  * @cl: bInterfaceClass value
-- 
cgit v1.2.3-70-g09d2


From a973e9dd1e140a65bed694a2c5c8d53e9cba1a23 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Sat, 1 Mar 2008 13:40:44 -0800
Subject: Revert "unique end pointer" patch

This only made sense for the alternate fastpath which was reverted last week.

Mathieu is working on a new version that addresses the fastpath issues but that
new code first needs to go through mm and it is not clear if we need the
unique end pointers with his new scheme.

Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/mm_types.h |  5 +---
 mm/slub.c                | 70 ++++++++++++++++--------------------------------
 2 files changed, 24 insertions(+), 51 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bfee0bd1d43..34023c65d46 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -64,10 +64,7 @@ struct page {
 #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
 	    spinlock_t ptl;
 #endif
-	    struct {
-		   struct kmem_cache *slab;	/* SLUB: Pointer to slab */
-		   void *end;			/* SLUB: end marker */
-	    };
+	    struct kmem_cache *slab;	/* SLUB: Pointer to slab */
 	    struct page *first_page;	/* Compound tail pages */
 	};
 	union {
diff --git a/mm/slub.c b/mm/slub.c
index 74c65af0a54..a873953e5a1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -291,32 +291,15 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
 #endif
 }
 
-/*
- * The end pointer in a slab is special. It points to the first object in the
- * slab but has bit 0 set to mark it.
- *
- * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
- * in the mapping set.
- */
-static inline int is_end(void *addr)
-{
-	return (unsigned long)addr & PAGE_MAPPING_ANON;
-}
-
-static void *slab_address(struct page *page)
-{
-	return page->end - PAGE_MAPPING_ANON;
-}
-
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
 {
 	void *base;
 
-	if (object == page->end)
+	if (!object)
 		return 1;
 
-	base = slab_address(page);
+	base = page_address(page);
 	if (object < base || object >= base + s->objects * s->size ||
 		(object - base) % s->size) {
 		return 0;
@@ -349,8 +332,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
 
 /* Scan freelist */
 #define for_each_free_object(__p, __s, __free) \
-	for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
-		__p))
+	for (__p = (__free); __p; __p = get_freepointer((__s), __p))
 
 /* Determine object index from a given position */
 static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -502,7 +484,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
 static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
 {
 	unsigned int off;	/* Offset of last byte */
-	u8 *addr = slab_address(page);
+	u8 *addr = page_address(page);
 
 	print_tracking(s, p);
 
@@ -680,7 +662,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
 	if (!(s->flags & SLAB_POISON))
 		return 1;
 
-	start = slab_address(page);
+	start = page_address(page);
 	end = start + (PAGE_SIZE << s->order);
 	length = s->objects * s->size;
 	remainder = end - (start + length);
@@ -748,7 +730,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
 		 * of the free objects in this slab. May cause
 		 * another error because the object count is now wrong.
 		 */
-		set_freepointer(s, p, page->end);
+		set_freepointer(s, p, NULL);
 		return 0;
 	}
 	return 1;
@@ -782,18 +764,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
 	void *fp = page->freelist;
 	void *object = NULL;
 
-	while (fp != page->end && nr <= s->objects) {
+	while (fp && nr <= s->objects) {
 		if (fp == search)
 			return 1;
 		if (!check_valid_pointer(s, page, fp)) {
 			if (object) {
 				object_err(s, page, object,
 					"Freechain corrupt");
-				set_freepointer(s, object, page->end);
+				set_freepointer(s, object, NULL);
 				break;
 			} else {
 				slab_err(s, page, "Freepointer corrupt");
-				page->freelist = page->end;
+				page->freelist = NULL;
 				page->inuse = s->objects;
 				slab_fix(s, "Freelist cleared");
 				return 0;
@@ -899,7 +881,7 @@ bad:
 		 */
 		slab_fix(s, "Marking all objects used");
 		page->inuse = s->objects;
-		page->freelist = page->end;
+		page->freelist = NULL;
 	}
 	return 0;
 }
@@ -939,7 +921,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
 	}
 
 	/* Special debug activities for freeing objects */
-	if (!SlabFrozen(page) && page->freelist == page->end)
+	if (!SlabFrozen(page) && !page->freelist)
 		remove_full(s, page);
 	if (s->flags & SLAB_STORE_USER)
 		set_track(s, object, TRACK_FREE, addr);
@@ -1124,7 +1106,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 		SetSlabDebug(page);
 
 	start = page_address(page);
-	page->end = start + 1;
 
 	if (unlikely(s->flags & SLAB_POISON))
 		memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1136,7 +1117,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 		last = p;
 	}
 	setup_object(s, page, last);
-	set_freepointer(s, last, page->end);
+	set_freepointer(s, last, NULL);
 
 	page->freelist = start;
 	page->inuse = 0;
@@ -1152,7 +1133,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		void *p;
 
 		slab_pad_check(s, page);
-		for_each_object(p, s, slab_address(page))
+		for_each_object(p, s, page_address(page))
 			check_object(s, page, p, 0);
 		ClearSlabDebug(page);
 	}
@@ -1162,7 +1143,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
 		-pages);
 
-	page->mapping = NULL;
 	__free_pages(page, s->order);
 }
 
@@ -1366,7 +1346,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 	ClearSlabFrozen(page);
 	if (page->inuse) {
 
-		if (page->freelist != page->end) {
+		if (page->freelist) {
 			add_partial(n, page, tail);
 			stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
 		} else {
@@ -1410,12 +1390,8 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 	 * Merge cpu freelist into freelist. Typically we get here
 	 * because both freelists are empty. So this is unlikely
 	 * to occur.
-	 *
-	 * We need to use _is_end here because deactivate slab may
-	 * be called for a debug slab. Then c->freelist may contain
-	 * a dummy pointer.
 	 */
-	while (unlikely(!is_end(c->freelist))) {
+	while (unlikely(c->freelist)) {
 		void **object;
 
 		tail = 0;	/* Hot objects. Put the slab first */
@@ -1517,7 +1493,7 @@ static void *__slab_alloc(struct kmem_cache *s,
 	stat(c, ALLOC_REFILL);
 load_freelist:
 	object = c->page->freelist;
-	if (unlikely(object == c->page->end))
+	if (unlikely(!object))
 		goto another_slab;
 	if (unlikely(SlabDebug(c->page)))
 		goto debug;
@@ -1525,7 +1501,7 @@ load_freelist:
 	object = c->page->freelist;
 	c->freelist = object[c->offset];
 	c->page->inuse = s->objects;
-	c->page->freelist = c->page->end;
+	c->page->freelist = NULL;
 	c->node = page_to_nid(c->page);
 unlock_out:
 	slab_unlock(c->page);
@@ -1607,7 +1583,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
 
 	local_irq_save(flags);
 	c = get_cpu_slab(s, smp_processor_id());
-	if (unlikely(is_end(c->freelist) || !node_match(c, node)))
+	if (unlikely(!c->freelist || !node_match(c, node)))
 
 		object = __slab_alloc(s, gfpflags, node, addr, c);
 
@@ -1677,7 +1653,7 @@ checks_ok:
 	 * was not on the partial list before
 	 * then add it.
 	 */
-	if (unlikely(prior == page->end)) {
+	if (unlikely(!prior)) {
 		add_partial(get_node(s, page_to_nid(page)), page, 1);
 		stat(c, FREE_ADD_PARTIAL);
 	}
@@ -1687,7 +1663,7 @@ out_unlock:
 	return;
 
 slab_empty:
-	if (prior != page->end) {
+	if (prior) {
 		/*
 		 * Slab still on the partial list.
 		 */
@@ -1910,7 +1886,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
 			struct kmem_cache_cpu *c)
 {
 	c->page = NULL;
-	c->freelist = (void *)PAGE_MAPPING_ANON;
+	c->freelist = NULL;
 	c->node = 0;
 	c->offset = s->offset / sizeof(void *);
 	c->objsize = s->objsize;
@@ -3199,7 +3175,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
 						unsigned long *map)
 {
 	void *p;
-	void *addr = slab_address(page);
+	void *addr = page_address(page);
 
 	if (!check_slab(s, page) ||
 			!on_freelist(s, page, NULL))
@@ -3482,7 +3458,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
 static void process_slab(struct loc_track *t, struct kmem_cache *s,
 		struct page *page, enum track_item alloc)
 {
-	void *addr = slab_address(page);
+	void *addr = page_address(page);
 	DECLARE_BITMAP(map, s->objects);
 	void *p;
 
-- 
cgit v1.2.3-70-g09d2


From 6446faa2ff30ca77c5b25e886bbbfb81c63f1c91 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Fri, 15 Feb 2008 23:45:26 -0800
Subject: slub: Fix up comments

Provide comments and fix up various spelling / style issues.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slub_def.h |  4 ++--
 mm/slub.c                | 49 +++++++++++++++++++++++++++---------------------
 2 files changed, 30 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 57deecc79d5..b00c1c73eb0 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -61,7 +61,7 @@ struct kmem_cache {
 	int size;		/* The size of an object including meta data */
 	int objsize;		/* The size of an object without meta data */
 	int offset;		/* Free pointer offset. */
-	int order;
+	int order;		/* Current preferred allocation order */
 
 	/*
 	 * Avoid an extra cache line for UP, SMP and for the node local to
@@ -138,11 +138,11 @@ static __always_inline int kmalloc_index(size_t size)
 	if (size <=        512) return 9;
 	if (size <=       1024) return 10;
 	if (size <=   2 * 1024) return 11;
+	if (size <=   4 * 1024) return 12;
 /*
  * The following is only needed to support architectures with a larger page
  * size than 4k.
  */
-	if (size <=   4 * 1024) return 12;
 	if (size <=   8 * 1024) return 13;
 	if (size <=  16 * 1024) return 14;
 	if (size <=  32 * 1024) return 15;
diff --git a/mm/slub.c b/mm/slub.c
index 72f5f4ecd1d..10d546954ef 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -291,6 +291,7 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
 #endif
 }
 
+/* Verify that a pointer has an address that is valid within a slab page */
 static inline int check_valid_pointer(struct kmem_cache *s,
 				struct page *page, const void *object)
 {
@@ -619,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
  * 	A. Free pointer (if we cannot overwrite object on free)
  * 	B. Tracking data for SLAB_STORE_USER
  * 	C. Padding to reach required alignment boundary or at mininum
- * 		one word if debuggin is on to be able to detect writes
+ * 		one word if debugging is on to be able to detect writes
  * 		before the word boundary.
  *
  *	Padding is done using 0x5a (POISON_INUSE)
@@ -1268,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
 	 * may return off node objects because partial slabs are obtained
 	 * from other nodes and filled up.
 	 *
-	 * If /sys/slab/xx/defrag_ratio is set to 100 (which makes
+	 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
 	 * defrag_ratio = 1000) then every (well almost) allocation will
 	 * first attempt to defrag slab caches on other nodes. This means
 	 * scanning over all nodes to look for partial slabs which may be
@@ -1343,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 			 * Adding an empty slab to the partial slabs in order
 			 * to avoid page allocator overhead. This slab needs
 			 * to come after the other slabs with objects in
-			 * order to fill them up. That way the size of the
-			 * partial list stays small. kmem_cache_shrink can
-			 * reclaim empty slabs from the partial list.
+			 * so that the others get filled first. That way the
+			 * size of the partial list stays small.
+			 *
+			 * kmem_cache_shrink can reclaim any empty slabs from the
+			 * partial list.
 			 */
 			add_partial(n, page, 1);
 			slab_unlock(page);
@@ -1368,7 +1371,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 	if (c->freelist)
 		stat(c, DEACTIVATE_REMOTE_FREES);
 	/*
-	 * Merge cpu freelist into freelist. Typically we get here
+	 * Merge cpu freelist into slab freelist. Typically we get here
 	 * because both freelists are empty. So this is unlikely
 	 * to occur.
 	 */
@@ -1399,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 
 /*
  * Flush cpu slab.
+ *
  * Called from IPI handler with interrupts disabled.
  */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
@@ -1457,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
  * rest of the freelist to the lockless freelist.
  *
  * And if we were unable to get a new slab from the partial slab lists then
- * we need to allocate a new slab. This is slowest path since we may sleep.
+ * we need to allocate a new slab. This is the slowest path since it involves
+ * a call to the page allocator and the setup of a new slab.
  */
 static void *__slab_alloc(struct kmem_cache *s,
 		gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
@@ -1471,7 +1476,9 @@ static void *__slab_alloc(struct kmem_cache *s,
 	slab_lock(c->page);
 	if (unlikely(!node_match(c, node)))
 		goto another_slab;
+
 	stat(c, ALLOC_REFILL);
+
 load_freelist:
 	object = c->page->freelist;
 	if (unlikely(!object))
@@ -1616,6 +1623,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 
 	if (unlikely(SlabDebug(page)))
 		goto debug;
+
 checks_ok:
 	prior = object[offset] = page->freelist;
 	page->freelist = object;
@@ -1630,8 +1638,7 @@ checks_ok:
 		goto slab_empty;
 
 	/*
-	 * Objects left in the slab. If it
-	 * was not on the partial list before
+	 * Objects left in the slab. If it was not on the partial list before
 	 * then add it.
 	 */
 	if (unlikely(!prior)) {
@@ -1845,13 +1852,11 @@ static unsigned long calculate_alignment(unsigned long flags,
 		unsigned long align, unsigned long size)
 {
 	/*
-	 * If the user wants hardware cache aligned objects then
-	 * follow that suggestion if the object is sufficiently
-	 * large.
+	 * If the user wants hardware cache aligned objects then follow that
+	 * suggestion if the object is sufficiently large.
 	 *
-	 * The hardware cache alignment cannot override the
-	 * specified alignment though. If that is greater
-	 * then use it.
+	 * The hardware cache alignment cannot override the specified
+	 * alignment though. If that is greater then use it.
 	 */
 	if ((flags & SLAB_HWCACHE_ALIGN) &&
 			size > cache_line_size() / 2)
@@ -2049,6 +2054,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
 #endif
 	init_kmem_cache_node(n);
 	atomic_long_inc(&n->nr_slabs);
+
 	/*
 	 * lockdep requires consistent irq usage for each lock
 	 * so even though there cannot be a race this early in
@@ -2301,7 +2307,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
 	/*
 	 * We could also check if the object is on the slabs freelist.
 	 * But this would be too expensive and it seems that the main
-	 * purpose of kmem_ptr_valid is to check if the object belongs
+	 * purpose of kmem_ptr_valid() is to check if the object belongs
 	 * to a certain slab.
 	 */
 	return 1;
@@ -2913,7 +2919,7 @@ void __init kmem_cache_init(void)
 	/*
 	 * Patch up the size_index table if we have strange large alignment
 	 * requirements for the kmalloc array. This is only the case for
-	 * mips it seems. The standard arches will not generate any code here.
+	 * MIPS it seems. The standard arches will not generate any code here.
 	 *
 	 * Largest permitted alignment is 256 bytes due to the way we
 	 * handle the index determination for the smaller caches.
@@ -2942,7 +2948,6 @@ void __init kmem_cache_init(void)
 	kmem_size = sizeof(struct kmem_cache);
 #endif
 
-
 	printk(KERN_INFO
 		"SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
 		" CPUs=%d, Nodes=%d\n",
@@ -3039,12 +3044,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
 		 */
 		for_each_online_cpu(cpu)
 			get_cpu_slab(s, cpu)->objsize = s->objsize;
+
 		s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
 		up_write(&slub_lock);
+
 		if (sysfs_slab_alias(s, name))
 			goto err;
 		return s;
 	}
+
 	s = kmalloc(kmem_size, GFP_KERNEL);
 	if (s) {
 		if (kmem_cache_open(s, GFP_KERNEL, name,
@@ -3927,7 +3935,6 @@ SLAB_ATTR(remote_node_defrag_ratio);
 #endif
 
 #ifdef CONFIG_SLUB_STATS
-
 static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
 {
 	unsigned long sum  = 0;
@@ -4111,8 +4118,8 @@ static struct kset *slab_kset;
 #define ID_STR_LENGTH 64
 
 /* Create a unique string id for a slab cache:
- * format
- * :[flags-]size:[memory address of kmemcache]
+ *
+ * Format	:[flags-]size
  */
 static char *create_unique_id(struct kmem_cache *s)
 {
-- 
cgit v1.2.3-70-g09d2


From 2f775dbaa541d6bc0cccf20aab95f7a0930ef7e9 Mon Sep 17 00:00:00 2001
From: Bryan Wu <cooloney@kernel.org>
Date: Thu, 6 Mar 2008 16:04:58 -0700
Subject: [Blackfin] arch: to kill syscalls missing warning by adding new
 timerfd syscalls

Signed-off-by: Bryan Wu <cooloney@kernel.org>
---
 arch/blackfin/mach-common/entry.S | 5 ++++-
 include/asm-blackfin/unistd.h     | 6 ++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index 2cbb7a0bc38..cee54cebbc6 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -1369,7 +1369,7 @@ ENTRY(_sys_call_table)
 	.long _sys_epoll_pwait
 	.long _sys_utimensat
 	.long _sys_signalfd
-	.long _sys_ni_syscall
+	.long _sys_timerfd_create
 	.long _sys_eventfd	/* 350 */
 	.long _sys_pread64
 	.long _sys_pwrite64
@@ -1378,6 +1378,9 @@ ENTRY(_sys_call_table)
 	.long _sys_get_robust_list	/* 355 */
 	.long _sys_fallocate
 	.long _sys_semtimedop
+	.long _sys_timerfd_settime
+	.long _sys_timerfd_gettime
+
 	.rept NR_syscalls-(.-_sys_call_table)/4
 	.long _sys_ni_syscall
 	.endr
diff --git a/include/asm-blackfin/unistd.h b/include/asm-blackfin/unistd.h
index e98167358d2..c18a399f6e3 100644
--- a/include/asm-blackfin/unistd.h
+++ b/include/asm-blackfin/unistd.h
@@ -361,7 +361,7 @@
 #define __NR_epoll_pwait	346
 #define __NR_utimensat		347
 #define __NR_signalfd		348
-#define __NR_timerfd		349
+#define __NR_timerfd_create	349
 #define __NR_eventfd		350
 #define __NR_pread64		351
 #define __NR_pwrite64		352
@@ -370,8 +370,10 @@
 #define __NR_get_robust_list	355
 #define __NR_fallocate		356
 #define __NR_semtimedop		357
+#define __NR_timerfd_settime	358
+#define __NR_timerfd_gettime	359
 
-#define __NR_syscall		358
+#define __NR_syscall		360
 #define NR_syscalls		__NR_syscall
 
 /* Old optional stuff no one actually uses */
-- 
cgit v1.2.3-70-g09d2


From 7a85f8896f4b4a4a0249563b92af9e3161a6b467 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 4 Mar 2008 11:17:11 +0100
Subject: block: restore the meaning of rq->data_len to the true data length

The meaning of rq->data_len was changed to the length of an allocated
buffer from the true data length. It breaks SG_IO friends and
bsg. This patch restores the meaning of rq->data_len to the true data
length and adds rq->extra_len to store an extended length (due to
drain buffer and padding).

This patch also removes the code to update bio in blk_rq_map_user
introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa.
The commit adjusts bio according to memory alignment
(queue_dma_alignment). However, memory alignment is NOT padding
alignment. This adjustment also breaks SG_IO friends and bsg. Padding
alignment needs to be fixed in a proper way (by a separate patch).

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <axboe@carl.home.kernel.dk>
---
 block/blk-core.c          | 3 +--
 block/blk-map.c           | 6 +-----
 block/blk-merge.c         | 2 +-
 block/bsg.c               | 8 ++++----
 block/scsi_ioctl.c        | 4 ++--
 drivers/ata/libata-scsi.c | 6 +++---
 include/linux/blkdev.h    | 2 +-
 7 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/block/blk-core.c b/block/blk-core.c
index 2d7e3a2f56c..a248cf1c98d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -127,7 +127,6 @@ void rq_init(struct request_queue *q, struct request *rq)
 	rq->nr_hw_segments = 0;
 	rq->ioprio = 0;
 	rq->special = NULL;
-	rq->raw_data_len = 0;
 	rq->buffer = NULL;
 	rq->tag = -1;
 	rq->errors = 0;
@@ -135,6 +134,7 @@ void rq_init(struct request_queue *q, struct request *rq)
 	rq->cmd_len = 0;
 	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->data_len = 0;
+	rq->extra_len = 0;
 	rq->sense_len = 0;
 	rq->data = NULL;
 	rq->sense = NULL;
@@ -2018,7 +2018,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
 	rq->hard_cur_sectors = rq->current_nr_sectors;
 	rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio);
 	rq->buffer = bio_data(bio);
-	rq->raw_data_len = bio->bi_size;
 	rq->data_len = bio->bi_size;
 
 	rq->bio = rq->biotail = bio;
diff --git a/block/blk-map.c b/block/blk-map.c
index 09f7fd0bcb7..f5598322954 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -19,7 +19,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
 		rq->biotail->bi_next = bio;
 		rq->biotail = bio;
 
-		rq->raw_data_len += bio->bi_size;
 		rq->data_len += bio->bi_size;
 	}
 	return 0;
@@ -151,11 +150,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 	 */
 	if (len & queue_dma_alignment(q)) {
 		unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1;
-		struct bio *bio = rq->biotail;
 
-		bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len;
-		bio->bi_size += pad_len;
-		rq->data_len += pad_len;
+		rq->extra_len += pad_len;
 	}
 
 	rq->buffer = rq->data = NULL;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 7506c4fe026..0f58616bcd7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -231,7 +231,7 @@ new_segment:
 			    ((unsigned long)q->dma_drain_buffer) &
 			    (PAGE_SIZE - 1));
 		nsegs++;
-		rq->data_len += q->dma_drain_size;
+		rq->extra_len += q->dma_drain_size;
 	}
 
 	if (sg)
diff --git a/block/bsg.c b/block/bsg.c
index 7f3c09549e4..8917c5174dc 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -437,14 +437,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
 	}
 
 	if (rq->next_rq) {
-		hdr->dout_resid = rq->raw_data_len;
-		hdr->din_resid = rq->next_rq->raw_data_len;
+		hdr->dout_resid = rq->data_len;
+		hdr->din_resid = rq->next_rq->data_len;
 		blk_rq_unmap_user(bidi_bio);
 		blk_put_request(rq->next_rq);
 	} else if (rq_data_dir(rq) == READ)
-		hdr->din_resid = rq->raw_data_len;
+		hdr->din_resid = rq->data_len;
 	else
-		hdr->dout_resid = rq->raw_data_len;
+		hdr->dout_resid = rq->data_len;
 
 	/*
 	 * If the request generated a negative error number, return it
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index e993cac4911..a2c3a936ebf 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -266,7 +266,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr,
 	hdr->info = 0;
 	if (hdr->masked_status || hdr->host_status || hdr->driver_status)
 		hdr->info |= SG_INFO_CHECK;
-	hdr->resid = rq->raw_data_len;
+	hdr->resid = rq->data_len;
 	hdr->sb_len_wr = 0;
 
 	if (rq->sense_len && hdr->sbp) {
@@ -528,8 +528,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk,
 	rq = blk_get_request(q, WRITE, __GFP_WAIT);
 	rq->cmd_type = REQ_TYPE_BLOCK_PC;
 	rq->data = NULL;
-	rq->raw_data_len = 0;
 	rq->data_len = 0;
+	rq->extra_len = 0;
 	rq->timeout = BLK_DEFAULT_SG_TIMEOUT;
 	memset(rq->cmd, 0, sizeof(rq->cmd));
 	rq->cmd[0] = cmd;
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7b1f1ee8131..fe47922dd69 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2538,7 +2538,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc)
 	}
 
 	qc->tf.command = ATA_CMD_PACKET;
-	qc->nbytes = scsi_bufflen(scmd);
+	qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len;
 
 	/* check whether ATAPI DMA is safe */
 	if (!using_pio && ata_check_atapi_dma(qc))
@@ -2549,7 +2549,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc)
 	 * want to set it properly, and for DMA where it is
 	 * effectively meaningless.
 	 */
-	nbytes = min(scmd->request->raw_data_len, (unsigned int)63 * 1024);
+	nbytes = min(scmd->request->data_len, (unsigned int)63 * 1024);
 
 	/* Most ATAPI devices which honor transfer chunk size don't
 	 * behave according to the spec when odd chunk size which
@@ -2875,7 +2875,7 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc)
 	 * TODO: find out if we need to do more here to
 	 *       cover scatter/gather case.
 	 */
-	qc->nbytes = scsi_bufflen(scmd);
+	qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len;
 
 	/* request result TF and be quiet about device error */
 	qc->flags |= ATA_QCFLAG_RESULT_TF | ATA_QCFLAG_QUIET;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6fe67d1939c..b72526c13ca 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -216,8 +216,8 @@ struct request {
 	unsigned int cmd_len;
 	unsigned char cmd[BLK_MAX_CDB];
 
-	unsigned int raw_data_len;
 	unsigned int data_len;
+	unsigned int extra_len;	/* length of alignment and padding */
 	unsigned int sense_len;
 	void *data;
 	void *sense;
-- 
cgit v1.2.3-70-g09d2


From e3790c7d42a545e8fe8b38b513613ca96687b670 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 4 Mar 2008 11:18:17 +0100
Subject: block: separate out padding from alignment

Block layer alignment was used for two different purposes - memory
alignment and padding.  This causes problems in lower layers because
drivers which only require memory alignment ends up with adjusted
rq->data_len.  Separate out padding such that padding occurs iff
driver explicitly requests it.

Tomo: restorethe code to update bio in blk_rq_map_user
      introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa
      according to padding alignment.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-map.c           | 20 +++++++++++++-------
 block/blk-settings.c      | 17 +++++++++++++++++
 drivers/ata/libata-scsi.c |  3 ++-
 include/linux/blkdev.h    |  2 ++
 4 files changed, 34 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/block/blk-map.c b/block/blk-map.c
index f5598322954..4e17dfd0035 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -43,6 +43,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 			     void __user *ubuf, unsigned int len)
 {
 	unsigned long uaddr;
+	unsigned int alignment;
 	struct bio *bio, *orig_bio;
 	int reading, ret;
 
@@ -53,8 +54,8 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
 	 * direct dma. else, set up kernel bounce buffers
 	 */
 	uaddr = (unsigned long) ubuf;
-	if (!(uaddr & queue_dma_alignment(q)) &&
-	    !(len & queue_dma_alignment(q)))
+	alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+	if (!(uaddr & alignment) && !(len & alignment))
 		bio = bio_map_user(q, NULL, uaddr, len, reading);
 	else
 		bio = bio_copy_user(q, uaddr, len, reading);
@@ -141,15 +142,20 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
 
 	/*
 	 * __blk_rq_map_user() copies the buffers if starting address
-	 * or length isn't aligned.  As the copied buffer is always
-	 * page aligned, we know that there's enough room for padding.
-	 * Extend the last bio and update rq->data_len accordingly.
+	 * or length isn't aligned to dma_pad_mask.  As the copied
+	 * buffer is always page aligned, we know that there's enough
+	 * room for padding.  Extend the last bio and update
+	 * rq->data_len accordingly.
 	 *
 	 * On unmap, bio_uncopy_user() will use unmodified
 	 * bio_map_data pointed to by bio->bi_private.
 	 */
-	if (len & queue_dma_alignment(q)) {
-		unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1;
+	if (len & q->dma_pad_mask) {
+		unsigned int pad_len = (q->dma_pad_mask & ~len) + 1;
+		struct bio *bio = rq->biotail;
+
+		bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len;
+		bio->bi_size += pad_len;
 
 		rq->extra_len += pad_len;
 	}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index da923fed1f2..a9f37f530b1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -292,6 +292,23 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
 }
 EXPORT_SYMBOL(blk_queue_stack_limits);
 
+/**
+ * blk_queue_dma_pad - set pad mask
+ * @q:     the request queue for the device
+ * @mask:  pad mask
+ *
+ * Set pad mask.  Direct IO requests are padded to the mask specified.
+ *
+ * Appending pad buffer to a request modifies ->data_len such that it
+ * includes the pad buffer.  The original requested data length can be
+ * obtained using blk_rq_raw_data_len().
+ **/
+void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
+{
+	q->dma_pad_mask = mask;
+}
+EXPORT_SYMBOL(blk_queue_dma_pad);
+
 /**
  * blk_queue_dma_drain - Set up a drain buffer for excess dma.
  * @q:  the request queue for the device
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index fe47922dd69..8f0e8f2bc62 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -862,9 +862,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 		struct request_queue *q = sdev->request_queue;
 		void *buf;
 
-		/* set the min alignment */
+		/* set the min alignment and padding */
 		blk_queue_update_dma_alignment(sdev->request_queue,
 					       ATA_DMA_PAD_SZ - 1);
+		blk_queue_dma_pad(sdev->request_queue, ATA_DMA_PAD_SZ - 1);
 
 		/* configure draining */
 		buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b72526c13ca..6f79d40dd3c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -362,6 +362,7 @@ struct request_queue
 	unsigned long		seg_boundary_mask;
 	void			*dma_drain_buffer;
 	unsigned int		dma_drain_size;
+	unsigned int		dma_pad_mask;
 	unsigned int		dma_alignment;
 
 	struct blk_queue_tag	*queue_tags;
@@ -701,6 +702,7 @@ extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short);
 extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
 extern void blk_queue_hardsect_size(struct request_queue *, unsigned short);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
+extern void blk_queue_dma_pad(struct request_queue *, unsigned int);
 extern int blk_queue_dma_drain(struct request_queue *q,
 			       dma_drain_needed_fn *dma_drain_needed,
 			       void *buf, unsigned int size);
-- 
cgit v1.2.3-70-g09d2


From 1826eadfc42839af7c1c5a1859510aff635d3fa1 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 4 Mar 2008 11:23:46 +0100
Subject: block/genhd.c: cleanups

This patch contains the following cleanups:
- make the needlessly global struct disk_type static
- #if 0 the unused genhd_media_change_notify()

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c         | 6 +++++-
 include/linux/genhd.h | 2 --
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index abc6feddc8c..c44527d16c5 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -24,6 +24,8 @@ static DEFINE_MUTEX(block_class_lock);
 struct kobject *block_depr;
 #endif
 
+static struct device_type disk_type;
+
 /*
  * Can be deleted altogether. Later.
  *
@@ -502,7 +504,7 @@ struct class block_class = {
 	.name		= "block",
 };
 
-struct device_type disk_type = {
+static struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
@@ -632,12 +634,14 @@ static void media_change_notify_thread(struct work_struct *work)
 	put_device(gd->driverfs_dev);
 }
 
+#if 0
 void genhd_media_change_notify(struct gendisk *disk)
 {
 	get_device(disk->driverfs_dev);
 	schedule_work(&disk->async_notify);
 }
 EXPORT_SYMBOL_GPL(genhd_media_change_notify);
+#endif  /*  0  */
 
 dev_t blk_lookup_devt(const char *name)
 {
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 09a3b18918c..cd048e3cc96 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -18,7 +18,6 @@
 #define dev_to_disk(device) container_of(device, struct gendisk, dev)
 #define dev_to_part(device) container_of(device, struct hd_struct, dev)
 
-extern struct device_type disk_type;
 extern struct device_type part_type;
 extern struct kobject *block_depr;
 extern struct class block_class;
@@ -556,7 +555,6 @@ extern struct gendisk *alloc_disk_node(int minors, int node_id);
 extern struct gendisk *alloc_disk(int minors);
 extern struct kobject *get_disk(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
-extern void genhd_media_change_notify(struct gendisk *disk);
 extern void blk_register_region(dev_t devt, unsigned long range,
 			struct module *module,
 			struct kobject *(*probe)(dev_t, int *, void *),
-- 
cgit v1.2.3-70-g09d2


From a0db701a6bf767320e4471bd55e70702d230f6fb Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Tue, 4 Mar 2008 11:23:50 +0100
Subject: block/genhd.c: proper externs

This patch adds proper externs for two structs in include/linux/genhd.h

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/proc/proc_misc.c   | 3 +--
 include/linux/genhd.h | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 468805d40e2..2d563979cb0 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -32,6 +32,7 @@
 #include <linux/interrupt.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
+#include <linux/genhd.h>
 #include <linux/smp.h>
 #include <linux/signal.h>
 #include <linux/module.h>
@@ -377,7 +378,6 @@ static int stram_read_proc(char *page, char **start, off_t off,
 #endif
 
 #ifdef CONFIG_BLOCK
-extern const struct seq_operations partitions_op;
 static int partitions_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &partitions_op);
@@ -389,7 +389,6 @@ static const struct file_operations proc_partitions_operations = {
 	.release	= seq_release,
 };
 
-extern const struct seq_operations diskstats_op;
 static int diskstats_open(struct inode *inode, struct file *file)
 {
 	return seq_open(file, &diskstats_op);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index cd048e3cc96..32c2ac49a07 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -22,6 +22,9 @@ extern struct device_type part_type;
 extern struct kobject *block_depr;
 extern struct class block_class;
 
+extern const struct seq_operations partitions_op;
+extern const struct seq_operations diskstats_op;
+
 enum {
 /* These three have identical behaviour; use the second one if DOS FDISK gets
    confused about extended/logical partitions starting past cylinder 1023. */
-- 
cgit v1.2.3-70-g09d2


From 72dc67a69690288538142df73a7e3ac66fea68dc Mon Sep 17 00:00:00 2001
From: Izik Eidus <izike@qumranet.com>
Date: Sun, 10 Feb 2008 18:04:15 +0200
Subject: KVM: remove the usage of the mmap_sem for the protection of the
 memory slots.

This patch replaces the mmap_sem lock for the memory slots with a new
kvm private lock, it is needed beacuse untill now there were cases where
kvm accesses user memory while holding the mmap semaphore.

Signed-off-by: Izik Eidus <izike@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/mmu.c         | 24 ++++++++++++++---
 arch/x86/kvm/paging_tmpl.h | 13 +++++++---
 arch/x86/kvm/vmx.c         |  7 +++--
 arch/x86/kvm/x86.c         | 65 ++++++++++++++++++++++++++--------------------
 include/linux/kvm_host.h   |  1 +
 virt/kvm/kvm_main.c        |  5 ++--
 6 files changed, 75 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8efdcdbebb0..26037106ad1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -876,11 +876,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
 
 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 {
+	struct page *page;
+
 	gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
 
 	if (gpa == UNMAPPED_GVA)
 		return NULL;
-	return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+	up_read(&current->mm->mmap_sem);
+
+	return page;
 }
 
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
@@ -1020,15 +1027,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 
 	struct page *page;
 
+	down_read(&vcpu->kvm->slots_lock);
+
 	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
 	r = __nonpaging_map(vcpu, v, write, gfn, page);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return r;
 }
@@ -1362,6 +1372,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	gfn_t gfn;
 	int r;
 	u64 gpte = 0;
+	struct page *page;
 
 	if (bytes != 4 && bytes != 8)
 		return;
@@ -1389,6 +1400,11 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	if (!is_present_pte(gpte))
 		return;
 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+
+	down_read(&current->mm->mmap_sem);
+	page = gfn_to_page(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
+
 	vcpu->arch.update_pte.gfn = gfn;
 	vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn);
 }
@@ -1496,9 +1512,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 	gpa_t gpa;
 	int r;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 03ba8608fe0..2009c6e9dc4 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
 	pt_element_t *table;
 	struct page *page;
 
+	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(kvm, table_gfn);
+	up_read(&current->mm->mmap_sem);
+
 	table = kmap_atomic(page, KM_USER0);
 
 	ret = CMPXCHG(&table[index], orig_pte, new_pte);
@@ -378,7 +381,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	if (r)
 		return r;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Look up the shadow pte for the faulting address.
 	 */
@@ -392,11 +395,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 		pgprintk("%s: guest page fault\n", __FUNCTION__);
 		inject_page_fault(vcpu, addr, walker.error_code);
 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 0;
 	}
 
+	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, walker.gfn);
+	up_read(&current->mm->mmap_sem);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
@@ -413,14 +418,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	 */
 	if (shadow_pte && is_io_pte(*shadow_pte)) {
 		spin_unlock(&vcpu->kvm->mmu_lock);
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 1;
 	}
 
 	++vcpu->stat.pf_fixed;
 	kvm_mmu_audit(vcpu, "post page fault (fixed)");
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return write_pt;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ad36447e696..86f5bf12183 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1477,7 +1477,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 	if (kvm->arch.apic_access_page)
 		goto out;
 	kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@@ -1487,9 +1487,12 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
 	if (r)
 		goto out;
+
+	down_read(&current->mm->mmap_sem);
 	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+	up_read(&current->mm->mmap_sem);
 out:
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 338764fa539..6b01552bd1f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -184,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 	int ret;
 	u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
 				  offset * sizeof(u64), sizeof(pdpte));
 	if (ret < 0) {
@@ -201,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return ret;
 }
@@ -215,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	if (is_long_mode(vcpu) || !is_pae(vcpu))
 		return false;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
 	if (r < 0)
 		goto out;
 	changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return changed;
 }
@@ -359,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		 */
 	}
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Does the new cr3 value map to physical memory? (Note, we
 	 * catch an invalid cr3 even in real-mode, because it would
@@ -375,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		vcpu->arch.cr3 = cr3;
 		vcpu->arch.mmu.new_cr3(vcpu);
 	}
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 }
 EXPORT_SYMBOL_GPL(set_cr3);
 
@@ -1232,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
 	if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
 		return -EINVAL;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
 	kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages;
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return 0;
 }
 
@@ -1286,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 	    < alias->target_phys_addr)
 		goto out;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	p = &kvm->arch.aliases[alias->slot];
 	p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
@@ -1300,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
 
 	kvm_mmu_zap_all(kvm);
 
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 
 	return 0;
 
@@ -1376,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	struct kvm_memory_slot *memslot;
 	int is_dirty = 0;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 
 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
 	if (r)
@@ -1392,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
 	}
 	r = 0;
 out:
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 
@@ -1570,7 +1570,7 @@ int emulator_read_std(unsigned long addr,
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	while (bytes) {
 		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 		unsigned offset = addr & (PAGE_SIZE-1);
@@ -1592,7 +1592,7 @@ int emulator_read_std(unsigned long addr,
 		addr += tocopy;
 	}
 out:
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1611,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr,
 		return X86EMUL_CONTINUE;
 	}
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	/* For APIC access vmexit */
 	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1651,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 {
 	int ret;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
 	if (ret < 0) {
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		return 0;
 	}
 	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	return 1;
 }
 
@@ -1670,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 	struct kvm_io_device *mmio_dev;
 	gpa_t                 gpa;
 
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 
 	if (gpa == UNMAPPED_GVA) {
 		kvm_inject_page_fault(vcpu, addr, 2);
@@ -1749,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 		char *kaddr;
 		u64 val;
 
-		down_read(&current->mm->mmap_sem);
+		down_read(&vcpu->kvm->slots_lock);
 		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 
 		if (gpa == UNMAPPED_GVA ||
@@ -1760,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 			goto emul_write;
 
 		val = *(u64 *)new;
+
+		down_read(&current->mm->mmap_sem);
 		page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+		up_read(&current->mm->mmap_sem);
+
 		kaddr = kmap_atomic(page, KM_USER0);
 		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
 		kunmap_atomic(kaddr, KM_USER0);
 		kvm_release_page_dirty(page);
 	emul_write:
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 	}
 #endif
 
@@ -2159,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 
 	for (i = 0; i < nr_pages; ++i) {
-		down_read(&current->mm->mmap_sem);
+		down_read(&vcpu->kvm->slots_lock);
 		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
 		vcpu->arch.pio.guest_pages[i] = page;
-		up_read(&current->mm->mmap_sem);
+		up_read(&vcpu->kvm->slots_lock);
 		if (!page) {
 			kvm_inject_gp(vcpu, 0);
 			free_pio_guest_pages(vcpu);
@@ -2485,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu)
 
 	down_read(&current->mm->mmap_sem);
 	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
-	vcpu->arch.apic->vapic_page = page;
 	up_read(&current->mm->mmap_sem);
+
+	vcpu->arch.apic->vapic_page = page;
 }
 
 static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -2959,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 	gpa_t gpa;
 
 	vcpu_load(vcpu);
-	down_read(&current->mm->mmap_sem);
+	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
-	up_read(&current->mm->mmap_sem);
+	up_read(&vcpu->kvm->slots_lock);
 	tr->physical_address = gpa;
 	tr->valid = gpa != UNMAPPED_GVA;
 	tr->writeable = 1;
@@ -3234,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 	 */
 	if (!user_alloc) {
 		if (npages && !old.rmap) {
+			down_write(&current->mm->mmap_sem);
 			memslot->userspace_addr = do_mmap(NULL, 0,
 						     npages * PAGE_SIZE,
 						     PROT_READ | PROT_WRITE,
 						     MAP_SHARED | MAP_ANONYMOUS,
 						     0);
+			up_write(&current->mm->mmap_sem);
 
 			if (IS_ERR((void *)memslot->userspace_addr))
 				return PTR_ERR((void *)memslot->userspace_addr);
@@ -3246,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 			if (!old.user_alloc && old.rmap) {
 				int ret;
 
+				down_write(&current->mm->mmap_sem);
 				ret = do_munmap(current->mm, old.userspace_addr,
 						old.npages * PAGE_SIZE);
+				up_write(&current->mm->mmap_sem);
 				if (ret < 0)
 					printk(KERN_WARNING
 				       "kvm_vm_ioctl_set_memory_region: "
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea4764b0a2f..928b0d59e9b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -107,6 +107,7 @@ struct kvm_memory_slot {
 struct kvm {
 	struct mutex lock; /* protects the vcpus array and APIC accesses */
 	spinlock_t mmu_lock;
+	struct rw_semaphore slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	int nmemslots;
 	struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 32fbf800696..b2e12893e3f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -169,6 +169,7 @@ static struct kvm *kvm_create_vm(void)
 	kvm_io_bus_init(&kvm->pio_bus);
 	mutex_init(&kvm->lock);
 	kvm_io_bus_init(&kvm->mmio_bus);
+	init_rwsem(&kvm->slots_lock);
 	spin_lock(&kvm_lock);
 	list_add(&kvm->vm_list, &vm_list);
 	spin_unlock(&kvm_lock);
@@ -339,9 +340,9 @@ int kvm_set_memory_region(struct kvm *kvm,
 {
 	int r;
 
-	down_write(&current->mm->mmap_sem);
+	down_write(&kvm->slots_lock);
 	r = __kvm_set_memory_region(kvm, mem, user_alloc);
-	up_write(&current->mm->mmap_sem);
+	up_write(&kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
-- 
cgit v1.2.3-70-g09d2


From e311f68a4e43ade048d7dbaa6b458fbe31114daf Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@snapgear.com>
Date: Tue, 4 Mar 2008 16:35:04 +1000
Subject: m68knommu: declare do_IRQ()

Need a declaration of do_IRQ for the 68328 interrupt handling code.
It is common to all m68knommu targets, so a common declaration makes
sense.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-m68knommu/machdep.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/asm-m68knommu/machdep.h b/include/asm-m68knommu/machdep.h
index 1cf26d240d8..de9f47a51cc 100644
--- a/include/asm-m68knommu/machdep.h
+++ b/include/asm-m68knommu/machdep.h
@@ -21,4 +21,6 @@ extern void (*mach_power_off)( void );
 
 extern void config_BSP(char *command, int len);
 
+extern void do_IRQ(int irq, struct pt_regs *fp);
+
 #endif /* _M68KNOMMU_MACHDEP_H */
-- 
cgit v1.2.3-70-g09d2


From 8727e28ddebb031d80b5e261c98c24f1dcb9a82f Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 4 Mar 2008 09:18:16 +0100
Subject: m68k{,nommu}: Wire up new timerfd syscalls

m68k{,nommu}: Wire up the new timerfd syscalls, which were introduced in
commit 4d672e7ac79b5ec5cdc90e450823441e20464691 ("timerfd: new timerfd API").

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/m68k/kernel/entry.S             | 4 +++-
 arch/m68knommu/kernel/syscalltable.S | 4 +++-
 include/asm-m68k/unistd.h            | 6 ++++--
 include/asm-m68knommu/unistd.h       | 6 ++++--
 4 files changed, 14 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S
index 6dfa3b3c0e2..18a9c5f4b00 100644
--- a/arch/m68k/kernel/entry.S
+++ b/arch/m68k/kernel/entry.S
@@ -742,7 +742,9 @@ sys_call_table:
 	.long sys_epoll_pwait		/* 315 */
 	.long sys_utimensat
 	.long sys_signalfd
-	.long sys_ni_syscall
+	.long sys_timerfd_create
 	.long sys_eventfd
 	.long sys_fallocate		/* 320 */
+	.long sys_timerfd_settime
+	.long sys_timerfd_gettime
 
diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 1b02b882006..fca2e49917a 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -336,9 +336,11 @@ ENTRY(sys_call_table)
 	.long sys_epoll_pwait		/* 315 */
 	.long sys_utimensat
 	.long sys_signalfd
-	.long sys_ni_syscall
+	.long sys_timerfd_create
 	.long sys_eventfd
 	.long sys_fallocate		/* 320 */
+	.long sys_timerfd_settime
+	.long sys_timerfd_gettime
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h
index 87f77b11931..e72ba563f10 100644
--- a/include/asm-m68k/unistd.h
+++ b/include/asm-m68k/unistd.h
@@ -320,13 +320,15 @@
 #define __NR_epoll_pwait	315
 #define __NR_utimensat		316
 #define __NR_signalfd		317
-#define __NR_timerfd		318
+#define __NR_timerfd_create	318
 #define __NR_eventfd		319
 #define __NR_fallocate		320
+#define __NR_timerfd_settime	321
+#define __NR_timerfd_gettime	322
 
 #ifdef __KERNEL__
 
-#define NR_syscalls		321
+#define NR_syscalls		323
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h
index 27c2f9bb4db..4ba98b9c5d7 100644
--- a/include/asm-m68knommu/unistd.h
+++ b/include/asm-m68knommu/unistd.h
@@ -321,13 +321,15 @@
 #define __NR_epoll_pwait	315
 #define __NR_utimensat		316
 #define __NR_signalfd		317
-#define __NR_timerfd		318
+#define __NR_timerfd_create	318
 #define __NR_eventfd		319
 #define __NR_fallocate		320
+#define __NR_timerfd_settime	321
+#define __NR_timerfd_gettime	322
 
 #ifdef __KERNEL__
 
-#define NR_syscalls		321
+#define NR_syscalls		323
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
-- 
cgit v1.2.3-70-g09d2


From 62fb185130e4d420f71a30ff59d8b16b74ef5d2b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 25 Feb 2008 17:34:02 +0100
Subject: sched: revert load_balance_monitor() changes

The following commits cause a number of regressions:

  commit 58e2d4ca581167c2a079f4ee02be2f0bc52e8729
  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
  Date:   Fri Jan 25 21:08:00 2008 +0100
  sched: group scheduling, change how cpu load is calculated

  commit 6b2d7700266b9402e12824e11e0099ae6a4a6a79
  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
  Date:   Fri Jan 25 21:08:00 2008 +0100
  sched: group scheduler, fix fairness of cpu bandwidth allocation for task groups

Namely:
 - very frequent wakeups on SMP, reported by PowerTop users.
 - cacheline trashing on (large) SMP
 - some latencies larger than 500ms

While there is a mergeable patch to fix the latter, the former issues
are not fixable in a manner suitable for .25 (we're at -rc3 now).

Hence we revert them and try again in v2.6.26.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Tested-by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |   4 -
 kernel/sched.c        | 283 +++++++-------------------------------------------
 kernel/sched_fair.c   | 115 +++++++-------------
 kernel/sched_rt.c     |   4 -
 kernel/sysctl.c       |  18 ----
 5 files changed, 70 insertions(+), 354 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c9621f8bf8..9ae4030067a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1542,10 +1542,6 @@ extern unsigned int sysctl_sched_child_runs_first;
 extern unsigned int sysctl_sched_features;
 extern unsigned int sysctl_sched_migration_cost;
 extern unsigned int sysctl_sched_nr_migrate;
-#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
-extern unsigned int sysctl_sched_min_bal_int_shares;
-extern unsigned int sysctl_sched_max_bal_int_shares;
-#endif
 
 int sched_nr_latency_handler(struct ctl_table *table, int write,
 		struct file *file, void __user *buffer, size_t *length,
diff --git a/kernel/sched.c b/kernel/sched.c
index f06950c8a6c..dcd553cc4ee 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -174,41 +174,6 @@ struct task_group {
 	struct sched_entity **se;
 	/* runqueue "owned" by this group on each cpu */
 	struct cfs_rq **cfs_rq;
-
-	/*
-	 * shares assigned to a task group governs how much of cpu bandwidth
-	 * is allocated to the group. The more shares a group has, the more is
-	 * the cpu bandwidth allocated to it.
-	 *
-	 * For ex, lets say that there are three task groups, A, B and C which
-	 * have been assigned shares 1000, 2000 and 3000 respectively. Then,
-	 * cpu bandwidth allocated by the scheduler to task groups A, B and C
-	 * should be:
-	 *
-	 *	Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66%
-	 *	Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33%
-	 *	Bw(C) = 3000/(1000+2000+3000) * 100 = 50%
-	 *
-	 * The weight assigned to a task group's schedulable entities on every
-	 * cpu (task_group.se[a_cpu]->load.weight) is derived from the task
-	 * group's shares. For ex: lets say that task group A has been
-	 * assigned shares of 1000 and there are two CPUs in a system. Then,
-	 *
-	 *  tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000;
-	 *
-	 * Note: It's not necessary that each of a task's group schedulable
-	 *	 entity have the same weight on all CPUs. If the group
-	 *	 has 2 of its tasks on CPU0 and 1 task on CPU1, then a
-	 *	 better distribution of weight could be:
-	 *
-	 *	tg_A->se[0]->load.weight = 2/3 * 2000 = 1333
-	 *	tg_A->se[1]->load.weight = 1/2 * 2000 =  667
-	 *
-	 * rebalance_shares() is responsible for distributing the shares of a
-	 * task groups like this among the group's schedulable entities across
-	 * cpus.
-	 *
-	 */
 	unsigned long shares;
 #endif
 
@@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock);
 static DEFINE_MUTEX(doms_cur_mutex);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-#ifdef CONFIG_SMP
-/* kernel thread that runs rebalance_shares() periodically */
-static struct task_struct *lb_monitor_task;
-static int load_balance_monitor(void *unused);
-#endif
-
-static void set_se_shares(struct sched_entity *se, unsigned long shares);
-
 #ifdef CONFIG_USER_SCHED
 # define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
 #else
 # define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
 #endif
 
-#define MIN_GROUP_SHARES	2
-
 static int init_task_group_load = INIT_TASK_GROUP_LOAD;
 #endif
 
@@ -1245,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
 static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
 #endif
 
-static inline void inc_cpu_load(struct rq *rq, unsigned long load)
-{
-	update_load_add(&rq->load, load);
-}
-
-static inline void dec_cpu_load(struct rq *rq, unsigned long load)
-{
-	update_load_sub(&rq->load, load);
-}
-
 #ifdef CONFIG_SMP
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
@@ -1272,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
 
 #define sched_class_highest (&rt_sched_class)
 
-static void inc_nr_running(struct rq *rq)
+static inline void inc_load(struct rq *rq, const struct task_struct *p)
+{
+	update_load_add(&rq->load, p->se.load.weight);
+}
+
+static inline void dec_load(struct rq *rq, const struct task_struct *p)
+{
+	update_load_sub(&rq->load, p->se.load.weight);
+}
+
+static void inc_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running++;
+	inc_load(rq, p);
 }
 
-static void dec_nr_running(struct rq *rq)
+static void dec_nr_running(struct task_struct *p, struct rq *rq)
 {
 	rq->nr_running--;
+	dec_load(rq, p);
 }
 
 static void set_load_weight(struct task_struct *p)
@@ -1371,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
 		rq->nr_uninterruptible--;
 
 	enqueue_task(rq, p, wakeup);
-	inc_nr_running(rq);
+	inc_nr_running(p, rq);
 }
 
 /*
@@ -1383,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
 		rq->nr_uninterruptible++;
 
 	dequeue_task(rq, p, sleep);
-	dec_nr_running(rq);
+	dec_nr_running(p, rq);
 }
 
 /**
@@ -2023,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 		 * management (if any):
 		 */
 		p->sched_class->task_new(rq, p);
-		inc_nr_running(rq);
+		inc_nr_running(p, rq);
 	}
 	check_preempt_curr(rq, p);
 #ifdef CONFIG_SMP
@@ -4362,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice)
 		goto out_unlock;
 	}
 	on_rq = p->se.on_rq;
-	if (on_rq)
+	if (on_rq) {
 		dequeue_task(rq, p, 0);
+		dec_load(rq, p);
+	}
 
 	p->static_prio = NICE_TO_PRIO(nice);
 	set_load_weight(p);
@@ -4373,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice)
 
 	if (on_rq) {
 		enqueue_task(rq, p, 0);
+		inc_load(rq, p);
 		/*
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
@@ -7087,21 +7047,6 @@ void __init sched_init_smp(void)
 	if (set_cpus_allowed(current, non_isolated_cpus) < 0)
 		BUG();
 	sched_init_granularity();
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	if (nr_cpu_ids == 1)
-		return;
-
-	lb_monitor_task = kthread_create(load_balance_monitor, NULL,
-					 "group_balance");
-	if (!IS_ERR(lb_monitor_task)) {
-		lb_monitor_task->flags |= PF_NOFREEZE;
-		wake_up_process(lb_monitor_task);
-	} else {
-		printk(KERN_ERR "Could not create load balance monitor thread"
-			"(error = %ld) \n", PTR_ERR(lb_monitor_task));
-	}
-#endif
 }
 #else
 void __init sched_init_smp(void)
@@ -7424,157 +7369,6 @@ void set_curr_task(int cpu, struct task_struct *p)
 
 #ifdef CONFIG_GROUP_SCHED
 
-#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP
-/*
- * distribute shares of all task groups among their schedulable entities,
- * to reflect load distribution across cpus.
- */
-static int rebalance_shares(struct sched_domain *sd, int this_cpu)
-{
-	struct cfs_rq *cfs_rq;
-	struct rq *rq = cpu_rq(this_cpu);
-	cpumask_t sdspan = sd->span;
-	int balanced = 1;
-
-	/* Walk thr' all the task groups that we have */
-	for_each_leaf_cfs_rq(rq, cfs_rq) {
-		int i;
-		unsigned long total_load = 0, total_shares;
-		struct task_group *tg = cfs_rq->tg;
-
-		/* Gather total task load of this group across cpus */
-		for_each_cpu_mask(i, sdspan)
-			total_load += tg->cfs_rq[i]->load.weight;
-
-		/* Nothing to do if this group has no load */
-		if (!total_load)
-			continue;
-
-		/*
-		 * tg->shares represents the number of cpu shares the task group
-		 * is eligible to hold on a single cpu. On N cpus, it is
-		 * eligible to hold (N * tg->shares) number of cpu shares.
-		 */
-		total_shares = tg->shares * cpus_weight(sdspan);
-
-		/*
-		 * redistribute total_shares across cpus as per the task load
-		 * distribution.
-		 */
-		for_each_cpu_mask(i, sdspan) {
-			unsigned long local_load, local_shares;
-
-			local_load = tg->cfs_rq[i]->load.weight;
-			local_shares = (local_load * total_shares) / total_load;
-			if (!local_shares)
-				local_shares = MIN_GROUP_SHARES;
-			if (local_shares == tg->se[i]->load.weight)
-				continue;
-
-			spin_lock_irq(&cpu_rq(i)->lock);
-			set_se_shares(tg->se[i], local_shares);
-			spin_unlock_irq(&cpu_rq(i)->lock);
-			balanced = 0;
-		}
-	}
-
-	return balanced;
-}
-
-/*
- * How frequently should we rebalance_shares() across cpus?
- *
- * The more frequently we rebalance shares, the more accurate is the fairness
- * of cpu bandwidth distribution between task groups. However higher frequency
- * also implies increased scheduling overhead.
- *
- * sysctl_sched_min_bal_int_shares represents the minimum interval between
- * consecutive calls to rebalance_shares() in the same sched domain.
- *
- * sysctl_sched_max_bal_int_shares represents the maximum interval between
- * consecutive calls to rebalance_shares() in the same sched domain.
- *
- * These settings allows for the appropriate trade-off between accuracy of
- * fairness and the associated overhead.
- *
- */
-
-/* default: 8ms, units: milliseconds */
-const_debug unsigned int sysctl_sched_min_bal_int_shares = 8;
-
-/* default: 128ms, units: milliseconds */
-const_debug unsigned int sysctl_sched_max_bal_int_shares = 128;
-
-/* kernel thread that runs rebalance_shares() periodically */
-static int load_balance_monitor(void *unused)
-{
-	unsigned int timeout = sysctl_sched_min_bal_int_shares;
-	struct sched_param schedparm;
-	int ret;
-
-	/*
-	 * We don't want this thread's execution to be limited by the shares
-	 * assigned to default group (init_task_group). Hence make it run
-	 * as a SCHED_RR RT task at the lowest priority.
-	 */
-	schedparm.sched_priority = 1;
-	ret = sched_setscheduler(current, SCHED_RR, &schedparm);
-	if (ret)
-		printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance"
-				" monitor thread (error = %d) \n", ret);
-
-	while (!kthread_should_stop()) {
-		int i, cpu, balanced = 1;
-
-		/* Prevent cpus going down or coming up */
-		get_online_cpus();
-		/* lockout changes to doms_cur[] array */
-		lock_doms_cur();
-		/*
-		 * Enter a rcu read-side critical section to safely walk rq->sd
-		 * chain on various cpus and to walk task group list
-		 * (rq->leaf_cfs_rq_list) in rebalance_shares().
-		 */
-		rcu_read_lock();
-
-		for (i = 0; i < ndoms_cur; i++) {
-			cpumask_t cpumap = doms_cur[i];
-			struct sched_domain *sd = NULL, *sd_prev = NULL;
-
-			cpu = first_cpu(cpumap);
-
-			/* Find the highest domain at which to balance shares */
-			for_each_domain(cpu, sd) {
-				if (!(sd->flags & SD_LOAD_BALANCE))
-					continue;
-				sd_prev = sd;
-			}
-
-			sd = sd_prev;
-			/* sd == NULL? No load balance reqd in this domain */
-			if (!sd)
-				continue;
-
-			balanced &= rebalance_shares(sd, cpu);
-		}
-
-		rcu_read_unlock();
-
-		unlock_doms_cur();
-		put_online_cpus();
-
-		if (!balanced)
-			timeout = sysctl_sched_min_bal_int_shares;
-		else if (timeout < sysctl_sched_max_bal_int_shares)
-			timeout *= 2;
-
-		msleep_interruptible(timeout);
-	}
-
-	return 0;
-}
-#endif	/* CONFIG_SMP */
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static void free_fair_sched_group(struct task_group *tg)
 {
@@ -7841,29 +7635,25 @@ void sched_move_task(struct task_struct *tsk)
 }
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
-/* rq->lock to be locked by caller */
 static void set_se_shares(struct sched_entity *se, unsigned long shares)
 {
 	struct cfs_rq *cfs_rq = se->cfs_rq;
 	struct rq *rq = cfs_rq->rq;
 	int on_rq;
 
-	if (!shares)
-		shares = MIN_GROUP_SHARES;
+	spin_lock_irq(&rq->lock);
 
 	on_rq = se->on_rq;
-	if (on_rq) {
+	if (on_rq)
 		dequeue_entity(cfs_rq, se, 0);
-		dec_cpu_load(rq, se->load.weight);
-	}
 
 	se->load.weight = shares;
 	se->load.inv_weight = div64_64((1ULL<<32), shares);
 
-	if (on_rq) {
+	if (on_rq)
 		enqueue_entity(cfs_rq, se, 0);
-		inc_cpu_load(rq, se->load.weight);
-	}
+
+	spin_unlock_irq(&rq->lock);
 }
 
 static DEFINE_MUTEX(shares_mutex);
@@ -7873,18 +7663,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 	int i;
 	unsigned long flags;
 
+	/*
+	 * A weight of 0 or 1 can cause arithmetics problems.
+	 * (The default weight is 1024 - so there's no practical
+	 *  limitation from this.)
+	 */
+	if (shares < 2)
+		shares = 2;
+
 	mutex_lock(&shares_mutex);
 	if (tg->shares == shares)
 		goto done;
 
-	if (shares < MIN_GROUP_SHARES)
-		shares = MIN_GROUP_SHARES;
-
-	/*
-	 * Prevent any load balance activity (rebalance_shares,
-	 * load_balance_fair) from referring to this group first,
-	 * by taking it off the rq->leaf_cfs_rq_list on each cpu.
-	 */
 	spin_lock_irqsave(&task_group_lock, flags);
 	for_each_possible_cpu(i)
 		unregister_fair_sched_group(tg, i);
@@ -7898,11 +7688,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 	 * w/o tripping rebalance_share or load_balance_fair.
 	 */
 	tg->shares = shares;
-	for_each_possible_cpu(i) {
-		spin_lock_irq(&cpu_rq(i)->lock);
+	for_each_possible_cpu(i)
 		set_se_shares(tg->se[i], shares);
-		spin_unlock_irq(&cpu_rq(i)->lock);
-	}
 
 	/*
 	 * Enable load balance activity on this group, by inserting it back on
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c8e6492c592..3df4d46994c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -727,8 +727,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
 	return se->parent;
 }
 
-#define GROUP_IMBALANCE_PCT	20
-
 #else	/* CONFIG_FAIR_GROUP_SCHED */
 
 #define for_each_sched_entity(se) \
@@ -819,26 +817,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p)
 static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 {
 	struct cfs_rq *cfs_rq;
-	struct sched_entity *se = &p->se,
-			    *topse = NULL;	/* Highest schedulable entity */
-	int incload = 1;
+	struct sched_entity *se = &p->se;
 
 	for_each_sched_entity(se) {
-		topse = se;
-		if (se->on_rq) {
-			incload = 0;
+		if (se->on_rq)
 			break;
-		}
 		cfs_rq = cfs_rq_of(se);
 		enqueue_entity(cfs_rq, se, wakeup);
 		wakeup = 1;
 	}
-	/* Increment cpu load if we just enqueued the first task of a group on
-	 * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-	 * at the highest grouping level.
-	 */
-	if (incload)
-		inc_cpu_load(rq, topse->load.weight);
 
 	hrtick_start_fair(rq, rq->curr);
 }
@@ -851,28 +838,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
 static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
 {
 	struct cfs_rq *cfs_rq;
-	struct sched_entity *se = &p->se,
-			    *topse = NULL; 	/* Highest schedulable entity */
-	int decload = 1;
+	struct sched_entity *se = &p->se;
 
 	for_each_sched_entity(se) {
-		topse = se;
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, sleep);
 		/* Don't dequeue parent if it has other entities besides us */
-		if (cfs_rq->load.weight) {
-			if (parent_entity(se))
-				decload = 0;
+		if (cfs_rq->load.weight)
 			break;
-		}
 		sleep = 1;
 	}
-	/* Decrement cpu load if we just dequeued the last task of a group on
-	 * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
-	 * at the highest grouping level.
-	 */
-	if (decload)
-		dec_cpu_load(rq, topse->load.weight);
 
 	hrtick_start_fair(rq, rq->curr);
 }
@@ -1186,6 +1161,25 @@ static struct task_struct *load_balance_next_fair(void *arg)
 	return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
+{
+	struct sched_entity *curr;
+	struct task_struct *p;
+
+	if (!cfs_rq->nr_running || !first_fair(cfs_rq))
+		return MAX_PRIO;
+
+	curr = cfs_rq->curr;
+	if (!curr)
+		curr = __pick_next_entity(cfs_rq);
+
+	p = task_of(curr);
+
+	return p->prio;
+}
+#endif
+
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		  unsigned long max_load_move,
@@ -1195,45 +1189,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 	struct cfs_rq *busy_cfs_rq;
 	long rem_load_move = max_load_move;
 	struct rq_iterator cfs_rq_iterator;
-	unsigned long load_moved;
 
 	cfs_rq_iterator.start = load_balance_start_fair;
 	cfs_rq_iterator.next = load_balance_next_fair;
 
 	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
 #ifdef CONFIG_FAIR_GROUP_SCHED
-		struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu];
-		unsigned long maxload, task_load, group_weight;
-		unsigned long thisload, per_task_load;
-		struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
-
-		task_load = busy_cfs_rq->load.weight;
-		group_weight = se->load.weight;
+		struct cfs_rq *this_cfs_rq;
+		long imbalance;
+		unsigned long maxload;
 
-		/*
-		 * 'group_weight' is contributed by tasks of total weight
-		 * 'task_load'. To move 'rem_load_move' worth of weight only,
-		 * we need to move a maximum task load of:
-		 *
-		 * 	maxload = (remload / group_weight) * task_load;
-		 */
-		maxload = (rem_load_move * task_load) / group_weight;
+		this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
 
-		if (!maxload || !task_load)
+		imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
+		/* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
+		if (imbalance <= 0)
 			continue;
 
-		per_task_load = task_load / busy_cfs_rq->nr_running;
-		/*
-		 * balance_tasks will try to forcibly move atleast one task if
-		 * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
-		 * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
-		 */
-		 if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
-			continue;
+		/* Don't pull more than imbalance/2 */
+		imbalance /= 2;
+		maxload = min(rem_load_move, imbalance);
 
-		/* Disable priority-based load balance */
-		*this_best_prio = 0;
-		thisload = this_cfs_rq->load.weight;
+		*this_best_prio = cfs_rq_best_prio(this_cfs_rq);
 #else
 # define maxload rem_load_move
 #endif
@@ -1242,33 +1219,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 		 * load_balance_[start|next]_fair iterators
 		 */
 		cfs_rq_iterator.arg = busy_cfs_rq;
-		load_moved = balance_tasks(this_rq, this_cpu, busiest,
+		rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
 					       maxload, sd, idle, all_pinned,
 					       this_best_prio,
 					       &cfs_rq_iterator);
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-		/*
-		 * load_moved holds the task load that was moved. The
-		 * effective (group) weight moved would be:
-		 * 	load_moved_eff = load_moved/task_load * group_weight;
-		 */
-		load_moved = (group_weight * load_moved) / task_load;
-
-		/* Adjust shares on both cpus to reflect load_moved */
-		group_weight -= load_moved;
-		set_se_shares(se, group_weight);
-
-		se = busy_cfs_rq->tg->se[this_cpu];
-		if (!thisload)
-			group_weight = load_moved;
-		else
-			group_weight = se->load.weight + load_moved;
-		set_se_shares(se, group_weight);
-#endif
-
-		rem_load_move -= load_moved;
-
 		if (rem_load_move <= 0)
 			break;
 	}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index f54792b175b..76e82851754 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -393,8 +393,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
 	 */
 	for_each_sched_rt_entity(rt_se)
 		enqueue_rt_entity(rt_se);
-
-	inc_cpu_load(rq, p->se.load.weight);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -414,8 +412,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 		if (rt_rq && rt_rq->rt_nr_running)
 			enqueue_rt_entity(rt_se);
 	}
-
-	dec_cpu_load(rq, p->se.load.weight);
 }
 
 /*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8b7e9541179..b2a2d6889ba 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -311,24 +311,6 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
-	{
-		.ctl_name       = CTL_UNNUMBERED,
-		.procname       = "sched_min_bal_int_shares",
-		.data           = &sysctl_sched_min_bal_int_shares,
-		.maxlen         = sizeof(unsigned int),
-		.mode           = 0644,
-		.proc_handler   = &proc_dointvec,
-	},
-	{
-		.ctl_name       = CTL_UNNUMBERED,
-		.procname       = "sched_max_bal_int_shares",
-		.data           = &sysctl_sched_max_bal_int_shares,
-		.maxlen         = sizeof(unsigned int),
-		.mode           = 0644,
-		.proc_handler   = &proc_dointvec,
-	},
-#endif
 #endif
 	{
 		.ctl_name	= CTL_UNNUMBERED,
-- 
cgit v1.2.3-70-g09d2


From ec8670f1f795badedaa056a3a3245b9b82201747 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 1 Mar 2008 07:51:29 -0700
Subject: dmaengine: fix sparse warning

include/linux/dmaengine.h:364:2: warning: returning void-valued expression

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/dmaengine.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index acbb364674f..261e43a4c87 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -366,7 +366,7 @@ __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
  */
 static inline void dma_async_issue_pending(struct dma_chan *chan)
 {
-	return chan->device->device_issue_pending(chan);
+	chan->device->device_issue_pending(chan);
 }
 
 #define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
-- 
cgit v1.2.3-70-g09d2


From d9452e9f81e997cbd0c9bface8d2c2a4b064cc3e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 4 Mar 2008 12:28:49 -0800
Subject: [NETPOLL]: Revert two bogus cleanups that broke netconsole.

Based upon a report by Andrew Morton and code analysis done
by Jarek Poplawski.

This reverts 33f807ba0d9259e7c75c7a2ce8bd2787e5b540c7 ("[NETPOLL]:
Kill NETPOLL_RX_DROP, set but never tested.")  and
c7b6ea24b43afb5749cb704e143df19d70e23dea ("[NETPOLL]: Don't need
rx_flags.").

The rx_flags did get tested for zero vs. non-zero and therefore we do
need those tests and that code which sets NETPOLL_RX_DROP et al.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  7 ++++---
 net/core/netpoll.c      | 12 ++++++++----
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index a0525a1f471..e3d79593fb3 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -25,6 +25,7 @@ struct netpoll {
 
 struct netpoll_info {
 	atomic_t refcnt;
+	int rx_flags;
 	spinlock_t rx_lock;
 	struct netpoll *rx_np; /* netpoll that registered an rx_hook */
 	struct sk_buff_head arp_tx; /* list of arp requests to reply to */
@@ -50,12 +51,12 @@ static inline int netpoll_rx(struct sk_buff *skb)
 	unsigned long flags;
 	int ret = 0;
 
-	if (!npinfo || !npinfo->rx_np)
+	if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags))
 		return 0;
 
 	spin_lock_irqsave(&npinfo->rx_lock, flags);
-	/* check rx_np again with the lock held */
-	if (npinfo->rx_np && __netpoll_rx(skb))
+	/* check rx_flags again with the lock held */
+	if (npinfo->rx_flags && __netpoll_rx(skb))
 		ret = 1;
 	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 6faa128a4c8..4b7e756181c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -39,6 +39,8 @@ static struct sk_buff_head skb_pool;
 static atomic_t trapped;
 
 #define USEC_PER_POLL	50
+#define NETPOLL_RX_ENABLED  1
+#define NETPOLL_RX_DROP     2
 
 #define MAX_SKB_SIZE \
 		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
@@ -126,11 +128,13 @@ static int poll_one_napi(struct netpoll_info *npinfo,
 	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
 		return budget;
 
+	npinfo->rx_flags |= NETPOLL_RX_DROP;
 	atomic_inc(&trapped);
 
 	work = napi->poll(napi, budget);
 
 	atomic_dec(&trapped);
+	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
 
 	return budget - work;
 }
@@ -472,7 +476,7 @@ int __netpoll_rx(struct sk_buff *skb)
 	if (skb->dev->type != ARPHRD_ETHER)
 		goto out;
 
-	/* if receive ARP during middle of NAPI poll, then queue */
+	/* check if netpoll clients need ARP */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
 		skb_queue_tail(&npi->arp_tx, skb);
@@ -534,9 +538,6 @@ int __netpoll_rx(struct sk_buff *skb)
 	return 1;
 
 out:
-	/* If packet received while already in poll then just
-	 * silently drop.
-	 */
 	if (atomic_read(&trapped)) {
 		kfree_skb(skb);
 		return 1;
@@ -675,6 +676,7 @@ int netpoll_setup(struct netpoll *np)
 			goto release;
 		}
 
+		npinfo->rx_flags = 0;
 		npinfo->rx_np = NULL;
 
 		spin_lock_init(&npinfo->rx_lock);
@@ -756,6 +758,7 @@ int netpoll_setup(struct netpoll *np)
 
 	if (np->rx_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
 		npinfo->rx_np = np;
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
@@ -797,6 +800,7 @@ void netpoll_cleanup(struct netpoll *np)
 			if (npinfo->rx_np == np) {
 				spin_lock_irqsave(&npinfo->rx_lock, flags);
 				npinfo->rx_np = NULL;
+				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
 				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 			}
 
-- 
cgit v1.2.3-70-g09d2


From a6cd6322d594014240465210ccb290971469c6e8 Mon Sep 17 00:00:00 2001
From: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Date: Mon, 25 Feb 2008 14:32:22 +0900
Subject: [IA64] Fix irq migration in multiple vector domain

Fix the problem that the following error message is sometimes displayed
at irq migration when vector domain is enabled.

    "Unexpected interrupt vector %d on CPU %d is not mapped to any IRQ!"

The cause of this problem is an interrupt is sent to the previous
target CPU after cleaning up vector to irq mapping table. To clean up
vector to irq map on the previous target CPU safty, change the irq
migration in multiple vector domain as follows. The original idea is
from x86 interrupt management code.

    - Delay vector to irq table cleanup until the interrupts are sent
      to new target CPUs. By this, it is ensured that target CPU is
      completely changed on the interrupt controller side.

    - Even after the interrupts are sent to new target CPUs, there can
      be pended interrupts remaining on the previous target CPU. So we
      need to delay clearning up vector to irq table until the pended
      interrupt is handled. For this, send IPI to the previous target
      CPU with lower priority vector and clean up vector to irq table
      in its handler.

This patch affects only to irq migration code with multiple vector
domain is enabled.

Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/iosapic.c  |   4 +-
 arch/ia64/kernel/irq_ia64.c | 134 ++++++++++++++++++++++++++++++++++----------
 arch/ia64/kernel/msi_ia64.c |   3 +-
 include/asm-ia64/hw_irq.h   |  12 +++-
 4 files changed, 120 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 398e2fd1cd2..7b3292282de 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -345,7 +345,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask)
 	if (cpus_empty(mask))
 		return;
 
-	if (reassign_irq_vector(irq, first_cpu(mask)))
+	if (irq_prepare_move(irq, first_cpu(mask)))
 		return;
 
 	dest = cpu_physical_id(first_cpu(mask));
@@ -397,6 +397,7 @@ iosapic_end_level_irq (unsigned int irq)
 	struct iosapic_rte_info *rte;
 	int do_unmask_irq = 0;
 
+	irq_complete_move(irq);
 	if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
 		do_unmask_irq = 1;
 		mask_irq(irq);
@@ -450,6 +451,7 @@ iosapic_ack_edge_irq (unsigned int irq)
 {
 	irq_desc_t *idesc = irq_desc + irq;
 
+	irq_complete_move(irq);
 	move_native_irq(irq);
 	/*
 	 * Once we have recorded IRQ_PENDING already, we can mask the
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
index 0b52f19ed04..2b8cf6e85af 100644
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -260,6 +260,8 @@ void __setup_vector_irq(int cpu)
 }
 
 #if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+#define IA64_IRQ_MOVE_VECTOR	IA64_DEF_FIRST_DEVICE_VECTOR
+
 static enum vector_domain_type {
 	VECTOR_DOMAIN_NONE,
 	VECTOR_DOMAIN_PERCPU
@@ -272,6 +274,101 @@ static cpumask_t vector_allocation_domain(int cpu)
 	return CPU_MASK_ALL;
 }
 
+static int __irq_prepare_move(int irq, int cpu)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	int vector;
+	cpumask_t domain;
+
+	if (cfg->move_in_progress || cfg->move_cleanup_count)
+		return -EBUSY;
+	if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
+		return -EINVAL;
+	if (cpu_isset(cpu, cfg->domain))
+		return 0;
+	domain = vector_allocation_domain(cpu);
+	vector = find_unassigned_vector(domain);
+	if (vector < 0)
+		return -ENOSPC;
+	cfg->move_in_progress = 1;
+	cfg->old_domain = cfg->domain;
+	cfg->vector = IRQ_VECTOR_UNASSIGNED;
+	cfg->domain = CPU_MASK_NONE;
+	BUG_ON(__bind_irq_vector(irq, vector, domain));
+	return 0;
+}
+
+int irq_prepare_move(int irq, int cpu)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&vector_lock, flags);
+	ret = __irq_prepare_move(irq, cpu);
+	spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
+}
+
+void irq_complete_move(unsigned irq)
+{
+	struct irq_cfg *cfg = &irq_cfg[irq];
+	cpumask_t cleanup_mask;
+	int i;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain)))
+		return;
+
+	cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
+	cfg->move_cleanup_count = cpus_weight(cleanup_mask);
+	for_each_cpu_mask(i, cleanup_mask)
+		platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0);
+	cfg->move_in_progress = 0;
+}
+
+static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id)
+{
+	int me = smp_processor_id();
+	ia64_vector vector;
+	unsigned long flags;
+
+	for (vector = IA64_FIRST_DEVICE_VECTOR;
+	     vector < IA64_LAST_DEVICE_VECTOR; vector++) {
+		int irq;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+		irq = __get_cpu_var(vector_irq)[vector];
+		if (irq < 0)
+			continue;
+
+		desc = irq_desc + irq;
+		cfg = irq_cfg + irq;
+		spin_lock(&desc->lock);
+		if (!cfg->move_cleanup_count)
+			goto unlock;
+
+		if (!cpu_isset(me, cfg->old_domain))
+			goto unlock;
+
+		spin_lock_irqsave(&vector_lock, flags);
+		__get_cpu_var(vector_irq)[vector] = -1;
+		cpu_clear(me, vector_table[vector]);
+		spin_unlock_irqrestore(&vector_lock, flags);
+		cfg->move_cleanup_count--;
+	unlock:
+		spin_unlock(&desc->lock);
+	}
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_move_irqaction = {
+	.handler =	smp_irq_move_cleanup_interrupt,
+	.flags =	IRQF_DISABLED,
+	.name =		"irq_move"
+};
+
 static int __init parse_vector_domain(char *arg)
 {
 	if (!arg)
@@ -303,36 +400,6 @@ void destroy_and_reserve_irq(unsigned int irq)
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
 
-static int __reassign_irq_vector(int irq, int cpu)
-{
-	struct irq_cfg *cfg = &irq_cfg[irq];
-	int vector;
-	cpumask_t domain;
-
-	if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu))
-		return -EINVAL;
-	if (cpu_isset(cpu, cfg->domain))
-		return 0;
-	domain = vector_allocation_domain(cpu);
-	vector = find_unassigned_vector(domain);
-	if (vector < 0)
-		return -ENOSPC;
-	__clear_irq_vector(irq);
-	BUG_ON(__bind_irq_vector(irq, vector, domain));
-	return 0;
-}
-
-int reassign_irq_vector(int irq, int cpu)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&vector_lock, flags);
-	ret = __reassign_irq_vector(irq, cpu);
-	spin_unlock_irqrestore(&vector_lock, flags);
-	return ret;
-}
-
 /*
  * Dynamic irq allocate and deallocation for MSI
  */
@@ -578,6 +645,13 @@ init_IRQ (void)
 	register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
 	register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction);
 	register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction);
+#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)
+	if (vector_domain_type != VECTOR_DOMAIN_NONE) {
+		BUG_ON(IA64_FIRST_DEVICE_VECTOR != IA64_IRQ_MOVE_VECTOR);
+		IA64_FIRST_DEVICE_VECTOR++;
+		register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction);
+	}
+#endif
 #endif
 #ifdef CONFIG_PERFMON
 	pfm_init_percpu();
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index e86d0295979..60c6ef67ebb 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -57,7 +57,7 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask)
 	if (!cpu_online(cpu))
 		return;
 
-	if (reassign_irq_vector(irq, cpu))
+	if (irq_prepare_move(irq, cpu))
 		return;
 
 	read_msi_msg(irq, &msg);
@@ -119,6 +119,7 @@ void ia64_teardown_msi_irq(unsigned int irq)
 
 static void ia64_ack_msi_irq(unsigned int irq)
 {
+	irq_complete_move(irq);
 	move_native_irq(irq);
 	ia64_eoi();
 }
diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h
index 7e6e3779670..76366dc9c1a 100644
--- a/include/asm-ia64/hw_irq.h
+++ b/include/asm-ia64/hw_irq.h
@@ -93,6 +93,9 @@ extern __u8 isa_irq_to_vector_map[16];
 struct irq_cfg {
 	ia64_vector vector;
 	cpumask_t domain;
+	cpumask_t old_domain;
+	unsigned move_cleanup_count;
+	u8 move_in_progress : 1;
 };
 extern spinlock_t vector_lock;
 extern struct irq_cfg irq_cfg[NR_IRQS];
@@ -106,12 +109,19 @@ extern int assign_irq_vector (int irq);	/* allocate a free vector */
 extern void free_irq_vector (int vector);
 extern int reserve_irq_vector (int vector);
 extern void __setup_vector_irq(int cpu);
-extern int reassign_irq_vector(int irq, int cpu);
 extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect);
 extern void register_percpu_irq (ia64_vector vec, struct irqaction *action);
 extern int check_irq_used (int irq);
 extern void destroy_and_reserve_irq (unsigned int irq);
 
+#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG))
+extern int irq_prepare_move(int irq, int cpu);
+extern void irq_complete_move(unsigned int irq);
+#else
+static inline int irq_prepare_move(int irq, int cpu) { return 0; }
+static inline void irq_complete_move(unsigned int irq) {}
+#endif
+
 static inline void ia64_resend_irq(unsigned int vector)
 {
 	platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0);
-- 
cgit v1.2.3-70-g09d2


From 6ed0dc5ba811ce682f48988bf114669265e1120d Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Wed, 27 Feb 2008 18:41:38 -0700
Subject: [IA64] workaround tiger ia64_sal_get_physical_id_info hang

This fixes regression introduced in 113134fcbca83619be4c68d0ca66db6093777b5d

Intel Tiger platforms hang when calling SAL_GET_PHYSICAL_ID_INFO
instead of properly returning -1 for unimplemented, so add a
version check.

SGI Altix platforms have an incorrect SAL version hard-coded into
their prom -- they encode 2.9, but actually implement 3.2 -- so
fix it up and allow ia64_sal_get_physical_id_info to keep
working.

Signed-off-by: Alex Chiang <achiang@hp.com>
Acked-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/sal.c | 7 +++++++
 include/asm-ia64/sal.h | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index f44fe841216..a3022dc48ef 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -109,6 +109,13 @@ check_versions (struct ia64_sal_systab *systab)
 		sal_revision = SAL_VERSION_CODE(2, 8);
 		sal_version = SAL_VERSION_CODE(0, 0);
 	}
+
+	if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9)))
+		/*
+		 * SGI Altix has hard-coded version 2.9 in their prom
+		 * but they actually implement 3.2, so let's fix it here.
+		 */
+		sal_revision = SAL_VERSION_CODE(3, 2);
 }
 
 static void __init
diff --git a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h
index 2251118894a..f4904db3b05 100644
--- a/include/asm-ia64/sal.h
+++ b/include/asm-ia64/sal.h
@@ -807,6 +807,10 @@ static inline s64
 ia64_sal_physical_id_info(u16 *splid)
 {
 	struct ia64_sal_retval isrv;
+
+	if (sal_revision < SAL_VERSION_CODE(3,2))
+		return -1;
+
 	SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0);
 	if (splid)
 		*splid = isrv.v0;
-- 
cgit v1.2.3-70-g09d2


From 956d6cad87abdfaef35fa4fc2f2e4ac5bb4ee7a5 Mon Sep 17 00:00:00 2001
From: Doug Chapman <doug.chapman@hp.com>
Date: Fri, 29 Feb 2008 15:28:43 -0500
Subject: [IA64] move gcc_intrin.h from header-y to unifdef-y

When I submitted 0df29025fd0379d5950d206314d0b10a2c8a9607 to ad
an #ifdef __KERNEL__ to include/asm-ia64/gcc_intrin.h a few weeks
ago I neglected to move gcc_intrin.h from header-y to unifdef-y.
Thanks to David Woodhouse for pointing this out.

Signed-off-by: Doug Chapman <doug.chapman@hp.com>
Signed-off-by: Jarod Wilson <jwilson@redhat.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/Kbuild | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild
index 4a1e48b9f40..eb24a3f47ca 100644
--- a/include/asm-ia64/Kbuild
+++ b/include/asm-ia64/Kbuild
@@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm
 header-y += break.h
 header-y += fpu.h
 header-y += fpswa.h
-header-y += gcc_intrin.h
 header-y += ia64regs.h
 header-y += intel_intrin.h
 header-y += intrinsics.h
@@ -12,5 +11,6 @@ header-y += ptrace_offsets.h
 header-y += rse.h
 header-y += ucontext.h
 
+unifdef-y += gcc_intrin.h
 unifdef-y += perfmon.h
 unifdef-y += ustack.h
-- 
cgit v1.2.3-70-g09d2


From 7adc3830f90df04a13366914d80a3ed407db5381 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 4 Mar 2008 14:28:41 -0800
Subject: [TCP]: Improve ipv4 established hash function.

If all of the entropy is in the local and foreign addresses,
but xor'ing together would cancel out that entropy, the
current hash performs poorly.

Suggested by Cosmin Ratiu:

	Basically, the situation is as follows: There is a client
	machine and a server machine. Both create 15000 virtual
	interfaces, open up a socket for each pair of interfaces and
	do SIP traffic. By profiling I noticed that there is a lot of
	time spent walking the established hash chains with this
	particular setup.

	The addresses were distributed like this: client interfaces
	were 198.18.0.1/16 with increments of 1 and server interfaces
	were 198.18.128.1/16 with increments of 1. As I said, there
	were 15000 interfaces. Source and destination ports were 5060
	for each connection.  So in this case, ports don't matter for
	hashing purposes, and the bits from the address pairs used
	cancel each other, meaning there are no differences in the
	whole lot of pairs, so they all end up in the same hash chain.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 70013c5f4e5..89cd011edb9 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -175,7 +175,8 @@ extern void build_ehash_secret(void);
 static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport,
 					const __be32 faddr, const __be16 fport)
 {
-	return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr,
+	return jhash_3words((__force __u32) laddr,
+			    (__force __u32) faddr,
 			    ((__u32) lport) << 16 | (__force __u32)fport,
 			    inet_ehash_secret);
 }
-- 
cgit v1.2.3-70-g09d2


From 9dad6f5785a9f113dbbd58951d2f5ef9abd06dcc Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Mon, 3 Mar 2008 20:07:22 +0200
Subject: [IA64] fix ia64 kprobes compilation

This patch fixes the following compile error with a recent gcc:
  CC      kernel/kprobes.o
/home/bunk/linux/kernel-2.6/git/linux-2.6/kernel/kprobes.c:1066: error: __ksymtab_jprobe_return causes a section type conflict

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/kprobes.c | 5 +++++
 include/asm-ia64/kprobes.h | 4 ----
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index b618487cdc8..615c3d2b634 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -1001,6 +1001,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	return 1;
 }
 
+/* ia64 does not need this */
+void __kprobes jprobe_return(void)
+{
+}
+
 int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index a93ce9ef07f..49684b65c18 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -122,10 +122,6 @@ extern int kprobes_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
 
-/* ia64 does not need this */
-static inline void jprobe_return(void)
-{
-}
 extern void invalidate_stacked_regs(void);
 extern void flush_register_stack(void);
 extern void arch_remove_kprobe(struct kprobe *p);
-- 
cgit v1.2.3-70-g09d2


From 3634634edd49c115da931998b9540bcc17665b05 Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Wed, 13 Feb 2008 17:08:16 -0800
Subject: debugfs: fix sparse warnings

extern does not belong in C files, move declaration to linux/debugfs.h
fs/debugfs/file.c:42:30: warning: symbol 'debugfs_file_operations' was not declared. Should it be static?
fs/debugfs/file.c:54:31: warning: symbol 'debugfs_link_operations' was not declared. Should it be static?

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/debugfs/inode.c      | 4 ----
 include/linux/debugfs.h | 5 +++++
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index d26e2826ba5..e9602d85c11 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -29,10 +29,6 @@
 
 #define DEBUGFS_MAGIC	0x64626720
 
-/* declared over in file.c */
-extern struct file_operations debugfs_file_operations;
-extern struct inode_operations debugfs_link_operations;
-
 static struct vfsmount *debugfs_mount;
 static int debugfs_mount_count;
 
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index f592d6de3b9..7266124361b 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -27,6 +27,11 @@ struct debugfs_blob_wrapper {
 };
 
 #if defined(CONFIG_DEBUG_FS)
+
+/* declared over in file.c */
+extern const struct file_operations debugfs_file_operations;
+extern const struct inode_operations debugfs_link_operations;
+
 struct dentry *debugfs_create_file(const char *name, mode_t mode,
 				   struct dentry *parent, void *data,
 				   const struct file_operations *fops);
-- 
cgit v1.2.3-70-g09d2


From d6d914f52b15d5a8e81ad481e02d9ab30d412a29 Mon Sep 17 00:00:00 2001
From: Lei Ming <tom.leiming@gmail.com>
Date: Mon, 25 Feb 2008 18:07:28 +0800
Subject: USB: fix comment of struct usb_interface

update the comment for the removed "driver" field  and  being
out-of-order of  @cur_altsetting and @num_altsetting.

Signed-off-by: Lei Ming <tom.leiming@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 5bd3ae8aaaf..583e0481dfa 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -94,10 +94,9 @@ enum usb_interface_condition {
  * @altsetting: array of interface structures, one for each alternate
  * 	setting that may be selected.  Each one includes a set of
  * 	endpoint configurations.  They will be in no particular order.
- * @num_altsetting: number of altsettings defined.
  * @cur_altsetting: the current altsetting.
+ * @num_altsetting: number of altsettings defined.
  * @intf_assoc: interface association descriptor
- * @driver: the USB driver that is bound to this interface.
  * @minor: the minor number assigned to this interface, if this
  *	interface is bound to a driver that uses the USB major number.
  *	If this interface does not use the USB major, this field should
-- 
cgit v1.2.3-70-g09d2


From 90a1ba0c5e39eeea278f263c28ae02166c5911c8 Mon Sep 17 00:00:00 2001
From: Jonas Bonn <jonas@southpole.se>
Date: Fri, 22 Feb 2008 11:02:21 +0100
Subject: PCI: Add DECLARE_PCI_DEVICE_TABLE macro

The definitions of struct pci_device_id arrays should generally follow
the same pattern across the entire kernel.  This macro defines this
array as const and puts it into the __devinitconst section.

There are currently many definitions scattered about the kernel that
omit the __devinitdata modifier despite the documentation stating that
it should always be there.  These definitions really also should have
been const, which wasn't possible before but has become so with the
addition of the __devinitconst attribute.

Furthermore, there are definitions that use "const" and __devinitdata,
which is explicitly wrong but the compiler doesn't catch section
mismatches if there's only one such one case in the module (which is
often the case).

Adding the __devinitconst modifier where there was nothing before buys
us memory.  Adding the const modifier gives the compiler a chance to do
its thing.  Changing __devinitdata to __devinitconst where it was wrong
actually fixes some compiler errors in older (mid-release) kernels that
were patched over by "removing" the section attribute altogether (which
wastes memory).

This macro makes it pretty difficult to get this definition wrong in
the future...

Signed-off-by: Jonas Bonn <jonas@southpole.se>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/pci.txt |  6 ++++--
 include/linux/pci.h   | 10 ++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/pci.txt b/Documentation/pci.txt
index 72b20c63959..bb7bd27d468 100644
--- a/Documentation/pci.txt
+++ b/Documentation/pci.txt
@@ -123,7 +123,8 @@ initialization with a pointer to a structure describing the driver
 
 
 The ID table is an array of struct pci_device_id entries ending with an
-all-zero entry.  Each entry consists of:
+all-zero entry; use of the macro DECLARE_PCI_DEVICE_TABLE is the preferred
+method of declaring the table.  Each entry consists of:
 
 	vendor,device	Vendor and device ID to match (or PCI_ANY_ID)
 
@@ -191,7 +192,8 @@ Tips on when/where to use the above attributes:
 
 	o Do not mark the struct pci_driver.
 
-	o The ID table array should be marked __devinitdata.
+	o The ID table array should be marked __devinitconst; this is done
+	  automatically if the table is declared with DECLARE_PCI_DEVICE_TABLE().
 
 	o The probe() and remove() functions should be marked __devinit
 	  and __devexit respectively.  All initialization functions
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 87195b62de5..f3165e7ac43 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -388,6 +388,16 @@ struct pci_driver {
 
 #define	to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
 
+/**
+ * DECLARE_PCI_DEVICE_TABLE - macro used to describe a pci device table
+ * @_table: device table name
+ *
+ * This macro is used to create a struct pci_device_id array (a device table)
+ * in a generic manner.
+ */
+#define DECLARE_PCI_DEVICE_TABLE(_table) \
+	const struct pci_device_id _table[] __devinitconst
+
 /**
  * PCI_DEVICE - macro used to describe a specific pci device
  * @vend: the 16 bit PCI Vendor ID
-- 
cgit v1.2.3-70-g09d2


From 7560fa60fcdcdb0da662f6a9fad9064b554ef46c Mon Sep 17 00:00:00 2001
From: David Brownell <david-b@pacbell.net>
Date: Tue, 4 Mar 2008 14:28:27 -0800
Subject: gpio: <linux/gpio.h> and "no GPIO support here" stubs

Add a <linux/gpio.h> defining fail/warn stubs for GPIO calls on platforms that
don't support the GPIO programming interface.  That includes the arch-specific
implementation glue otherwise.

This facilitates a new model for GPIO usage: drivers that can use GPIOs if
they're available, but don't require them.  One example of such a driver is
NAND driver for various FreeScale chips.  On platforms update with GPIO
support, they can be used instead of a worst-case delay to verify that the
BUSY signal is off.

(Also includes a couple minor unrelated doc updates.)

Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/gpio.txt | 16 ++++++---
 include/linux/gpio.h   | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 107 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/gpio.h

(limited to 'include')

diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt
index 8da724e2a0f..54630095aa3 100644
--- a/Documentation/gpio.txt
+++ b/Documentation/gpio.txt
@@ -2,6 +2,9 @@ GPIO Interfaces
 
 This provides an overview of GPIO access conventions on Linux.
 
+These calls use the gpio_* naming prefix.  No other calls should use that
+prefix, or the related __gpio_* prefix.
+
 
 What is a GPIO?
 ===============
@@ -69,11 +72,13 @@ in this document, but drivers acting as clients to the GPIO interface must
 not care how it's implemented.)
 
 That said, if the convention is supported on their platform, drivers should
-use it when possible.  Platforms should declare GENERIC_GPIO support in
-Kconfig (boolean true), which multi-platform drivers can depend on when
-using the include file:
+use it when possible.  Platforms must declare GENERIC_GPIO support in their
+Kconfig (boolean true), and provide an <asm/gpio.h> file.  Drivers that can't
+work without standard GPIO calls should have Kconfig entries which depend
+on GENERIC_GPIO.  The GPIO calls are available, either as "real code" or as
+optimized-away stubs, when drivers use the include file:
 
-	#include <asm/gpio.h>
+	#include <linux/gpio.h>
 
 If you stick to this convention then it'll be easier for other developers to
 see what your code is doing, and help maintain it.
@@ -316,6 +321,9 @@ pulldowns integrated on some platforms.  Not all platforms support them,
 or support them in the same way; and any given board might use external
 pullups (or pulldowns) so that the on-chip ones should not be used.
 (When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.)
+Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a
+platform-specific issue, as are models like (not) having a one-to-one
+correspondence between configurable pins and GPIOs.
 
 There are other system-specific mechanisms that are not specified here,
 like the aforementioned options for input de-glitching and wire-OR output.
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
new file mode 100644
index 00000000000..4987a84078e
--- /dev/null
+++ b/include/linux/gpio.h
@@ -0,0 +1,95 @@
+#ifndef __LINUX_GPIO_H
+#define __LINUX_GPIO_H
+
+/* see Documentation/gpio.txt */
+
+#ifdef CONFIG_GENERIC_GPIO
+#include <asm/gpio.h>
+
+#else
+
+/*
+ * Some platforms don't support the GPIO programming interface.
+ *
+ * In case some driver uses it anyway (it should normally have
+ * depended on GENERIC_GPIO), these routines help the compiler
+ * optimize out much GPIO-related code ... or trigger a runtime
+ * warning when something is wrongly called.
+ */
+
+static inline int gpio_is_valid(int number)
+{
+	return 0;
+}
+
+static inline int gpio_request(unsigned gpio, const char *label)
+{
+	return -ENOSYS;
+}
+
+static inline void gpio_free(unsigned gpio)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
+
+static inline int gpio_direction_input(unsigned gpio)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_direction_output(unsigned gpio, int value)
+{
+	return -ENOSYS;
+}
+
+static inline int gpio_get_value(unsigned gpio)
+{
+	/* GPIO can never have been requested or set as {in,out}put */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline void gpio_set_value(unsigned gpio, int value)
+{
+	/* GPIO can never have been requested or set as output */
+	WARN_ON(1);
+}
+
+static inline int gpio_cansleep(unsigned gpio)
+{
+	/* GPIO can never have been requested or set as {in,out}put */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline int gpio_get_value_cansleep(unsigned gpio)
+{
+	/* GPIO can never have been requested or set as {in,out}put */
+	WARN_ON(1);
+	return 0;
+}
+
+static inline void gpio_set_value_cansleep(unsigned gpio, int value)
+{
+	/* GPIO can never have been requested or set as output */
+	WARN_ON(1);
+}
+
+static inline int gpio_to_irq(unsigned gpio)
+{
+	/* GPIO can never have been requested or set as input */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+static inline int irq_to_gpio(unsigned irq)
+{
+	/* irq can never have been returned from gpio_to_irq() */
+	WARN_ON(1);
+	return -EINVAL;
+}
+
+#endif
+
+#endif /* __LINUX_GPIO_H */
-- 
cgit v1.2.3-70-g09d2


From 9edddaa200df18e08fe0cf21036e8ae467b1363c Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Tue, 4 Mar 2008 14:28:37 -0800
Subject: Kprobes: indicate kretprobe support in Kconfig

Add CONFIG_HAVE_KRETPROBES to the arch/<arch>/Kconfig file for relevant
architectures with kprobes support.  This facilitates easy handling of
in-kernel modules (like samples/kprobes/kretprobe_example.c) that depend on
kretprobes being present in the kernel.

Thanks to Sam Ravnborg for helping make the patch more lean.

Per Mathieu's suggestion, added CONFIG_KRETPROBES and fixed up dependencies.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig                  | 7 +++++++
 arch/arm/Kconfig              | 1 +
 arch/ia64/Kconfig             | 1 +
 arch/powerpc/Kconfig          | 1 +
 arch/s390/Kconfig             | 1 +
 arch/sparc64/Kconfig          | 1 +
 arch/x86/Kconfig              | 1 +
 include/asm-arm/kprobes.h     | 1 -
 include/asm-ia64/kprobes.h    | 1 -
 include/asm-powerpc/kprobes.h | 1 -
 include/asm-s390/kprobes.h    | 1 -
 include/asm-sparc64/kprobes.h | 2 --
 include/asm-x86/kprobes.h     | 1 -
 include/linux/kprobes.h       | 6 +++---
 kernel/kprobes.c              | 9 +++------
 15 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/arch/Kconfig b/arch/Kconfig
index 3d72dc3fc8f..694c9af520b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -27,5 +27,12 @@ config KPROBES
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
 
+config KRETPROBES
+	def_bool y
+	depends on KPROBES && HAVE_KRETPROBES
+
 config HAVE_KPROBES
 	def_bool n
+
+config HAVE_KRETPROBES
+	def_bool n
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 16b82e1272b..955fc53c1c0 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -12,6 +12,7 @@ config ARM
 	select SYS_SUPPORTS_APM_EMULATION
 	select HAVE_OPROFILE
 	select HAVE_KPROBES if (!XIP_KERNEL)
+	select HAVE_KRETPROBES if (HAVE_KPROBES)
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index dff9edfc746..56762d3c2a6 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -18,6 +18,7 @@ config IA64
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_KRETPROBES
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5b8d8382b76..1189d8d6170 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -90,6 +90,7 @@ config PPC
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_KRETPROBES
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b21444b681b..9892827b617 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -61,6 +61,7 @@ config S390
 	def_bool y
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_KRETPROBES
 
 source "init/Kconfig"
 
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 3af378ddb6a..463d1be32c9 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -10,6 +10,7 @@ config SPARC
 	default y
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_KRETPROBES
 
 config SPARC64
 	bool
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 53800b80a20..f41c9538ca3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -21,6 +21,7 @@ config X86
 	select HAVE_IDE
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
+	select HAVE_KRETPROBES
 	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
 
 
diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h
index 4e7bd32288a..c042194d3ab 100644
--- a/include/asm-arm/kprobes.h
+++ b/include/asm-arm/kprobes.h
@@ -20,7 +20,6 @@
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define __ARCH_WANT_KPROBES_INSN_SLOT
 #define MAX_INSN_SIZE			2
 #define MAX_STACK_SIZE			64	/* 32 would probably be OK */
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index a93ce9ef07f..adbaba14eb0 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -82,7 +82,6 @@ struct kprobe_ctlblk {
 	struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ];
 };
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define kretprobe_blacklist_size 0
 
 #define SLOT0_OPCODE_SHIFT	(37)
diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h
index afabad230db..d0e7701fa1f 100644
--- a/include/asm-powerpc/kprobes.h
+++ b/include/asm-powerpc/kprobes.h
@@ -80,7 +80,6 @@ typedef unsigned int kprobe_opcode_t;
 #define is_trap(instr)	(IS_TW(instr) || IS_TWI(instr))
 #endif
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define flush_insn_slot(p)	do { } while (0)
 #define kretprobe_blacklist_size 0
 
diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h
index 948db3d0d05..330f68caffe 100644
--- a/include/asm-s390/kprobes.h
+++ b/include/asm-s390/kprobes.h
@@ -46,7 +46,6 @@ typedef u16 kprobe_opcode_t;
 	? (MAX_STACK_SIZE) \
 	: (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR)))
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define kretprobe_blacklist_size 0
 
 #define KPROBE_SWAP_INST	0x10
diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h
index 7237dd87663..5879d71afda 100644
--- a/include/asm-sparc64/kprobes.h
+++ b/include/asm-sparc64/kprobes.h
@@ -14,8 +14,6 @@ typedef u32 kprobe_opcode_t;
 
 #define arch_remove_kprobe(p)	do {} while (0)
 
-#define ARCH_SUPPORTS_KRETPROBES
-
 #define flush_insn_slot(p)		\
 do { 	flushi(&(p)->ainsn.insn[0]);	\
 	flushi(&(p)->ainsn.insn[1]);	\
diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h
index 143476a3cb5..61ad7b5d142 100644
--- a/include/asm-x86/kprobes.h
+++ b/include/asm-x86/kprobes.h
@@ -42,7 +42,6 @@ typedef u8 kprobe_opcode_t;
 	: (((unsigned long)current_thread_info()) + THREAD_SIZE \
 	   - (unsigned long)(ADDR)))
 
-#define ARCH_SUPPORTS_KRETPROBES
 #define flush_insn_slot(p)	do { } while (0)
 
 extern const int kretprobe_blacklist_size;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 4a6ce82ba03..0f28486f636 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -125,11 +125,11 @@ struct jprobe {
 DECLARE_PER_CPU(struct kprobe *, current_kprobe);
 DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
-#ifdef ARCH_SUPPORTS_KRETPROBES
+#ifdef CONFIG_KRETPROBES
 extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
 				   struct pt_regs *regs);
 extern int arch_trampoline_kprobe(struct kprobe *p);
-#else /* ARCH_SUPPORTS_KRETPROBES */
+#else /* CONFIG_KRETPROBES */
 static inline void arch_prepare_kretprobe(struct kretprobe *rp,
 					struct pt_regs *regs)
 {
@@ -138,7 +138,7 @@ static inline int arch_trampoline_kprobe(struct kprobe *p)
 {
 	return 0;
 }
-#endif /* ARCH_SUPPORTS_KRETPROBES */
+#endif /* CONFIG_KRETPROBES */
 /*
  * Function-return probe -
  * Note:
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 7a86e643233..e6a61dcbc57 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -678,8 +678,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp)
 	unregister_kprobe(&jp->kp);
 }
 
-#ifdef ARCH_SUPPORTS_KRETPROBES
-
+#ifdef CONFIG_KRETPROBES
 /*
  * This kprobe pre_handler is registered with every kretprobe. When probe
  * hits it will set up the return probe.
@@ -769,8 +768,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
 	return ret;
 }
 
-#else /* ARCH_SUPPORTS_KRETPROBES */
-
+#else /* CONFIG_KRETPROBES */
 int __kprobes register_kretprobe(struct kretprobe *rp)
 {
 	return -ENOSYS;
@@ -781,8 +779,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
 {
 	return 0;
 }
-
-#endif /* ARCH_SUPPORTS_KRETPROBES */
+#endif /* CONFIG_KRETPROBES */
 
 void __kprobes unregister_kretprobe(struct kretprobe *rp)
 {
-- 
cgit v1.2.3-70-g09d2


From 00f0b8259e48979c37212995d798f3fbd0374690 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@linux.vnet.ibm.com>
Date: Tue, 4 Mar 2008 14:28:39 -0800
Subject: Memory controller: rename to Memory Resource Controller

Rename Memory Controller to Memory Resource Controller.  Reflect the same
changes in the CONFIG definition for the Memory Resource Controller.  Group
together the config options for Resource Counters and Memory Resource
Controller.

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/controllers/memory.txt |  8 ++++++--
 include/linux/cgroup_subsys.h        |  2 +-
 include/linux/memcontrol.h           |  4 ++--
 include/linux/mm_types.h             |  4 ++--
 init/Kconfig                         | 30 +++++++++++++++---------------
 mm/Makefile                          |  2 +-
 mm/oom_kill.c                        |  2 +-
 mm/vmscan.c                          |  4 ++--
 8 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index fba6af45225..866b9cd9a95 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -1,4 +1,8 @@
-Memory Controller
+Memory Resource Controller
+
+NOTE: The Memory Resource Controller has been generically been referred
+to as the memory controller in this document. Do not confuse memory controller
+used here with the memory controller that is used in hardware.
 
 Salient features
 
@@ -152,7 +156,7 @@ The memory controller uses the following hierarchy
 
 a. Enable CONFIG_CGROUPS
 b. Enable CONFIG_RESOURCE_COUNTERS
-c. Enable CONFIG_CGROUP_MEM_CONT
+c. Enable CONFIG_CGROUP_MEM_RES_CTLR
 
 1. Prepare the cgroups
 # mkdir -p /cgroups
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index ac6aad98b60..1ddebfc5256 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -37,7 +37,7 @@ SUBSYS(cpuacct)
 
 /* */
 
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
 SUBSYS(mem_cgroup)
 #endif
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 04075628cb9..a8be8073b9e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -25,7 +25,7 @@ struct page_cgroup;
 struct page;
 struct mm_struct;
 
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
 extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
 extern void mm_free_cgroup(struct mm_struct *mm);
@@ -72,7 +72,7 @@ extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
 extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
 				struct zone *zone, int priority);
 
-#else /* CONFIG_CGROUP_MEM_CONT */
+#else /* CONFIG_CGROUP_MEM_RES_CTLR */
 static inline void mm_init_cgroup(struct mm_struct *mm,
 					struct task_struct *p)
 {
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 34023c65d46..af190ceab97 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -88,7 +88,7 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
 	unsigned long page_cgroup;
 #endif
 };
@@ -222,7 +222,7 @@ struct mm_struct {
 	/* aio bits */
 	rwlock_t		ioctx_list_lock;
 	struct kioctx		*ioctx_list;
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
 	struct mem_cgroup *mem_cgroup;
 #endif
 };
diff --git a/init/Kconfig b/init/Kconfig
index f698a5af500..442850b984b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -366,6 +366,21 @@ config RESOURCE_COUNTERS
           infrastructure that works with cgroups
 	depends on CGROUPS
 
+config CGROUP_MEM_RES_CTLR
+	bool "Memory Resource Controller for Control Groups"
+	depends on CGROUPS && RESOURCE_COUNTERS
+	help
+	  Provides a memory resource controller that manages both page cache and
+	  RSS memory.
+
+	  Note that setting this option increases fixed memory overhead
+	  associated with each page of memory in the system by 4/8 bytes
+	  and also increases cache misses because struct page on many 64bit
+	  systems will not fit into a single cache line anymore.
+
+	  Only enable when you're ok with these trade offs and really
+	  sure you need the memory resource controller.
+
 config SYSFS_DEPRECATED
 	bool "Create deprecated sysfs files"
 	depends on SYSFS
@@ -387,21 +402,6 @@ config SYSFS_DEPRECATED
 	  If you are using a distro that was released in 2006 or later,
 	  it should be safe to say N here.
 
-config CGROUP_MEM_CONT
-	bool "Memory controller for cgroups"
-	depends on CGROUPS && RESOURCE_COUNTERS
-	help
-	  Provides a memory controller that manages both page cache and
-	  RSS memory.
-
-	  Note that setting this option increases fixed memory overhead
-	  associated with each page of memory in the system by 4/8 bytes
-	  and also increases cache misses because struct page on many 64bit
-	  systems will not fit into a single cache line anymore.
-
-	  Only enable when you're ok with these trade offs and really
-	  sure you need the memory controller.
-
 config PROC_PID_CPUSET
 	bool "Include legacy /proc/<pid>/cpuset file"
 	depends on CPUSETS
diff --git a/mm/Makefile b/mm/Makefile
index 9f117bab532..a5b0dd93427 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -32,5 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_SMP) += allocpercpu.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
-obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 4194b9db010..44b2da11bf4 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -412,7 +412,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	return oom_kill_task(p);
 }
 
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
 void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
 {
 	unsigned long points = 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a26dabd62fe..106ba10e1ac 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -126,7 +126,7 @@ long vm_total_pages;	/* The total number of pages which the VM controls */
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
 
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
 #define scan_global_lru(sc)	(!(sc)->mem_cgroup)
 #else
 #define scan_global_lru(sc)	(1)
@@ -1427,7 +1427,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
 	return do_try_to_free_pages(zones, gfp_mask, &sc);
 }
 
-#ifdef CONFIG_CGROUP_MEM_CONT
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR
 
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 						gfp_t gfp_mask)
-- 
cgit v1.2.3-70-g09d2


From 735c4fb916e9f83a9350aeb2680d77d01ea75094 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 4 Mar 2008 14:28:40 -0800
Subject: add noinline_for_stack

People are adding `noinline' in various places to prevent excess stack
consumption due to gcc inlining.  But once this is done, it is quite unobvious
why the `noinline' is present in the code.  We can comment each and every
site, or we can use noinline_for_stack.

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d0e17e1657d..dcae0c8d97e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -138,6 +138,12 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #define noinline
 #endif
 
+/*
+ * Rather then using noinline to prevent stack consumption, use
+ * noinline_for_stack instead.  For documentaiton reasons.
+ */
+#define noinline_for_stack noinline
+
 #ifndef __always_inline
 #define __always_inline inline
 #endif
-- 
cgit v1.2.3-70-g09d2


From 5cba6d22e35a05adb28fdea191b232501518c455 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 4 Mar 2008 14:28:45 -0800
Subject: ndelay(): switch to C function to avoid 64-bit division

We should be able to do ndelay(some_u64), but that can cause a call to
__divdi3() to be emitted because the ndelay() macros does a divide.

Fix it by switching to static inline which will force the u64 arg to be
treated as an unsigned long.  udelay() takes an unsigned long arg.

[bunk@kernel.org: reported m68k build breakage]
Cc: Adrian Bunk <bunk@kernel.org>
Cc: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Cc: Martin Michlmayr <tbm@cyrius.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delay.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/delay.h b/include/linux/delay.h
index 17ddb55430a..54552d21296 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -7,6 +7,8 @@
  * Delay routines, using a pre-computed "loops_per_jiffy" value.
  */
 
+#include <linux/kernel.h>
+
 extern unsigned long loops_per_jiffy;
 
 #include <asm/delay.h>
@@ -32,7 +34,11 @@ extern unsigned long loops_per_jiffy;
 #endif
 
 #ifndef ndelay
-#define ndelay(x)	udelay(((x)+999)/1000)
+static inline void ndelay(unsigned long x)
+{
+	udelay(DIV_ROUND_UP(x, 1000));
+}
+#define ndelay(x) ndelay(x)
 #endif
 
 void calibrate_delay(void);
-- 
cgit v1.2.3-70-g09d2


From 3149be50d3a31df095bcc83d752293da65a37f62 Mon Sep 17 00:00:00 2001
From: Ville Syrjala <syrjala@sci.fi>
Date: Tue, 4 Mar 2008 14:28:50 -0800
Subject: sm501: add support for the SM502 programmable PLL

SM502 has a programmable PLL which can provide the panel pixel clock instead
of the 288MHz and 336MHz PLLs.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Ville Syrjala <syrjala@sci.fi>
Cc: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/mfd/sm501.c        | 163 +++++++++++++++++++++++++++++++++++----------
 include/linux/sm501-regs.h |   3 +
 include/linux/sm501.h      |   3 +-
 3 files changed, 133 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c
index 4de8d467762..13bac53db69 100644
--- a/drivers/mfd/sm501.c
+++ b/drivers/mfd/sm501.c
@@ -48,6 +48,7 @@ struct sm501_devdata {
 	unsigned int			 pdev_id;
 	unsigned int			 irq;
 	void __iomem			*regs;
+	unsigned int			 rev;
 };
 
 #define MHZ (1000 * 1000)
@@ -417,46 +418,108 @@ struct sm501_clock {
 	unsigned long mclk;
 	int divider;
 	int shift;
+	unsigned int m, n, k;
 };
 
+/* sm501_calc_clock
+ *
+ * Calculates the nearest discrete clock frequency that
+ * can be achieved with the specified input clock.
+ *   the maximum divisor is 3 or 5
+ */
+
+static int sm501_calc_clock(unsigned long freq,
+			    struct sm501_clock *clock,
+			    int max_div,
+			    unsigned long mclk,
+			    long *best_diff)
+{
+	int ret = 0;
+	int divider;
+	int shift;
+	long diff;
+
+	/* try dividers 1 and 3 for CRT and for panel,
+	   try divider 5 for panel only.*/
+
+	for (divider = 1; divider <= max_div; divider += 2) {
+		/* try all 8 shift values.*/
+		for (shift = 0; shift < 8; shift++) {
+			/* Calculate difference to requested clock */
+			diff = sm501fb_round_div(mclk, divider << shift) - freq;
+			if (diff < 0)
+				diff = -diff;
+
+			/* If it is less than the current, use it */
+			if (diff < *best_diff) {
+				*best_diff = diff;
+
+				clock->mclk = mclk;
+				clock->divider = divider;
+				clock->shift = shift;
+				ret = 1;
+			}
+		}
+	}
+
+	return ret;
+}
+
+/* sm501_calc_pll
+ *
+ * Calculates the nearest discrete clock frequency that can be
+ * achieved using the programmable PLL.
+ *   the maximum divisor is 3 or 5
+ */
+
+static unsigned long sm501_calc_pll(unsigned long freq,
+					struct sm501_clock *clock,
+					int max_div)
+{
+	unsigned long mclk;
+	unsigned int m, n, k;
+	long best_diff = 999999999;
+
+	/*
+	 * The SM502 datasheet doesn't specify the min/max values for M and N.
+	 * N = 1 at least doesn't work in practice.
+	 */
+	for (m = 2; m <= 255; m++) {
+		for (n = 2; n <= 127; n++) {
+			for (k = 0; k <= 1; k++) {
+				mclk = (24000000UL * m / n) >> k;
+
+				if (sm501_calc_clock(freq, clock, max_div,
+						     mclk, &best_diff)) {
+					clock->m = m;
+					clock->n = n;
+					clock->k = k;
+				}
+			}
+		}
+	}
+
+	/* Return best clock. */
+	return clock->mclk / (clock->divider << clock->shift);
+}
+
 /* sm501_select_clock
  *
- * selects nearest discrete clock frequency the SM501 can achive
+ * Calculates the nearest discrete clock frequency that can be
+ * achieved using the 288MHz and 336MHz PLLs.
  *   the maximum divisor is 3 or 5
  */
+
 static unsigned long sm501_select_clock(unsigned long freq,
 					struct sm501_clock *clock,
 					int max_div)
 {
 	unsigned long mclk;
-	int divider;
-	int shift;
-	long diff;
 	long best_diff = 999999999;
 
 	/* Try 288MHz and 336MHz clocks. */
 	for (mclk = 288000000; mclk <= 336000000; mclk += 48000000) {
-		/* try dividers 1 and 3 for CRT and for panel,
-		   try divider 5 for panel only.*/
-
-		for (divider = 1; divider <= max_div; divider += 2) {
-			/* try all 8 shift values.*/
-			for (shift = 0; shift < 8; shift++) {
-				/* Calculate difference to requested clock */
-				diff = sm501fb_round_div(mclk, divider << shift) - freq;
-				if (diff < 0)
-					diff = -diff;
-
-				/* If it is less than the current, use it */
-				if (diff < best_diff) {
-					best_diff = diff;
-
-					clock->mclk = mclk;
-					clock->divider = divider;
-					clock->shift = shift;
-				}
-			}
-		}
+		sm501_calc_clock(freq, clock, max_div, mclk, &best_diff);
 	}
 
 	/* Return best clock. */
@@ -478,6 +541,7 @@ unsigned long sm501_set_clock(struct device *dev,
 	unsigned long gate = readl(sm->regs + SM501_CURRENT_GATE);
 	unsigned long clock = readl(sm->regs + SM501_CURRENT_CLOCK);
 	unsigned char reg;
+	unsigned int pll_reg = 0;
 	unsigned long sm501_freq; /* the actual frequency acheived */
 
 	struct sm501_clock to;
@@ -492,14 +556,28 @@ unsigned long sm501_set_clock(struct device *dev,
 		 * requested frequency the value must be multiplied by
 		 * 2. This clock also has an additional pre divisor */
 
-		sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2);
-		reg=to.shift & 0x07;/* bottom 3 bits are shift */
-		if (to.divider == 3)
-			reg |= 0x08; /* /3 divider required */
-		else if (to.divider == 5)
-			reg |= 0x10; /* /5 divider required */
-		if (to.mclk != 288000000)
-			reg |= 0x20; /* which mclk pll is source */
+		if (sm->rev >= 0xC0) {
+			/* SM502 -> use the programmable PLL */
+			sm501_freq = (sm501_calc_pll(2 * req_freq,
+						     &to, 5) / 2);
+			reg = to.shift & 0x07;/* bottom 3 bits are shift */
+			if (to.divider == 3)
+				reg |= 0x08; /* /3 divider required */
+			else if (to.divider == 5)
+				reg |= 0x10; /* /5 divider required */
+			reg |= 0x40; /* select the programmable PLL */
+			pll_reg = 0x20000 | (to.k << 15) | (to.n << 8) | to.m;
+		} else {
+			sm501_freq = (sm501_select_clock(2 * req_freq,
+							 &to, 5) / 2);
+			reg = to.shift & 0x07;/* bottom 3 bits are shift */
+			if (to.divider == 3)
+				reg |= 0x08; /* /3 divider required */
+			else if (to.divider == 5)
+				reg |= 0x10; /* /5 divider required */
+			if (to.mclk != 288000000)
+				reg |= 0x20; /* which mclk pll is source */
+		}
 		break;
 
 	case SM501_CLOCK_V2XCLK:
@@ -560,6 +638,10 @@ unsigned long sm501_set_clock(struct device *dev,
 	}
 
 	writel(mode, sm->regs + SM501_POWER_MODE_CONTROL);
+
+	if (pll_reg)
+		writel(pll_reg, sm->regs + SM501_PROGRAMMABLE_PLL_CONTROL);
+
 	sm501_sync_regs(sm);
 
 	dev_info(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n",
@@ -580,15 +662,24 @@ EXPORT_SYMBOL_GPL(sm501_set_clock);
  * finds the closest available frequency for a given clock
 */
 
-unsigned long sm501_find_clock(int clksrc,
+unsigned long sm501_find_clock(struct device *dev,
+			       int clksrc,
 			       unsigned long req_freq)
 {
+	struct sm501_devdata *sm = dev_get_drvdata(dev);
 	unsigned long sm501_freq; /* the frequency achiveable by the 501 */
 	struct sm501_clock to;
 
 	switch (clksrc) {
 	case SM501_CLOCK_P2XCLK:
-		sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2);
+		if (sm->rev >= 0xC0) {
+			/* SM502 -> use the programmable PLL */
+			sm501_freq = (sm501_calc_pll(2 * req_freq,
+						     &to, 5) / 2);
+		} else {
+			sm501_freq = (sm501_select_clock(2 * req_freq,
+							 &to, 5) / 2);
+		}
 		break;
 
 	case SM501_CLOCK_V2XCLK:
@@ -895,6 +986,8 @@ static int sm501_init_dev(struct sm501_devdata *sm)
 	dev_info(sm->dev, "SM501 At %p: Version %08lx, %ld Mb, IRQ %d\n",
 		 sm->regs, devid, (unsigned long)mem_avail >> 20, sm->irq);
 
+	sm->rev = devid & SM501_DEVICEID_REVMASK;
+
 	sm501_dump_gate(sm);
 
 	ret = device_create_file(sm->dev, &dev_attr_dbg_regs);
diff --git a/include/linux/sm501-regs.h b/include/linux/sm501-regs.h
index 64236b73c72..d53642d2d89 100644
--- a/include/linux/sm501-regs.h
+++ b/include/linux/sm501-regs.h
@@ -129,11 +129,14 @@
 
 #define SM501_DEVICEID_SM501		(0x05010000)
 #define SM501_DEVICEID_IDMASK		(0xffff0000)
+#define SM501_DEVICEID_REVMASK		(0x000000ff)
 
 #define SM501_PLLCLOCK_COUNT		(0x000064)
 #define SM501_MISC_TIMING		(0x000068)
 #define SM501_CURRENT_SDRAM_CLOCK	(0x00006C)
 
+#define SM501_PROGRAMMABLE_PLL_CONTROL	(0x000074)
+
 /* GPIO base */
 #define SM501_GPIO			(0x010000)
 #define SM501_GPIO_DATA_LOW		(0x00)
diff --git a/include/linux/sm501.h b/include/linux/sm501.h
index 932a9efee8a..bca13454470 100644
--- a/include/linux/sm501.h
+++ b/include/linux/sm501.h
@@ -24,7 +24,8 @@ extern int sm501_unit_power(struct device *dev,
 extern unsigned long sm501_set_clock(struct device *dev,
 				     int clksrc, unsigned long freq);
 
-extern unsigned long sm501_find_clock(int clksrc, unsigned long req_freq);
+extern unsigned long sm501_find_clock(struct device *dev,
+				      int clksrc, unsigned long req_freq);
 
 /* sm501_misc_control
  *
-- 
cgit v1.2.3-70-g09d2


From 040922c04cf2c8ac70be2e88a8a9614ecdb41d2e Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Tue, 4 Mar 2008 14:28:53 -0800
Subject: include falloc.h in header-y

Include falloc.h in header-y; it defines a flag for the fallocate sysctl.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index aada32fffec..994df378000 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -61,6 +61,7 @@ header-y += efs_fs_sb.h
 header-y += elf-fdpic.h
 header-y += elf-em.h
 header-y += fadvise.h
+header-y += falloc.h
 header-y += fd.h
 header-y += fdreg.h
 header-y += fib_rules.h
-- 
cgit v1.2.3-70-g09d2


From acc4988bcf38f9618886eaeb9802aeacc6978ec2 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Tue, 4 Mar 2008 14:29:00 -0800
Subject: markers: add an if(0) to __mark_check_format()

Wrap __mark_check_format() into an if(0) to make sure that parameters such as

trace_mark(mm_page_alloc, "order %u pfn %lu", order, page?page_to_pfn(page):0);

(where page_to_pfn() has side-effects) won't generate code because of the
__mark_check_format().

Thanks to Jan Kiszka for reporting this.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/marker.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/marker.h b/include/linux/marker.h
index 5df879dc377..430f6adf976 100644
--- a/include/linux/marker.h
+++ b/include/linux/marker.h
@@ -104,10 +104,16 @@ static inline void marker_update_probe_range(struct marker *begin,
 #define MARK_NOARGS " "
 
 /* To be used for string format validity checking with gcc */
-static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...)
+static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
 {
 }
 
+#define __mark_check_format(format, args...)				\
+	do {								\
+		if (0)							\
+			___mark_check_format(format, ## args);		\
+	} while (0)
+
 extern marker_probe_func __mark_empty_function;
 
 extern void marker_probe_cb(const struct marker *mdata,
-- 
cgit v1.2.3-70-g09d2


From bd845e38c7a7251a95a8f2c38aa7fb87140b771d Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 4 Mar 2008 14:29:01 -0800
Subject: memcg: mm_match_cgroup not vm_match_cgroup

vm_match_cgroup is a perverse name for a macro to match mm with cgroup: rename
it mm_match_cgroup, matching mm_init_cgroup and mm_free_cgroup.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 4 ++--
 mm/memcontrol.c            | 2 +-
 mm/rmap.c                  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index a8be8073b9e..e4247c83c1c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -48,7 +48,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 					gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
-#define vm_match_cgroup(mm, cgroup)	\
+#define mm_match_cgroup(mm, cgroup)	\
 	((cgroup) == rcu_dereference((mm)->mem_cgroup))
 
 extern int mem_cgroup_prepare_migration(struct page *page);
@@ -118,7 +118,7 @@ static inline int mem_cgroup_cache_charge(struct page *page,
 	return 0;
 }
 
-static inline int vm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
+static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 {
 	return 1;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 631002d085d..41041c0a689 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -399,7 +399,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 	int ret;
 
 	task_lock(task);
-	ret = task->mm && vm_match_cgroup(task->mm, mem);
+	ret = task->mm && mm_match_cgroup(task->mm, mem);
 	task_unlock(task);
 	return ret;
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 8fd527c4e2b..0c9a2df06c3 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -321,7 +321,7 @@ static int page_referenced_anon(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont))
+		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
 		referenced += page_referenced_one(page, vma, &mapcount);
 		if (!mapcount)
@@ -382,7 +382,7 @@ static int page_referenced_file(struct page *page,
 		 * counting on behalf of references from different
 		 * cgroups
 		 */
-		if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont))
+		if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont))
 			continue;
 		if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE))
 				  == (VM_LOCKED|VM_MAYSHARE)) {
-- 
cgit v1.2.3-70-g09d2


From 427d5416f317681498337ab19218d195edea02d6 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 4 Mar 2008 14:29:03 -0800
Subject: memcg: move_lists on page not page_cgroup

Each caller of mem_cgroup_move_lists is having to use page_get_page_cgroup:
it's more convenient if it acts upon the page itself not the page_cgroup; and
in a later patch this becomes important to handle within memcontrol.c.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 ++---
 mm/memcontrol.c            | 4 +++-
 mm/swap.c                  | 2 +-
 mm/vmscan.c                | 5 +++--
 4 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e4247c83c1c..56432ff8d4e 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -36,7 +36,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
 extern void mem_cgroup_uncharge(struct page_cgroup *pc);
 extern void mem_cgroup_uncharge_page(struct page *page);
-extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
+extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct list_head *dst,
 					unsigned long *scanned, int order,
@@ -106,8 +106,7 @@ static inline void mem_cgroup_uncharge_page(struct page *page)
 {
 }
 
-static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
-						bool active)
+static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 41041c0a689..afdd406f618 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -407,11 +407,13 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
 /*
  * This routine assumes that the appropriate zone's lru lock is already held
  */
-void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+void mem_cgroup_move_lists(struct page *page, bool active)
 {
+	struct page_cgroup *pc;
 	struct mem_cgroup_per_zone *mz;
 	unsigned long flags;
 
+	pc = page_get_page_cgroup(page);
 	if (!pc)
 		return;
 
diff --git a/mm/swap.c b/mm/swap.c
index 710a20bb974..d4ec59aa5c4 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -176,7 +176,7 @@ void activate_page(struct page *page)
 		SetPageActive(page);
 		add_page_to_active_list(zone, page);
 		__count_vm_event(PGACTIVATE);
-		mem_cgroup_move_lists(page_get_page_cgroup(page), true);
+		mem_cgroup_move_lists(page, true);
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 106ba10e1ac..45711585684 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1128,7 +1128,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		ClearPageActive(page);
 
 		list_move(&page->lru, &zone->inactive_list);
-		mem_cgroup_move_lists(page_get_page_cgroup(page), false);
+		mem_cgroup_move_lists(page, false);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			__mod_zone_page_state(zone, NR_INACTIVE, pgmoved);
@@ -1156,8 +1156,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
 		VM_BUG_ON(PageLRU(page));
 		SetPageLRU(page);
 		VM_BUG_ON(!PageActive(page));
+
 		list_move(&page->lru, &zone->active_list);
-		mem_cgroup_move_lists(page_get_page_cgroup(page), true);
+		mem_cgroup_move_lists(page, true);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			__mod_zone_page_state(zone, NR_ACTIVE, pgmoved);
-- 
cgit v1.2.3-70-g09d2


From 9442ec9df40d952b0de185ae5638a74970388e01 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 4 Mar 2008 14:29:07 -0800
Subject: memcg: bad page if page_cgroup when free

Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad
page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it
were set here, it'd likely cause corruption when the page is reused.

Don't use page_assign_page_cgroup to clear it: that should be private to
memcontrol.c, and always called with the lock taken; and memmap_init_zone
doesn't need it either - like page->mapping and other pointers throughout the
kernel, Linux assumes pointers in zeroed structures are NULL pointers.

Instead use page_reset_bad_cgroup, added to memcontrol.h for this only.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  8 ++++----
 mm/memcontrol.c            | 27 ++++++++++++---------------
 mm/page_alloc.c            | 18 ++++++++++++------
 3 files changed, 28 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 56432ff8d4e..70789df7dab 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,8 +29,9 @@ struct mm_struct;
 
 extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p);
 extern void mm_free_cgroup(struct mm_struct *mm);
-extern void page_assign_page_cgroup(struct page *page,
-					struct page_cgroup *pc);
+
+#define page_reset_bad_cgroup(page)	((page)->page_cgroup = 0)
+
 extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
@@ -82,8 +83,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm)
 {
 }
 
-static inline void page_assign_page_cgroup(struct page *page,
-						struct page_cgroup *pc)
+static inline void page_reset_bad_cgroup(struct page *page)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index afdd406f618..9e170d3c71e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -140,11 +140,17 @@ struct mem_cgroup {
 
 /*
  * We use the lower bit of the page->page_cgroup pointer as a bit spin
- * lock. We need to ensure that page->page_cgroup is atleast two
- * byte aligned (based on comments from Nick Piggin)
+ * lock.  We need to ensure that page->page_cgroup is at least two
+ * byte aligned (based on comments from Nick Piggin).  But since
+ * bit_spin_lock doesn't actually set that lock bit in a non-debug
+ * uniprocessor kernel, we should avoid setting it here too.
  */
 #define PAGE_CGROUP_LOCK_BIT 	0x0
-#define PAGE_CGROUP_LOCK 		(1 << PAGE_CGROUP_LOCK_BIT)
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
+#define PAGE_CGROUP_LOCK 	(1 << PAGE_CGROUP_LOCK_BIT)
+#else
+#define PAGE_CGROUP_LOCK	0x0
+#endif
 
 /*
  * A page_cgroup page is associated with every page descriptor. The
@@ -271,19 +277,10 @@ static inline int page_cgroup_locked(struct page *page)
 					&page->page_cgroup);
 }
 
-void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
+static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
 {
-	int locked;
-
-	/*
-	 * While resetting the page_cgroup we might not hold the
-	 * page_cgroup lock. free_hot_cold_page() is an example
-	 * of such a scenario
-	 */
-	if (pc)
-		VM_BUG_ON(!page_cgroup_locked(page));
-	locked = (page->page_cgroup & PAGE_CGROUP_LOCK);
-	page->page_cgroup = ((unsigned long)pc | locked);
+	VM_BUG_ON(!page_cgroup_locked(page));
+	page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK);
 }
 
 struct page_cgroup *page_get_page_cgroup(struct page *page)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e76cf94725c..402a504f122 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -222,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page)
 
 static void bad_page(struct page *page)
 {
-	printk(KERN_EMERG "Bad page state in process '%s'\n"
-		KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
-		KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
-		KERN_EMERG "Backtrace:\n",
+	void *pc = page_get_page_cgroup(page);
+
+	printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG
+		"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
 		current->comm, page, (int)(2*sizeof(unsigned long)),
 		(unsigned long)page->flags, page->mapping,
 		page_mapcount(page), page_count(page));
+	if (pc) {
+		printk(KERN_EMERG "cgroup:%p\n", pc);
+		page_reset_bad_cgroup(page);
+	}
+	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
+		KERN_EMERG "Backtrace:\n");
 	dump_stack();
 	page->flags &= ~(1 << PG_lru	|
 			1 << PG_private |
@@ -454,6 +460,7 @@ static inline int free_pages_check(struct page *page)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
+		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
@@ -603,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
+		(page_get_page_cgroup(page) != NULL) |
 		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
@@ -989,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold)
 
 	if (!PageHighMem(page))
 		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
-	VM_BUG_ON(page_get_page_cgroup(page));
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
@@ -2528,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 		set_page_links(page, zone, nid, pfn);
 		init_page_count(page);
 		reset_page_mapcount(page);
-		page_assign_page_cgroup(page, NULL);
 		SetPageReserved(page);
 
 		/*
-- 
cgit v1.2.3-70-g09d2


From 8289546e573d5ff681cdf0fc7a1184cca66fdb55 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 4 Mar 2008 14:29:08 -0800
Subject: memcg: remove mem_cgroup_uncharge

Nothing uses mem_cgroup_uncharge apart from mem_cgroup_uncharge_page, (a
trivial wrapper around it) and mem_cgroup_end_migration (which does the same
as mem_cgroup_uncharge_page).  And it often ends up having to lock just to let
its caller unlock.  Remove it (but leave the silly locking until a later
patch).

Moved mem_cgroup_cache_charge next to mem_cgroup_charge in memcontrol.h.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: David Rientjes <rientjes@google.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hirokazu Takahashi <taka@valinux.co.jp>
Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp>
Cc: Paul Menage <menage@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 20 +++++++-------------
 mm/memcontrol.c            | 23 ++++++++---------------
 2 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 70789df7dab..8b1c4295848 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -35,7 +35,8 @@ extern void mm_free_cgroup(struct mm_struct *mm);
 extern struct page_cgroup *page_get_page_cgroup(struct page *page);
 extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
 				gfp_t gfp_mask);
-extern void mem_cgroup_uncharge(struct page_cgroup *pc);
+extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+					gfp_t gfp_mask);
 extern void mem_cgroup_uncharge_page(struct page *page);
 extern void mem_cgroup_move_lists(struct page *page, bool active);
 extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -45,8 +46,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 					struct mem_cgroup *mem_cont,
 					int active);
 extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
-extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
-					gfp_t gfp_mask);
 int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem);
 
 #define mm_match_cgroup(mm, cgroup)	\
@@ -92,14 +91,16 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
 	return NULL;
 }
 
-static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
-					gfp_t gfp_mask)
+static inline int mem_cgroup_charge(struct page *page,
+					struct mm_struct *mm, gfp_t gfp_mask)
 {
 	return 0;
 }
 
-static inline void mem_cgroup_uncharge(struct page_cgroup *pc)
+static inline int mem_cgroup_cache_charge(struct page *page,
+					struct mm_struct *mm, gfp_t gfp_mask)
 {
+	return 0;
 }
 
 static inline void mem_cgroup_uncharge_page(struct page *page)
@@ -110,13 +111,6 @@ static inline void mem_cgroup_move_lists(struct page *page, bool active)
 {
 }
 
-static inline int mem_cgroup_cache_charge(struct page *page,
-						struct mm_struct *mm,
-						gfp_t gfp_mask)
-{
-	return 0;
-}
-
 static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem)
 {
 	return 1;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 83ba13ad31e..1333d25163b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -697,20 +697,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
 
 /*
  * Uncharging is always a welcome operation, we never complain, simply
- * uncharge. This routine should be called with lock_page_cgroup held
+ * uncharge.
  */
-void mem_cgroup_uncharge(struct page_cgroup *pc)
+void mem_cgroup_uncharge_page(struct page *page)
 {
+	struct page_cgroup *pc;
 	struct mem_cgroup *mem;
 	struct mem_cgroup_per_zone *mz;
-	struct page *page;
 	unsigned long flags;
 
 	/*
 	 * Check if our page_cgroup is valid
 	 */
+	lock_page_cgroup(page);
+	pc = page_get_page_cgroup(page);
 	if (!pc)
-		return;
+		goto unlock;
 
 	if (atomic_dec_and_test(&pc->ref_cnt)) {
 		page = pc->page;
@@ -731,12 +733,8 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
 		}
 		lock_page_cgroup(page);
 	}
-}
 
-void mem_cgroup_uncharge_page(struct page *page)
-{
-	lock_page_cgroup(page);
-	mem_cgroup_uncharge(page_get_page_cgroup(page));
+unlock:
 	unlock_page_cgroup(page);
 }
 
@@ -759,12 +757,7 @@ int mem_cgroup_prepare_migration(struct page *page)
 
 void mem_cgroup_end_migration(struct page *page)
 {
-	struct page_cgroup *pc;
-
-	lock_page_cgroup(page);
-	pc = page_get_page_cgroup(page);
-	mem_cgroup_uncharge(pc);
-	unlock_page_cgroup(page);
+	mem_cgroup_uncharge_page(page);
 }
 /*
  * We know both *page* and *newpage* are now not-on-LRU and Pg_locked.
-- 
cgit v1.2.3-70-g09d2


From 07f2402b4adbcd0e6822ddc27953b63d4504faec Mon Sep 17 00:00:00 2001
From: Jesper Nilsson <jesper.nilsson@axis.com>
Date: Tue, 4 Mar 2008 14:29:23 -0800
Subject: cris: correct usage of __user for copy to and from user space in
 lib/usercopy and uaccess.h

Function __copy_user_zeroing in arch/lib/usercopy.c had the wrong parameter
set as __user, and in include/asm-cris/uaccess.h, it was not set at all for
some of the calling functions.

This will cut the number of warnings quite dramatically when using sparse.

While we're here, remove useless CVS log and correct confusing typo.

Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Mikael Starvik <mikael.starvik@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/cris/arch-v10/lib/usercopy.c |  2 +-
 arch/cris/arch-v32/lib/usercopy.c |  2 +-
 include/asm-cris/uaccess.h        | 53 +++++++--------------------------------
 3 files changed, 11 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
index b8e6c0430e5..b0a608da7bd 100644
--- a/arch/cris/arch-v10/lib/usercopy.c
+++ b/arch/cris/arch-v10/lib/usercopy.c
@@ -193,7 +193,7 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
    inaccessible.  */
 
 unsigned long
-__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn)
+__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
index 04d0cf35a27..0b5b70d5f58 100644
--- a/arch/cris/arch-v32/lib/usercopy.c
+++ b/arch/cris/arch-v32/lib/usercopy.c
@@ -161,7 +161,7 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn)
    inaccessible.  */
 
 unsigned long
-__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn)
+__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
diff --git a/include/asm-cris/uaccess.h b/include/asm-cris/uaccess.h
index 69d48a2dc8e..ea11eaf0e92 100644
--- a/include/asm-cris/uaccess.h
+++ b/include/asm-cris/uaccess.h
@@ -1,43 +1,6 @@
 /* 
  * Authors:    Bjorn Wesen (bjornw@axis.com)
  *	       Hans-Peter Nilsson (hp@axis.com)
- *
- * $Log: uaccess.h,v $
- * Revision 1.8  2001/10/29 13:01:48  bjornw
- * Removed unused variable tmp2 in strnlen_user
- *
- * Revision 1.7  2001/10/02 12:44:52  hp
- * Add support for 64-bit put_user/get_user
- *
- * Revision 1.6  2001/10/01 14:51:17  bjornw
- * Added register prefixes and removed underscores
- *
- * Revision 1.5  2000/10/25 03:33:21  hp
- * - Provide implementation for everything else but get_user and put_user;
- *   copying inline to/from user for constant length 0..16, 20, 24, and
- *   clearing for 0..4, 8, 12, 16, 20, 24, strncpy_from_user and strnlen_user
- *   always inline.
- * - Constraints for destination addr in get_user cannot be memory, only reg.
- * - Correct labels for PC at expected fault points.
- * - Nits with assembly code.
- * - Don't use statement expressions without value; use "do {} while (0)".
- * - Return correct values from __generic_... functions.
- *
- * Revision 1.4  2000/09/12 16:28:25  bjornw
- * * Removed comments from the get/put user asm code
- * * Constrains for destination addr in put_user cannot be memory, only reg
- *
- * Revision 1.3  2000/09/12 14:30:20  bjornw
- * MAX_ADDR_USER does not exist anymore
- *
- * Revision 1.2  2000/07/13 15:52:48  bjornw
- * New user-access functions
- *
- * Revision 1.1.1.1  2000/07/10 16:32:31  bjornw
- * CRIS architecture, working draft
- *
- *
- *
  */
 
 /* Asm:s have been tweaked (within the domain of correctness) to give
@@ -209,9 +172,9 @@ extern long __get_user_bad(void);
 /* More complex functions.  Most are inline, but some call functions that
    live in lib/usercopy.c  */
 
-extern unsigned long __copy_user(void *to, const void *from, unsigned long n);
-extern unsigned long __copy_user_zeroing(void *to, const void *from, unsigned long n);
-extern unsigned long __do_clear_user(void *to, unsigned long n);
+extern unsigned long __copy_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n);
+extern unsigned long __do_clear_user(void __user *to, unsigned long n);
 
 static inline unsigned long
 __generic_copy_to_user(void __user *to, const void *from, unsigned long n)
@@ -253,7 +216,7 @@ strncpy_from_user(char *dst, const char __user *src, long count)
 }
 
 
-/* Note that if these expand awfully if made into switch constructs, so
+/* Note that these expand awfully if made into switch constructs, so
    don't do that.  */
 
 static inline unsigned long
@@ -407,19 +370,21 @@ __constant_clear_user(void __user *to, unsigned long n)
  */
 
 static inline unsigned long
-__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
+__generic_copy_from_user_nocheck(void *to, const void __user *from,
+				 unsigned long n)
 {
 	return __copy_user_zeroing(to,from,n);
 }
 
 static inline unsigned long
-__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
+__generic_copy_to_user_nocheck(void __user *to, const void *from,
+			       unsigned long n)
 {
 	return __copy_user(to,from,n);
 }
 
 static inline unsigned long
-__generic_clear_user_nocheck(void *to, unsigned long n)
+__generic_clear_user_nocheck(void __user *to, unsigned long n)
 {
 	return __do_clear_user(to,n);
 }
-- 
cgit v1.2.3-70-g09d2


From 87ffbe679e21cbf82ff8e3302520ff0ea2beed9a Mon Sep 17 00:00:00 2001
From: Jesper Nilsson <jesper.nilsson@axis.com>
Date: Tue, 4 Mar 2008 14:29:23 -0800
Subject: cris: correct syscall numbers in unistd.h for timerfd_settime and
 timerfd_gettime

Last commit for unistd was not correct, it only had a partial update of
syscall numbers for __NR_timerfd_settime and __NR_timerfd_gettime.  Also,
NR_syscalls was not incremented for the new syscalls.

Signed-off-by: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: Mikael Starvik <mikael.starvik@axis.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-cris/unistd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-cris/unistd.h b/include/asm-cris/unistd.h
index 007cb16a6b5..76398ef87e9 100644
--- a/include/asm-cris/unistd.h
+++ b/include/asm-cris/unistd.h
@@ -329,12 +329,12 @@
 #define __NR_timerfd_create	322
 #define __NR_eventfd		323
 #define __NR_fallocate		324
-#define __NR_timerfd_settime    315
-#define __NR_timerfd_gettime    316
+#define __NR_timerfd_settime	325
+#define __NR_timerfd_gettime	326
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 325
+#define NR_syscalls 327
 
 #include <asm/arch/unistd.h>
 
-- 
cgit v1.2.3-70-g09d2


From 3715863aa142c4f4c5208f5f3e5e9bac06006d2f Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 4 Mar 2008 14:29:27 -0800
Subject: iommu: export iommu_is_span_boundary helper function

iommu_is_span_boundary is used internally in the IOMMU helper
(lib/iommu-helper.c), a primitive function that judges whether a memory area
spans LLD's segment boundary or not.

It's difficult to convert some IOMMUs to use the IOMMU helper but
iommu_is_span_boundary is still useful for them.  So this patch exports it.

This is needed for the parisc iommu fixes.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Kyle McMartin <kyle@parisc-linux.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Grant Grundler <grundler@parisc-linux.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/iommu-helper.h |  3 +++
 lib/iommu-helper.c           | 10 ++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index 4dd4c04ff2f..c975caf7538 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -1,3 +1,6 @@
+extern int iommu_is_span_boundary(unsigned int index, unsigned int nr,
+				  unsigned long shift,
+				  unsigned long boundary_size);
 extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long start, unsigned int nr,
 				      unsigned long shift,
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index 495575a59ca..a3b8d4c3f77 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -40,10 +40,12 @@ static inline void set_bit_area(unsigned long *map, unsigned long i,
 	}
 }
 
-static inline int is_span_boundary(unsigned int index, unsigned int nr,
-				   unsigned long shift,
-				   unsigned long boundary_size)
+int iommu_is_span_boundary(unsigned int index, unsigned int nr,
+			   unsigned long shift,
+			   unsigned long boundary_size)
 {
+	BUG_ON(!is_power_of_2(boundary_size));
+
 	shift = (shift + index) & (boundary_size - 1);
 	return shift + nr > boundary_size;
 }
@@ -57,7 +59,7 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 again:
 	index = find_next_zero_area(map, size, start, nr, align_mask);
 	if (index != -1) {
-		if (is_span_boundary(index, nr, shift, boundary_size)) {
+		if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
 			/* we could do more effectively */
 			start = index + 1;
 			goto again;
-- 
cgit v1.2.3-70-g09d2


From 8311c29d40235062a843f4a8e8a70a44af6fe4c9 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Mar 2008 14:29:30 -0800
Subject: md: reduce CPU wastage on idle md array with a write-intent bitmap

On an md array with a write-intent bitmap, a thread wakes up every few seconds
and scans the bitmap looking for work to do.  If the array is idle, there will
be no work to do, but a lot of scanning is done to discover this.

So cache the fact that the bitmap is completely clean, and avoid scanning the
whole bitmap when the cache is known to be clean.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/bitmap.c         | 19 +++++++++++++++++--
 include/linux/raid/bitmap.h |  2 ++
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 7aeceedcf7d..831aed9c56f 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1047,6 +1047,11 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 	if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ))
 		return;
 	bitmap->daemon_lastrun = jiffies;
+	if (bitmap->allclean) {
+		bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
+		return;
+	}
+	bitmap->allclean = 1;
 
 	for (j = 0; j < bitmap->chunks; j++) {
 		bitmap_counter_t *bmc;
@@ -1068,8 +1073,10 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 					clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE);
 
 				spin_unlock_irqrestore(&bitmap->lock, flags);
-				if (need_write)
+				if (need_write) {
 					write_page(bitmap, page, 0);
+					bitmap->allclean = 0;
+				}
 				continue;
 			}
 
@@ -1098,6 +1105,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 /*
   if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
 */
+			if (*bmc)
+				bitmap->allclean = 0;
+
 			if (*bmc == 2) {
 				*bmc=1; /* maybe clear the bit next time */
 				set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
@@ -1132,6 +1142,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
 		}
 	}
 
+	if (bitmap->allclean == 0)
+		bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ;
 }
 
 static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap,
@@ -1226,6 +1238,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
 			sectors -= blocks;
 		else sectors = 0;
 	}
+	bitmap->allclean = 0;
 	return 0;
 }
 
@@ -1296,6 +1309,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
 		}
 	}
 	spin_unlock_irq(&bitmap->lock);
+	bitmap->allclean = 0;
 	return rv;
 }
 
@@ -1332,6 +1346,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab
 	}
  unlock:
 	spin_unlock_irqrestore(&bitmap->lock, flags);
+	bitmap->allclean = 0;
 }
 
 void bitmap_close_sync(struct bitmap *bitmap)
@@ -1399,7 +1414,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n
 		set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
 	}
 	spin_unlock_irq(&bitmap->lock);
-
+	bitmap->allclean = 0;
 }
 
 /* dirty the memory and file bits for bitmap chunks "s" to "e" */
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index e51b531cd0b..47fbcba1185 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -235,6 +235,8 @@ struct bitmap {
 
 	unsigned long flags;
 
+	int allclean;
+
 	unsigned long max_write_behind; /* write-behind mode */
 	atomic_t behind_writes;
 
-- 
cgit v1.2.3-70-g09d2


From d0fae18f1b53a1d39135a968792be034bdf7ff26 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 4 Mar 2008 14:29:31 -0800
Subject: md: clean up irregularity with raid autodetect

When a raid1 array is stopped, all components currently get added to the list
for auto-detection.  However we should really only add components that were
found by autodetection in the first place.  So add a flag to record that
information, and use it.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c           | 4 +++-
 include/linux/raid/md_k.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b375de5c1af..a71241c5ae7 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1503,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev)
 	free_disk_sb(rdev);
 	list_del_init(&rdev->same_set);
 #ifndef MODULE
-	md_autodetect_dev(rdev->bdev->bd_dev);
+	if (test_bit(AutoDetected, &rdev->flags))
+		md_autodetect_dev(rdev->bdev->bd_dev);
 #endif
 	unlock_rdev(rdev);
 	kobject_put(&rdev->kobj);
@@ -6025,6 +6026,7 @@ static void autostart_arrays(int part)
 			MD_BUG();
 			continue;
 		}
+		set_bit(AutoDetected, &rdev->flags);
 		list_add(&rdev->same_set, &pending_raid_disks);
 		i_passed++;
 	}
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 85a068bab62..7bb6d1abf71 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -83,6 +83,7 @@ struct mdk_rdev_s
 #define	BarriersNotsupp	5		/* BIO_RW_BARRIER is not supported */
 #define	AllReserved	6		/* If whole device is reserved for
 					 * one array */
+#define	AutoDetected	7		/* added by auto-detect */
 
 	int desc_nr;			/* descriptor index in the superblock */
 	int raid_disk;			/* role of device in array */
-- 
cgit v1.2.3-70-g09d2


From 45ab33b6c190c4a8c58f1d13be2ff89ee62024ba Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Tue, 4 Mar 2008 13:26:55 -0600
Subject: [SCSI] iscsi class: regression - fix races with state manipulation
 and blocking/unblocking

For qla4xxx, we could be starting a session, but some error (network,
target, IO from a device that got started, etc) could cause the session
to fail and curring the block/unblock and state manipulation could race
with each other. This patch just has those operations done in the
single threaded iscsi eh work queue, so that way they are serialized.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 76 +++++++++++++++++++++++--------------
 include/scsi/scsi_transport_iscsi.h |  2 +
 2 files changed, 50 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index dfb026b95a6..ca7bb6f63bd 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -373,24 +373,25 @@ static void session_recovery_timedout(struct work_struct *work)
 	scsi_target_unblock(&session->dev);
 }
 
-static void __iscsi_unblock_session(struct iscsi_cls_session *session)
-{
-	if (!cancel_delayed_work(&session->recovery_work))
-		flush_workqueue(iscsi_eh_timer_workq);
-	scsi_target_unblock(&session->dev);
-}
-
-void iscsi_unblock_session(struct iscsi_cls_session *session)
+static void __iscsi_unblock_session(struct work_struct *work)
 {
+	struct iscsi_cls_session *session =
+			container_of(work, struct iscsi_cls_session,
+				     unblock_work);
 	struct Scsi_Host *shost = iscsi_session_to_shost(session);
 	struct iscsi_host *ihost = shost->shost_data;
 	unsigned long flags;
 
+	/*
+	 * The recovery and unblock work get run from the same workqueue,
+	 * so try to cancel it if it was going to run after this unblock.
+	 */
+	cancel_delayed_work(&session->recovery_work);
 	spin_lock_irqsave(&session->lock, flags);
 	session->state = ISCSI_SESSION_LOGGED_IN;
 	spin_unlock_irqrestore(&session->lock, flags);
-
-	__iscsi_unblock_session(session);
+	/* start IO */
+	scsi_target_unblock(&session->dev);
 	/*
 	 * Only do kernel scanning if the driver is properly hooked into
 	 * the async scanning code (drivers like iscsi_tcp do login and
@@ -401,20 +402,43 @@ void iscsi_unblock_session(struct iscsi_cls_session *session)
 			atomic_inc(&ihost->nr_scans);
 	}
 }
+
+/**
+ * iscsi_unblock_session - set a session as logged in and start IO.
+ * @session: iscsi session
+ *
+ * Mark a session as ready to accept IO.
+ */
+void iscsi_unblock_session(struct iscsi_cls_session *session)
+{
+	queue_work(iscsi_eh_timer_workq, &session->unblock_work);
+	/*
+	 * make sure all the events have completed before tell the driver
+	 * it is safe
+	 */
+	flush_workqueue(iscsi_eh_timer_workq);
+}
 EXPORT_SYMBOL_GPL(iscsi_unblock_session);
 
-void iscsi_block_session(struct iscsi_cls_session *session)
+static void __iscsi_block_session(struct work_struct *work)
 {
+	struct iscsi_cls_session *session =
+			container_of(work, struct iscsi_cls_session,
+				     block_work);
 	unsigned long flags;
 
 	spin_lock_irqsave(&session->lock, flags);
 	session->state = ISCSI_SESSION_FAILED;
 	spin_unlock_irqrestore(&session->lock, flags);
-
 	scsi_target_block(&session->dev);
 	queue_delayed_work(iscsi_eh_timer_workq, &session->recovery_work,
 			   session->recovery_tmo * HZ);
 }
+
+void iscsi_block_session(struct iscsi_cls_session *session)
+{
+	queue_work(iscsi_eh_timer_workq, &session->block_work);
+}
 EXPORT_SYMBOL_GPL(iscsi_block_session);
 
 static void __iscsi_unbind_session(struct work_struct *work)
@@ -463,6 +487,8 @@ iscsi_alloc_session(struct Scsi_Host *shost,
 	INIT_DELAYED_WORK(&session->recovery_work, session_recovery_timedout);
 	INIT_LIST_HEAD(&session->host_list);
 	INIT_LIST_HEAD(&session->sess_list);
+	INIT_WORK(&session->unblock_work, __iscsi_unblock_session);
+	INIT_WORK(&session->block_work, __iscsi_block_session);
 	INIT_WORK(&session->unbind_work, __iscsi_unbind_session);
 	INIT_WORK(&session->scan_work, iscsi_scan_session);
 	spin_lock_init(&session->lock);
@@ -575,24 +601,25 @@ void iscsi_remove_session(struct iscsi_cls_session *session)
 	list_del(&session->sess_list);
 	spin_unlock_irqrestore(&sesslock, flags);
 
+	/* make sure there are no blocks/unblocks queued */
+	flush_workqueue(iscsi_eh_timer_workq);
+	/* make sure the timedout callout is not running */
+	if (!cancel_delayed_work(&session->recovery_work))
+		flush_workqueue(iscsi_eh_timer_workq);
 	/*
 	 * If we are blocked let commands flow again. The lld or iscsi
 	 * layer should set up the queuecommand to fail commands.
+	 * We assume that LLD will not be calling block/unblock while
+	 * removing the session.
 	 */
 	spin_lock_irqsave(&session->lock, flags);
 	session->state = ISCSI_SESSION_FREE;
 	spin_unlock_irqrestore(&session->lock, flags);
-	__iscsi_unblock_session(session);
-	__iscsi_unbind_session(&session->unbind_work);
 
-	/* flush running scans */
+	scsi_target_unblock(&session->dev);
+	/* flush running scans then delete devices */
 	flush_workqueue(ihost->scan_workq);
-	/*
-	 * If the session dropped while removing devices then we need to make
-	 * sure it is not blocked
-	 */
-	if (!cancel_delayed_work(&session->recovery_work))
-		flush_workqueue(iscsi_eh_timer_workq);
+	__iscsi_unbind_session(&session->unbind_work);
 
 	/* hw iscsi may not have removed all connections from session */
 	err = device_for_each_child(&session->dev, NULL,
@@ -802,23 +829,16 @@ EXPORT_SYMBOL_GPL(iscsi_recv_pdu);
 
 void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error)
 {
-	struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
 	struct nlmsghdr	*nlh;
 	struct sk_buff	*skb;
 	struct iscsi_uevent *ev;
 	struct iscsi_internal *priv;
 	int len = NLMSG_SPACE(sizeof(*ev));
-	unsigned long flags;
 
 	priv = iscsi_if_transport_lookup(conn->transport);
 	if (!priv)
 		return;
 
-	spin_lock_irqsave(&session->lock, flags);
-	if (session->state == ISCSI_SESSION_LOGGED_IN)
-		session->state = ISCSI_SESSION_FAILED;
-	spin_unlock_irqrestore(&session->lock, flags);
-
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (!skb) {
 		iscsi_cls_conn_printk(KERN_ERR, conn, "gracefully ignored "
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index dbc96ef4cc7..aab1eae2ec4 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -177,6 +177,8 @@ struct iscsi_cls_session {
 	struct list_head host_list;
 	struct iscsi_transport *transport;
 	spinlock_t lock;
+	struct work_struct block_work;
+	struct work_struct unblock_work;
 	struct work_struct scan_work;
 	struct work_struct unbind_work;
 
-- 
cgit v1.2.3-70-g09d2


From e0007529893c1c064be90bd21422ca0da4a0198e Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 5 Mar 2008 10:31:54 -0500
Subject: LSM/SELinux: Interfaces to allow FS to control mount options

Introduce new LSM interfaces to allow an FS to deal with their own mount
options.  This includes a new string parsing function exported from the
LSM that an FS can use to get a security data blob and a new security
data blob.  This is particularly useful for an FS which uses binary
mount data, like NFS, which does not pass strings into the vfs to be
handled by the loaded LSM.  Also fix a BUG() in both SELinux and SMACK
when dealing with binary mount data.  If the binary mount data is less
than one page the copy_page() in security_sb_copy_data() can cause an
illegal page fault and boom.  Remove all NFSisms from the SELinux code
since they were broken by past NFS changes.

Signed-off-by: Eric Paris <eparis@redhat.com>
Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/super.c                          |   4 +-
 include/linux/security.h            |  99 ++++++++++++++------
 security/dummy.c                    |  23 ++---
 security/security.c                 |  23 +++--
 security/selinux/hooks.c            | 175 +++++++++++++++++++-----------------
 security/selinux/include/security.h |   5 ++
 security/smack/smack_lsm.c          |   9 +-
 7 files changed, 204 insertions(+), 134 deletions(-)

(limited to 'include')

diff --git a/fs/super.c b/fs/super.c
index 88811f60c8d..010446d8c40 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -870,12 +870,12 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 	if (!mnt)
 		goto out;
 
-	if (data) {
+	if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) {
 		secdata = alloc_secdata();
 		if (!secdata)
 			goto out_mnt;
 
-		error = security_sb_copy_data(type, data, secdata);
+		error = security_sb_copy_data(data, secdata);
 		if (error)
 			goto out_free_secdata;
 	}
diff --git a/include/linux/security.h b/include/linux/security.h
index fe52cdeab0a..b07357ca213 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -34,12 +34,6 @@
 #include <linux/xfrm.h>
 #include <net/flow.h>
 
-/* only a char in selinux superblock security struct flags */
-#define FSCONTEXT_MNT		0x01
-#define CONTEXT_MNT		0x02
-#define ROOTCONTEXT_MNT		0x04
-#define DEFCONTEXT_MNT		0x08
-
 extern unsigned securebits;
 
 struct ctl_table;
@@ -114,6 +108,32 @@ struct request_sock;
 
 #ifdef CONFIG_SECURITY
 
+struct security_mnt_opts {
+	char **mnt_opts;
+	int *mnt_opts_flags;
+	int num_mnt_opts;
+};
+
+static inline void security_init_mnt_opts(struct security_mnt_opts *opts)
+{
+	opts->mnt_opts = NULL;
+	opts->mnt_opts_flags = NULL;
+	opts->num_mnt_opts = 0;
+}
+
+static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
+{
+	int i;
+	if (opts->mnt_opts)
+		for(i = 0; i < opts->num_mnt_opts; i++)
+			kfree(opts->mnt_opts[i]);
+	kfree(opts->mnt_opts);
+	opts->mnt_opts = NULL;
+	kfree(opts->mnt_opts_flags);
+	opts->mnt_opts_flags = NULL;
+	opts->num_mnt_opts = 0;
+}
+
 /**
  * struct security_operations - main security structure
  *
@@ -262,19 +282,19 @@ struct request_sock;
  * @sb_get_mnt_opts:
  *	Get the security relevant mount options used for a superblock
  *	@sb the superblock to get security mount options from
- *	@mount_options array for pointers to mount options
- *	@mount_flags array of ints specifying what each mount options is
- *	@num_opts number of options in the arrays
+ *	@opts binary data structure containing all lsm mount data
  * @sb_set_mnt_opts:
  *	Set the security relevant mount options used for a superblock
  *	@sb the superblock to set security mount options for
- *	@mount_options array for pointers to mount options
- *	@mount_flags array of ints specifying what each mount options is
- *	@num_opts number of options in the arrays
+ *	@opts binary data structure containing all lsm mount data
  * @sb_clone_mnt_opts:
  *	Copy all security options from a given superblock to another
  *	@oldsb old superblock which contain information to clone
  *	@newsb new superblock which needs filled in
+ * @sb_parse_opts_str:
+ *	Parse a string of security data filling in the opts structure
+ *	@options string containing all mount options known by the LSM
+ *	@opts binary data structure usable by the LSM
  *
  * Security hooks for inode operations.
  *
@@ -1238,8 +1258,7 @@ struct security_operations {
 
 	int (*sb_alloc_security) (struct super_block * sb);
 	void (*sb_free_security) (struct super_block * sb);
-	int (*sb_copy_data)(struct file_system_type *type,
-			    void *orig, void *copy);
+	int (*sb_copy_data)(char *orig, char *copy);
 	int (*sb_kern_mount) (struct super_block *sb, void *data);
 	int (*sb_statfs) (struct dentry *dentry);
 	int (*sb_mount) (char *dev_name, struct nameidata * nd,
@@ -1257,12 +1276,12 @@ struct security_operations {
 	void (*sb_post_pivotroot) (struct nameidata * old_nd,
 				   struct nameidata * new_nd);
 	int (*sb_get_mnt_opts) (const struct super_block *sb,
-				char ***mount_options, int **flags,
-				int *num_opts);
-	int (*sb_set_mnt_opts) (struct super_block *sb, char **mount_options,
-				int *flags, int num_opts);
+				struct security_mnt_opts *opts);
+	int (*sb_set_mnt_opts) (struct super_block *sb,
+				struct security_mnt_opts *opts);
 	void (*sb_clone_mnt_opts) (const struct super_block *oldsb,
 				   struct super_block *newsb);
+	int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts);
 
 	int (*inode_alloc_security) (struct inode *inode);	
 	void (*inode_free_security) (struct inode *inode);
@@ -1507,7 +1526,7 @@ int security_bprm_check(struct linux_binprm *bprm);
 int security_bprm_secureexec(struct linux_binprm *bprm);
 int security_sb_alloc(struct super_block *sb);
 void security_sb_free(struct super_block *sb);
-int security_sb_copy_data(struct file_system_type *type, void *orig, void *copy);
+int security_sb_copy_data(char *orig, char *copy);
 int security_sb_kern_mount(struct super_block *sb, void *data);
 int security_sb_statfs(struct dentry *dentry);
 int security_sb_mount(char *dev_name, struct nameidata *nd,
@@ -1520,12 +1539,12 @@ void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *d
 void security_sb_post_addmount(struct vfsmount *mnt, struct nameidata *mountpoint_nd);
 int security_sb_pivotroot(struct nameidata *old_nd, struct nameidata *new_nd);
 void security_sb_post_pivotroot(struct nameidata *old_nd, struct nameidata *new_nd);
-int security_sb_get_mnt_opts(const struct super_block *sb, char ***mount_options,
-			     int **flags, int *num_opts);
-int security_sb_set_mnt_opts(struct super_block *sb, char **mount_options,
-			     int *flags, int num_opts);
+int security_sb_get_mnt_opts(const struct super_block *sb,
+				struct security_mnt_opts *opts);
+int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts);
 void security_sb_clone_mnt_opts(const struct super_block *oldsb,
 				struct super_block *newsb);
+int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts);
 
 int security_inode_alloc(struct inode *inode);
 void security_inode_free(struct inode *inode);
@@ -1635,6 +1654,16 @@ int security_secctx_to_secid(char *secdata, u32 seclen, u32 *secid);
 void security_release_secctx(char *secdata, u32 seclen);
 
 #else /* CONFIG_SECURITY */
+struct security_mnt_opts {
+};
+
+static inline void security_init_mnt_opts(struct security_mnt_opts *opts)
+{
+}
+
+static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
+{
+}
 
 /*
  * This is the default capabilities functionality.  Most of these functions
@@ -1762,8 +1791,7 @@ static inline int security_sb_alloc (struct super_block *sb)
 static inline void security_sb_free (struct super_block *sb)
 { }
 
-static inline int security_sb_copy_data (struct file_system_type *type,
-					 void *orig, void *copy)
+static inline int security_sb_copy_data (char *orig, char *copy)
 {
 	return 0;
 }
@@ -1819,6 +1847,27 @@ static inline int security_sb_pivotroot (struct nameidata *old_nd,
 static inline void security_sb_post_pivotroot (struct nameidata *old_nd,
 					       struct nameidata *new_nd)
 { }
+static inline int security_sb_get_mnt_opts(const struct super_block *sb,
+					   struct security_mnt_opts *opts)
+{
+	security_init_mnt_opts(opts);
+	return 0;
+}
+
+static inline int security_sb_set_mnt_opts(struct super_block *sb,
+					   struct security_mnt_opts *opts)
+{
+	return 0;
+}
+
+static inline void security_sb_clone_mnt_opts(const struct super_block *oldsb,
+					      struct super_block *newsb)
+{ }
+
+static inline int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts)
+{
+	return 0;
+}
 
 static inline int security_inode_alloc (struct inode *inode)
 {
diff --git a/security/dummy.c b/security/dummy.c
index 649326bf64e..78d8f92310a 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -181,8 +181,7 @@ static void dummy_sb_free_security (struct super_block *sb)
 	return;
 }
 
-static int dummy_sb_copy_data (struct file_system_type *type,
-			       void *orig, void *copy)
+static int dummy_sb_copy_data (char *orig, char *copy)
 {
 	return 0;
 }
@@ -245,19 +244,17 @@ static void dummy_sb_post_pivotroot (struct nameidata *old_nd, struct nameidata
 	return;
 }
 
-static int dummy_sb_get_mnt_opts(const struct super_block *sb, char ***mount_options,
-				 int **flags, int *num_opts)
+static int dummy_sb_get_mnt_opts(const struct super_block *sb,
+				 struct security_mnt_opts *opts)
 {
-	*mount_options = NULL;
-	*flags = NULL;
-	*num_opts = 0;
+	security_init_mnt_opts(opts);
 	return 0;
 }
 
-static int dummy_sb_set_mnt_opts(struct super_block *sb, char **mount_options,
-				 int *flags, int num_opts)
+static int dummy_sb_set_mnt_opts(struct super_block *sb,
+				 struct security_mnt_opts *opts)
 {
-	if (unlikely(num_opts))
+	if (unlikely(opts->num_mnt_opts))
 		return -EOPNOTSUPP;
 	return 0;
 }
@@ -268,6 +265,11 @@ static void dummy_sb_clone_mnt_opts(const struct super_block *oldsb,
 	return;
 }
 
+static int dummy_sb_parse_opts_str(char *options, struct security_mnt_opts *opts)
+{
+	return 0;
+}
+
 static int dummy_inode_alloc_security (struct inode *inode)
 {
 	return 0;
@@ -1028,6 +1030,7 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, sb_get_mnt_opts);
 	set_to_dummy_if_null(ops, sb_set_mnt_opts);
 	set_to_dummy_if_null(ops, sb_clone_mnt_opts);
+	set_to_dummy_if_null(ops, sb_parse_opts_str);
 	set_to_dummy_if_null(ops, inode_alloc_security);
 	set_to_dummy_if_null(ops, inode_free_security);
 	set_to_dummy_if_null(ops, inode_init_security);
diff --git a/security/security.c b/security/security.c
index d15e56cbaad..b1387a6b416 100644
--- a/security/security.c
+++ b/security/security.c
@@ -244,10 +244,11 @@ void security_sb_free(struct super_block *sb)
 	security_ops->sb_free_security(sb);
 }
 
-int security_sb_copy_data(struct file_system_type *type, void *orig, void *copy)
+int security_sb_copy_data(char *orig, char *copy)
 {
-	return security_ops->sb_copy_data(type, orig, copy);
+	return security_ops->sb_copy_data(orig, copy);
 }
+EXPORT_SYMBOL(security_sb_copy_data);
 
 int security_sb_kern_mount(struct super_block *sb, void *data)
 {
@@ -306,24 +307,30 @@ void security_sb_post_pivotroot(struct nameidata *old_nd, struct nameidata *new_
 }
 
 int security_sb_get_mnt_opts(const struct super_block *sb,
-			      char ***mount_options,
-			      int **flags, int *num_opts)
+				struct security_mnt_opts *opts)
 {
-	return security_ops->sb_get_mnt_opts(sb, mount_options, flags, num_opts);
+	return security_ops->sb_get_mnt_opts(sb, opts);
 }
 
 int security_sb_set_mnt_opts(struct super_block *sb,
-			      char **mount_options,
-			      int *flags, int num_opts)
+				struct security_mnt_opts *opts)
 {
-	return security_ops->sb_set_mnt_opts(sb, mount_options, flags, num_opts);
+	return security_ops->sb_set_mnt_opts(sb, opts);
 }
+EXPORT_SYMBOL(security_sb_set_mnt_opts);
 
 void security_sb_clone_mnt_opts(const struct super_block *oldsb,
 				struct super_block *newsb)
 {
 	security_ops->sb_clone_mnt_opts(oldsb, newsb);
 }
+EXPORT_SYMBOL(security_sb_clone_mnt_opts);
+
+int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts)
+{
+	return security_ops->sb_parse_opts_str(options, opts);
+}
+EXPORT_SYMBOL(security_sb_parse_opts_str);
 
 int security_inode_alloc(struct inode *inode)
 {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 75c2e99bfb8..4bf4807f2d4 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -443,8 +443,7 @@ out:
  * mount options, or whatever.
  */
 static int selinux_get_mnt_opts(const struct super_block *sb,
-				char ***mount_options, int **mnt_opts_flags,
-				int *num_opts)
+				struct security_mnt_opts *opts)
 {
 	int rc = 0, i;
 	struct superblock_security_struct *sbsec = sb->s_security;
@@ -452,9 +451,7 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
 	u32 len;
 	char tmp;
 
-	*num_opts = 0;
-	*mount_options = NULL;
-	*mnt_opts_flags = NULL;
+	security_init_mnt_opts(opts);
 
 	if (!sbsec->initialized)
 		return -EINVAL;
@@ -470,18 +467,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
 	/* count the number of mount options for this sb */
 	for (i = 0; i < 8; i++) {
 		if (tmp & 0x01)
-			(*num_opts)++;
+			opts->num_mnt_opts++;
 		tmp >>= 1;
 	}
 
-	*mount_options = kcalloc(*num_opts, sizeof(char *), GFP_ATOMIC);
-	if (!*mount_options) {
+	opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC);
+	if (!opts->mnt_opts) {
 		rc = -ENOMEM;
 		goto out_free;
 	}
 
-	*mnt_opts_flags = kcalloc(*num_opts, sizeof(int), GFP_ATOMIC);
-	if (!*mnt_opts_flags) {
+	opts->mnt_opts_flags = kcalloc(opts->num_mnt_opts, sizeof(int), GFP_ATOMIC);
+	if (!opts->mnt_opts_flags) {
 		rc = -ENOMEM;
 		goto out_free;
 	}
@@ -491,22 +488,22 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
 		rc = security_sid_to_context(sbsec->sid, &context, &len);
 		if (rc)
 			goto out_free;
-		(*mount_options)[i] = context;
-		(*mnt_opts_flags)[i++] = FSCONTEXT_MNT;
+		opts->mnt_opts[i] = context;
+		opts->mnt_opts_flags[i++] = FSCONTEXT_MNT;
 	}
 	if (sbsec->flags & CONTEXT_MNT) {
 		rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len);
 		if (rc)
 			goto out_free;
-		(*mount_options)[i] = context;
-		(*mnt_opts_flags)[i++] = CONTEXT_MNT;
+		opts->mnt_opts[i] = context;
+		opts->mnt_opts_flags[i++] = CONTEXT_MNT;
 	}
 	if (sbsec->flags & DEFCONTEXT_MNT) {
 		rc = security_sid_to_context(sbsec->def_sid, &context, &len);
 		if (rc)
 			goto out_free;
-		(*mount_options)[i] = context;
-		(*mnt_opts_flags)[i++] = DEFCONTEXT_MNT;
+		opts->mnt_opts[i] = context;
+		opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT;
 	}
 	if (sbsec->flags & ROOTCONTEXT_MNT) {
 		struct inode *root = sbsec->sb->s_root->d_inode;
@@ -515,24 +512,16 @@ static int selinux_get_mnt_opts(const struct super_block *sb,
 		rc = security_sid_to_context(isec->sid, &context, &len);
 		if (rc)
 			goto out_free;
-		(*mount_options)[i] = context;
-		(*mnt_opts_flags)[i++] = ROOTCONTEXT_MNT;
+		opts->mnt_opts[i] = context;
+		opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT;
 	}
 
-	BUG_ON(i != *num_opts);
+	BUG_ON(i != opts->num_mnt_opts);
 
 	return 0;
 
 out_free:
-	/* don't leak context string if security_sid_to_context had an error */
-	if (*mount_options && i)
-		for (; i > 0; i--)
-			kfree((*mount_options)[i-1]);
-	kfree(*mount_options);
-	*mount_options = NULL;
-	kfree(*mnt_opts_flags);
-	*mnt_opts_flags = NULL;
-	*num_opts = 0;
+	security_free_mnt_opts(opts);
 	return rc;
 }
 
@@ -553,12 +542,13 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag,
 			return 1;
 	return 0;
 }
+
 /*
  * Allow filesystems with binary mount data to explicitly set mount point
  * labeling information.
  */
-static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options,
-				int *flags, int num_opts)
+static int selinux_set_mnt_opts(struct super_block *sb,
+				struct security_mnt_opts *opts)
 {
 	int rc = 0, i;
 	struct task_security_struct *tsec = current->security;
@@ -568,6 +558,9 @@ static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options,
 	struct inode_security_struct *root_isec = inode->i_security;
 	u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0;
 	u32 defcontext_sid = 0;
+	char **mount_options = opts->mnt_opts;
+	int *flags = opts->mnt_opts_flags;
+	int num_opts = opts->num_mnt_opts;
 
 	mutex_lock(&sbsec->lock);
 
@@ -588,6 +581,21 @@ static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options,
 		goto out;
 	}
 
+	/*
+	 * Binary mount data FS will come through this function twice.  Once
+	 * from an explicit call and once from the generic calls from the vfs.
+	 * Since the generic VFS calls will not contain any security mount data
+	 * we need to skip the double mount verification.
+	 *
+	 * This does open a hole in which we will not notice if the first
+	 * mount using this sb set explict options and a second mount using
+	 * this sb does not set any security options.  (The first options
+	 * will be used for both mounts)
+	 */
+	if (sbsec->initialized && (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA)
+	    && (num_opts == 0))
+	        goto out;
+
 	/*
 	 * parse the mount options, check if they are valid sids.
 	 * also check if someone is trying to mount the same sb more
@@ -792,43 +800,14 @@ static void selinux_sb_clone_mnt_opts(const struct super_block *oldsb,
 	mutex_unlock(&newsbsec->lock);
 }
 
-/*
- * string mount options parsing and call set the sbsec
- */
-static int superblock_doinit(struct super_block *sb, void *data)
+int selinux_parse_opts_str(char *options, struct security_mnt_opts *opts)
 {
+	char *p;
 	char *context = NULL, *defcontext = NULL;
 	char *fscontext = NULL, *rootcontext = NULL;
-	int rc = 0;
-	char *p, *options = data;
-	/* selinux only know about a fixed number of mount options */
-	char *mnt_opts[NUM_SEL_MNT_OPTS];
-	int mnt_opts_flags[NUM_SEL_MNT_OPTS], num_mnt_opts = 0;
-
-	if (!data)
-		goto out;
+	int rc, num_mnt_opts = 0;
 
-	/* with the nfs patch this will become a goto out; */
-	if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) {
-		const char *name = sb->s_type->name;
-		/* NFS we understand. */
-		if (!strcmp(name, "nfs")) {
-			struct nfs_mount_data *d = data;
-
-			if (d->version !=  NFS_MOUNT_VERSION)
-				goto out;
-
-			if (d->context[0]) {
-				context = kstrdup(d->context, GFP_KERNEL);
-				if (!context) {
-					rc = -ENOMEM;
-					goto out;
-				}
-			}
-			goto build_flags;
-		} else
-			goto out;
-	}
+	opts->num_mnt_opts = 0;
 
 	/* Standard string-based options. */
 	while ((p = strsep(&options, "|")) != NULL) {
@@ -901,26 +880,37 @@ static int superblock_doinit(struct super_block *sb, void *data)
 		}
 	}
 
-build_flags:
+	rc = -ENOMEM;
+	opts->mnt_opts = kcalloc(NUM_SEL_MNT_OPTS, sizeof(char *), GFP_ATOMIC);
+	if (!opts->mnt_opts)
+		goto out_err;
+
+	opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_ATOMIC);
+	if (!opts->mnt_opts_flags) {
+		kfree(opts->mnt_opts);
+		goto out_err;
+	}
+
 	if (fscontext) {
-		mnt_opts[num_mnt_opts] = fscontext;
-		mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT;
+		opts->mnt_opts[num_mnt_opts] = fscontext;
+		opts->mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT;
 	}
 	if (context) {
-		mnt_opts[num_mnt_opts] = context;
-		mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT;
+		opts->mnt_opts[num_mnt_opts] = context;
+		opts->mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT;
 	}
 	if (rootcontext) {
-		mnt_opts[num_mnt_opts] = rootcontext;
-		mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT;
+		opts->mnt_opts[num_mnt_opts] = rootcontext;
+		opts->mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT;
 	}
 	if (defcontext) {
-		mnt_opts[num_mnt_opts] = defcontext;
-		mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT;
+		opts->mnt_opts[num_mnt_opts] = defcontext;
+		opts->mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT;
 	}
 
-out:
-	rc = selinux_set_mnt_opts(sb, mnt_opts, mnt_opts_flags, num_mnt_opts);
+	opts->num_mnt_opts = num_mnt_opts;
+	return 0;
+
 out_err:
 	kfree(context);
 	kfree(defcontext);
@@ -928,6 +918,33 @@ out_err:
 	kfree(rootcontext);
 	return rc;
 }
+/*
+ * string mount options parsing and call set the sbsec
+ */
+static int superblock_doinit(struct super_block *sb, void *data)
+{
+	int rc = 0;
+	char *options = data;
+	struct security_mnt_opts opts;
+
+	security_init_mnt_opts(&opts);
+
+	if (!data)
+		goto out;
+
+	BUG_ON(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA);
+
+	rc = selinux_parse_opts_str(options, &opts);
+	if (rc)
+		goto out_err;
+
+out:
+	rc = selinux_set_mnt_opts(sb, &opts);
+
+out_err:
+	security_free_mnt_opts(&opts);
+	return rc;
+}
 
 static inline u16 inode_mode_to_security_class(umode_t mode)
 {
@@ -2253,7 +2270,7 @@ static inline void take_selinux_option(char **to, char *from, int *first,
 	}
 }
 
-static int selinux_sb_copy_data(struct file_system_type *type, void *orig, void *copy)
+static int selinux_sb_copy_data(char *orig, char *copy)
 {
 	int fnosec, fsec, rc = 0;
 	char *in_save, *in_curr, *in_end;
@@ -2263,12 +2280,6 @@ static int selinux_sb_copy_data(struct file_system_type *type, void *orig, void
 	in_curr = orig;
 	sec_curr = copy;
 
-	/* Binary mount data: just copy */
-	if (type->fs_flags & FS_BINARY_MOUNTDATA) {
-		copy_page(sec_curr, in_curr);
-		goto out;
-	}
-
 	nosec = (char *)get_zeroed_page(GFP_KERNEL);
 	if (!nosec) {
 		rc = -ENOMEM;
@@ -5251,6 +5262,8 @@ static struct security_operations selinux_ops = {
 	.sb_get_mnt_opts =		selinux_get_mnt_opts,
 	.sb_set_mnt_opts =		selinux_set_mnt_opts,
 	.sb_clone_mnt_opts = 		selinux_sb_clone_mnt_opts,
+	.sb_parse_opts_str = 		selinux_parse_opts_str,
+
 
 	.inode_alloc_security =		selinux_inode_alloc_security,
 	.inode_free_security =		selinux_inode_free_security,
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index 837ce420d2f..f7d2f03781f 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -35,6 +35,11 @@
 #define POLICYDB_VERSION_MAX	POLICYDB_VERSION_POLCAP
 #endif
 
+#define CONTEXT_MNT	0x01
+#define FSCONTEXT_MNT	0x02
+#define ROOTCONTEXT_MNT	0x04
+#define DEFCONTEXT_MNT	0x08
+
 struct netlbl_lsm_secattr;
 
 extern int selinux_enabled;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 770eb067e16..0241fd35967 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -189,17 +189,10 @@ static void smack_sb_free_security(struct super_block *sb)
  * Copy the Smack specific mount options out of the mount
  * options list.
  */
-static int smack_sb_copy_data(struct file_system_type *type, void *orig,
-			      void *smackopts)
+static int smack_sb_copy_data(char *orig, char *smackopts)
 {
 	char *cp, *commap, *otheropts, *dp;
 
-	/* Binary mount data: just copy */
-	if (type->fs_flags & FS_BINARY_MOUNTDATA) {
-		copy_page(smackopts, orig);
-		return 0;
-	}
-
 	otheropts = (char *)get_zeroed_page(GFP_KERNEL);
 	if (otheropts == NULL)
 		return -ENOMEM;
-- 
cgit v1.2.3-70-g09d2


From eac738e6cea16bfbd7b9018d60d009aedd2d14b6 Mon Sep 17 00:00:00 2001
From: Petr Tesarik <ptesarik@suse.cz>
Date: Mon, 11 Feb 2008 22:43:05 +0100
Subject: [IA64] convert sys_ptrace to arch_ptrace

Convert sys_ptrace() to arch_ptrace().

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/ptrace.c | 42 +++---------------------------------------
 include/asm-ia64/ptrace.h |  2 --
 2 files changed, 3 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index e82fe296c2c..1dfff5a8f36 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1470,46 +1470,13 @@ ptrace_disable (struct task_struct *child)
 	child_psr->tb = 0;
 }
 
-asmlinkage long
-sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
+long
+arch_ptrace (struct task_struct *child, long request, long addr, long data)
 {
 	struct pt_regs *pt;
-	struct task_struct *child;
 	struct switch_stack *sw;
 	long ret;
 
-	lock_kernel();
-	ret = -EPERM;
-	if (request == PTRACE_TRACEME) {
-		ret = ptrace_traceme();
-		goto out;
-	}
-
-	ret = -ESRCH;
-	read_lock(&tasklist_lock);
-	{
-		child = find_task_by_pid(pid);
-		if (child)
-			get_task_struct(child);
-	}
-	read_unlock(&tasklist_lock);
-	if (!child)
-		goto out;
-	ret = -EPERM;
-	if (pid == 1)		/* no messing around with init! */
-		goto out_tsk;
-
-	if (request == PTRACE_ATTACH) {
-		ret = ptrace_attach(child);
-		if (!ret)
-			arch_ptrace_attach(child);
-		goto out_tsk;
-	}
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out_tsk;
-
 	pt = task_pt_regs(child);
 	sw = (struct switch_stack *) (child->thread.ksp + 16);
 
@@ -1594,7 +1561,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
 		 */
 		if (child->exit_state == EXIT_ZOMBIE)
 			/* already dead */
-			goto out_tsk;
+			return 0;
 		child->exit_code = SIGKILL;
 
 		ptrace_disable(child);
@@ -1643,9 +1610,6 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
 		goto out_tsk;
 	}
   out_tsk:
-	put_task_struct(child);
-  out:
-	unlock_kernel();
 	return ret;
 }
 
diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h
index 0bdce7dde1b..5b523409878 100644
--- a/include/asm-ia64/ptrace.h
+++ b/include/asm-ia64/ptrace.h
@@ -233,8 +233,6 @@ struct switch_stack {
 #include <asm/current.h>
 #include <asm/page.h>
 
-#define __ARCH_SYS_PTRACE	1
-
 /*
  * We use the ia64_psr(regs)->ri to determine which of the three
  * instructions in bundle (16 bytes) took the sample. Generate
-- 
cgit v1.2.3-70-g09d2


From 8db3f5254151c3a06a764bbb18283570ba1897bf Mon Sep 17 00:00:00 2001
From: Petr Tesarik <ptesarik@suse.cz>
Date: Mon, 11 Feb 2008 22:43:38 +0100
Subject: [IA64] remove duplicate code from arch_ptrace()

Remove all code which does exactly the same thing as ptrace_request().

Signed-off-by: Petr Tesarik <ptesarik@suse.cz>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/ptrace.c | 96 ++++++++++++++---------------------------------
 include/asm-ia64/ptrace.h |  7 ++++
 2 files changed, 36 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 1dfff5a8f36..f10c8b40dd3 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1454,6 +1454,35 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
 	return ret;
 }
 
+void
+user_enable_single_step (struct task_struct *child)
+{
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
+
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->ss = 1;
+}
+
+void
+user_enable_block_step (struct task_struct *child)
+{
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
+
+	set_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->tb = 1;
+}
+
+void
+user_disable_single_step (struct task_struct *child)
+{
+	struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child));
+
+	/* make sure the single step/taken-branch trap bits are not set: */
+	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
+	child_psr->ss = 0;
+	child_psr->tb = 0;
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -1528,73 +1557,6 @@ arch_ptrace (struct task_struct *child, long request, long addr, long data)
 		ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
 		goto out_tsk;
 
-	      case PTRACE_SYSCALL:
-		/* continue and stop at next (return from) syscall */
-	      case PTRACE_CONT:
-		/* restart after signal. */
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-		if (request == PTRACE_SYSCALL)
-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		else
-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		child->exit_code = data;
-
-		/*
-		 * Make sure the single step/taken-branch trap bits
-		 * are not set:
-		 */
-		clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-		ia64_psr(pt)->ss = 0;
-		ia64_psr(pt)->tb = 0;
-
-		wake_up_process(child);
-		ret = 0;
-		goto out_tsk;
-
-	      case PTRACE_KILL:
-		/*
-		 * Make the child exit.  Best I can do is send it a
-		 * sigkill.  Perhaps it should be put in the status
-		 * that it wants to exit.
-		 */
-		if (child->exit_state == EXIT_ZOMBIE)
-			/* already dead */
-			return 0;
-		child->exit_code = SIGKILL;
-
-		ptrace_disable(child);
-		wake_up_process(child);
-		ret = 0;
-		goto out_tsk;
-
-	      case PTRACE_SINGLESTEP:
-		/* let child execute for one instruction */
-	      case PTRACE_SINGLEBLOCK:
-		ret = -EIO;
-		if (!valid_signal(data))
-			goto out_tsk;
-
-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-		set_tsk_thread_flag(child, TIF_SINGLESTEP);
-		if (request == PTRACE_SINGLESTEP) {
-			ia64_psr(pt)->ss = 1;
-		} else {
-			ia64_psr(pt)->tb = 1;
-		}
-		child->exit_code = data;
-
-		/* give it a chance to run. */
-		wake_up_process(child);
-		ret = 0;
-		goto out_tsk;
-
-	      case PTRACE_DETACH:
-		/* detach a process that was attached. */
-		ret = ptrace_detach(child, data);
-		goto out_tsk;
-
 	      case PTRACE_GETREGS:
 		ret = ptrace_getregs(child,
 				     (struct pt_all_user_regs __user *) data);
diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h
index 5b523409878..4b2a8d40ebc 100644
--- a/include/asm-ia64/ptrace.h
+++ b/include/asm-ia64/ptrace.h
@@ -312,6 +312,13 @@ struct switch_stack {
   #define arch_ptrace_attach(child) \
 	ptrace_attach_sync_user_rbs(child)
 
+  #define arch_has_single_step()  (1)
+  extern void user_enable_single_step(struct task_struct *);
+  extern void user_disable_single_step(struct task_struct *);
+
+  #define arch_has_block_step()   (1)
+  extern void user_enable_block_step(struct task_struct *);
+
 #endif /* !__KERNEL__ */
 
 /* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */
-- 
cgit v1.2.3-70-g09d2


From 9821b1f4a145b20db08108362f0b4caf4f0832a1 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Wed, 5 Mar 2008 19:02:23 -0700
Subject: [Blackfin] arch: current_l1_stack_save is a pointer, so use NULL
 rather than 0

Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
---
 include/asm-blackfin/mmu_context.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-blackfin/mmu_context.h b/include/asm-blackfin/mmu_context.h
index b5eb67596ad..f55ec3c23a9 100644
--- a/include/asm-blackfin/mmu_context.h
+++ b/include/asm-blackfin/mmu_context.h
@@ -73,7 +73,7 @@ static inline void destroy_context(struct mm_struct *mm)
 	struct sram_list_struct *tmp;
 
 	if (current_l1_stack_save == mm->context.l1_stack_save)
-		current_l1_stack_save = 0;
+		current_l1_stack_save = NULL;
 	if (mm->context.l1_stack_save)
 		free_l1stack();
 
-- 
cgit v1.2.3-70-g09d2


From 7b9726a7a0d8c70ea44a5ed23726748de344f223 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 6 Mar 2008 17:23:15 +0900
Subject: sh: Fix up the sh64 build.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/lib64/udelay.c | 21 +++++++--------------
 include/asm-sh/delay.h |  5 -----
 2 files changed, 7 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/sh/lib64/udelay.c b/arch/sh/lib64/udelay.c
index 23c7d17fb9f..d76bd801194 100644
--- a/arch/sh/lib64/udelay.c
+++ b/arch/sh/lib64/udelay.c
@@ -21,7 +21,7 @@
  * a 1GHz box, that's about 2 seconds.
  */
 
-void __delay(int loops)
+void __delay(unsigned long loops)
 {
 	long long dummy;
 	__asm__ __volatile__("gettr	tr0, %1\n\t"
@@ -33,24 +33,17 @@ void __delay(int loops)
 			     :"0"(loops));
 }
 
-void __udelay(unsigned long long usecs, unsigned long lpj)
+inline void __const_udelay(unsigned long xloops)
 {
-	usecs *= (((unsigned long long) HZ << 32) / 1000000) * lpj;
-	__delay((long long) usecs >> 32);
+	__delay(xloops * (HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy));
 }
 
-void __ndelay(unsigned long long nsecs, unsigned long lpj)
+void __udelay(unsigned long usecs)
 {
-	nsecs *= (((unsigned long long) HZ << 32) / 1000000000) * lpj;
-	__delay((long long) nsecs >> 32);
+	__const_udelay(usecs * 0x000010c6);  /* 2**32 / 1000000 */
 }
 
-void udelay(unsigned long usecs)
+void __ndelay(unsigned long nsecs)
 {
-	__udelay(usecs, cpu_data[raw_smp_processor_id()].loops_per_jiffy);
-}
-
-void ndelay(unsigned long nsecs)
-{
-	__ndelay(nsecs, cpu_data[raw_smp_processor_id()].loops_per_jiffy);
+	__const_udelay(nsecs * 0x00000005);
 }
diff --git a/include/asm-sh/delay.h b/include/asm-sh/delay.h
index d5d46404100..4b16bf9b56b 100644
--- a/include/asm-sh/delay.h
+++ b/include/asm-sh/delay.h
@@ -15,7 +15,6 @@ extern void __ndelay(unsigned long nsecs);
 extern void __const_udelay(unsigned long xloops);
 extern void __delay(unsigned long loops);
 
-#ifdef CONFIG_SUPERH32
 #define udelay(n) (__builtin_constant_p(n) ? \
 	((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \
 	__udelay(n))
@@ -23,9 +22,5 @@ extern void __delay(unsigned long loops);
 #define ndelay(n) (__builtin_constant_p(n) ? \
 	((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
 	__ndelay(n))
-#else
-extern void udelay(unsigned long usecs);
-extern void ndelay(unsigned long nsecs);
-#endif
 
 #endif /* __ASM_SH_DELAY_H */
-- 
cgit v1.2.3-70-g09d2


From 2ab42e24d63193d78f2e888a170b208f4776aaba Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Thu, 6 Mar 2008 10:57:54 +0000
Subject: Really unexport asm/page.h

Commit ed7b1889da256977574663689b598d88950bbd23 removed page.h from
include/asm-generic/Kbuild so that it shouldn't get exported.

However, it was redundantly listed in asm-mn10300/Kbuild and
asm-x86/Kbuild too. Remove those as well, so it really stops being
exported on those architectures. Also remove the redundant listing of
ptrace.h and termios.h from mn10300.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-mn10300/Kbuild | 4 ----
 include/asm-x86/Kbuild     | 1 -
 2 files changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-mn10300/Kbuild b/include/asm-mn10300/Kbuild
index 79384c537dc..c68e1680da0 100644
--- a/include/asm-mn10300/Kbuild
+++ b/include/asm-mn10300/Kbuild
@@ -1,5 +1 @@
 include include/asm-generic/Kbuild.asm
-
-unifdef-y += termios.h
-unifdef-y += ptrace.h
-unifdef-y += page.h
diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild
index b04a7ff46df..3b8160a2b47 100644
--- a/include/asm-x86/Kbuild
+++ b/include/asm-x86/Kbuild
@@ -16,7 +16,6 @@ unifdef-y += ist.h
 unifdef-y += mce.h
 unifdef-y += msr.h
 unifdef-y += mtrr.h
-unifdef-y += page.h
 unifdef-y += posix_types_32.h
 unifdef-y += posix_types_64.h
 unifdef-y += ptrace.h
-- 
cgit v1.2.3-70-g09d2


From 45e18c228e131592a922859e1525770a1803191d Mon Sep 17 00:00:00 2001
From: Harvey Harrison <harvey.harrison@gmail.com>
Date: Thu, 6 Mar 2008 09:49:01 -0800
Subject: [IA64] kprobes arch consolidation build fix

ia64 named their handler kprobes_fault_handler while all other
arches used kprobe_fault_handler.  Change the function definition
and header declaration.

Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/kprobes.c | 2 +-
 arch/ia64/mm/fault.c       | 2 +-
 include/asm-ia64/kprobes.h | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 615c3d2b634..8d9a446a0d1 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -838,7 +838,7 @@ out:
 	return 1;
 }
 
-int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 3e69881648a..23088bed111 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -26,7 +26,7 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap)
 	if (!user_mode(regs)) {
 		/* kprobe_running() needs smp_processor_id() */
 		preempt_disable();
-		if (kprobe_running() && kprobes_fault_handler(regs, trap))
+		if (kprobe_running() && kprobe_fault_handler(regs, trap))
 			ret = 1;
 		preempt_enable();
 	}
diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
index 8233b3a964c..d03bf9ff68e 100644
--- a/include/asm-ia64/kprobes.h
+++ b/include/asm-ia64/kprobes.h
@@ -117,7 +117,7 @@ struct arch_specific_insn {
 	unsigned short slot;
 };
 
-extern int kprobes_fault_handler(struct pt_regs *regs, int trapnr);
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 				    unsigned long val, void *data);
 
-- 
cgit v1.2.3-70-g09d2


From 1c61fc40fc264059ff41a614ed2d899127288281 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 5 Mar 2008 13:58:17 -0800
Subject: slab - use angle brackets for include of kmalloc_sizes.h

Make them all use angle brackets and the directory name.

Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slab_def.h | 4 ++--
 mm/slab.c                | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index fcc48096ee6..39c3a5eb8eb 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -41,7 +41,7 @@ static inline void *kmalloc(size_t size, gfp_t flags)
 			goto found; \
 		else \
 			i++;
-#include "kmalloc_sizes.h"
+#include <linux/kmalloc_sizes.h>
 #undef CACHE
 		{
 			extern void __you_cannot_kmalloc_that_much(void);
@@ -75,7 +75,7 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 			goto found; \
 		else \
 			i++;
-#include "kmalloc_sizes.h"
+#include <linux/kmalloc_sizes.h>
 #undef CACHE
 		{
 			extern void __you_cannot_kmalloc_that_much(void);
diff --git a/mm/slab.c b/mm/slab.c
index 5d16c8a3049..f7faff72cf5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -333,7 +333,7 @@ static __always_inline int index_of(const size_t size)
 		return i; \
 	else \
 		i++;
-#include "linux/kmalloc_sizes.h"
+#include <linux/kmalloc_sizes.h>
 #undef CACHE
 		__bad_size();
 	} else
-- 
cgit v1.2.3-70-g09d2


From 810b38179e9e4d4f57b4b733767bb08f8291a965 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 29 Feb 2008 15:21:01 -0500
Subject: sched: retain vruntime

Kei Tokunaga reported an interactivity problem when moving tasks
between control groups.

Tasks would retain their old vruntime when moved between groups, this
can cause funny lags. Re-set the vruntime on group move to fit within
the new tree.

Reported-by: Kei Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  4 ++++
 kernel/sched.c        |  5 +++++
 kernel/sched_fair.c   | 14 ++++++++++++++
 3 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9ae4030067a..11d8e9a74ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -899,6 +899,10 @@ struct sched_class {
 			     int running);
 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
 			     int oldprio, int running);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	void (*moved_group) (struct task_struct *p);
+#endif
 };
 
 struct load_weight {
diff --git a/kernel/sched.c b/kernel/sched.c
index dcd553cc4ee..0b949c4e73a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7625,6 +7625,11 @@ void sched_move_task(struct task_struct *tsk)
 
 	set_task_rq(tsk, task_cpu(tsk));
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	if (tsk->sched_class->moved_group)
+		tsk->sched_class->moved_group(tsk);
+#endif
+
 	if (on_rq) {
 		if (unlikely(running))
 			tsk->sched_class->set_curr_task(rq);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3df4d46994c..e2a53051561 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1353,6 +1353,16 @@ static void set_curr_task_fair(struct rq *rq)
 		set_next_entity(cfs_rq_of(se), se);
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void moved_group_fair(struct task_struct *p)
+{
+	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+
+	update_curr(cfs_rq);
+	place_entity(cfs_rq, &p->se, 1);
+}
+#endif
+
 /*
  * All the scheduling class methods:
  */
@@ -1381,6 +1391,10 @@ static const struct sched_class fair_sched_class = {
 
 	.prio_changed		= prio_changed_fair,
 	.switched_to		= switched_to_fair,
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	.moved_group		= moved_group_fair,
+#endif
 };
 
 #ifdef CONFIG_SCHED_DEBUG
-- 
cgit v1.2.3-70-g09d2


From c37dcd334c0b0a46a90cfa13b9f69e2aaa89bc09 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 6 Mar 2008 12:34:50 -0500
Subject: NFS: Fix the fsid revalidation in nfs_update_inode()

When we detect that we've crossed a mountpoint on the remote server, we
must take care not to use that inode to revalidate the fsid on our
current superblock. To do so, we label the inode as a remote mountpoint,
and check for that in nfs_update_inode().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 6 ++++--
 include/linux/nfs_fs.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 966a8850aa3..a4c7cf2bff3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -299,6 +299,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 				else
 					inode->i_op = &nfs_mountpoint_inode_operations;
 				inode->i_fop = NULL;
+				set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags);
 			}
 		} else if (S_ISLNK(inode->i_mode))
 			inode->i_op = &nfs_symlink_inode_operations;
@@ -1003,8 +1004,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 
 	server = NFS_SERVER(inode);
 	/* Update the fsid? */
-	if (S_ISDIR(inode->i_mode)
-			&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+	if (S_ISDIR(inode->i_mode) &&
+			!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
+			!test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags))
 		server->fsid = fattr->fsid;
 
 	/*
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a69ba80f2df..f4a0e4c218d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -195,6 +195,7 @@ struct nfs_inode {
 #define NFS_INO_ADVISE_RDPLUS	(1)		/* advise readdirplus */
 #define NFS_INO_STALE		(2)		/* possible stale inode */
 #define NFS_INO_ACL_LRU_SET	(3)		/* Inode is on the LRU list */
+#define NFS_INO_MOUNTPOINT	(4)		/* inode is remote mountpoint */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
-- 
cgit v1.2.3-70-g09d2