From 464abc5de6ea8f4af1c1246e0d1ea7b07362db43 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 25 Feb 2008 13:50:20 +0800 Subject: [Blackfin] arch: Cleanup abd Simplify: - Simplify init_arch_irq - Make code more readable - Remove useless SSYNCs - Fix comments Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu --- include/asm-blackfin/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-blackfin/irq.h b/include/asm-blackfin/irq.h index 65480dab244..86b67834354 100644 --- a/include/asm-blackfin/irq.h +++ b/include/asm-blackfin/irq.h @@ -67,4 +67,6 @@ static __inline__ int irq_canonicalize(int irq) #define NO_IRQ ((unsigned int)(-1)) #endif +#define SIC_SYSIRQ(irq) (irq - (IRQ_CORETMR + 1)) + #endif /* _BFIN_IRQ_H_ */ -- cgit v1.2.3-70-g09d2 From 3927819d511f5b5855e6f2345f24e7b04e4fd2f5 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 25 Feb 2008 14:39:50 +0800 Subject: [Blackfin] arch: Fix CONFIG_PM support for BF561 Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu --- arch/blackfin/mach-common/dpmc.S | 6 ++---- include/asm-blackfin/mach-bf561/blackfin.h | 3 ++- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/blackfin/mach-common/dpmc.S b/arch/blackfin/mach-common/dpmc.S index fc9f6eb9018..9d45aa3265b 100644 --- a/arch/blackfin/mach-common/dpmc.S +++ b/arch/blackfin/mach-common/dpmc.S @@ -31,9 +31,6 @@ #include #include -.text - -#if !defined(CONFIG_BF561) .section .l1.text @@ -328,10 +325,12 @@ ENTRY(_set_sic_iwr) RTS; ENTRY(_set_rtc_istat) +#ifndef CONFIG_BF561 P0.H = hi(RTC_ISTAT); P0.L = lo(RTC_ISTAT); w[P0] = R0.L; SSYNC; +#endif RTS; ENTRY(_test_pll_locked) @@ -342,4 +341,3 @@ ENTRY(_test_pll_locked) CC = BITTST(R0,5); IF !CC JUMP 1b; RTS; -#endif diff --git a/include/asm-blackfin/mach-bf561/blackfin.h b/include/asm-blackfin/mach-bf561/blackfin.h index 362617f9384..3a16df2c86d 100644 --- a/include/asm-blackfin/mach-bf561/blackfin.h +++ b/include/asm-blackfin/mach-bf561/blackfin.h @@ -49,7 +49,8 @@ #define bfin_read_FIO_INEN() bfin_read_FIO0_INEN() #define bfin_write_FIO_INEN(val) bfin_write_FIO0_INEN(val) - +#define SIC_IWR0 SICA_IWR0 +#define SIC_IWR1 SICA_IWR1 #define SIC_IAR0 SICA_IAR0 #define bfin_write_SIC_IMASK0 bfin_write_SICA_IMASK0 #define bfin_write_SIC_IMASK1 bfin_write_SICA_IMASK1 -- cgit v1.2.3-70-g09d2 From fee40119a2b2abbe239438b74052854db6f3444d Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 25 Feb 2008 15:06:07 +0800 Subject: [Blackfin] arch: make sure we have proper description/copyright/license lines Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- arch/blackfin/kernel/gptimers.c | 8 ++++---- include/asm-blackfin/gptimers.h | 7 +++---- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/blackfin/kernel/gptimers.c b/arch/blackfin/kernel/gptimers.c index 5cf4bdb1df3..1904d8b5332 100644 --- a/arch/blackfin/kernel/gptimers.c +++ b/arch/blackfin/kernel/gptimers.c @@ -1,9 +1,9 @@ /* - * bfin_gptimers.c - derived from bf53x_timers.c - * Driver for General Purpose Timer functions on the Blackfin processor + * gptimers.c - Blackfin General Purpose Timer core API * - * Copyright (C) 2005 John DeHority - * Copyright (C) 2006 Hella Aglaia GmbH (awe@aglaia-gmbh.de) + * Copyright (c) 2005-2008 Analog Devices Inc. + * Copyright (C) 2005 John DeHority + * Copyright (C) 2006 Hella Aglaia GmbH (awe@aglaia-gmbh.de) * * Licensed under the GPLv2. */ diff --git a/include/asm-blackfin/gptimers.h b/include/asm-blackfin/gptimers.h index 8265ea473d5..4f318f1fd2d 100644 --- a/include/asm-blackfin/gptimers.h +++ b/include/asm-blackfin/gptimers.h @@ -1,12 +1,11 @@ /* - * include/asm/bf5xx_timers.h - * - * This file contains the major Data structures and constants - * used for General Purpose Timer Implementation in BF5xx + * gptimers.h - Blackfin General Purpose Timer structs/defines/prototypes * + * Copyright (c) 2005-2008 Analog Devices Inc. * Copyright (C) 2005 John DeHority * Copyright (C) 2006 Hella Aglaia GmbH (awe@aglaia-gmbh.de) * + * Licensed under the GPL-2. */ #ifndef _BLACKFIN_TIMERS_H_ -- cgit v1.2.3-70-g09d2 From 40edad3efadb3aa486c7a5452401c4de10902496 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 25 Feb 2008 15:23:30 +0800 Subject: [Blackfin] arch: add bfin_clear_PPIx_STATUS() helper funcs like we have for other parts Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- include/asm-blackfin/mach-bf561/cdefBF561.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-blackfin/mach-bf561/cdefBF561.h b/include/asm-blackfin/mach-bf561/cdefBF561.h index d667816486c..1bc8d2f89cc 100644 --- a/include/asm-blackfin/mach-bf561/cdefBF561.h +++ b/include/asm-blackfin/mach-bf561/cdefBF561.h @@ -559,6 +559,7 @@ static __inline__ void bfin_write_VR_CTL(unsigned int val) #define bfin_write_PPI0_CONTROL(val) bfin_write16(PPI0_CONTROL,val) #define bfin_read_PPI0_STATUS() bfin_read16(PPI0_STATUS) #define bfin_write_PPI0_STATUS(val) bfin_write16(PPI0_STATUS,val) +#define bfin_clear_PPI0_STATUS() bfin_read_PPI0_STATUS() #define bfin_read_PPI0_COUNT() bfin_read16(PPI0_COUNT) #define bfin_write_PPI0_COUNT(val) bfin_write16(PPI0_COUNT,val) #define bfin_read_PPI0_DELAY() bfin_read16(PPI0_DELAY) @@ -570,6 +571,7 @@ static __inline__ void bfin_write_VR_CTL(unsigned int val) #define bfin_write_PPI1_CONTROL(val) bfin_write16(PPI1_CONTROL,val) #define bfin_read_PPI1_STATUS() bfin_read16(PPI1_STATUS) #define bfin_write_PPI1_STATUS(val) bfin_write16(PPI1_STATUS,val) +#define bfin_clear_PPI1_STATUS() bfin_read_PPI1_STATUS() #define bfin_read_PPI1_COUNT() bfin_read16(PPI1_COUNT) #define bfin_write_PPI1_COUNT(val) bfin_write16(PPI1_COUNT,val) #define bfin_read_PPI1_DELAY() bfin_read16(PPI1_DELAY) -- cgit v1.2.3-70-g09d2 From 0bcfd70ea11a5d6f2362be463513a60245a62baf Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 24 Dec 2007 19:40:05 +0800 Subject: [Blackfin] serial driver: fix bug - cache the bits of the LSR on systems where the LSR is read-to-clear Cache the bits of the LSR on systems where the LSR is read-to-clear so that we can safely read the LSR in random places. this fixes older parts where break/framing/parity/overflow was not being detected at all in PIO mode, and this fixes newer parts where break/framing/parity/overflow was being reported all the time without being cleared. Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- drivers/serial/bfin_5xx.c | 10 +++++++--- include/asm-blackfin/mach-bf527/bfin_serial_5xx.h | 19 ++++++++++++++++++- include/asm-blackfin/mach-bf533/bfin_serial_5xx.h | 19 ++++++++++++++++++- include/asm-blackfin/mach-bf537/bfin_serial_5xx.h | 19 ++++++++++++++++++- include/asm-blackfin/mach-bf548/bfin_serial_5xx.h | 1 + include/asm-blackfin/mach-bf561/bfin_serial_5xx.h | 19 ++++++++++++++++++- 6 files changed, 80 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c index ca9ceaa113a..af84984df77 100644 --- a/drivers/serial/bfin_5xx.c +++ b/drivers/serial/bfin_5xx.c @@ -216,8 +216,10 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart) struct pt_regs *regs = get_irq_regs(); #endif - ch = UART_GET_CHAR(uart); status = UART_GET_LSR(uart); + UART_CLEAR_LSR(uart); + + ch = UART_GET_CHAR(uart); uart->port.icount.rx++; #ifdef CONFIG_KGDB_UART @@ -335,7 +337,7 @@ static irqreturn_t bfin_serial_rx_int(int irq, void *dev_id) struct bfin_serial_port *uart = dev_id; spin_lock(&uart->port.lock); - while ((UART_GET_IER(uart) & ERBFI) && (UART_GET_LSR(uart) & DR)) + while (UART_GET_LSR(uart) & DR) bfin_serial_rx_chars(uart); spin_unlock(&uart->port.lock); @@ -347,7 +349,7 @@ static irqreturn_t bfin_serial_tx_int(int irq, void *dev_id) struct bfin_serial_port *uart = dev_id; spin_lock(&uart->port.lock); - if ((UART_GET_IER(uart) & ETBEI) && (UART_GET_LSR(uart) & THRE)) + if (UART_GET_LSR(uart) & THRE) bfin_serial_tx_chars(uart); spin_unlock(&uart->port.lock); @@ -428,6 +430,8 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart) int i, flg, status; status = UART_GET_LSR(uart); + UART_CLEAR_LSR(uart); + uart->port.icount.rx += CIRC_CNT(uart->rx_dma_buf.head, uart->rx_dma_buf.tail, UART_XMIT_SIZE);; if (status & BI) { diff --git a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h index 15dbc21eed8..233c585efc1 100644 --- a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h @@ -23,7 +23,6 @@ #define UART_GET_DLH(uart) bfin_read16(((uart)->port.membase + OFFSET_DLH)) #define UART_GET_IIR(uart) bfin_read16(((uart)->port.membase + OFFSET_IIR)) #define UART_GET_LCR(uart) bfin_read16(((uart)->port.membase + OFFSET_LCR)) -#define UART_GET_LSR(uart) bfin_read16(((uart)->port.membase + OFFSET_LSR)) #define UART_GET_GCTL(uart) bfin_read16(((uart)->port.membase + OFFSET_GCTL)) #define UART_PUT_CHAR(uart, v) bfin_write16(((uart)->port.membase + OFFSET_THR), v) @@ -58,6 +57,7 @@ struct bfin_serial_port { struct uart_port port; unsigned int old_status; + unsigned int lsr; #ifdef CONFIG_SERIAL_BFIN_DMA int tx_done; int tx_count; @@ -76,6 +76,23 @@ struct bfin_serial_port { #endif }; +/* The hardware clears the LSR bits upon read, so we need to cache + * some of the more fun bits in software so they don't get lost + * when checking the LSR in other code paths (TX). + */ +static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart) +{ + unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR); + uart->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | uart->lsr; +} + +static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart) +{ + uart->lsr = 0; + bfin_write16(uart->port.membase + OFFSET_LSR, -1); +} + struct bfin_serial_port bfin_serial_ports[NR_PORTS]; struct bfin_serial_res { unsigned long uart_base_addr; diff --git a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h index 7871d4313f4..b619065ceeb 100644 --- a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h @@ -23,7 +23,6 @@ #define UART_GET_DLH(uart) bfin_read16(((uart)->port.membase + OFFSET_DLH)) #define UART_GET_IIR(uart) bfin_read16(((uart)->port.membase + OFFSET_IIR)) #define UART_GET_LCR(uart) bfin_read16(((uart)->port.membase + OFFSET_LCR)) -#define UART_GET_LSR(uart) bfin_read16(((uart)->port.membase + OFFSET_LSR)) #define UART_GET_GCTL(uart) bfin_read16(((uart)->port.membase + OFFSET_GCTL)) #define UART_PUT_CHAR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_THR),v) @@ -46,6 +45,7 @@ struct bfin_serial_port { struct uart_port port; unsigned int old_status; + unsigned int lsr; #ifdef CONFIG_SERIAL_BFIN_DMA int tx_done; int tx_count; @@ -64,6 +64,23 @@ struct bfin_serial_port { #endif }; +/* The hardware clears the LSR bits upon read, so we need to cache + * some of the more fun bits in software so they don't get lost + * when checking the LSR in other code paths (TX). + */ +static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart) +{ + unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR); + uart->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | uart->lsr; +} + +static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart) +{ + uart->lsr = 0; + bfin_write16(uart->port.membase + OFFSET_LSR, -1); +} + struct bfin_serial_port bfin_serial_ports[NR_PORTS]; struct bfin_serial_res { unsigned long uart_base_addr; diff --git a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h index 86e45c37983..f18c517cc93 100644 --- a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h @@ -23,7 +23,6 @@ #define UART_GET_DLH(uart) bfin_read16(((uart)->port.membase + OFFSET_DLH)) #define UART_GET_IIR(uart) bfin_read16(((uart)->port.membase + OFFSET_IIR)) #define UART_GET_LCR(uart) bfin_read16(((uart)->port.membase + OFFSET_LCR)) -#define UART_GET_LSR(uart) bfin_read16(((uart)->port.membase + OFFSET_LSR)) #define UART_GET_GCTL(uart) bfin_read16(((uart)->port.membase + OFFSET_GCTL)) #define UART_PUT_CHAR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_THR),v) @@ -58,6 +57,7 @@ struct bfin_serial_port { struct uart_port port; unsigned int old_status; + unsigned int lsr; #ifdef CONFIG_SERIAL_BFIN_DMA int tx_done; int tx_count; @@ -76,6 +76,23 @@ struct bfin_serial_port { #endif }; +/* The hardware clears the LSR bits upon read, so we need to cache + * some of the more fun bits in software so they don't get lost + * when checking the LSR in other code paths (TX). + */ +static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart) +{ + unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR); + uart->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | uart->lsr; +} + +static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart) +{ + uart->lsr = 0; + bfin_write16(uart->port.membase + OFFSET_LSR, -1); +} + struct bfin_serial_port bfin_serial_ports[NR_PORTS]; struct bfin_serial_res { unsigned long uart_base_addr; diff --git a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h index 3770aa38ee9..8733d47747e 100644 --- a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h @@ -32,6 +32,7 @@ #define UART_PUT_DLH(uart,v) bfin_write16(((uart)->port.membase + OFFSET_DLH),v) #define UART_PUT_LSR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_LSR),v) #define UART_PUT_LCR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_LCR),v) +#define UART_CLEAR_LSR(uart) bfin_write16(((uart)->port.membase + OFFSET_LSR), -1) #define UART_PUT_GCTL(uart,v) bfin_write16(((uart)->port.membase + OFFSET_GCTL),v) #if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) diff --git a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h index 7871d4313f4..b619065ceeb 100644 --- a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h @@ -23,7 +23,6 @@ #define UART_GET_DLH(uart) bfin_read16(((uart)->port.membase + OFFSET_DLH)) #define UART_GET_IIR(uart) bfin_read16(((uart)->port.membase + OFFSET_IIR)) #define UART_GET_LCR(uart) bfin_read16(((uart)->port.membase + OFFSET_LCR)) -#define UART_GET_LSR(uart) bfin_read16(((uart)->port.membase + OFFSET_LSR)) #define UART_GET_GCTL(uart) bfin_read16(((uart)->port.membase + OFFSET_GCTL)) #define UART_PUT_CHAR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_THR),v) @@ -46,6 +45,7 @@ struct bfin_serial_port { struct uart_port port; unsigned int old_status; + unsigned int lsr; #ifdef CONFIG_SERIAL_BFIN_DMA int tx_done; int tx_count; @@ -64,6 +64,23 @@ struct bfin_serial_port { #endif }; +/* The hardware clears the LSR bits upon read, so we need to cache + * some of the more fun bits in software so they don't get lost + * when checking the LSR in other code paths (TX). + */ +static inline unsigned int UART_GET_LSR(struct bfin_serial_port *uart) +{ + unsigned int lsr = bfin_read16(uart->port.membase + OFFSET_LSR); + uart->lsr |= (lsr & (BI|FE|PE|OE)); + return lsr | uart->lsr; +} + +static inline void UART_CLEAR_LSR(struct bfin_serial_port *uart) +{ + uart->lsr = 0; + bfin_write16(uart->port.membase + OFFSET_LSR, -1); +} + struct bfin_serial_port bfin_serial_ports[NR_PORTS]; struct bfin_serial_res { unsigned long uart_base_addr; -- cgit v1.2.3-70-g09d2 From 8851c71eb97610f0f63121d62345c969f71201a2 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 24 Dec 2007 19:48:04 +0800 Subject: [Blackfin] serial driver: rework break flood anomaly handling to be more robust/realistic about what we can actually work around Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- drivers/serial/bfin_5xx.c | 66 ++++++++++++++++++----- include/asm-blackfin/mach-bf533/bfin_serial_5xx.h | 3 ++ include/asm-blackfin/mach-bf561/bfin_serial_5xx.h | 3 ++ 3 files changed, 58 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c index af84984df77..50aa3b2a19b 100644 --- a/drivers/serial/bfin_5xx.c +++ b/drivers/serial/bfin_5xx.c @@ -206,12 +206,20 @@ int kgdb_get_debug_char(void) } #endif +#if ANOMALY_05000230 && defined(CONFIG_SERIAL_BFIN_PIO) +# define UART_GET_ANOMALY_THRESHOLD(uart) ((uart)->anomaly_threshold) +# define UART_SET_ANOMALY_THRESHOLD(uart, v) ((uart)->anomaly_threshold = (v)) +#else +# define UART_GET_ANOMALY_THRESHOLD(uart) 0 +# define UART_SET_ANOMALY_THRESHOLD(uart, v) +#endif + #ifdef CONFIG_SERIAL_BFIN_PIO static void bfin_serial_rx_chars(struct bfin_serial_port *uart) { struct tty_struct *tty = uart->port.info->tty; unsigned int status, ch, flg; - static int in_break = 0; + static struct timeval anomaly_start = { .tv_sec = 0 }; #ifdef CONFIG_KGDB_UART struct pt_regs *regs = get_irq_regs(); #endif @@ -244,28 +252,56 @@ static void bfin_serial_rx_chars(struct bfin_serial_port *uart) #endif if (ANOMALY_05000230) { - /* The BF533 family of processors have a nice misbehavior where - * they continuously generate characters for a "single" break. + /* The BF533 (and BF561) family of processors have a nice anomaly + * where they continuously generate characters for a "single" break. * We have to basically ignore this flood until the "next" valid - * character comes across. All other Blackfin families operate - * properly though. + * character comes across. Due to the nature of the flood, it is + * not possible to reliably catch bytes that are sent too quickly + * after this break. So application code talking to the Blackfin + * which sends a break signal must allow at least 1.5 character + * times after the end of the break for things to stabilize. This + * timeout was picked as it must absolutely be larger than 1 + * character time +/- some percent. So 1.5 sounds good. All other + * Blackfin families operate properly. Woo. * Note: While Anomaly 05000230 does not directly address this, * the changes that went in for it also fixed this issue. + * That anomaly was fixed in 0.5+ silicon. I like bunnies. */ - if (in_break) { - if (ch != 0) { - in_break = 0; - ch = UART_GET_CHAR(uart); - if (bfin_revid() < 5) - return; - } else - return; + if (anomaly_start.tv_sec) { + struct timeval curr; + suseconds_t usecs; + + if ((~ch & (~ch + 1)) & 0xff) + goto known_good_char; + + do_gettimeofday(&curr); + if (curr.tv_sec - anomaly_start.tv_sec > 1) + goto known_good_char; + + usecs = 0; + if (curr.tv_sec != anomaly_start.tv_sec) + usecs += USEC_PER_SEC; + usecs += curr.tv_usec - anomaly_start.tv_usec; + + if (usecs > UART_GET_ANOMALY_THRESHOLD(uart)) + goto known_good_char; + + if (ch) + anomaly_start.tv_sec = 0; + else + anomaly_start = curr; + + return; + + known_good_char: + anomaly_start.tv_sec = 0; } } if (status & BI) { if (ANOMALY_05000230) - in_break = 1; + if (bfin_revid() < 5) + do_gettimeofday(&anomaly_start); uart->port.icount.brk++; if (uart_handle_break(&uart->port)) goto ignore_char; @@ -778,6 +814,8 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios, quot = uart_get_divisor(port, baud); spin_lock_irqsave(&uart->port.lock, flags); + UART_SET_ANOMALY_THRESHOLD(uart, USEC_PER_SEC / baud * 15); + do { lsr = UART_GET_LSR(uart); } while (!(lsr & TEMT)); diff --git a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h index b619065ceeb..a1b4f4eebd0 100644 --- a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h @@ -57,6 +57,9 @@ struct bfin_serial_port { struct work_struct tx_dma_workqueue; #else struct work_struct cts_workqueue; +# if ANOMALY_05000230 + unsigned int anomaly_threshold; +# endif #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS int cts_pin; diff --git a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h index b619065ceeb..a1b4f4eebd0 100644 --- a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h @@ -57,6 +57,9 @@ struct bfin_serial_port { struct work_struct tx_dma_workqueue; #else struct work_struct cts_workqueue; +# if ANOMALY_05000230 + unsigned int anomaly_threshold; +# endif #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS int cts_pin; -- cgit v1.2.3-70-g09d2 From 4cb4f22b19237e63c460c53fbd1c417cdaf63014 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Sat, 2 Feb 2008 14:29:25 +0800 Subject: [Blackfin] serial driver: Fix bug Poll RTS/CTS status in DMA mode as well https://blackfin.uclinux.org/gf/project/uclinux-dist/tracker/?action=TrackerItemEdit&tracker_item_id=3858 Signed-off-by: Sonic Zhang Signed-off-by: Bryan Wu --- drivers/serial/bfin_5xx.c | 16 +++++----------- include/asm-blackfin/mach-bf527/bfin_serial_5xx.h | 3 +-- include/asm-blackfin/mach-bf533/bfin_serial_5xx.h | 2 +- include/asm-blackfin/mach-bf537/bfin_serial_5xx.h | 3 +-- include/asm-blackfin/mach-bf548/bfin_serial_5xx.h | 3 +-- include/asm-blackfin/mach-bf561/bfin_serial_5xx.h | 2 +- 6 files changed, 10 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c index a07df539e52..af866ab3f5a 100644 --- a/drivers/serial/bfin_5xx.c +++ b/drivers/serial/bfin_5xx.c @@ -53,7 +53,6 @@ #ifdef CONFIG_SERIAL_BFIN_DMA static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart); #else -static void bfin_serial_do_work(struct work_struct *work); static void bfin_serial_tx_chars(struct bfin_serial_port *uart); #endif @@ -372,8 +371,9 @@ static irqreturn_t bfin_serial_tx_int(int irq, void *dev_id) return IRQ_HANDLED; } +#endif - +#ifdef CONFIG_SERIAL_BFIN_CTSRTS static void bfin_serial_do_work(struct work_struct *work) { struct bfin_serial_port *uart = container_of(work, struct bfin_serial_port, cts_workqueue); @@ -607,22 +607,17 @@ static void bfin_serial_mctrl_check(struct bfin_serial_port *uart) { #ifdef CONFIG_SERIAL_BFIN_CTSRTS unsigned int status; -# ifdef CONFIG_SERIAL_BFIN_DMA struct uart_info *info = uart->port.info; struct tty_struct *tty = info->tty; status = bfin_serial_get_mctrl(&uart->port); + uart_handle_cts_change(&uart->port, status & TIOCM_CTS); if (!(status & TIOCM_CTS)) { tty->hw_stopped = 1; + schedule_work(&uart->cts_workqueue); } else { tty->hw_stopped = 0; } -# else - status = bfin_serial_get_mctrl(&uart->port); - uart_handle_cts_change(&uart->port, status & TIOCM_CTS); - if (!(status & TIOCM_CTS)) - schedule_work(&uart->cts_workqueue); -# endif #endif } @@ -939,10 +934,9 @@ static void __init bfin_serial_init_ports(void) bfin_serial_ports[i].rx_dma_channel = bfin_serial_resource[i].uart_rx_dma_channel; init_timer(&(bfin_serial_ports[i].rx_dma_timer)); -#else - INIT_WORK(&bfin_serial_ports[i].cts_workqueue, bfin_serial_do_work); #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS + INIT_WORK(&bfin_serial_ports[i].cts_workqueue, bfin_serial_do_work); bfin_serial_ports[i].cts_pin = bfin_serial_resource[i].uart_cts_pin; bfin_serial_ports[i].rts_pin = diff --git a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h index 233c585efc1..c0694ecd2ec 100644 --- a/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf527/bfin_serial_5xx.h @@ -67,10 +67,9 @@ struct bfin_serial_port { unsigned int tx_dma_channel; unsigned int rx_dma_channel; struct work_struct tx_dma_workqueue; -#else - struct work_struct cts_workqueue; #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS + struct work_struct cts_workqueue; int cts_pin; int rts_pin; #endif diff --git a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h index a1b4f4eebd0..b6f513bee56 100644 --- a/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf533/bfin_serial_5xx.h @@ -56,12 +56,12 @@ struct bfin_serial_port { unsigned int rx_dma_channel; struct work_struct tx_dma_workqueue; #else - struct work_struct cts_workqueue; # if ANOMALY_05000230 unsigned int anomaly_threshold; # endif #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS + struct work_struct cts_workqueue; int cts_pin; int rts_pin; #endif diff --git a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h index f18c517cc93..8fc672d3105 100644 --- a/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf537/bfin_serial_5xx.h @@ -67,10 +67,9 @@ struct bfin_serial_port { unsigned int tx_dma_channel; unsigned int rx_dma_channel; struct work_struct tx_dma_workqueue; -#else - struct work_struct cts_workqueue; #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS + struct work_struct cts_workqueue; int cts_pin; int rts_pin; #endif diff --git a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h index 8733d47747e..c459c484646 100644 --- a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h @@ -69,10 +69,9 @@ struct bfin_serial_port { unsigned int tx_dma_channel; unsigned int rx_dma_channel; struct work_struct tx_dma_workqueue; -#else - struct work_struct cts_workqueue; #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS + struct work_struct cts_workqueue; int cts_pin; int rts_pin; #endif diff --git a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h index a1b4f4eebd0..b6f513bee56 100644 --- a/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf561/bfin_serial_5xx.h @@ -56,12 +56,12 @@ struct bfin_serial_port { unsigned int rx_dma_channel; struct work_struct tx_dma_workqueue; #else - struct work_struct cts_workqueue; # if ANOMALY_05000230 unsigned int anomaly_threshold; # endif #endif #ifdef CONFIG_SERIAL_BFIN_CTSRTS + struct work_struct cts_workqueue; int cts_pin; int rts_pin; #endif -- cgit v1.2.3-70-g09d2 From db288381e26e592b11572ce8199bedeadf0c0830 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Sat, 2 Feb 2008 17:05:02 +0800 Subject: [Blackfin] serial driver: Add flow control support to bf54x Signed-off-by: Sonic Zhang Signed-off-by: Bryan Wu --- drivers/serial/Kconfig | 4 ++-- drivers/serial/bfin_5xx.c | 12 ++++++++++++ include/asm-blackfin/mach-bf548/bfin_serial_5xx.h | 3 +++ 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index 202fb5c99b8..cf627cd1b4c 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -699,14 +699,14 @@ config BFIN_UART1_CTSRTS config UART1_CTS_PIN int "UART1 CTS pin" - depends on BFIN_UART1_CTSRTS && (BF53x || BF561) + depends on BFIN_UART1_CTSRTS && !BF54x default -1 help Refer to ./include/asm-blackfin/gpio.h to see the GPIO map. config UART1_RTS_PIN int "UART1 RTS pin" - depends on BFIN_UART1_CTSRTS && (BF53x || BF561) + depends on BFIN_UART1_CTSRTS && !BF54x default -1 help Refer to ./include/asm-blackfin/gpio.h to see the GPIO map. diff --git a/drivers/serial/bfin_5xx.c b/drivers/serial/bfin_5xx.c index af866ab3f5a..69ac7007682 100644 --- a/drivers/serial/bfin_5xx.c +++ b/drivers/serial/bfin_5xx.c @@ -579,7 +579,11 @@ static unsigned int bfin_serial_get_mctrl(struct uart_port *port) if (uart->cts_pin < 0) return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; +# ifdef BF54x + if (UART_GET_MSR(uart) & CTS) +# else if (gpio_get_value(uart->cts_pin)) +# endif return TIOCM_DSR | TIOCM_CAR; else #endif @@ -594,9 +598,17 @@ static void bfin_serial_set_mctrl(struct uart_port *port, unsigned int mctrl) return; if (mctrl & TIOCM_RTS) +# ifdef BF54x + UART_PUT_MCR(uart, UART_GET_MCR(uart) & ~MRTS); +# else gpio_set_value(uart->rts_pin, 0); +# endif else +# ifdef BF54x + UART_PUT_MCR(uart, UART_GET_MCR(uart) | MRTS); +# else gpio_set_value(uart->rts_pin, 1); +# endif #endif } diff --git a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h index c459c484646..7e6339f62a5 100644 --- a/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h +++ b/include/asm-blackfin/mach-bf548/bfin_serial_5xx.h @@ -24,6 +24,8 @@ #define UART_GET_LCR(uart) bfin_read16(((uart)->port.membase + OFFSET_LCR)) #define UART_GET_LSR(uart) bfin_read16(((uart)->port.membase + OFFSET_LSR)) #define UART_GET_GCTL(uart) bfin_read16(((uart)->port.membase + OFFSET_GCTL)) +#define UART_GET_MSR(uart) bfin_read16(((uart)->port.membase + OFFSET_MSR)) +#define UART_GET_MCR(uart) bfin_read16(((uart)->port.membase + OFFSET_MCR)) #define UART_PUT_CHAR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_THR),v) #define UART_PUT_DLL(uart,v) bfin_write16(((uart)->port.membase + OFFSET_DLL),v) @@ -34,6 +36,7 @@ #define UART_PUT_LCR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_LCR),v) #define UART_CLEAR_LSR(uart) bfin_write16(((uart)->port.membase + OFFSET_LSR), -1) #define UART_PUT_GCTL(uart,v) bfin_write16(((uart)->port.membase + OFFSET_GCTL),v) +#define UART_PUT_MCR(uart,v) bfin_write16(((uart)->port.membase + OFFSET_MCR),v) #if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) # define CONFIG_SERIAL_BFIN_CTSRTS -- cgit v1.2.3-70-g09d2 From e48a411fa097c386c6081f4ed5a952aa3e21ca2b Mon Sep 17 00:00:00 2001 From: Haavard Skinnemoen Date: Mon, 11 Feb 2008 16:55:19 +0100 Subject: AVR32: Define PAGE_SHARED The virtual framebuffer driver needs PAGE_SHARED, which is not defined on avr32. Define it. Reported-by: Oliver Zander Signed-off-by: Haavard Skinnemoen --- include/asm-avr32/pgtable.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-avr32/pgtable.h b/include/asm-avr32/pgtable.h index 018f6e2a024..3ae7b548fce 100644 --- a/include/asm-avr32/pgtable.h +++ b/include/asm-avr32/pgtable.h @@ -157,6 +157,7 @@ extern struct page *empty_zero_page; #define _PAGE_S(x) _PAGE_NORMAL(x) #define PAGE_COPY _PAGE_P(PAGE_WRITE | PAGE_READ) +#define PAGE_SHARED _PAGE_S(PAGE_WRITE | PAGE_READ) #ifndef __ASSEMBLY__ /* -- cgit v1.2.3-70-g09d2 From b35905c16ad6428551eb9e49525011bd2700cf56 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 25 Feb 2008 16:54:37 -0500 Subject: ext4: Fix memory and buffer head leak in callers to ext4_ext_find_extent() The path variable returned via ext4_ext_find_extent is a kmalloc variable and needs to be freeded. It also contains a reference to buffer_head which needs to be dropped. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Mingming Cao Signed-off-by: "Theodore Ts'o" --- fs/ext4/extents.c | 6 +++--- fs/ext4/migrate.c | 5 +++++ include/linux/ext4_fs_extents.h | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e856f660fc3..995ac16102a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -349,7 +349,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) #define ext4_ext_show_leaf(inode,path) #endif -static void ext4_ext_drop_refs(struct ext4_ext_path *path) +void ext4_ext_drop_refs(struct ext4_ext_path *path) { int depth = path->p_depth; int i; @@ -2200,10 +2200,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, newdepth = ext_depth(inode); if (newdepth != depth) { depth = newdepth; - path = ext4_ext_find_extent(inode, iblock, NULL); + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, iblock, path); if (IS_ERR(path)) { err = PTR_ERR(path); - path = NULL; goto out; } eh = path[depth].p_hdr; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 8c6c685b9d2..5c1e27de775 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -43,6 +43,7 @@ static int finish_range(handle_t *handle, struct inode *inode, if (IS_ERR(path)) { retval = PTR_ERR(path); + path = NULL; goto err_out; } @@ -74,6 +75,10 @@ static int finish_range(handle_t *handle, struct inode *inode, } retval = ext4_ext_insert_extent(handle, inode, path, &newext); err_out: + if (path) { + ext4_ext_drop_refs(path); + kfree(path); + } lb->first_pblock = 0; return retval; } diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h index 697da4bce6c..1285c583b2d 100644 --- a/include/linux/ext4_fs_extents.h +++ b/include/linux/ext4_fs_extents.h @@ -227,5 +227,6 @@ extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, ext4_lblk_t *, ext4_fsblk_t *); extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *, ext4_lblk_t *, ext4_fsblk_t *); +extern void ext4_ext_drop_refs(struct ext4_ext_path *); #endif /* _LINUX_EXT4_EXTENTS */ -- cgit v1.2.3-70-g09d2 From 76fc60a2e3c6aa6e98cd3a5cb81a1855c637b274 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 23 Feb 2008 11:12:06 +0800 Subject: [CRYPTO] skcipher: Move chainiv/seqiv into crypto_blkcipher module For compatibility with dm-crypt initramfs setups it is useful to merge chainiv/seqiv into the crypto_blkcipher module. Since they're required by most algorithms anyway this is an acceptable trade-off. Signed-off-by: Herbert Xu --- crypto/Makefile | 4 ++-- crypto/ablkcipher.c | 3 --- crypto/blkcipher.c | 29 +++++++++++++++++++++++++++++ crypto/chainiv.c | 12 ++++-------- crypto/eseqiv.c | 12 ++++-------- include/crypto/internal/skcipher.h | 6 ++++++ 6 files changed, 45 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/crypto/Makefile b/crypto/Makefile index 48c75837995..7cf36253a75 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -12,9 +12,9 @@ obj-$(CONFIG_CRYPTO_AEAD) += aead.o crypto_blkcipher-objs := ablkcipher.o crypto_blkcipher-objs += blkcipher.o +crypto_blkcipher-objs += chainiv.o +crypto_blkcipher-objs += eseqiv.o obj-$(CONFIG_CRYPTO_BLKCIPHER) += crypto_blkcipher.o -obj-$(CONFIG_CRYPTO_BLKCIPHER) += chainiv.o -obj-$(CONFIG_CRYPTO_BLKCIPHER) += eseqiv.o obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o crypto_hash-objs := hash.o diff --git a/crypto/ablkcipher.c b/crypto/ablkcipher.c index 3bcb099b4a8..94140b3756f 100644 --- a/crypto/ablkcipher.c +++ b/crypto/ablkcipher.c @@ -341,6 +341,3 @@ err: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(crypto_alloc_ablkcipher); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Asynchronous block chaining cipher type"); diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 4a7e65c4df4..185f955fb0d 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -696,5 +696,34 @@ void skcipher_geniv_exit(struct crypto_tfm *tfm) } EXPORT_SYMBOL_GPL(skcipher_geniv_exit); +static int __init blkcipher_module_init(void) +{ + int err; + + err = chainiv_module_init(); + if (err) + goto out; + + err = eseqiv_module_init(); + if (err) + goto eseqiv_err; + +out: + return err; + +eseqiv_err: + chainiv_module_exit(); + goto out; +} + +static void __exit blkcipher_module_exit(void) +{ + eseqiv_module_exit(); + chainiv_module_exit(); +} + +module_init(blkcipher_module_init); +module_exit(blkcipher_module_exit); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Generic block chaining cipher type"); diff --git a/crypto/chainiv.c b/crypto/chainiv.c index d17fa0454dc..0a7cac6e908 100644 --- a/crypto/chainiv.c +++ b/crypto/chainiv.c @@ -314,18 +314,14 @@ static struct crypto_template chainiv_tmpl = { .module = THIS_MODULE, }; -static int __init chainiv_module_init(void) +int __init chainiv_module_init(void) { return crypto_register_template(&chainiv_tmpl); } +EXPORT_SYMBOL_GPL(chainiv_module_init); -static void __exit chainiv_module_exit(void) +void __exit chainiv_module_exit(void) { crypto_unregister_template(&chainiv_tmpl); } - -module_init(chainiv_module_init); -module_exit(chainiv_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Chain IV Generator"); +EXPORT_SYMBOL_GPL(chainiv_module_exit); diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c index eb90d27ae11..6f2cd063b6f 100644 --- a/crypto/eseqiv.c +++ b/crypto/eseqiv.c @@ -247,18 +247,14 @@ static struct crypto_template eseqiv_tmpl = { .module = THIS_MODULE, }; -static int __init eseqiv_module_init(void) +int __init eseqiv_module_init(void) { return crypto_register_template(&eseqiv_tmpl); } +EXPORT_SYMBOL_GPL(eseqiv_module_init); -static void __exit eseqiv_module_exit(void) +void __exit eseqiv_module_exit(void) { crypto_unregister_template(&eseqiv_tmpl); } - -module_init(eseqiv_module_init); -module_exit(eseqiv_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Encrypted Sequence Number IV Generator"); +EXPORT_SYMBOL_GPL(eseqiv_module_exit); diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 2ba42cd7d6a..a8f12644a13 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -15,6 +15,7 @@ #include #include +#include #include struct rtattr; @@ -64,6 +65,11 @@ void skcipher_geniv_free(struct crypto_instance *inst); int skcipher_geniv_init(struct crypto_tfm *tfm); void skcipher_geniv_exit(struct crypto_tfm *tfm); +int __init eseqiv_module_init(void); +void __exit eseqiv_module_exit(void); +int __init chainiv_module_init(void); +void __exit chainiv_module_exit(void); + static inline struct crypto_ablkcipher *skcipher_geniv_cipher( struct crypto_ablkcipher *geniv) { -- cgit v1.2.3-70-g09d2 From 5319578ca38a8b90b6d0270c194c65d1dd8f7725 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sat, 23 Feb 2008 23:35:44 -0600 Subject: [SCSI] libsas: export sas_find_local_phy function This is needed by the to be added I_T reset function in aic94xx. It needs to know the local phy so it can send a link or hard reset along the path. Signed-off-by: James Bottomley --- drivers/scsi/libsas/sas_scsi_host.c | 5 +++-- include/scsi/libsas.h | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 704ea06a6e5..f3706f4b79c 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -434,7 +434,7 @@ static int sas_recover_I_T(struct domain_device *dev) } /* Find the sas_phy that's attached to this device */ -static struct sas_phy *find_local_sas_phy(struct domain_device *dev) +struct sas_phy *sas_find_local_phy(struct domain_device *dev) { struct domain_device *pdev = dev->parent; struct ex_phy *exphy = NULL; @@ -456,6 +456,7 @@ static struct sas_phy *find_local_sas_phy(struct domain_device *dev) BUG_ON(!exphy); return exphy->phy; } +EXPORT_SYMBOL_GPL(sas_find_local_phy); /* Attempt to send a LUN reset message to a device */ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd) @@ -482,7 +483,7 @@ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd) int sas_eh_bus_reset_handler(struct scsi_cmnd *cmd) { struct domain_device *dev = cmd_to_domain_dev(cmd); - struct sas_phy *phy = find_local_sas_phy(dev); + struct sas_phy *phy = sas_find_local_phy(dev); int res; res = sas_phy_reset(phy, 1); diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 3ffd6b582a9..39e1cac24bb 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -675,5 +675,6 @@ extern int sas_smp_handler(struct Scsi_Host *shost, struct sas_rphy *rphy, extern void sas_ssp_task_response(struct device *dev, struct sas_task *task, struct ssp_response_iu *iu); +struct sas_phy *sas_find_local_phy(struct domain_device *dev); #endif /* _SASLIB_H_ */ -- cgit v1.2.3-70-g09d2 From cc645a020510cf68332a71394a32c1eacb92c6ed Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 14 Feb 2008 15:09:27 +0900 Subject: sh: Rename SH-3 CCR3 reg to avoid synclink_cs clash. drivers/char/pcmcia/synclink_cs.c:284:1: warning: "CCR3" redefined In file included from include/asm/cache.h:13, from include/asm/processor_32.h:15, from include/asm/processor.h:60, from include/linux/prefetch.h:14, from include/linux/list.h:8, from include/linux/module.h:9, from drivers/char/pcmcia/synclink_cs.c:38: include/asm/cpu/cache.h:38:1: warning: this is the location of the previous definition Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh3/probe.c | 4 ++-- include/asm-sh/cpu-sh3/cache.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/sh/kernel/cpu/sh3/probe.c b/arch/sh/kernel/cpu/sh3/probe.c index fcc80bb7bee..10f2a760c5e 100644 --- a/arch/sh/kernel/cpu/sh3/probe.c +++ b/arch/sh/kernel/cpu/sh3/probe.c @@ -94,9 +94,9 @@ int __uses_jump_to_uncached detect_cpu_and_cache_system(void) boot_cpu_data.dcache.way_incr = (1 << 13); boot_cpu_data.dcache.entry_mask = 0x1ff0; boot_cpu_data.dcache.sets = 512; - ctrl_outl(CCR_CACHE_32KB, CCR3); + ctrl_outl(CCR_CACHE_32KB, CCR3_REG); #else - ctrl_outl(CCR_CACHE_16KB, CCR3); + ctrl_outl(CCR_CACHE_16KB, CCR3_REG); #endif #endif } diff --git a/include/asm-sh/cpu-sh3/cache.h b/include/asm-sh/cpu-sh3/cache.h index 56bd838b7db..bee2d81c56b 100644 --- a/include/asm-sh/cpu-sh3/cache.h +++ b/include/asm-sh/cpu-sh3/cache.h @@ -35,7 +35,7 @@ defined(CONFIG_CPU_SUBTYPE_SH7710) || \ defined(CONFIG_CPU_SUBTYPE_SH7720) || \ defined(CONFIG_CPU_SUBTYPE_SH7721) -#define CCR3 0xa40000b4 +#define CCR3_REG 0xa40000b4 #define CCR_CACHE_16KB 0x00010000 #define CCR_CACHE_32KB 0x00020000 #endif -- cgit v1.2.3-70-g09d2 From 2d07b255c7b8a9723010e5c74778e058dc05162e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Fri, 15 Feb 2008 09:56:34 -0800 Subject: sched: add declaration of sched_tail to sched.h Avoids sparse warnings: kernel/sched.c:2170:17: warning: symbol 'schedule_tail' was not declared. Should it be static? Avoids the need for an external declaration in arch/um/process.c Signed-off-by: Harvey Harrison Signed-off-by: Ingo Molnar --- arch/um/kernel/process.c | 2 -- include/linux/sched.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index fc50d2f959d..e8cb9ff183e 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -128,8 +128,6 @@ void *get_current(void) return current; } -extern void schedule_tail(struct task_struct *prev); - /* * This is called magically, by its address being stuffed in a jmp_buf * and being longjmp-d to. diff --git a/include/linux/sched.h b/include/linux/sched.h index e217d188a10..9c17e828d6d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -242,6 +242,7 @@ struct task_struct; extern void sched_init(void); extern void sched_init_smp(void); +extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern void init_idle_bootup_task(struct task_struct *idle); -- cgit v1.2.3-70-g09d2 From bdb9441e9c325d50b5ae17f7d3205d65b8ed2e5f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Feb 2008 23:02:48 +0100 Subject: lockdep: increase MAX_LOCK_DEPTH Some code paths exceed the current max lock depth (XFS), so increase this limit a bit. I looked at making this a dynamic allocated array, but we should not advocate insane lock depths, so stay with this as long as it works... Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index e217d188a10..e3ea1243754 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1189,7 +1189,7 @@ struct task_struct { int softirq_context; #endif #ifdef CONFIG_LOCKDEP -# define MAX_LOCK_DEPTH 30UL +# define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; struct held_lock held_locks[MAX_LOCK_DEPTH]; -- cgit v1.2.3-70-g09d2 From 24d10f0c37d301e88f6965e3dc0aa684311544e5 Mon Sep 17 00:00:00 2001 From: Adrian McMenamin Date: Sat, 16 Feb 2008 23:37:33 +0000 Subject: maple: remove unused variable Remove an unused variable from the definition of struct maple_device Signed-off-by: Adrian McMenamin Signed-off-by: Paul Mundt --- include/linux/maple.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/maple.h b/include/linux/maple.h index 3f01e2bae1a..d31e36ebb43 100644 --- a/include/linux/maple.h +++ b/include/linux/maple.h @@ -64,7 +64,6 @@ struct maple_driver { int (*connect) (struct maple_device * dev); void (*disconnect) (struct maple_device * dev); struct device_driver drv; - int registered; }; void maple_getcond_callback(struct maple_device *dev, -- cgit v1.2.3-70-g09d2 From 8b1d16540c6ae4e62fcff56bd47794951b3ca87a Mon Sep 17 00:00:00 2001 From: Hideo Saito Date: Tue, 26 Feb 2008 14:28:48 +0900 Subject: sh: Fix up HAS_SR_RB typo in entry-macros. Signed-off-by: Hideo Saito Signed-off-by: Paul Mundt --- include/asm-sh/entry-macros.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-sh/entry-macros.S b/include/asm-sh/entry-macros.S index 500030eae7a..2dab0b8d945 100644 --- a/include/asm-sh/entry-macros.S +++ b/include/asm-sh/entry-macros.S @@ -12,7 +12,7 @@ not r11, r11 stc sr, r10 and r11, r10 -#ifdef CONFIG_HAS_SR_RB +#ifdef CONFIG_CPU_HAS_SR_RB stc k_g_imask, r11 or r11, r10 #endif @@ -20,7 +20,7 @@ .endm .macro get_current_thread_info, ti, tmp -#ifdef CONFIG_HAS_SR_RB +#ifdef CONFIG_CPU_HAS_SR_RB stc r7_bank, \ti #else mov #((THREAD_SIZE - 1) >> 10) ^ 0xff, \tmp -- cgit v1.2.3-70-g09d2 From 96de1a8f0275bd67f243833e7088baced518f873 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Tue, 26 Feb 2008 14:52:45 +0900 Subject: serial: Move asm-sh/sci.h to linux/serial_sci.h. This header is needed on other architectures as well (namely h8300), which currently fails to build without this in place. Rather than duplicating the port definition completely there, just move this to a common location instead. This should get h8300 working again for 2.6.25, in addition to the changes already pushed by Sato-san in -rc2. Signed-off-by: Paul Mundt --- arch/sh/kernel/cpu/sh2/setup-sh7619.c | 2 +- arch/sh/kernel/cpu/sh2a/setup-sh7203.c | 2 +- arch/sh/kernel/cpu/sh2a/setup-sh7206.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh7705.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh770x.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh7710.c | 2 +- arch/sh/kernel/cpu/sh3/setup-sh7720.c | 2 +- arch/sh/kernel/cpu/sh4/setup-sh4-202.c | 2 +- arch/sh/kernel/cpu/sh4/setup-sh7750.c | 2 +- arch/sh/kernel/cpu/sh4/setup-sh7760.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7343.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7366.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7722.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7763.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7770.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7780.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-sh7785.c | 2 +- arch/sh/kernel/cpu/sh4a/setup-shx3.c | 2 +- drivers/serial/sh-sci.c | 2 +- include/asm-sh/sci.h | 34 ---------------------------------- include/linux/serial_sci.h | 32 ++++++++++++++++++++++++++++++++ 21 files changed, 51 insertions(+), 53 deletions(-) delete mode 100644 include/asm-sh/sci.h create mode 100644 include/linux/serial_sci.h (limited to 'include') diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c index b230eb278ce..cc530f4d84d 100644 --- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c +++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c index db6ef5cecde..e98dc445035 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c index a564425b905..e6d4ec445dd 100644 --- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c +++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c index dd0a20a685f..f581534cb73 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include enum { diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c index 9066ed78e28..d3733b13ea5 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c index 0cc0e2bf135..7406c9ad925 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include enum { diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c index 3855ea4c21c..8028082527c 100644 --- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c +++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #define INTC_ICR1 0xA4140010UL diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c index dab193293f2..7371abf64f8 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c index ae3603aca61..ec884039b91 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static struct resource rtc_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c index 85f81579b97..254c5c55ab9 100644 --- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c +++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include enum { UNUSED = 0, diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c index c0a3f079dfd..6d4f50cd4aa 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c index 967e8b69a2f..f26b5cdad0d 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c index 73c778d40d1..b98b4bc93ec 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include static struct resource usbf_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c index eabd5386812..07c988dc9de 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include static struct resource rtc_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c index 32f4f59a837..b9cec48b180 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c index 293004b526f..18dbbe23fea 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include static struct resource rtc_resources[] = { [0] = { diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c index 74b60e96cdf..621e7329ec6 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c @@ -10,10 +10,10 @@ #include #include #include +#include #include #include #include -#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c index 4dc958b6b31..bd35f32534b 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c @@ -10,9 +10,9 @@ #include #include #include +#include #include #include -#include static struct plat_sci_port sci_platform_data[] = { { diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c index 9ce12cb2ceb..a8c116b80bf 100644 --- a/drivers/serial/sh-sci.c +++ b/drivers/serial/sh-sci.c @@ -41,6 +41,7 @@ #include #include #include +#include #ifdef CONFIG_CPU_FREQ #include @@ -54,7 +55,6 @@ #include #endif -#include #include "sh-sci.h" struct sci_port { diff --git a/include/asm-sh/sci.h b/include/asm-sh/sci.h deleted file mode 100644 index 52e73660c12..00000000000 --- a/include/asm-sh/sci.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef __ASM_SH_SCI_H -#define __ASM_SH_SCI_H - -#include - -/* - * Generic header for SuperH SCI(F) - * - * Do not place SH-specific parts in here, sh64 and h8300 depend on this too. - */ - -/* Offsets into the sci_port->irqs array */ -enum { - SCIx_ERI_IRQ, - SCIx_RXI_IRQ, - SCIx_TXI_IRQ, - SCIx_BRI_IRQ, - SCIx_NR_IRQS, -}; - -/* - * Platform device specific platform_data struct - */ -struct plat_sci_port { - void __iomem *membase; /* io cookie */ - unsigned long mapbase; /* resource base */ - unsigned int irqs[SCIx_NR_IRQS]; /* ERI, RXI, TXI, BRI */ - unsigned int type; /* SCI / SCIF / IRDA */ - upf_t flags; /* UPF_* flags */ -}; - -int early_sci_setup(struct uart_port *port); - -#endif /* __ASM_SH_SCI_H */ diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h new file mode 100644 index 00000000000..893cc53486b --- /dev/null +++ b/include/linux/serial_sci.h @@ -0,0 +1,32 @@ +#ifndef __LINUX_SERIAL_SCI_H +#define __LINUX_SERIAL_SCI_H + +#include + +/* + * Generic header for SuperH SCI(F) (used by sh/sh64/h8300 and related parts) + */ + +/* Offsets into the sci_port->irqs array */ +enum { + SCIx_ERI_IRQ, + SCIx_RXI_IRQ, + SCIx_TXI_IRQ, + SCIx_BRI_IRQ, + SCIx_NR_IRQS, +}; + +/* + * Platform device specific platform_data struct + */ +struct plat_sci_port { + void __iomem *membase; /* io cookie */ + unsigned long mapbase; /* resource base */ + unsigned int irqs[SCIx_NR_IRQS]; /* ERI, RXI, TXI, BRI */ + unsigned int type; /* SCI / SCIF / IRDA */ + upf_t flags; /* UPF_* flags */ +}; + +int early_sci_setup(struct uart_port *port); + +#endif /* __LINUX_SERIAL_SCI_H */ -- cgit v1.2.3-70-g09d2 From cbc34973709eb41b369c304c075cf2069f847012 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 13 Feb 2008 13:14:35 -0800 Subject: lguest: include function prototypes Added a declaration to asm-x86/lguest.h and moved the extern arrays there as well. As an alternative to including asm/lguest.h directly, an include could be put in linux/lguest.h Signed-off-by: Harvey Harrison Cc: "rusty@rustcorp.com.au" Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/lguest/boot.c | 10 +--------- include/asm-x86/lguest.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 5afdde4895d..1e613fb03e3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include @@ -75,15 +76,6 @@ * behaving in simplified but equivalent ways. In particular, the Guest is the * same kernel as the Host (or at least, built from the same source code). :*/ -/* Declarations for definitions in lguest_guest.S */ -extern char lguest_noirq_start[], lguest_noirq_end[]; -extern const char lgstart_cli[], lgend_cli[]; -extern const char lgstart_sti[], lgend_sti[]; -extern const char lgstart_popf[], lgend_popf[]; -extern const char lgstart_pushf[], lgend_pushf[]; -extern const char lgstart_iret[], lgend_iret[]; -extern void lguest_iret(void); - struct lguest_data lguest_data = { .hcall_status = { [0 ... LHCALL_RING_SIZE-1] = 0xFF }, .noirq_start = (u32)lguest_noirq_start, diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h index 4d9367b7297..9b17571e9bc 100644 --- a/include/asm-x86/lguest.h +++ b/include/asm-x86/lguest.h @@ -23,6 +23,17 @@ /* Found in switcher.S */ extern unsigned long default_idt_entries[]; +/* Declarations for definitions in lguest_guest.S */ +extern char lguest_noirq_start[], lguest_noirq_end[]; +extern const char lgstart_cli[], lgend_cli[]; +extern const char lgstart_sti[], lgend_sti[]; +extern const char lgstart_popf[], lgend_popf[]; +extern const char lgstart_pushf[], lgend_pushf[]; +extern const char lgstart_iret[], lgend_iret[]; + +extern void lguest_iret(void); +extern void lguest_init(void); + struct lguest_regs { /* Manually saved part. */ -- cgit v1.2.3-70-g09d2 From 7343b3b3a627eb30e24e921f004f659c8ebb91c5 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 14 Feb 2008 14:52:05 -0800 Subject: x86: require family >= 6 if we are using P6 NOPs The P6 family of NOPs are only available on family >= 6 or above, so enforce that in the boot code. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.cpu | 5 +++++ include/asm-x86/nops.h | 4 +--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index e09a6b73a1a..86fd2a0e459 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -377,6 +377,10 @@ config X86_OOSTORE def_bool y depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR +config X86_P6_NOP + def_bool y + depends on (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || PENTIUM4) + config X86_TSC def_bool y depends on ((MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ) || X86_64 @@ -390,6 +394,7 @@ config X86_CMOV config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 + default "6" if X86_32 && X86_P6_NOP default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK) default "3" diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h index fec025c7f58..c52b334b816 100644 --- a/include/asm-x86/nops.h +++ b/include/asm-x86/nops.h @@ -63,9 +63,7 @@ #define ASM_NOP6 K7_NOP6 #define ASM_NOP7 K7_NOP7 #define ASM_NOP8 K7_NOP8 -#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \ - defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \ - defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) +#elif defined(CONFIG_X86_P6_NOP) #define ASM_NOP1 P6_NOP1 #define ASM_NOP2 P6_NOP2 #define ASM_NOP3 P6_NOP3 -- cgit v1.2.3-70-g09d2 From 4cd20952d74323df06e438c0c3273b5be89d6bfd Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 18 Feb 2008 23:24:33 -0800 Subject: x86: add comments for NOPs Add comments describing the various NOP sequences. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/nops.h | 62 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h index c52b334b816..e3b2bce0aff 100644 --- a/include/asm-x86/nops.h +++ b/include/asm-x86/nops.h @@ -3,17 +3,29 @@ /* Define nops for use with alternative() */ -/* generic versions from gas */ -#define GENERIC_NOP1 ".byte 0x90\n" -#define GENERIC_NOP2 ".byte 0x89,0xf6\n" -#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" -#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" -#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 -#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" -#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" -#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 +/* generic versions from gas + 1: nop + 2: movl %esi,%esi + 3: leal 0x00(%esi),%esi + 4: leal 0x00(,%esi,1),%esi + 6: leal 0x00000000(%esi),%esi + 7: leal 0x00000000(,%esi,1),%esi +*/ +#define GENERIC_NOP1 ".byte 0x90\n" +#define GENERIC_NOP2 ".byte 0x89,0xf6\n" +#define GENERIC_NOP3 ".byte 0x8d,0x76,0x00\n" +#define GENERIC_NOP4 ".byte 0x8d,0x74,0x26,0x00\n" +#define GENERIC_NOP5 GENERIC_NOP1 GENERIC_NOP4 +#define GENERIC_NOP6 ".byte 0x8d,0xb6,0x00,0x00,0x00,0x00\n" +#define GENERIC_NOP7 ".byte 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00\n" +#define GENERIC_NOP8 GENERIC_NOP1 GENERIC_NOP7 -/* Opteron 64bit nops */ +/* Opteron 64bit nops + 1: nop + 2: osp nop + 3: osp osp nop + 4: osp osp osp nop +*/ #define K8_NOP1 GENERIC_NOP1 #define K8_NOP2 ".byte 0x66,0x90\n" #define K8_NOP3 ".byte 0x66,0x66,0x90\n" @@ -23,19 +35,35 @@ #define K8_NOP7 K8_NOP4 K8_NOP3 #define K8_NOP8 K8_NOP4 K8_NOP4 -/* K7 nops */ -/* uses eax dependencies (arbitary choice) */ -#define K7_NOP1 GENERIC_NOP1 +/* K7 nops + uses eax dependencies (arbitary choice) + 1: nop + 2: movl %eax,%eax + 3: leal (,%eax,1),%eax + 4: leal 0x00(,%eax,1),%eax + 6: leal 0x00000000(%eax),%eax + 7: leal 0x00000000(,%eax,1),%eax +*/ +#define K7_NOP1 GENERIC_NOP1 #define K7_NOP2 ".byte 0x8b,0xc0\n" #define K7_NOP3 ".byte 0x8d,0x04,0x20\n" #define K7_NOP4 ".byte 0x8d,0x44,0x20,0x00\n" #define K7_NOP5 K7_NOP4 ASM_NOP1 #define K7_NOP6 ".byte 0x8d,0x80,0,0,0,0\n" -#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" -#define K7_NOP8 K7_NOP7 ASM_NOP1 +#define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" +#define K7_NOP8 K7_NOP7 ASM_NOP1 -/* P6 nops */ -/* uses eax dependencies (Intel-recommended choice) */ +/* P6 nops + uses eax dependencies (Intel-recommended choice) + 1: nop + 2: osp nop + 3: nopl (%eax) + 4: nopl 0x00(%eax) + 5: nopl 0x00(%eax,%eax,1) + 6: osp nopl 0x00(%eax,%eax,1) + 7: nopl 0x00000000(%eax) + 8: nopl 0x00000000(%eax,%eax,1) +*/ #define P6_NOP1 GENERIC_NOP1 #define P6_NOP2 ".byte 0x66,0x90\n" #define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" -- cgit v1.2.3-70-g09d2 From 88f3aec7afd9ae3e6f6d221801996b69aad1e3a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Feb 2008 11:04:11 +0100 Subject: x86: fix spontaneous reboot with allyesconfig bzImage recently the 64-bit allyesconfig bzImage kernel started spontaneously rebooting during early bootup. after a few fun hours spent with early init debugging, it turns out that we've got this rather annoying limit on the size of the kernel image: #define KERNEL_TEXT_SIZE (40*1024*1024) which limit my vmlinux just happened to pass: text data bss dec hex filename 29703744 4222751 8646224 42572719 2899baf vmlinux 40 MB is 42572719 bytes, so my vmlinux was just 1.5% above this limit :-/ So it happily crashed right in head_64.S, which - as we all know - is the most debuggable code in the whole architecture ;-) So increase the limit to allow an up to 128MB kernel image to be mapped. (should anyone be that crazy or lazy) We have a full 4K of pagetable (level2_kernel_pgt) allocated for these mappings already, so there's no RAM overhead and the limit was rather pointless and arbitrary. Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 22 ++++++++++++++-------- arch/x86/mm/init_64.c | 5 +++-- include/asm-x86/page_64.h | 8 ++++++-- 3 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index eb415043a92..b037b15be7f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -379,18 +379,24 @@ NEXT_PAGE(level2_ident_pgt) /* Since I easily can, map the first 1G. * Don't set NX because code runs from these pages. */ - PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) + PMDS(0, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) NEXT_PAGE(level2_kernel_pgt) - /* 40MB kernel mapping. The kernel code cannot be bigger than that. - When you change this change KERNEL_TEXT_SIZE in page.h too. */ - /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ - PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, KERNEL_TEXT_SIZE/PMD_SIZE) - /* Module mapping starts here */ - .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0 + /* + * 128 MB kernel mapping. We spend a full page on this pagetable + * anyway. + * + * The kernel code+data+bss must not be bigger than that. + * + * (NOTE: at +128MB starts the module area, see MODULES_VADDR. + * If you want to increase this then increase MODULES_VADDR + * too.) + */ + PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, + KERNEL_TEXT_SIZE/PMD_SIZE) NEXT_PAGE(level2_spare_pgt) - .fill 512,8,0 + .fill 512, 8, 0 #undef PMDS #undef NEXT_PAGE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 6dbb0035c57..a02a14f0f32 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -172,8 +172,9 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot) } /* - * The head.S code sets up the kernel high mapping from: - * __START_KERNEL_map to __START_KERNEL_map + KERNEL_TEXT_SIZE + * The head.S code sets up the kernel high mapping: + * + * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) * * phys_addr holds the negative offset to the kernel, which is added * to the compile time generated pmds. This results in invalid pmds up diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index f7393bc516e..3e2e3ca6304 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -47,8 +47,12 @@ #define __PHYSICAL_MASK_SHIFT 46 #define __VIRTUAL_MASK_SHIFT 48 -#define KERNEL_TEXT_SIZE (40*1024*1024) -#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) +/* + * Kernel image size is limited to 128 MB (see level2_kernel_pgt in + * arch/x86/kernel/head_64.S), and it is mapped here: + */ +#define KERNEL_TEXT_SIZE (128*1024*1024) +#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) #ifndef __ASSEMBLY__ void clear_page(void *page); -- cgit v1.2.3-70-g09d2 From d4afe414189b098d56bcd24280c018aa2ac9a990 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 21 Feb 2008 13:39:30 +0100 Subject: x86: rename KERNEL_TEXT_SIZE => KERNEL_IMAGE_SIZE The KERNEL_TEXT_SIZE constant was mis-named, as we not only map the kernel text but data, bss and init sections as well. That name led me on the wrong path with the KERNEL_TEXT_SIZE regression, because i knew how big of _text_ my images have and i knew about the 40 MB "text" limit so i wrongly thought to be on the safe side of the 40 MB limit with my 29 MB of text, while the total image size was slightly above 40 MB. Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 +- include/asm-x86/page_64.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b037b15be7f..a007454133a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -393,7 +393,7 @@ NEXT_PAGE(level2_kernel_pgt) * too.) */ PMDS(0, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, - KERNEL_TEXT_SIZE/PMD_SIZE) + KERNEL_IMAGE_SIZE/PMD_SIZE) NEXT_PAGE(level2_spare_pgt) .fill 512, 8, 0 diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index 3e2e3ca6304..143546073b9 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -51,8 +51,8 @@ * Kernel image size is limited to 128 MB (see level2_kernel_pgt in * arch/x86/kernel/head_64.S), and it is mapped here: */ -#define KERNEL_TEXT_SIZE (128*1024*1024) -#define KERNEL_TEXT_START _AC(0xffffffff80000000, UL) +#define KERNEL_IMAGE_SIZE (128*1024*1024) +#define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) #ifndef __ASSEMBLY__ void clear_page(void *page); -- cgit v1.2.3-70-g09d2 From f18edc95a37a901ffcbe91f5e05105f916a04fae Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 16 Feb 2008 14:05:01 +0100 Subject: x86: no robust/pi futex for real i386 CPUs Real i386 CPUs do not have cmpxchg instructions. Catch it before crashing on an invalid opcode. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/asm-x86/futex.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h index cd9f894dd2d..c9952ea9f69 100644 --- a/include/asm-x86/futex.h +++ b/include/asm-x86/futex.h @@ -102,6 +102,13 @@ futex_atomic_op_inuser(int encoded_op, int __user *uaddr) static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { + +#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) + /* Real i386 machines have no cmpxchg instruction */ + if (boot_cpu_data.x86 == 3) + return -ENOSYS; +#endif + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 78374676efae525094aee45c0aab4bcab95ea9d1 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Tue, 26 Feb 2008 18:25:53 -0800 Subject: CONNECTOR: make cn_already_initialized static It is used in connector.c only, so make it static. Signed-off-by: Li Zefan Signed-off-by: David S. Miller --- drivers/connector/connector.c | 2 +- include/linux/connector.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c index fea2d3ed9cb..85e2ba7fcfb 100644 --- a/drivers/connector/connector.c +++ b/drivers/connector/connector.c @@ -47,7 +47,7 @@ static LIST_HEAD(notify_list); static struct cn_dev cdev; -int cn_already_initialized = 0; +static int cn_already_initialized; /* * msg->seq and msg->ack are used to determine message genealogy. diff --git a/include/linux/connector.h b/include/linux/connector.h index da6dd957f90..96a89d3d672 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -170,7 +170,5 @@ int cn_cb_equal(struct cb_id *, struct cb_id *); void cn_queue_wrapper(struct work_struct *work); -extern int cn_already_initialized; - #endif /* __KERNEL__ */ #endif /* __CONNECTOR_H */ -- cgit v1.2.3-70-g09d2 From fbabbed8284d1526ed01754ecd4fabdb941a1ff2 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 27 Feb 2008 12:21:18 -0800 Subject: [NETFILTER]: Fix NF_QUEUE_NR() parenthesis Properly add parens around the macro argument. This is not needed by the kernel but the macro is exported to userspace, so it shouldn't make any assumptions. Also use NF_VERDICT_BITS instead of NF_VERDICT_QBTIS for the left-shift since thats whats logically correct. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index b74b615492e..f0680c2bee7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -31,7 +31,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE) +#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ -- cgit v1.2.3-70-g09d2 From 7e8616d8e7731b026019d9af7cc9914b8bb42bc7 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 27 Feb 2008 16:04:52 -0500 Subject: [SCTP]: Update AUTH structures to match declarations in draft-16. The new SCTP socket api (draft 16) updates the AUTH API structures. We never exported these since we knew they would change. Update the rest to match the draft. Signed-off-by: Vlad Yasevich --- include/net/sctp/user.h | 10 ++++++---- net/sctp/auth.c | 4 ++-- net/sctp/socket.c | 6 +++++- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 9462d6ae2f3..9619b9d35c9 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -411,6 +411,7 @@ struct sctp_event_subscribe { __u8 sctp_shutdown_event; __u8 sctp_partial_delivery_event; __u8 sctp_adaptation_layer_event; + __u8 sctp_authentication_event; }; /* @@ -587,7 +588,7 @@ struct sctp_authchunk { * endpoint requires the peer to use. */ struct sctp_hmacalgo { - __u16 shmac_num_idents; + __u32 shmac_num_idents; __u16 shmac_idents[]; }; @@ -600,7 +601,7 @@ struct sctp_hmacalgo { struct sctp_authkey { sctp_assoc_t sca_assoc_id; __u16 sca_keynumber; - __u16 sca_keylen; + __u16 sca_keylength; __u8 sca_key[]; }; @@ -693,8 +694,9 @@ struct sctp_status { * the peer requires to be received authenticated only. */ struct sctp_authchunks { - sctp_assoc_t gauth_assoc_id; - uint8_t gauth_chunks[]; + sctp_assoc_t gauth_assoc_id; + __u32 gauth_number_of_chunks; + uint8_t gauth_chunks[]; }; /* diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 8bb79f28177..675a5c3e68a 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -838,11 +838,11 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, } /* Create a new key data based on the info passed in */ - key = sctp_auth_create_key(auth_key->sca_keylen, GFP_KERNEL); + key = sctp_auth_create_key(auth_key->sca_keylength, GFP_KERNEL); if (!key) goto nomem; - memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylen); + memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); /* If we are replacing, remove the old keys data from the * key id. If we are adding new key id, add it to the diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 848df21dc6c..939892691a2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1964,7 +1964,7 @@ static int sctp_setsockopt_disable_fragments(struct sock *sk, static int sctp_setsockopt_events(struct sock *sk, char __user *optval, int optlen) { - if (optlen != sizeof(struct sctp_event_subscribe)) + if (optlen > sizeof(struct sctp_event_subscribe)) return -EINVAL; if (copy_from_user(&sctp_sk(sk)->subscribe, optval, optlen)) return -EFAULT; @@ -5094,6 +5094,8 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, len = num_chunks; if (put_user(len, optlen)) return -EFAULT; + if (put_user(num_chunks, &p->gauth_number_of_chunks)) + return -EFAULT; if (copy_to_user(to, ch->chunks, len)) return -EFAULT; @@ -5133,6 +5135,8 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, len = num_chunks; if (put_user(len, optlen)) return -EFAULT; + if (put_user(num_chunks, &p->gauth_number_of_chunks)) + return -EFAULT; if (copy_to_user(to, ch->chunks, len)) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 7729d74ed5099021f79ee8ecfa676829b5bac796 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 28 Feb 2008 21:53:20 -0800 Subject: [SPARC]: Add reboot_command[] extern decl to asm/system.h Kill off some sparse warnings. Signed-off-by: David S. Miller --- arch/sparc/kernel/process.c | 2 -- arch/sparc64/kernel/process.c | 2 -- include/asm-sparc/system.h | 2 ++ include/asm-sparc64/system.h | 2 ++ 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c index 0bd69d0b5cd..70c0dd22491 100644 --- a/arch/sparc/kernel/process.c +++ b/arch/sparc/kernel/process.c @@ -139,8 +139,6 @@ void cpu_idle(void) #endif -extern char reboot_command []; - /* XXX cli/sti -> local_irq_xxx here, check this works once SMP is fixed. */ void machine_halt(void) { diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c index 2aafce7dfc0..e116e38b160 100644 --- a/arch/sparc64/kernel/process.c +++ b/arch/sparc64/kernel/process.c @@ -114,8 +114,6 @@ void cpu_idle(void) } } -extern char reboot_command []; - void machine_halt(void) { sstate_halt(); diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h index 45e47c159a6..4e08210cd4c 100644 --- a/include/asm-sparc/system.h +++ b/include/asm-sparc/system.h @@ -44,6 +44,8 @@ extern enum sparc_cpu sparc_cpu_model; #define SUN4M_NCPUS 4 /* Architectural limit of sun4m. */ +extern char reboot_command[]; + extern struct thread_info *current_set[NR_CPUS]; extern unsigned long empty_bad_page; diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index ed91a5d8d4f..53eae091a17 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -30,6 +30,8 @@ enum sparc_cpu { #define ARCH_SUN4C_SUN4 0 #define ARCH_SUN4 0 +extern char reboot_command[]; + /* These are here in an effort to more fully work around Spitfire Errata * #51. Essentially, if a memory barrier occurs soon after a mispredicted * branch, the chip can stop executing instructions until a trap occurs. -- cgit v1.2.3-70-g09d2 From b59931649256685f294d2d163a4f6d6286fbff05 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 26 Feb 2008 13:20:58 -0800 Subject: elfcore-compat fix uid/gid types I overlooked the difference between __kernel_uid_t and uid_t when defining struct compat_elf_prpsinfo. The result is a regression in 32-bit core dumps on x86_64, where the NT_PRPSINFO note has the wrong size and layout. This patch fixes it. Signed-off-by: Roland McGrath Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/elfcore-compat.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h index 532d13adabc..0a90e1c3a42 100644 --- a/include/linux/elfcore-compat.h +++ b/include/linux/elfcore-compat.h @@ -45,8 +45,8 @@ struct compat_elf_prpsinfo char pr_zomb; char pr_nice; compat_ulong_t pr_flag; - compat_uid_t pr_uid; - compat_gid_t pr_gid; + __compat_uid_t pr_uid; + __compat_gid_t pr_gid; compat_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid; char pr_fname[16]; char pr_psargs[ELF_PRARGSZ]; -- cgit v1.2.3-70-g09d2 From 57ce36feb4d1281247755bc445bae77728298955 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 25 Feb 2008 16:45:03 +0100 Subject: let __dec_zone_page_state use __dec_zone_state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This removes code duplication and makes __dec_zone_page_state look like __inc_zone_page_state. Signed-off-by: Uwe Kleine-König Acked-by: Christoph Lameter Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 75370ec0923..9f1b4b46151 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -246,8 +246,7 @@ static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item) static inline void __dec_zone_page_state(struct page *page, enum zone_stat_item item) { - atomic_long_dec(&page_zone(page)->vm_stat[item]); - atomic_long_dec(&vm_stat[item]); + __dec_zone_state(page_zone(page), item); } /* -- cgit v1.2.3-70-g09d2 From 2232c2d8e0a6a31061dec311f3d1cf7624bc14f1 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Feb 2008 18:46:50 +0100 Subject: rcu: add support for dynamic ticks and preempt rcu The PREEMPT-RCU can get stuck if a CPU goes idle and NO_HZ is set. The idle CPU will not progress the RCU through its grace period and a synchronize_rcu my get stuck. Without this patch I have a box that will not boot when PREEMPT_RCU and NO_HZ are set. That same box boots fine with this patch. This patch comes from the -rt kernel where it has been tested for several months. Signed-off-by: Steven Rostedt Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/hardirq.h | 10 ++ include/linux/rcuclassic.h | 3 + include/linux/rcupreempt.h | 22 +++++ kernel/rcupreempt.c | 224 ++++++++++++++++++++++++++++++++++++++++++++- kernel/softirq.c | 1 + kernel/time/tick-sched.c | 3 + 6 files changed, 259 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 2961ec78804..49829988bfa 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -109,6 +109,14 @@ static inline void account_system_vtime(struct task_struct *tsk) } #endif +#if defined(CONFIG_PREEMPT_RCU) && defined(CONFIG_NO_HZ) +extern void rcu_irq_enter(void); +extern void rcu_irq_exit(void); +#else +# define rcu_irq_enter() do { } while (0) +# define rcu_irq_exit() do { } while (0) +#endif /* CONFIG_PREEMPT_RCU */ + /* * It is safe to do non-atomic ops on ->hardirq_context, * because NMI handlers may not preempt and the ops are @@ -117,6 +125,7 @@ static inline void account_system_vtime(struct task_struct *tsk) */ #define __irq_enter() \ do { \ + rcu_irq_enter(); \ account_system_vtime(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ @@ -135,6 +144,7 @@ extern void irq_enter(void); trace_hardirq_exit(); \ account_system_vtime(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ + rcu_irq_exit(); \ } while (0) /* diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index 4d6624260b4..b3dccd68629 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -160,5 +160,8 @@ extern void rcu_restart_cpu(int cpu); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); +#define rcu_enter_nohz() do { } while (0) +#define rcu_exit_nohz() do { } while (0) + #endif /* __KERNEL__ */ #endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 60c2a033b19..01152ed532c 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -82,5 +82,27 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu); struct softirq_action; +#ifdef CONFIG_NO_HZ +DECLARE_PER_CPU(long, dynticks_progress_counter); + +static inline void rcu_enter_nohz(void) +{ + __get_cpu_var(dynticks_progress_counter)++; + WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); + mb(); +} + +static inline void rcu_exit_nohz(void) +{ + mb(); + __get_cpu_var(dynticks_progress_counter)++; + WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); +} + +#else /* CONFIG_NO_HZ */ +#define rcu_enter_nohz() do { } while (0) +#define rcu_exit_nohz() do { } while (0) +#endif /* CONFIG_NO_HZ */ + #endif /* __KERNEL__ */ #endif /* __LINUX_RCUPREEMPT_H */ diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 987cfb7ade8..c7c52096df4 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -23,6 +23,10 @@ * to Suparna Bhattacharya for pushing me completely away * from atomic instructions on the read side. * + * - Added handling of Dynamic Ticks + * Copyright 2007 - Paul E. Mckenney + * - Steven Rostedt + * * Papers: http://www.rdrop.com/users/paulmck/RCU * * Design Document: http://lwn.net/Articles/253651/ @@ -409,6 +413,212 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) } } +#ifdef CONFIG_NO_HZ + +DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; +static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); +static DEFINE_PER_CPU(int, rcu_update_flag); + +/** + * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. + * + * If the CPU was idle with dynamic ticks active, this updates the + * dynticks_progress_counter to let the RCU handling know that the + * CPU is active. + */ +void rcu_irq_enter(void) +{ + int cpu = smp_processor_id(); + + if (per_cpu(rcu_update_flag, cpu)) + per_cpu(rcu_update_flag, cpu)++; + + /* + * Only update if we are coming from a stopped ticks mode + * (dynticks_progress_counter is even). + */ + if (!in_interrupt() && + (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { + /* + * The following might seem like we could have a race + * with NMI/SMIs. But this really isn't a problem. + * Here we do a read/modify/write, and the race happens + * when an NMI/SMI comes in after the read and before + * the write. But NMI/SMIs will increment this counter + * twice before returning, so the zero bit will not + * be corrupted by the NMI/SMI which is the most important + * part. + * + * The only thing is that we would bring back the counter + * to a postion that it was in during the NMI/SMI. + * But the zero bit would be set, so the rest of the + * counter would again be ignored. + * + * On return from the IRQ, the counter may have the zero + * bit be 0 and the counter the same as the return from + * the NMI/SMI. If the state machine was so unlucky to + * see that, it still doesn't matter, since all + * RCU read-side critical sections on this CPU would + * have already completed. + */ + per_cpu(dynticks_progress_counter, cpu)++; + /* + * The following memory barrier ensures that any + * rcu_read_lock() primitives in the irq handler + * are seen by other CPUs to follow the above + * increment to dynticks_progress_counter. This is + * required in order for other CPUs to correctly + * determine when it is safe to advance the RCU + * grace-period state machine. + */ + smp_mb(); /* see above block comment. */ + /* + * Since we can't determine the dynamic tick mode from + * the dynticks_progress_counter after this routine, + * we use a second flag to acknowledge that we came + * from an idle state with ticks stopped. + */ + per_cpu(rcu_update_flag, cpu)++; + /* + * If we take an NMI/SMI now, they will also increment + * the rcu_update_flag, and will not update the + * dynticks_progress_counter on exit. That is for + * this IRQ to do. + */ + } +} + +/** + * rcu_irq_exit - Called from exiting Hard irq context. + * + * If the CPU was idle with dynamic ticks active, update the + * dynticks_progress_counter to put let the RCU handling be + * aware that the CPU is going back to idle with no ticks. + */ +void rcu_irq_exit(void) +{ + int cpu = smp_processor_id(); + + /* + * rcu_update_flag is set if we interrupted the CPU + * when it was idle with ticks stopped. + * Once this occurs, we keep track of interrupt nesting + * because a NMI/SMI could also come in, and we still + * only want the IRQ that started the increment of the + * dynticks_progress_counter to be the one that modifies + * it on exit. + */ + if (per_cpu(rcu_update_flag, cpu)) { + if (--per_cpu(rcu_update_flag, cpu)) + return; + + /* This must match the interrupt nesting */ + WARN_ON(in_interrupt()); + + /* + * If an NMI/SMI happens now we are still + * protected by the dynticks_progress_counter being odd. + */ + + /* + * The following memory barrier ensures that any + * rcu_read_unlock() primitives in the irq handler + * are seen by other CPUs to preceed the following + * increment to dynticks_progress_counter. This + * is required in order for other CPUs to determine + * when it is safe to advance the RCU grace-period + * state machine. + */ + smp_mb(); /* see above block comment. */ + per_cpu(dynticks_progress_counter, cpu)++; + WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); + } +} + +static void dyntick_save_progress_counter(int cpu) +{ + per_cpu(rcu_dyntick_snapshot, cpu) = + per_cpu(dynticks_progress_counter, cpu); +} + +static inline int +rcu_try_flip_waitack_needed(int cpu) +{ + long curr; + long snap; + + curr = per_cpu(dynticks_progress_counter, cpu); + snap = per_cpu(rcu_dyntick_snapshot, cpu); + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU remained in dynticks mode for the entire time + * and didn't take any interrupts, NMIs, SMIs, or whatever, + * then it cannot be in the middle of an rcu_read_lock(), so + * the next rcu_read_lock() it executes must use the new value + * of the counter. So we can safely pretend that this CPU + * already acknowledged the counter. + */ + + if ((curr == snap) && ((curr & 0x1) == 0)) + return 0; + + /* + * If the CPU passed through or entered a dynticks idle phase with + * no active irq handlers, then, as above, we can safely pretend + * that this CPU already acknowledged the counter. + */ + + if ((curr - snap) > 2 || (snap & 0x1) == 0) + return 0; + + /* We need this CPU to explicitly acknowledge the counter flip. */ + + return 1; +} + +static inline int +rcu_try_flip_waitmb_needed(int cpu) +{ + long curr; + long snap; + + curr = per_cpu(dynticks_progress_counter, cpu); + snap = per_cpu(rcu_dyntick_snapshot, cpu); + smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + + /* + * If the CPU remained in dynticks mode for the entire time + * and didn't take any interrupts, NMIs, SMIs, or whatever, + * then it cannot have executed an RCU read-side critical section + * during that time, so there is no need for it to execute a + * memory barrier. + */ + + if ((curr == snap) && ((curr & 0x1) == 0)) + return 0; + + /* + * If the CPU either entered or exited an outermost interrupt, + * SMI, NMI, or whatever handler, then we know that it executed + * a memory barrier when doing so. So we don't need another one. + */ + if (curr != snap) + return 0; + + /* We need the CPU to execute a memory barrier. */ + + return 1; +} + +#else /* !CONFIG_NO_HZ */ + +# define dyntick_save_progress_counter(cpu) do { } while (0) +# define rcu_try_flip_waitack_needed(cpu) (1) +# define rcu_try_flip_waitmb_needed(cpu) (1) + +#endif /* CONFIG_NO_HZ */ + /* * Get here when RCU is idle. Decide whether we need to * move out of idle state, and return non-zero if so. @@ -447,8 +657,10 @@ rcu_try_flip_idle(void) /* Now ask each CPU for acknowledgement of the flip. */ - for_each_cpu_mask(cpu, rcu_cpu_online_map) + for_each_cpu_mask(cpu, rcu_cpu_online_map) { per_cpu(rcu_flip_flag, cpu) = rcu_flipped; + dyntick_save_progress_counter(cpu); + } return 1; } @@ -464,7 +676,8 @@ rcu_try_flip_waitack(void) RCU_TRACE_ME(rcupreempt_trace_try_flip_a1); for_each_cpu_mask(cpu, rcu_cpu_online_map) - if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { + if (rcu_try_flip_waitack_needed(cpu) && + per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) { RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1); return 0; } @@ -509,8 +722,10 @@ rcu_try_flip_waitzero(void) smp_mb(); /* ^^^^^^^^^^^^ */ /* Call for a memory barrier from each CPU. */ - for_each_cpu_mask(cpu, rcu_cpu_online_map) + for_each_cpu_mask(cpu, rcu_cpu_online_map) { per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed; + dyntick_save_progress_counter(cpu); + } RCU_TRACE_ME(rcupreempt_trace_try_flip_z2); return 1; @@ -528,7 +743,8 @@ rcu_try_flip_waitmb(void) RCU_TRACE_ME(rcupreempt_trace_try_flip_m1); for_each_cpu_mask(cpu, rcu_cpu_online_map) - if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { + if (rcu_try_flip_waitmb_needed(cpu) && + per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) { RCU_TRACE_ME(rcupreempt_trace_try_flip_me1); return 0; } diff --git a/kernel/softirq.c b/kernel/softirq.c index 5b3aea5f471..31e9f2a4792 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -313,6 +313,7 @@ void irq_exit(void) /* Make sure that timer wheel updates are propagated */ if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched()) tick_nohz_stop_sched_tick(); + rcu_irq_exit(); #endif preempt_enable_no_resched(); } diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index fa9bb73dbdb..2968298f8f3 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -282,6 +282,7 @@ void tick_nohz_stop_sched_tick(void) ts->idle_tick = ts->sched_timer.expires; ts->tick_stopped = 1; ts->idle_jiffies = last_jiffies; + rcu_enter_nohz(); } /* @@ -375,6 +376,8 @@ void tick_nohz_restart_sched_tick(void) return; } + rcu_exit_nohz(); + /* Update jiffies first */ select_nohz_load_balancer(0); now = ktime_get(); -- cgit v1.2.3-70-g09d2 From cded932b75ab0a5f9181ee3da34a0a488d1a14fd Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Mon, 18 Feb 2008 18:10:47 +0100 Subject: x86: fix pmd_bad and pud_bad to support huge pages I recently stumbled upon a problem in the support for huge pages. If a program using huge pages does not explicitly unmap them, they remain mapped (and therefore, are lost) after the program exits. I observed that the free huge page count in /proc/meminfo decreased when running my program, and it did not increase after the program exited. After running the program a few times, no more huge pages could be allocated. The reason for this seems to be that the x86 pmd_bad and pud_bad consider pmd/pud entries having the PSE bit set invalid. I think there is nothing wrong with this bit being set, it just indicates that the lowest level of translation has been reached. This bit has to be (and is) checked after the basic validity of the entry has been checked, like in this fragment from follow_page() in mm/memory.c: if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto no_page_table; if (pmd_huge(*pmd)) { BUG_ON(flags & FOLL_GET); page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); goto out; } Note that this code currently doesn't work as intended if the pmd refers to a huge page, the pmd_huge() check can not be reached if the page is huge. Extending pmd_bad() (and, for future 1GB page support, pud_bad()) to allow for the PSE bit being set fixes this. For similar reasons, allowing the NX bit being set is necessary, too. I have seen huge pages having the NX bit set in their pmd entry, which would cause the same problem. Signed-Off-By: Hans Rosenfeld Signed-off-by: Ingo Molnar --- include/asm-x86/pgtable_32.h | 4 +++- include/asm-x86/pgtable_64.h | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index a842c7222b1..b478efa971e 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -91,7 +91,9 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) +#define pmd_bad(x) ((pmd_val(x) \ + & ~(PAGE_MASK | _PAGE_USER | _PAGE_PSE | _PAGE_NX)) \ + != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 0a0b77bc736..0a9258333cb 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -153,12 +153,14 @@ static inline unsigned long pgd_bad(pgd_t pgd) static inline unsigned long pud_bad(pud_t pud) { - return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); + return pud_val(pud) & + ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); } static inline unsigned long pmd_bad(pmd_t pmd) { - return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); + return pmd_val(pmd) & + ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); } #define pte_none(x) (!pte_val(x)) -- cgit v1.2.3-70-g09d2 From 53c58588107973c0e240a1ed4fb8295f274c409d Mon Sep 17 00:00:00 2001 From: Dave Anderson Date: Thu, 21 Feb 2008 11:45:38 -0500 Subject: x86 ptrace: fix ptrace_bts_config structure declaration The 2.6.25 ptrace_bts_config structure in asm-x86/ptrace-abi.h is defined with u32 types: #include /* configuration/status structure used in PTRACE_BTS_CONFIG and PTRACE_BTS_STATUS commands. */ struct ptrace_bts_config { /* requested or actual size of BTS buffer in bytes */ u32 size; /* bitmask of below flags */ u32 flags; /* buffer overflow signal */ u32 signal; /* actual size of bts_struct in bytes */ u32 bts_size; }; #endif But u32 is only accessible in asm-x86/types.h if __KERNEL__, leading to compile errors when ptrace.h is included from user-space. The double-underscore versions that are exported to user-space in asm-x86/types.h should be used instead. Signed-off-by: Dave Anderson Signed-off-by: Ingo Molnar --- include/asm-x86/ptrace-abi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index 81a8ee4c55f..f224eb3c315 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -89,13 +89,13 @@ */ struct ptrace_bts_config { /* requested or actual size of BTS buffer in bytes */ - u32 size; + __u32 size; /* bitmask of below flags */ - u32 flags; + __u32 flags; /* buffer overflow signal */ - u32 signal; + __u32 signal; /* actual size of bts_struct in bytes */ - u32 bts_size; + __u32 bts_size; }; #endif -- cgit v1.2.3-70-g09d2 From 94a3f78566ef98a48814d82892f28bb741624cb8 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sat, 23 Feb 2008 00:23:48 +0100 Subject: [ARM] 4837/1: make __get_unaligned_*() return unsigned types Eric Sandeen tracked an XFS on ARM corruption bug down to a function under fs/xfs/ involving some get_unaligned() calls on u64 pointers. As it turns out, calling ARM's get_unaligned() on a u64 pointer pointing to the following byte sequence: 80 81 82 83 84 85 86 87 would return ffffffff83828180 (LE mode.) This turns out to be because of implicit u8 -> int promotion in ARM's implementation of various helpers for get_unaligned(), causing them to accidentally return signed instead of unsigned values, which in turn caused the subsequent casts to unsigned long long in __get_unaligned_8_[bl]e() to sign-extend the lower words. Fix by casting the return values of __get_unaligned_[24]_[bl]e() to unsigned int. Cc: Eric Sandeen Cc: Rabeeh Khoury Cc: Nicolas Pitre Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- include/asm-arm/unaligned.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/asm-arm/unaligned.h b/include/asm-arm/unaligned.h index 8431f6eed5c..5db03cf3b90 100644 --- a/include/asm-arm/unaligned.h +++ b/include/asm-arm/unaligned.h @@ -40,16 +40,16 @@ extern int __bug_unaligned_x(const void *ptr); */ #define __get_unaligned_2_le(__p) \ - (__p[0] | __p[1] << 8) + (unsigned int)(__p[0] | __p[1] << 8) #define __get_unaligned_2_be(__p) \ - (__p[0] << 8 | __p[1]) + (unsigned int)(__p[0] << 8 | __p[1]) #define __get_unaligned_4_le(__p) \ - (__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24) + (unsigned int)(__p[0] | __p[1] << 8 | __p[2] << 16 | __p[3] << 24) #define __get_unaligned_4_be(__p) \ - (__p[0] << 24 | __p[1] << 16 | __p[2] << 8 | __p[3]) + (unsigned int)(__p[0] << 24 | __p[1] << 16 | __p[2] << 8 | __p[3]) #define __get_unaligned_8_le(__p) \ ((unsigned long long)__get_unaligned_4_le((__p+4)) << 32 | \ -- cgit v1.2.3-70-g09d2 From 5ce94e9e8b469a17fbd3efa1b940c19b5e43449a Mon Sep 17 00:00:00 2001 From: Thomas Kunze Date: Sun, 24 Feb 2008 17:59:34 +0100 Subject: [ARM] 4838/1: Fix kexec for SA1100 machines This patch sets KEXEC_CONTROL_MEMORY_LIMIT to (-1)UL. As the value is compared with physical addresses TASK_SIZE makes no sense. Machines where the RAM addresses start above TASK_SIZE kexecs eats all memory and crashes the kernel without this patch. Signed-off-by: Thomas Kunze Acked-by: Richard Purdie Signed-off-by: Russell King --- include/asm-arm/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-arm/kexec.h b/include/asm-arm/kexec.h index 1ee17b6951d..47fe34d692d 100644 --- a/include/asm-arm/kexec.h +++ b/include/asm-arm/kexec.h @@ -8,7 +8,7 @@ /* Maximum address we can reach in physical address mode */ #define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) /* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) #define KEXEC_CONTROL_CODE_SIZE 4096 -- cgit v1.2.3-70-g09d2 From a3359e21c06cb5b366fb47307b3d2fd23386a774 Mon Sep 17 00:00:00 2001 From: eric miao Date: Wed, 27 Feb 2008 01:59:28 +0100 Subject: [ARM] 4840/1: pxa: fix the typo in get_irqnr_and_base This typo causes the incorrect calculation of the IRQ numbers in the ICIP2 registers. Signed-off-by: eric miao Signed-off-by: Russell King --- include/asm-arm/arch-pxa/entry-macro.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-arm/arch-pxa/entry-macro.S b/include/asm-arm/arch-pxa/entry-macro.S index b7e73085146..c145bb01bc8 100644 --- a/include/asm-arm/arch-pxa/entry-macro.S +++ b/include/asm-arm/arch-pxa/entry-macro.S @@ -35,7 +35,7 @@ 1004: mrc p6, 0, \irqstat, c6, c0, 0 @ ICIP2 mrc p6, 0, \irqnr, c7, c0, 0 @ ICMR2 - ands \irqstat, \irqstat, \irqnr + ands \irqnr, \irqstat, \irqnr beq 1003f rsb \irqstat, \irqnr, #0 and \irqstat, \irqstat, \irqnr -- cgit v1.2.3-70-g09d2 From d862ccc570c875e1454fc57ed00f5a1081985b26 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Feb 2008 15:34:56 +0100 Subject: [ARM] 4843/1: Add GCR_CLKBPB for PXA3xx The PXA3xx AC97 controller has an additional control bit GCR_CLKBPB which must be used during cold reset. Signed-off-by: Mark Brown Acked-by: eric miao Signed-off-by: Russell King --- include/asm-arm/arch-pxa/pxa-regs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h index ac175b4d10c..2357a73340d 100644 --- a/include/asm-arm/arch-pxa/pxa-regs.h +++ b/include/asm-arm/arch-pxa/pxa-regs.h @@ -520,6 +520,9 @@ #define MCCR_FSRIE (1 << 1) /* FIFO Service Request Interrupt Enable */ #define GCR __REG(0x4050000C) /* Global Control Register */ +#ifdef CONFIG_PXA3xx +#define GCR_CLKBPB (1 << 31) /* Internal clock enable */ +#endif #define GCR_nDMAEN (1 << 24) /* non DMA Enable */ #define GCR_CDONE_IE (1 << 19) /* Command Done Interrupt Enable */ #define GCR_SDONE_IE (1 << 18) /* Status Done Interrupt Enable */ -- cgit v1.2.3-70-g09d2 From 9176c0b1f5a9099cebc07458042ae6a7c75af7b2 Mon Sep 17 00:00:00 2001 From: Jens Osterkamp Date: Thu, 28 Feb 2008 11:26:21 +0100 Subject: [POWERPC] move celleb DABRX definitions This moves the private DABRX definitions for celleb from beat.h to reg.h to make them usable for all. Signed-off-by: Jens Osterkamp Signed-off-by: Arnd Bergmann --- arch/powerpc/platforms/celleb/beat.h | 3 --- include/asm-powerpc/reg.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/celleb/beat.h b/arch/powerpc/platforms/celleb/beat.h index b2e292df13c..ac82ac35b99 100644 --- a/arch/powerpc/platforms/celleb/beat.h +++ b/arch/powerpc/platforms/celleb/beat.h @@ -21,9 +21,6 @@ #ifndef _CELLEB_BEAT_H #define _CELLEB_BEAT_H -#define DABRX_KERNEL (1UL<<1) -#define DABRX_USER (1UL<<0) - int64_t beat_get_term_char(uint64_t,uint64_t*,uint64_t*,uint64_t*); int64_t beat_put_term_char(uint64_t,uint64_t,uint64_t,uint64_t); int64_t beat_repository_encode(int, const char *, uint64_t[4]); diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h index 0d6238987df..edc0cfd7f6e 100644 --- a/include/asm-powerpc/reg.h +++ b/include/asm-powerpc/reg.h @@ -153,6 +153,9 @@ #define CTRL_RUNLATCH 0x1 #define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ #define DABR_TRANSLATION (1UL << 2) +#define SPRN_DABRX 0x3F7 /* Data Address Breakpoint Register Extension */ +#define DABRX_USER (1UL << 0) +#define DABRX_KERNEL (1UL << 1) #define SPRN_DAR 0x013 /* Data Address Register */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ #define DSISR_NOHPTE 0x40000000 /* no translation found */ -- cgit v1.2.3-70-g09d2 From 674eea0fc4d1d693250b5d3ddad42ca931c87dfd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 11 Feb 2008 18:37:23 +0200 Subject: KVM: Make the supported cpuid list a host property rather than a vm property One of the use cases for the supported cpuid list is to create a "greatest common denominator" of cpu capabilities in a server farm. As such, it is useful to be able to get the list without creating a virtual machine first. Since the code does not depend on the vm in any way, all that is needed is to move it to the device ioctl handler. The capability identifier is also changed so that binaries made against -rc1 will fail gracefully. Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 42 ++++++++++++++++++++++-------------------- include/linux/kvm.h | 4 ++-- 2 files changed, 24 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ec60409299a..a7069ec2267 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -46,6 +46,9 @@ #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 __user *entries); + struct kvm_x86_ops *kvm_x86_ops; struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -727,6 +730,24 @@ long kvm_arch_dev_ioctl(struct file *filp, r = 0; break; } + case KVM_GET_SUPPORTED_CPUID: { + struct kvm_cpuid2 __user *cpuid_arg = argp; + struct kvm_cpuid2 cpuid; + + r = -EFAULT; + if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) + goto out; + r = kvm_dev_ioctl_get_supported_cpuid(&cpuid, + cpuid_arg->entries); + if (r) + goto out; + + r = -EFAULT; + if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) + goto out; + r = 0; + break; + } default: r = -EINVAL; } @@ -974,8 +995,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, put_cpu(); } -static int kvm_vm_ioctl_get_supported_cpuid(struct kvm *kvm, - struct kvm_cpuid2 *cpuid, +static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries) { struct kvm_cpuid_entry2 *cpuid_entries; @@ -1487,24 +1507,6 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } - case KVM_GET_SUPPORTED_CPUID: { - struct kvm_cpuid2 __user *cpuid_arg = argp; - struct kvm_cpuid2 cpuid; - - r = -EFAULT; - if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid)) - goto out; - r = kvm_vm_ioctl_get_supported_cpuid(kvm, &cpuid, - cpuid_arg->entries); - if (r) - goto out; - - r = -EFAULT; - if (copy_to_user(cpuid_arg, &cpuid, sizeof cpuid)) - goto out; - r = 0; - break; - } default: ; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4de4fd2d860..c1ec04fd000 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -221,6 +221,7 @@ struct kvm_vapic_addr { * Get size for mmap(vcpu_fd) */ #define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */ +#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2) /* * Extension capability list. @@ -230,8 +231,8 @@ struct kvm_vapic_addr { #define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2 #define KVM_CAP_USER_MEMORY 3 #define KVM_CAP_SET_TSS_ADDR 4 -#define KVM_CAP_EXT_CPUID 5 #define KVM_CAP_VAPIC 6 +#define KVM_CAP_EXT_CPUID 7 /* * ioctls for VM fds @@ -249,7 +250,6 @@ struct kvm_vapic_addr { #define KVM_CREATE_VCPU _IO(KVMIO, 0x41) #define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log) #define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias) -#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x48, struct kvm_cpuid2) /* Device model IOC */ #define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60) #define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level) -- cgit v1.2.3-70-g09d2 From a345b4ba2086bacc63884e5d72268415a97bcbff Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Mar 2008 10:02:44 -0800 Subject: Revert "x86: fix pmd_bad and pud_bad to support huge pages" This reverts commit cded932b75ab0a5f9181ee3da34a0a488d1a14fd. Arjan bisected down a boot-time hang to this, saying: ".. it prevents the kernel to finish booting on my (Penryn based) laptop. The boot stops right after freeing the init memory." and while it's not clear exactly what triggers it, at this stage we're better off just reverting it while Ingo tries to figure out what went wrong. Requested-by: Arjan van de Ven Cc: Hans Rosenfeld Cc: Nish Aravamudan Acked-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/asm-x86/pgtable_32.h | 4 +--- include/asm-x86/pgtable_64.h | 6 ++---- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index b478efa971e..a842c7222b1 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -91,9 +91,7 @@ extern unsigned long pg0[]; /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ #define pmd_none(x) (!(unsigned long)pmd_val(x)) #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) -#define pmd_bad(x) ((pmd_val(x) \ - & ~(PAGE_MASK | _PAGE_USER | _PAGE_PSE | _PAGE_NX)) \ - != _KERNPG_TABLE) +#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 0a9258333cb..0a0b77bc736 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -153,14 +153,12 @@ static inline unsigned long pgd_bad(pgd_t pgd) static inline unsigned long pud_bad(pud_t pud) { - return pud_val(pud) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } static inline unsigned long pmd_bad(pmd_t pmd) { - return pmd_val(pmd) & - ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX); + return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER); } #define pte_none(x) (!pte_val(x)) -- cgit v1.2.3-70-g09d2 From d0bcabcd72dda5f553322a1ca92ae31c15b408b6 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Feb 2008 22:03:07 -0800 Subject: docbook: fix usb source files Fix docbook problems in USB source files. These cause the generated docbook to be incorrect. Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- drivers/usb/core/usb.c | 6 ++---- include/linux/usb.h | 9 +++------ 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 4e984060c98..f6f19908f5f 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -99,8 +99,7 @@ struct usb_interface *usb_ifnum_to_if(const struct usb_device *dev, EXPORT_SYMBOL_GPL(usb_ifnum_to_if); /** - * usb_altnum_to_altsetting - get the altsetting structure with a given - * alternate setting number. + * usb_altnum_to_altsetting - get the altsetting structure with a given alternate setting number. * @intf: the interface containing the altsetting in question * @altnum: the desired alternate setting number * @@ -442,8 +441,7 @@ EXPORT_SYMBOL_GPL(usb_put_intf); */ /** - * usb_lock_device_for_reset - cautiously acquire the lock for a - * usb device structure + * usb_lock_device_for_reset - cautiously acquire the lock for a usb device structure * @udev: device that's being locked * @iface: interface bound to the driver making the request (optional) * diff --git a/include/linux/usb.h b/include/linux/usb.h index 2372e2e6b52..5bd3ae8aaaf 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -781,8 +781,7 @@ static inline int usb_endpoint_is_isoc_out( .idVendor = (vend), \ .idProduct = (prod) /** - * USB_DEVICE_VER - macro used to describe a specific usb device with a - * version range + * USB_DEVICE_VER - describe a specific usb device with a version range * @vend: the 16 bit USB Vendor ID * @prod: the 16 bit USB Product ID * @lo: the bcdDevice_lo value @@ -799,8 +798,7 @@ static inline int usb_endpoint_is_isoc_out( .bcdDevice_hi = (hi) /** - * USB_DEVICE_INTERFACE_PROTOCOL - macro used to describe a usb - * device with a specific interface protocol + * USB_DEVICE_INTERFACE_PROTOCOL - describe a usb device with a specific interface protocol * @vend: the 16 bit USB Vendor ID * @prod: the 16 bit USB Product ID * @pr: bInterfaceProtocol value @@ -846,8 +844,7 @@ static inline int usb_endpoint_is_isoc_out( .bInterfaceProtocol = (pr) /** - * USB_DEVICE_AND_INTERFACE_INFO - macro used to describe a specific usb device - * with a class of usb interfaces + * USB_DEVICE_AND_INTERFACE_INFO - describe a specific usb device with a class of usb interfaces * @vend: the 16 bit USB Vendor ID * @prod: the 16 bit USB Product ID * @cl: bInterfaceClass value -- cgit v1.2.3-70-g09d2 From a973e9dd1e140a65bed694a2c5c8d53e9cba1a23 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 1 Mar 2008 13:40:44 -0800 Subject: Revert "unique end pointer" patch This only made sense for the alternate fastpath which was reverted last week. Mathieu is working on a new version that addresses the fastpath issues but that new code first needs to go through mm and it is not clear if we need the unique end pointers with his new scheme. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- include/linux/mm_types.h | 5 +--- mm/slub.c | 70 ++++++++++++++++-------------------------------- 2 files changed, 24 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bfee0bd1d43..34023c65d46 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -64,10 +64,7 @@ struct page { #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS spinlock_t ptl; #endif - struct { - struct kmem_cache *slab; /* SLUB: Pointer to slab */ - void *end; /* SLUB: end marker */ - }; + struct kmem_cache *slab; /* SLUB: Pointer to slab */ struct page *first_page; /* Compound tail pages */ }; union { diff --git a/mm/slub.c b/mm/slub.c index 74c65af0a54..a873953e5a1 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -291,32 +291,15 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) #endif } -/* - * The end pointer in a slab is special. It points to the first object in the - * slab but has bit 0 set to mark it. - * - * Note that SLUB relies on page_mapping returning NULL for pages with bit 0 - * in the mapping set. - */ -static inline int is_end(void *addr) -{ - return (unsigned long)addr & PAGE_MAPPING_ANON; -} - -static void *slab_address(struct page *page) -{ - return page->end - PAGE_MAPPING_ANON; -} - static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { void *base; - if (object == page->end) + if (!object) return 1; - base = slab_address(page); + base = page_address(page); if (object < base || object >= base + s->objects * s->size || (object - base) % s->size) { return 0; @@ -349,8 +332,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp) /* Scan freelist */ #define for_each_free_object(__p, __s, __free) \ - for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ - __p)) + for (__p = (__free); __p; __p = get_freepointer((__s), __p)) /* Determine object index from a given position */ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) @@ -502,7 +484,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...) static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) { unsigned int off; /* Offset of last byte */ - u8 *addr = slab_address(page); + u8 *addr = page_address(page); print_tracking(s, p); @@ -680,7 +662,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!(s->flags & SLAB_POISON)) return 1; - start = slab_address(page); + start = page_address(page); end = start + (PAGE_SIZE << s->order); length = s->objects * s->size; remainder = end - (start + length); @@ -748,7 +730,7 @@ static int check_object(struct kmem_cache *s, struct page *page, * of the free objects in this slab. May cause * another error because the object count is now wrong. */ - set_freepointer(s, p, page->end); + set_freepointer(s, p, NULL); return 0; } return 1; @@ -782,18 +764,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search) void *fp = page->freelist; void *object = NULL; - while (fp != page->end && nr <= s->objects) { + while (fp && nr <= s->objects) { if (fp == search) return 1; if (!check_valid_pointer(s, page, fp)) { if (object) { object_err(s, page, object, "Freechain corrupt"); - set_freepointer(s, object, page->end); + set_freepointer(s, object, NULL); break; } else { slab_err(s, page, "Freepointer corrupt"); - page->freelist = page->end; + page->freelist = NULL; page->inuse = s->objects; slab_fix(s, "Freelist cleared"); return 0; @@ -899,7 +881,7 @@ bad: */ slab_fix(s, "Marking all objects used"); page->inuse = s->objects; - page->freelist = page->end; + page->freelist = NULL; } return 0; } @@ -939,7 +921,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page, } /* Special debug activities for freeing objects */ - if (!SlabFrozen(page) && page->freelist == page->end) + if (!SlabFrozen(page) && !page->freelist) remove_full(s, page); if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); @@ -1124,7 +1106,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) SetSlabDebug(page); start = page_address(page); - page->end = start + 1; if (unlikely(s->flags & SLAB_POISON)) memset(start, POISON_INUSE, PAGE_SIZE << s->order); @@ -1136,7 +1117,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) last = p; } setup_object(s, page, last); - set_freepointer(s, last, page->end); + set_freepointer(s, last, NULL); page->freelist = start; page->inuse = 0; @@ -1152,7 +1133,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) void *p; slab_pad_check(s, page); - for_each_object(p, s, slab_address(page)) + for_each_object(p, s, page_address(page)) check_object(s, page, p, 0); ClearSlabDebug(page); } @@ -1162,7 +1143,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page) NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); - page->mapping = NULL; __free_pages(page, s->order); } @@ -1366,7 +1346,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) ClearSlabFrozen(page); if (page->inuse) { - if (page->freelist != page->end) { + if (page->freelist) { add_partial(n, page, tail); stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); } else { @@ -1410,12 +1390,8 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) * Merge cpu freelist into freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. - * - * We need to use _is_end here because deactivate slab may - * be called for a debug slab. Then c->freelist may contain - * a dummy pointer. */ - while (unlikely(!is_end(c->freelist))) { + while (unlikely(c->freelist)) { void **object; tail = 0; /* Hot objects. Put the slab first */ @@ -1517,7 +1493,7 @@ static void *__slab_alloc(struct kmem_cache *s, stat(c, ALLOC_REFILL); load_freelist: object = c->page->freelist; - if (unlikely(object == c->page->end)) + if (unlikely(!object)) goto another_slab; if (unlikely(SlabDebug(c->page))) goto debug; @@ -1525,7 +1501,7 @@ load_freelist: object = c->page->freelist; c->freelist = object[c->offset]; c->page->inuse = s->objects; - c->page->freelist = c->page->end; + c->page->freelist = NULL; c->node = page_to_nid(c->page); unlock_out: slab_unlock(c->page); @@ -1607,7 +1583,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); - if (unlikely(is_end(c->freelist) || !node_match(c, node))) + if (unlikely(!c->freelist || !node_match(c, node))) object = __slab_alloc(s, gfpflags, node, addr, c); @@ -1677,7 +1653,7 @@ checks_ok: * was not on the partial list before * then add it. */ - if (unlikely(prior == page->end)) { + if (unlikely(!prior)) { add_partial(get_node(s, page_to_nid(page)), page, 1); stat(c, FREE_ADD_PARTIAL); } @@ -1687,7 +1663,7 @@ out_unlock: return; slab_empty: - if (prior != page->end) { + if (prior) { /* * Slab still on the partial list. */ @@ -1910,7 +1886,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s, struct kmem_cache_cpu *c) { c->page = NULL; - c->freelist = (void *)PAGE_MAPPING_ANON; + c->freelist = NULL; c->node = 0; c->offset = s->offset / sizeof(void *); c->objsize = s->objsize; @@ -3199,7 +3175,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page, unsigned long *map) { void *p; - void *addr = slab_address(page); + void *addr = page_address(page); if (!check_slab(s, page) || !on_freelist(s, page, NULL)) @@ -3482,7 +3458,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s, static void process_slab(struct loc_track *t, struct kmem_cache *s, struct page *page, enum track_item alloc) { - void *addr = slab_address(page); + void *addr = page_address(page); DECLARE_BITMAP(map, s->objects); void *p; -- cgit v1.2.3-70-g09d2 From 6446faa2ff30ca77c5b25e886bbbfb81c63f1c91 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Fri, 15 Feb 2008 23:45:26 -0800 Subject: slub: Fix up comments Provide comments and fix up various spelling / style issues. Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 4 ++-- mm/slub.c | 49 +++++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 57deecc79d5..b00c1c73eb0 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -61,7 +61,7 @@ struct kmem_cache { int size; /* The size of an object including meta data */ int objsize; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ - int order; + int order; /* Current preferred allocation order */ /* * Avoid an extra cache line for UP, SMP and for the node local to @@ -138,11 +138,11 @@ static __always_inline int kmalloc_index(size_t size) if (size <= 512) return 9; if (size <= 1024) return 10; if (size <= 2 * 1024) return 11; + if (size <= 4 * 1024) return 12; /* * The following is only needed to support architectures with a larger page * size than 4k. */ - if (size <= 4 * 1024) return 12; if (size <= 8 * 1024) return 13; if (size <= 16 * 1024) return 14; if (size <= 32 * 1024) return 15; diff --git a/mm/slub.c b/mm/slub.c index 72f5f4ecd1d..10d546954ef 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -291,6 +291,7 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) #endif } +/* Verify that a pointer has an address that is valid within a slab page */ static inline int check_valid_pointer(struct kmem_cache *s, struct page *page, const void *object) { @@ -619,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, * A. Free pointer (if we cannot overwrite object on free) * B. Tracking data for SLAB_STORE_USER * C. Padding to reach required alignment boundary or at mininum - * one word if debuggin is on to be able to detect writes + * one word if debugging is on to be able to detect writes * before the word boundary. * * Padding is done using 0x5a (POISON_INUSE) @@ -1268,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags) * may return off node objects because partial slabs are obtained * from other nodes and filled up. * - * If /sys/slab/xx/defrag_ratio is set to 100 (which makes + * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes * defrag_ratio = 1000) then every (well almost) allocation will * first attempt to defrag slab caches on other nodes. This means * scanning over all nodes to look for partial slabs which may be @@ -1343,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) * Adding an empty slab to the partial slabs in order * to avoid page allocator overhead. This slab needs * to come after the other slabs with objects in - * order to fill them up. That way the size of the - * partial list stays small. kmem_cache_shrink can - * reclaim empty slabs from the partial list. + * so that the others get filled first. That way the + * size of the partial list stays small. + * + * kmem_cache_shrink can reclaim any empty slabs from the + * partial list. */ add_partial(n, page, 1); slab_unlock(page); @@ -1368,7 +1371,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) if (c->freelist) stat(c, DEACTIVATE_REMOTE_FREES); /* - * Merge cpu freelist into freelist. Typically we get here + * Merge cpu freelist into slab freelist. Typically we get here * because both freelists are empty. So this is unlikely * to occur. */ @@ -1399,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) /* * Flush cpu slab. + * * Called from IPI handler with interrupts disabled. */ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) @@ -1457,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node) * rest of the freelist to the lockless freelist. * * And if we were unable to get a new slab from the partial slab lists then - * we need to allocate a new slab. This is slowest path since we may sleep. + * we need to allocate a new slab. This is the slowest path since it involves + * a call to the page allocator and the setup of a new slab. */ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) @@ -1471,7 +1476,9 @@ static void *__slab_alloc(struct kmem_cache *s, slab_lock(c->page); if (unlikely(!node_match(c, node))) goto another_slab; + stat(c, ALLOC_REFILL); + load_freelist: object = c->page->freelist; if (unlikely(!object)) @@ -1616,6 +1623,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, if (unlikely(SlabDebug(page))) goto debug; + checks_ok: prior = object[offset] = page->freelist; page->freelist = object; @@ -1630,8 +1638,7 @@ checks_ok: goto slab_empty; /* - * Objects left in the slab. If it - * was not on the partial list before + * Objects left in the slab. If it was not on the partial list before * then add it. */ if (unlikely(!prior)) { @@ -1845,13 +1852,11 @@ static unsigned long calculate_alignment(unsigned long flags, unsigned long align, unsigned long size) { /* - * If the user wants hardware cache aligned objects then - * follow that suggestion if the object is sufficiently - * large. + * If the user wants hardware cache aligned objects then follow that + * suggestion if the object is sufficiently large. * - * The hardware cache alignment cannot override the - * specified alignment though. If that is greater - * then use it. + * The hardware cache alignment cannot override the specified + * alignment though. If that is greater then use it. */ if ((flags & SLAB_HWCACHE_ALIGN) && size > cache_line_size() / 2) @@ -2049,6 +2054,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags, #endif init_kmem_cache_node(n); atomic_long_inc(&n->nr_slabs); + /* * lockdep requires consistent irq usage for each lock * so even though there cannot be a race this early in @@ -2301,7 +2307,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object) /* * We could also check if the object is on the slabs freelist. * But this would be too expensive and it seems that the main - * purpose of kmem_ptr_valid is to check if the object belongs + * purpose of kmem_ptr_valid() is to check if the object belongs * to a certain slab. */ return 1; @@ -2913,7 +2919,7 @@ void __init kmem_cache_init(void) /* * Patch up the size_index table if we have strange large alignment * requirements for the kmalloc array. This is only the case for - * mips it seems. The standard arches will not generate any code here. + * MIPS it seems. The standard arches will not generate any code here. * * Largest permitted alignment is 256 bytes due to the way we * handle the index determination for the smaller caches. @@ -2942,7 +2948,6 @@ void __init kmem_cache_init(void) kmem_size = sizeof(struct kmem_cache); #endif - printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," " CPUs=%d, Nodes=%d\n", @@ -3039,12 +3044,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, */ for_each_online_cpu(cpu) get_cpu_slab(s, cpu)->objsize = s->objsize; + s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); up_write(&slub_lock); + if (sysfs_slab_alias(s, name)) goto err; return s; } + s = kmalloc(kmem_size, GFP_KERNEL); if (s) { if (kmem_cache_open(s, GFP_KERNEL, name, @@ -3927,7 +3935,6 @@ SLAB_ATTR(remote_node_defrag_ratio); #endif #ifdef CONFIG_SLUB_STATS - static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) { unsigned long sum = 0; @@ -4111,8 +4118,8 @@ static struct kset *slab_kset; #define ID_STR_LENGTH 64 /* Create a unique string id for a slab cache: - * format - * :[flags-]size:[memory address of kmemcache] + * + * Format :[flags-]size */ static char *create_unique_id(struct kmem_cache *s) { -- cgit v1.2.3-70-g09d2 From 2f775dbaa541d6bc0cccf20aab95f7a0930ef7e9 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Thu, 6 Mar 2008 16:04:58 -0700 Subject: [Blackfin] arch: to kill syscalls missing warning by adding new timerfd syscalls Signed-off-by: Bryan Wu --- arch/blackfin/mach-common/entry.S | 5 ++++- include/asm-blackfin/unistd.h | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index 2cbb7a0bc38..cee54cebbc6 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1369,7 +1369,7 @@ ENTRY(_sys_call_table) .long _sys_epoll_pwait .long _sys_utimensat .long _sys_signalfd - .long _sys_ni_syscall + .long _sys_timerfd_create .long _sys_eventfd /* 350 */ .long _sys_pread64 .long _sys_pwrite64 @@ -1378,6 +1378,9 @@ ENTRY(_sys_call_table) .long _sys_get_robust_list /* 355 */ .long _sys_fallocate .long _sys_semtimedop + .long _sys_timerfd_settime + .long _sys_timerfd_gettime + .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall .endr diff --git a/include/asm-blackfin/unistd.h b/include/asm-blackfin/unistd.h index e98167358d2..c18a399f6e3 100644 --- a/include/asm-blackfin/unistd.h +++ b/include/asm-blackfin/unistd.h @@ -361,7 +361,7 @@ #define __NR_epoll_pwait 346 #define __NR_utimensat 347 #define __NR_signalfd 348 -#define __NR_timerfd 349 +#define __NR_timerfd_create 349 #define __NR_eventfd 350 #define __NR_pread64 351 #define __NR_pwrite64 352 @@ -370,8 +370,10 @@ #define __NR_get_robust_list 355 #define __NR_fallocate 356 #define __NR_semtimedop 357 +#define __NR_timerfd_settime 358 +#define __NR_timerfd_gettime 359 -#define __NR_syscall 358 +#define __NR_syscall 360 #define NR_syscalls __NR_syscall /* Old optional stuff no one actually uses */ -- cgit v1.2.3-70-g09d2 From 7a85f8896f4b4a4a0249563b92af9e3161a6b467 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Mar 2008 11:17:11 +0100 Subject: block: restore the meaning of rq->data_len to the true data length The meaning of rq->data_len was changed to the length of an allocated buffer from the true data length. It breaks SG_IO friends and bsg. This patch restores the meaning of rq->data_len to the true data length and adds rq->extra_len to store an extended length (due to drain buffer and padding). This patch also removes the code to update bio in blk_rq_map_user introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa. The commit adjusts bio according to memory alignment (queue_dma_alignment). However, memory alignment is NOT padding alignment. This adjustment also breaks SG_IO friends and bsg. Padding alignment needs to be fixed in a proper way (by a separate patch). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- block/blk-map.c | 6 +----- block/blk-merge.c | 2 +- block/bsg.c | 8 ++++---- block/scsi_ioctl.c | 4 ++-- drivers/ata/libata-scsi.c | 6 +++--- include/linux/blkdev.h | 2 +- 7 files changed, 13 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 2d7e3a2f56c..a248cf1c98d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -127,7 +127,6 @@ void rq_init(struct request_queue *q, struct request *rq) rq->nr_hw_segments = 0; rq->ioprio = 0; rq->special = NULL; - rq->raw_data_len = 0; rq->buffer = NULL; rq->tag = -1; rq->errors = 0; @@ -135,6 +134,7 @@ void rq_init(struct request_queue *q, struct request *rq) rq->cmd_len = 0; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->data_len = 0; + rq->extra_len = 0; rq->sense_len = 0; rq->data = NULL; rq->sense = NULL; @@ -2018,7 +2018,6 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->hard_cur_sectors = rq->current_nr_sectors; rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); rq->buffer = bio_data(bio); - rq->raw_data_len = bio->bi_size; rq->data_len = bio->bi_size; rq->bio = rq->biotail = bio; diff --git a/block/blk-map.c b/block/blk-map.c index 09f7fd0bcb7..f5598322954 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -19,7 +19,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, rq->biotail->bi_next = bio; rq->biotail = bio; - rq->raw_data_len += bio->bi_size; rq->data_len += bio->bi_size; } return 0; @@ -151,11 +150,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, */ if (len & queue_dma_alignment(q)) { unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1; - struct bio *bio = rq->biotail; - bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len; - bio->bi_size += pad_len; - rq->data_len += pad_len; + rq->extra_len += pad_len; } rq->buffer = rq->data = NULL; diff --git a/block/blk-merge.c b/block/blk-merge.c index 7506c4fe026..0f58616bcd7 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -231,7 +231,7 @@ new_segment: ((unsigned long)q->dma_drain_buffer) & (PAGE_SIZE - 1)); nsegs++; - rq->data_len += q->dma_drain_size; + rq->extra_len += q->dma_drain_size; } if (sg) diff --git a/block/bsg.c b/block/bsg.c index 7f3c09549e4..8917c5174dc 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -437,14 +437,14 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, } if (rq->next_rq) { - hdr->dout_resid = rq->raw_data_len; - hdr->din_resid = rq->next_rq->raw_data_len; + hdr->dout_resid = rq->data_len; + hdr->din_resid = rq->next_rq->data_len; blk_rq_unmap_user(bidi_bio); blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) - hdr->din_resid = rq->raw_data_len; + hdr->din_resid = rq->data_len; else - hdr->dout_resid = rq->raw_data_len; + hdr->dout_resid = rq->data_len; /* * If the request generated a negative error number, return it diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index e993cac4911..a2c3a936ebf 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -266,7 +266,7 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, hdr->info = 0; if (hdr->masked_status || hdr->host_status || hdr->driver_status) hdr->info |= SG_INFO_CHECK; - hdr->resid = rq->raw_data_len; + hdr->resid = rq->data_len; hdr->sb_len_wr = 0; if (rq->sense_len && hdr->sbp) { @@ -528,8 +528,8 @@ static int __blk_send_generic(struct request_queue *q, struct gendisk *bd_disk, rq = blk_get_request(q, WRITE, __GFP_WAIT); rq->cmd_type = REQ_TYPE_BLOCK_PC; rq->data = NULL; - rq->raw_data_len = 0; rq->data_len = 0; + rq->extra_len = 0; rq->timeout = BLK_DEFAULT_SG_TIMEOUT; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = cmd; diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7b1f1ee8131..fe47922dd69 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2538,7 +2538,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) } qc->tf.command = ATA_CMD_PACKET; - qc->nbytes = scsi_bufflen(scmd); + qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len; /* check whether ATAPI DMA is safe */ if (!using_pio && ata_check_atapi_dma(qc)) @@ -2549,7 +2549,7 @@ static unsigned int atapi_xlat(struct ata_queued_cmd *qc) * want to set it properly, and for DMA where it is * effectively meaningless. */ - nbytes = min(scmd->request->raw_data_len, (unsigned int)63 * 1024); + nbytes = min(scmd->request->data_len, (unsigned int)63 * 1024); /* Most ATAPI devices which honor transfer chunk size don't * behave according to the spec when odd chunk size which @@ -2875,7 +2875,7 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) * TODO: find out if we need to do more here to * cover scatter/gather case. */ - qc->nbytes = scsi_bufflen(scmd); + qc->nbytes = scsi_bufflen(scmd) + scmd->request->extra_len; /* request result TF and be quiet about device error */ qc->flags |= ATA_QCFLAG_RESULT_TF | ATA_QCFLAG_QUIET; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6fe67d1939c..b72526c13ca 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -216,8 +216,8 @@ struct request { unsigned int cmd_len; unsigned char cmd[BLK_MAX_CDB]; - unsigned int raw_data_len; unsigned int data_len; + unsigned int extra_len; /* length of alignment and padding */ unsigned int sense_len; void *data; void *sense; -- cgit v1.2.3-70-g09d2 From e3790c7d42a545e8fe8b38b513613ca96687b670 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Mar 2008 11:18:17 +0100 Subject: block: separate out padding from alignment Block layer alignment was used for two different purposes - memory alignment and padding. This causes problems in lower layers because drivers which only require memory alignment ends up with adjusted rq->data_len. Separate out padding such that padding occurs iff driver explicitly requests it. Tomo: restorethe code to update bio in blk_rq_map_user introduced by the commit 40b01b9bbdf51ae543a04744283bf2d56c4a6afa according to padding alignment. Signed-off-by: Tejun Heo Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-map.c | 20 +++++++++++++------- block/blk-settings.c | 17 +++++++++++++++++ drivers/ata/libata-scsi.c | 3 ++- include/linux/blkdev.h | 2 ++ 4 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/block/blk-map.c b/block/blk-map.c index f5598322954..4e17dfd0035 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -43,6 +43,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, void __user *ubuf, unsigned int len) { unsigned long uaddr; + unsigned int alignment; struct bio *bio, *orig_bio; int reading, ret; @@ -53,8 +54,8 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * direct dma. else, set up kernel bounce buffers */ uaddr = (unsigned long) ubuf; - if (!(uaddr & queue_dma_alignment(q)) && - !(len & queue_dma_alignment(q))) + alignment = queue_dma_alignment(q) | q->dma_pad_mask; + if (!(uaddr & alignment) && !(len & alignment)) bio = bio_map_user(q, NULL, uaddr, len, reading); else bio = bio_copy_user(q, uaddr, len, reading); @@ -141,15 +142,20 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, /* * __blk_rq_map_user() copies the buffers if starting address - * or length isn't aligned. As the copied buffer is always - * page aligned, we know that there's enough room for padding. - * Extend the last bio and update rq->data_len accordingly. + * or length isn't aligned to dma_pad_mask. As the copied + * buffer is always page aligned, we know that there's enough + * room for padding. Extend the last bio and update + * rq->data_len accordingly. * * On unmap, bio_uncopy_user() will use unmodified * bio_map_data pointed to by bio->bi_private. */ - if (len & queue_dma_alignment(q)) { - unsigned int pad_len = (queue_dma_alignment(q) & ~len) + 1; + if (len & q->dma_pad_mask) { + unsigned int pad_len = (q->dma_pad_mask & ~len) + 1; + struct bio *bio = rq->biotail; + + bio->bi_io_vec[bio->bi_vcnt - 1].bv_len += pad_len; + bio->bi_size += pad_len; rq->extra_len += pad_len; } diff --git a/block/blk-settings.c b/block/blk-settings.c index da923fed1f2..a9f37f530b1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -292,6 +292,23 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) } EXPORT_SYMBOL(blk_queue_stack_limits); +/** + * blk_queue_dma_pad - set pad mask + * @q: the request queue for the device + * @mask: pad mask + * + * Set pad mask. Direct IO requests are padded to the mask specified. + * + * Appending pad buffer to a request modifies ->data_len such that it + * includes the pad buffer. The original requested data length can be + * obtained using blk_rq_raw_data_len(). + **/ +void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) +{ + q->dma_pad_mask = mask; +} +EXPORT_SYMBOL(blk_queue_dma_pad); + /** * blk_queue_dma_drain - Set up a drain buffer for excess dma. * @q: the request queue for the device diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index fe47922dd69..8f0e8f2bc62 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -862,9 +862,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, struct request_queue *q = sdev->request_queue; void *buf; - /* set the min alignment */ + /* set the min alignment and padding */ blk_queue_update_dma_alignment(sdev->request_queue, ATA_DMA_PAD_SZ - 1); + blk_queue_dma_pad(sdev->request_queue, ATA_DMA_PAD_SZ - 1); /* configure draining */ buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b72526c13ca..6f79d40dd3c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -362,6 +362,7 @@ struct request_queue unsigned long seg_boundary_mask; void *dma_drain_buffer; unsigned int dma_drain_size; + unsigned int dma_pad_mask; unsigned int dma_alignment; struct blk_queue_tag *queue_tags; @@ -701,6 +702,7 @@ extern void blk_queue_max_hw_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_hardsect_size(struct request_queue *, unsigned short); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); +extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -- cgit v1.2.3-70-g09d2 From 1826eadfc42839af7c1c5a1859510aff635d3fa1 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:46 +0100 Subject: block/genhd.c: cleanups This patch contains the following cleanups: - make the needlessly global struct disk_type static - #if 0 the unused genhd_media_change_notify() Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- block/genhd.c | 6 +++++- include/linux/genhd.h | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/block/genhd.c b/block/genhd.c index abc6feddc8c..c44527d16c5 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -24,6 +24,8 @@ static DEFINE_MUTEX(block_class_lock); struct kobject *block_depr; #endif +static struct device_type disk_type; + /* * Can be deleted altogether. Later. * @@ -502,7 +504,7 @@ struct class block_class = { .name = "block", }; -struct device_type disk_type = { +static struct device_type disk_type = { .name = "disk", .groups = disk_attr_groups, .release = disk_release, @@ -632,12 +634,14 @@ static void media_change_notify_thread(struct work_struct *work) put_device(gd->driverfs_dev); } +#if 0 void genhd_media_change_notify(struct gendisk *disk) { get_device(disk->driverfs_dev); schedule_work(&disk->async_notify); } EXPORT_SYMBOL_GPL(genhd_media_change_notify); +#endif /* 0 */ dev_t blk_lookup_devt(const char *name) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 09a3b18918c..cd048e3cc96 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -18,7 +18,6 @@ #define dev_to_disk(device) container_of(device, struct gendisk, dev) #define dev_to_part(device) container_of(device, struct hd_struct, dev) -extern struct device_type disk_type; extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; @@ -556,7 +555,6 @@ extern struct gendisk *alloc_disk_node(int minors, int node_id); extern struct gendisk *alloc_disk(int minors); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); -extern void genhd_media_change_notify(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, struct module *module, struct kobject *(*probe)(dev_t, int *, void *), -- cgit v1.2.3-70-g09d2 From a0db701a6bf767320e4471bd55e70702d230f6fb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 4 Mar 2008 11:23:50 +0100 Subject: block/genhd.c: proper externs This patch adds proper externs for two structs in include/linux/genhd.h Signed-off-by: Adrian Bunk Signed-off-by: Jens Axboe --- fs/proc/proc_misc.c | 3 +-- include/linux/genhd.h | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 468805d40e2..2d563979cb0 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -377,7 +378,6 @@ static int stram_read_proc(char *page, char **start, off_t off, #endif #ifdef CONFIG_BLOCK -extern const struct seq_operations partitions_op; static int partitions_open(struct inode *inode, struct file *file) { return seq_open(file, &partitions_op); @@ -389,7 +389,6 @@ static const struct file_operations proc_partitions_operations = { .release = seq_release, }; -extern const struct seq_operations diskstats_op; static int diskstats_open(struct inode *inode, struct file *file) { return seq_open(file, &diskstats_op); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index cd048e3cc96..32c2ac49a07 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -22,6 +22,9 @@ extern struct device_type part_type; extern struct kobject *block_depr; extern struct class block_class; +extern const struct seq_operations partitions_op; +extern const struct seq_operations diskstats_op; + enum { /* These three have identical behaviour; use the second one if DOS FDISK gets confused about extended/logical partitions starting past cylinder 1023. */ -- cgit v1.2.3-70-g09d2 From 72dc67a69690288538142df73a7e3ac66fea68dc Mon Sep 17 00:00:00 2001 From: Izik Eidus Date: Sun, 10 Feb 2008 18:04:15 +0200 Subject: KVM: remove the usage of the mmap_sem for the protection of the memory slots. This patch replaces the mmap_sem lock for the memory slots with a new kvm private lock, it is needed beacuse untill now there were cases where kvm accesses user memory while holding the mmap semaphore. Signed-off-by: Izik Eidus Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 24 ++++++++++++++--- arch/x86/kvm/paging_tmpl.h | 13 +++++++--- arch/x86/kvm/vmx.c | 7 +++-- arch/x86/kvm/x86.c | 65 ++++++++++++++++++++++++++-------------------- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 5 ++-- 6 files changed, 75 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8efdcdbebb0..26037106ad1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -876,11 +876,18 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) { + struct page *page; + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); if (gpa == UNMAPPED_GVA) return NULL; - return gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + + return page; } static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, @@ -1020,15 +1027,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) struct page *page; + down_read(&vcpu->kvm->slots_lock); + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gfn); + up_read(¤t->mm->mmap_sem); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); r = __nonpaging_map(vcpu, v, write, gfn, page); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return r; } @@ -1362,6 +1372,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn; int r; u64 gpte = 0; + struct page *page; if (bytes != 4 && bytes != 8) return; @@ -1389,6 +1400,11 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, if (!is_present_pte(gpte)) return; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; + + down_read(¤t->mm->mmap_sem); + page = gfn_to_page(vcpu->kvm, gfn); + up_read(¤t->mm->mmap_sem); + vcpu->arch.update_pte.gfn = gfn; vcpu->arch.update_pte.page = gfn_to_page(vcpu->kvm, gfn); } @@ -1496,9 +1512,9 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) gpa_t gpa; int r; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); spin_lock(&vcpu->kvm->mmu_lock); r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 03ba8608fe0..2009c6e9dc4 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -91,7 +91,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm, pt_element_t *table; struct page *page; + down_read(¤t->mm->mmap_sem); page = gfn_to_page(kvm, table_gfn); + up_read(¤t->mm->mmap_sem); + table = kmap_atomic(page, KM_USER0); ret = CMPXCHG(&table[index], orig_pte, new_pte); @@ -378,7 +381,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (r) return r; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); /* * Look up the shadow pte for the faulting address. */ @@ -392,11 +395,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, pgprintk("%s: guest page fault\n", __FUNCTION__); inject_page_fault(vcpu, addr, walker.error_code); vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 0; } + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, walker.gfn); + up_read(¤t->mm->mmap_sem); spin_lock(&vcpu->kvm->mmu_lock); kvm_mmu_free_some_pages(vcpu); @@ -413,14 +418,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, */ if (shadow_pte && is_io_pte(*shadow_pte)) { spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 1; } ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, "post page fault (fixed)"); spin_unlock(&vcpu->kvm->mmu_lock); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return write_pt; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ad36447e696..86f5bf12183 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1477,7 +1477,7 @@ static int alloc_apic_access_page(struct kvm *kvm) struct kvm_userspace_memory_region kvm_userspace_mem; int r = 0; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); if (kvm->arch.apic_access_page) goto out; kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT; @@ -1487,9 +1487,12 @@ static int alloc_apic_access_page(struct kvm *kvm) r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); if (r) goto out; + + down_read(¤t->mm->mmap_sem); kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00); + up_read(¤t->mm->mmap_sem); out: - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 338764fa539..6b01552bd1f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -184,7 +184,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) int ret; u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)]; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte, offset * sizeof(u64), sizeof(pdpte)); if (ret < 0) { @@ -201,7 +201,7 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs)); out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return ret; } @@ -215,13 +215,13 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) if (is_long_mode(vcpu) || !is_pae(vcpu)) return false; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte)); if (r < 0) goto out; changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0; out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return changed; } @@ -359,7 +359,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) */ } - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); /* * Does the new cr3 value map to physical memory? (Note, we * catch an invalid cr3 even in real-mode, because it would @@ -375,7 +375,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) vcpu->arch.cr3 = cr3; vcpu->arch.mmu.new_cr3(vcpu); } - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); } EXPORT_SYMBOL_GPL(set_cr3); @@ -1232,12 +1232,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES) return -EINVAL; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return 0; } @@ -1286,7 +1286,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, < alias->target_phys_addr) goto out; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); p = &kvm->arch.aliases[alias->slot]; p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; @@ -1300,7 +1300,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, kvm_mmu_zap_all(kvm); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return 0; @@ -1376,7 +1376,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int is_dirty = 0; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); r = kvm_get_dirty_log(kvm, log, &is_dirty); if (r) @@ -1392,7 +1392,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, } r = 0; out: - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } @@ -1570,7 +1570,7 @@ int emulator_read_std(unsigned long addr, void *data = val; int r = X86EMUL_CONTINUE; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); while (bytes) { gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); unsigned offset = addr & (PAGE_SIZE-1); @@ -1592,7 +1592,7 @@ int emulator_read_std(unsigned long addr, addr += tocopy; } out: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(emulator_read_std); @@ -1611,9 +1611,9 @@ static int emulator_read_emulated(unsigned long addr, return X86EMUL_CONTINUE; } - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); /* For APIC access vmexit */ if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) @@ -1651,14 +1651,14 @@ static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, { int ret; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); if (ret < 0) { - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 0; } kvm_mmu_pte_write(vcpu, gpa, val, bytes); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); return 1; } @@ -1670,9 +1670,9 @@ static int emulator_write_emulated_onepage(unsigned long addr, struct kvm_io_device *mmio_dev; gpa_t gpa; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); if (gpa == UNMAPPED_GVA) { kvm_inject_page_fault(vcpu, addr, 2); @@ -1749,7 +1749,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr, char *kaddr; u64 val; - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); if (gpa == UNMAPPED_GVA || @@ -1760,13 +1760,17 @@ static int emulator_cmpxchg_emulated(unsigned long addr, goto emul_write; val = *(u64 *)new; + + down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + up_read(¤t->mm->mmap_sem); + kaddr = kmap_atomic(page, KM_USER0); set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); kunmap_atomic(kaddr, KM_USER0); kvm_release_page_dirty(page); emul_write: - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); } #endif @@ -2159,10 +2163,10 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, kvm_x86_ops->skip_emulated_instruction(vcpu); for (i = 0; i < nr_pages; ++i) { - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); page = gva_to_page(vcpu, address + i * PAGE_SIZE); vcpu->arch.pio.guest_pages[i] = page; - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); if (!page) { kvm_inject_gp(vcpu, 0); free_pio_guest_pages(vcpu); @@ -2485,8 +2489,9 @@ static void vapic_enter(struct kvm_vcpu *vcpu) down_read(¤t->mm->mmap_sem); page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); - vcpu->arch.apic->vapic_page = page; up_read(¤t->mm->mmap_sem); + + vcpu->arch.apic->vapic_page = page; } static void vapic_exit(struct kvm_vcpu *vcpu) @@ -2959,9 +2964,9 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, gpa_t gpa; vcpu_load(vcpu); - down_read(¤t->mm->mmap_sem); + down_read(&vcpu->kvm->slots_lock); gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr); - up_read(¤t->mm->mmap_sem); + up_read(&vcpu->kvm->slots_lock); tr->physical_address = gpa; tr->valid = gpa != UNMAPPED_GVA; tr->writeable = 1; @@ -3234,11 +3239,13 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages && !old.rmap) { + down_write(¤t->mm->mmap_sem); memslot->userspace_addr = do_mmap(NULL, 0, npages * PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0); + up_write(¤t->mm->mmap_sem); if (IS_ERR((void *)memslot->userspace_addr)) return PTR_ERR((void *)memslot->userspace_addr); @@ -3246,8 +3253,10 @@ int kvm_arch_set_memory_region(struct kvm *kvm, if (!old.user_alloc && old.rmap) { int ret; + down_write(¤t->mm->mmap_sem); ret = do_munmap(current->mm, old.userspace_addr, old.npages * PAGE_SIZE); + up_write(¤t->mm->mmap_sem); if (ret < 0) printk(KERN_WARNING "kvm_vm_ioctl_set_memory_region: " diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ea4764b0a2f..928b0d59e9b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -107,6 +107,7 @@ struct kvm_memory_slot { struct kvm { struct mutex lock; /* protects the vcpus array and APIC accesses */ spinlock_t mmu_lock; + struct rw_semaphore slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ int nmemslots; struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 32fbf800696..b2e12893e3f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -169,6 +169,7 @@ static struct kvm *kvm_create_vm(void) kvm_io_bus_init(&kvm->pio_bus); mutex_init(&kvm->lock); kvm_io_bus_init(&kvm->mmio_bus); + init_rwsem(&kvm->slots_lock); spin_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); spin_unlock(&kvm_lock); @@ -339,9 +340,9 @@ int kvm_set_memory_region(struct kvm *kvm, { int r; - down_write(¤t->mm->mmap_sem); + down_write(&kvm->slots_lock); r = __kvm_set_memory_region(kvm, mem, user_alloc); - up_write(¤t->mm->mmap_sem); + up_write(&kvm->slots_lock); return r; } EXPORT_SYMBOL_GPL(kvm_set_memory_region); -- cgit v1.2.3-70-g09d2 From e311f68a4e43ade048d7dbaa6b458fbe31114daf Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Tue, 4 Mar 2008 16:35:04 +1000 Subject: m68knommu: declare do_IRQ() Need a declaration of do_IRQ for the 68328 interrupt handling code. It is common to all m68knommu targets, so a common declaration makes sense. Signed-off-by: Greg Ungerer Signed-off-by: Linus Torvalds --- include/asm-m68knommu/machdep.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-m68knommu/machdep.h b/include/asm-m68knommu/machdep.h index 1cf26d240d8..de9f47a51cc 100644 --- a/include/asm-m68knommu/machdep.h +++ b/include/asm-m68knommu/machdep.h @@ -21,4 +21,6 @@ extern void (*mach_power_off)( void ); extern void config_BSP(char *command, int len); +extern void do_IRQ(int irq, struct pt_regs *fp); + #endif /* _M68KNOMMU_MACHDEP_H */ -- cgit v1.2.3-70-g09d2 From 8727e28ddebb031d80b5e261c98c24f1dcb9a82f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 4 Mar 2008 09:18:16 +0100 Subject: m68k{,nommu}: Wire up new timerfd syscalls m68k{,nommu}: Wire up the new timerfd syscalls, which were introduced in commit 4d672e7ac79b5ec5cdc90e450823441e20464691 ("timerfd: new timerfd API"). Signed-off-by: Geert Uytterhoeven Acked-by: Greg Ungerer Signed-off-by: Linus Torvalds --- arch/m68k/kernel/entry.S | 4 +++- arch/m68knommu/kernel/syscalltable.S | 4 +++- include/asm-m68k/unistd.h | 6 ++++-- include/asm-m68knommu/unistd.h | 6 ++++-- 4 files changed, 14 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 6dfa3b3c0e2..18a9c5f4b00 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -742,7 +742,9 @@ sys_call_table: .long sys_epoll_pwait /* 315 */ .long sys_utimensat .long sys_signalfd - .long sys_ni_syscall + .long sys_timerfd_create .long sys_eventfd .long sys_fallocate /* 320 */ + .long sys_timerfd_settime + .long sys_timerfd_gettime diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 1b02b882006..fca2e49917a 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -336,9 +336,11 @@ ENTRY(sys_call_table) .long sys_epoll_pwait /* 315 */ .long sys_utimensat .long sys_signalfd - .long sys_ni_syscall + .long sys_timerfd_create .long sys_eventfd .long sys_fallocate /* 320 */ + .long sys_timerfd_settime + .long sys_timerfd_gettime .rept NR_syscalls-(.-sys_call_table)/4 .long sys_ni_syscall diff --git a/include/asm-m68k/unistd.h b/include/asm-m68k/unistd.h index 87f77b11931..e72ba563f10 100644 --- a/include/asm-m68k/unistd.h +++ b/include/asm-m68k/unistd.h @@ -320,13 +320,15 @@ #define __NR_epoll_pwait 315 #define __NR_utimensat 316 #define __NR_signalfd 317 -#define __NR_timerfd 318 +#define __NR_timerfd_create 318 #define __NR_eventfd 319 #define __NR_fallocate 320 +#define __NR_timerfd_settime 321 +#define __NR_timerfd_gettime 322 #ifdef __KERNEL__ -#define NR_syscalls 321 +#define NR_syscalls 323 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h index 27c2f9bb4db..4ba98b9c5d7 100644 --- a/include/asm-m68knommu/unistd.h +++ b/include/asm-m68knommu/unistd.h @@ -321,13 +321,15 @@ #define __NR_epoll_pwait 315 #define __NR_utimensat 316 #define __NR_signalfd 317 -#define __NR_timerfd 318 +#define __NR_timerfd_create 318 #define __NR_eventfd 319 #define __NR_fallocate 320 +#define __NR_timerfd_settime 321 +#define __NR_timerfd_gettime 322 #ifdef __KERNEL__ -#define NR_syscalls 321 +#define NR_syscalls 323 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR -- cgit v1.2.3-70-g09d2 From 62fb185130e4d420f71a30ff59d8b16b74ef5d2b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Feb 2008 17:34:02 +0100 Subject: sched: revert load_balance_monitor() changes The following commits cause a number of regressions: commit 58e2d4ca581167c2a079f4ee02be2f0bc52e8729 Author: Srivatsa Vaddagiri Date: Fri Jan 25 21:08:00 2008 +0100 sched: group scheduling, change how cpu load is calculated commit 6b2d7700266b9402e12824e11e0099ae6a4a6a79 Author: Srivatsa Vaddagiri Date: Fri Jan 25 21:08:00 2008 +0100 sched: group scheduler, fix fairness of cpu bandwidth allocation for task groups Namely: - very frequent wakeups on SMP, reported by PowerTop users. - cacheline trashing on (large) SMP - some latencies larger than 500ms While there is a mergeable patch to fix the latter, the former issues are not fixable in a manner suitable for .25 (we're at -rc3 now). Hence we revert them and try again in v2.6.26. Signed-off-by: Peter Zijlstra CC: Srivatsa Vaddagiri Tested-by: Alexey Zaytsev Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 - kernel/sched.c | 283 +++++++------------------------------------------- kernel/sched_fair.c | 115 +++++++------------- kernel/sched_rt.c | 4 - kernel/sysctl.c | 18 ---- 5 files changed, 70 insertions(+), 354 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2c9621f8bf8..9ae4030067a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1542,10 +1542,6 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; -#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) -extern unsigned int sysctl_sched_min_bal_int_shares; -extern unsigned int sysctl_sched_max_bal_int_shares; -#endif int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, diff --git a/kernel/sched.c b/kernel/sched.c index f06950c8a6c..dcd553cc4ee 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -174,41 +174,6 @@ struct task_group { struct sched_entity **se; /* runqueue "owned" by this group on each cpu */ struct cfs_rq **cfs_rq; - - /* - * shares assigned to a task group governs how much of cpu bandwidth - * is allocated to the group. The more shares a group has, the more is - * the cpu bandwidth allocated to it. - * - * For ex, lets say that there are three task groups, A, B and C which - * have been assigned shares 1000, 2000 and 3000 respectively. Then, - * cpu bandwidth allocated by the scheduler to task groups A, B and C - * should be: - * - * Bw(A) = 1000/(1000+2000+3000) * 100 = 16.66% - * Bw(B) = 2000/(1000+2000+3000) * 100 = 33.33% - * Bw(C) = 3000/(1000+2000+3000) * 100 = 50% - * - * The weight assigned to a task group's schedulable entities on every - * cpu (task_group.se[a_cpu]->load.weight) is derived from the task - * group's shares. For ex: lets say that task group A has been - * assigned shares of 1000 and there are two CPUs in a system. Then, - * - * tg_A->se[0]->load.weight = tg_A->se[1]->load.weight = 1000; - * - * Note: It's not necessary that each of a task's group schedulable - * entity have the same weight on all CPUs. If the group - * has 2 of its tasks on CPU0 and 1 task on CPU1, then a - * better distribution of weight could be: - * - * tg_A->se[0]->load.weight = 2/3 * 2000 = 1333 - * tg_A->se[1]->load.weight = 1/2 * 2000 = 667 - * - * rebalance_shares() is responsible for distributing the shares of a - * task groups like this among the group's schedulable entities across - * cpus. - * - */ unsigned long shares; #endif @@ -250,22 +215,12 @@ static DEFINE_SPINLOCK(task_group_lock); static DEFINE_MUTEX(doms_cur_mutex); #ifdef CONFIG_FAIR_GROUP_SCHED -#ifdef CONFIG_SMP -/* kernel thread that runs rebalance_shares() periodically */ -static struct task_struct *lb_monitor_task; -static int load_balance_monitor(void *unused); -#endif - -static void set_se_shares(struct sched_entity *se, unsigned long shares); - #ifdef CONFIG_USER_SCHED # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) #else # define INIT_TASK_GROUP_LOAD NICE_0_LOAD #endif -#define MIN_GROUP_SHARES 2 - static int init_task_group_load = INIT_TASK_GROUP_LOAD; #endif @@ -1245,16 +1200,6 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime); static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} #endif -static inline void inc_cpu_load(struct rq *rq, unsigned long load) -{ - update_load_add(&rq->load, load); -} - -static inline void dec_cpu_load(struct rq *rq, unsigned long load) -{ - update_load_sub(&rq->load, load); -} - #ifdef CONFIG_SMP static unsigned long source_load(int cpu, int type); static unsigned long target_load(int cpu, int type); @@ -1272,14 +1217,26 @@ static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); #define sched_class_highest (&rt_sched_class) -static void inc_nr_running(struct rq *rq) +static inline void inc_load(struct rq *rq, const struct task_struct *p) +{ + update_load_add(&rq->load, p->se.load.weight); +} + +static inline void dec_load(struct rq *rq, const struct task_struct *p) +{ + update_load_sub(&rq->load, p->se.load.weight); +} + +static void inc_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running++; + inc_load(rq, p); } -static void dec_nr_running(struct rq *rq) +static void dec_nr_running(struct task_struct *p, struct rq *rq) { rq->nr_running--; + dec_load(rq, p); } static void set_load_weight(struct task_struct *p) @@ -1371,7 +1328,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) rq->nr_uninterruptible--; enqueue_task(rq, p, wakeup); - inc_nr_running(rq); + inc_nr_running(p, rq); } /* @@ -1383,7 +1340,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) rq->nr_uninterruptible++; dequeue_task(rq, p, sleep); - dec_nr_running(rq); + dec_nr_running(p, rq); } /** @@ -2023,7 +1980,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * management (if any): */ p->sched_class->task_new(rq, p); - inc_nr_running(rq); + inc_nr_running(p, rq); } check_preempt_curr(rq, p); #ifdef CONFIG_SMP @@ -4362,8 +4319,10 @@ void set_user_nice(struct task_struct *p, long nice) goto out_unlock; } on_rq = p->se.on_rq; - if (on_rq) + if (on_rq) { dequeue_task(rq, p, 0); + dec_load(rq, p); + } p->static_prio = NICE_TO_PRIO(nice); set_load_weight(p); @@ -4373,6 +4332,7 @@ void set_user_nice(struct task_struct *p, long nice) if (on_rq) { enqueue_task(rq, p, 0); + inc_load(rq, p); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -7087,21 +7047,6 @@ void __init sched_init_smp(void) if (set_cpus_allowed(current, non_isolated_cpus) < 0) BUG(); sched_init_granularity(); - -#ifdef CONFIG_FAIR_GROUP_SCHED - if (nr_cpu_ids == 1) - return; - - lb_monitor_task = kthread_create(load_balance_monitor, NULL, - "group_balance"); - if (!IS_ERR(lb_monitor_task)) { - lb_monitor_task->flags |= PF_NOFREEZE; - wake_up_process(lb_monitor_task); - } else { - printk(KERN_ERR "Could not create load balance monitor thread" - "(error = %ld) \n", PTR_ERR(lb_monitor_task)); - } -#endif } #else void __init sched_init_smp(void) @@ -7424,157 +7369,6 @@ void set_curr_task(int cpu, struct task_struct *p) #ifdef CONFIG_GROUP_SCHED -#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP -/* - * distribute shares of all task groups among their schedulable entities, - * to reflect load distribution across cpus. - */ -static int rebalance_shares(struct sched_domain *sd, int this_cpu) -{ - struct cfs_rq *cfs_rq; - struct rq *rq = cpu_rq(this_cpu); - cpumask_t sdspan = sd->span; - int balanced = 1; - - /* Walk thr' all the task groups that we have */ - for_each_leaf_cfs_rq(rq, cfs_rq) { - int i; - unsigned long total_load = 0, total_shares; - struct task_group *tg = cfs_rq->tg; - - /* Gather total task load of this group across cpus */ - for_each_cpu_mask(i, sdspan) - total_load += tg->cfs_rq[i]->load.weight; - - /* Nothing to do if this group has no load */ - if (!total_load) - continue; - - /* - * tg->shares represents the number of cpu shares the task group - * is eligible to hold on a single cpu. On N cpus, it is - * eligible to hold (N * tg->shares) number of cpu shares. - */ - total_shares = tg->shares * cpus_weight(sdspan); - - /* - * redistribute total_shares across cpus as per the task load - * distribution. - */ - for_each_cpu_mask(i, sdspan) { - unsigned long local_load, local_shares; - - local_load = tg->cfs_rq[i]->load.weight; - local_shares = (local_load * total_shares) / total_load; - if (!local_shares) - local_shares = MIN_GROUP_SHARES; - if (local_shares == tg->se[i]->load.weight) - continue; - - spin_lock_irq(&cpu_rq(i)->lock); - set_se_shares(tg->se[i], local_shares); - spin_unlock_irq(&cpu_rq(i)->lock); - balanced = 0; - } - } - - return balanced; -} - -/* - * How frequently should we rebalance_shares() across cpus? - * - * The more frequently we rebalance shares, the more accurate is the fairness - * of cpu bandwidth distribution between task groups. However higher frequency - * also implies increased scheduling overhead. - * - * sysctl_sched_min_bal_int_shares represents the minimum interval between - * consecutive calls to rebalance_shares() in the same sched domain. - * - * sysctl_sched_max_bal_int_shares represents the maximum interval between - * consecutive calls to rebalance_shares() in the same sched domain. - * - * These settings allows for the appropriate trade-off between accuracy of - * fairness and the associated overhead. - * - */ - -/* default: 8ms, units: milliseconds */ -const_debug unsigned int sysctl_sched_min_bal_int_shares = 8; - -/* default: 128ms, units: milliseconds */ -const_debug unsigned int sysctl_sched_max_bal_int_shares = 128; - -/* kernel thread that runs rebalance_shares() periodically */ -static int load_balance_monitor(void *unused) -{ - unsigned int timeout = sysctl_sched_min_bal_int_shares; - struct sched_param schedparm; - int ret; - - /* - * We don't want this thread's execution to be limited by the shares - * assigned to default group (init_task_group). Hence make it run - * as a SCHED_RR RT task at the lowest priority. - */ - schedparm.sched_priority = 1; - ret = sched_setscheduler(current, SCHED_RR, &schedparm); - if (ret) - printk(KERN_ERR "Couldn't set SCHED_RR policy for load balance" - " monitor thread (error = %d) \n", ret); - - while (!kthread_should_stop()) { - int i, cpu, balanced = 1; - - /* Prevent cpus going down or coming up */ - get_online_cpus(); - /* lockout changes to doms_cur[] array */ - lock_doms_cur(); - /* - * Enter a rcu read-side critical section to safely walk rq->sd - * chain on various cpus and to walk task group list - * (rq->leaf_cfs_rq_list) in rebalance_shares(). - */ - rcu_read_lock(); - - for (i = 0; i < ndoms_cur; i++) { - cpumask_t cpumap = doms_cur[i]; - struct sched_domain *sd = NULL, *sd_prev = NULL; - - cpu = first_cpu(cpumap); - - /* Find the highest domain at which to balance shares */ - for_each_domain(cpu, sd) { - if (!(sd->flags & SD_LOAD_BALANCE)) - continue; - sd_prev = sd; - } - - sd = sd_prev; - /* sd == NULL? No load balance reqd in this domain */ - if (!sd) - continue; - - balanced &= rebalance_shares(sd, cpu); - } - - rcu_read_unlock(); - - unlock_doms_cur(); - put_online_cpus(); - - if (!balanced) - timeout = sysctl_sched_min_bal_int_shares; - else if (timeout < sysctl_sched_max_bal_int_shares) - timeout *= 2; - - msleep_interruptible(timeout); - } - - return 0; -} -#endif /* CONFIG_SMP */ - #ifdef CONFIG_FAIR_GROUP_SCHED static void free_fair_sched_group(struct task_group *tg) { @@ -7841,29 +7635,25 @@ void sched_move_task(struct task_struct *tsk) } #ifdef CONFIG_FAIR_GROUP_SCHED -/* rq->lock to be locked by caller */ static void set_se_shares(struct sched_entity *se, unsigned long shares) { struct cfs_rq *cfs_rq = se->cfs_rq; struct rq *rq = cfs_rq->rq; int on_rq; - if (!shares) - shares = MIN_GROUP_SHARES; + spin_lock_irq(&rq->lock); on_rq = se->on_rq; - if (on_rq) { + if (on_rq) dequeue_entity(cfs_rq, se, 0); - dec_cpu_load(rq, se->load.weight); - } se->load.weight = shares; se->load.inv_weight = div64_64((1ULL<<32), shares); - if (on_rq) { + if (on_rq) enqueue_entity(cfs_rq, se, 0); - inc_cpu_load(rq, se->load.weight); - } + + spin_unlock_irq(&rq->lock); } static DEFINE_MUTEX(shares_mutex); @@ -7873,18 +7663,18 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) int i; unsigned long flags; + /* + * A weight of 0 or 1 can cause arithmetics problems. + * (The default weight is 1024 - so there's no practical + * limitation from this.) + */ + if (shares < 2) + shares = 2; + mutex_lock(&shares_mutex); if (tg->shares == shares) goto done; - if (shares < MIN_GROUP_SHARES) - shares = MIN_GROUP_SHARES; - - /* - * Prevent any load balance activity (rebalance_shares, - * load_balance_fair) from referring to this group first, - * by taking it off the rq->leaf_cfs_rq_list on each cpu. - */ spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) unregister_fair_sched_group(tg, i); @@ -7898,11 +7688,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * w/o tripping rebalance_share or load_balance_fair. */ tg->shares = shares; - for_each_possible_cpu(i) { - spin_lock_irq(&cpu_rq(i)->lock); + for_each_possible_cpu(i) set_se_shares(tg->se[i], shares); - spin_unlock_irq(&cpu_rq(i)->lock); - } /* * Enable load balance activity on this group, by inserting it back on diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c8e6492c592..3df4d46994c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -727,8 +727,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) return se->parent; } -#define GROUP_IMBALANCE_PCT 20 - #else /* CONFIG_FAIR_GROUP_SCHED */ #define for_each_sched_entity(se) \ @@ -819,26 +817,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p) static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) { struct cfs_rq *cfs_rq; - struct sched_entity *se = &p->se, - *topse = NULL; /* Highest schedulable entity */ - int incload = 1; + struct sched_entity *se = &p->se; for_each_sched_entity(se) { - topse = se; - if (se->on_rq) { - incload = 0; + if (se->on_rq) break; - } cfs_rq = cfs_rq_of(se); enqueue_entity(cfs_rq, se, wakeup); wakeup = 1; } - /* Increment cpu load if we just enqueued the first task of a group on - * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs - * at the highest grouping level. - */ - if (incload) - inc_cpu_load(rq, topse->load.weight); hrtick_start_fair(rq, rq->curr); } @@ -851,28 +838,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) { struct cfs_rq *cfs_rq; - struct sched_entity *se = &p->se, - *topse = NULL; /* Highest schedulable entity */ - int decload = 1; + struct sched_entity *se = &p->se; for_each_sched_entity(se) { - topse = se; cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, sleep); /* Don't dequeue parent if it has other entities besides us */ - if (cfs_rq->load.weight) { - if (parent_entity(se)) - decload = 0; + if (cfs_rq->load.weight) break; - } sleep = 1; } - /* Decrement cpu load if we just dequeued the last task of a group on - * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs - * at the highest grouping level. - */ - if (decload) - dec_cpu_load(rq, topse->load.weight); hrtick_start_fair(rq, rq->curr); } @@ -1186,6 +1161,25 @@ static struct task_struct *load_balance_next_fair(void *arg) return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); } +#ifdef CONFIG_FAIR_GROUP_SCHED +static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) +{ + struct sched_entity *curr; + struct task_struct *p; + + if (!cfs_rq->nr_running || !first_fair(cfs_rq)) + return MAX_PRIO; + + curr = cfs_rq->curr; + if (!curr) + curr = __pick_next_entity(cfs_rq); + + p = task_of(curr); + + return p->prio; +} +#endif + static unsigned long load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_load_move, @@ -1195,45 +1189,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, struct cfs_rq *busy_cfs_rq; long rem_load_move = max_load_move; struct rq_iterator cfs_rq_iterator; - unsigned long load_moved; cfs_rq_iterator.start = load_balance_start_fair; cfs_rq_iterator.next = load_balance_next_fair; for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { #ifdef CONFIG_FAIR_GROUP_SCHED - struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu]; - unsigned long maxload, task_load, group_weight; - unsigned long thisload, per_task_load; - struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu]; - - task_load = busy_cfs_rq->load.weight; - group_weight = se->load.weight; + struct cfs_rq *this_cfs_rq; + long imbalance; + unsigned long maxload; - /* - * 'group_weight' is contributed by tasks of total weight - * 'task_load'. To move 'rem_load_move' worth of weight only, - * we need to move a maximum task load of: - * - * maxload = (remload / group_weight) * task_load; - */ - maxload = (rem_load_move * task_load) / group_weight; + this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); - if (!maxload || !task_load) + imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; + /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ + if (imbalance <= 0) continue; - per_task_load = task_load / busy_cfs_rq->nr_running; - /* - * balance_tasks will try to forcibly move atleast one task if - * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if - * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load. - */ - if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load) - continue; + /* Don't pull more than imbalance/2 */ + imbalance /= 2; + maxload = min(rem_load_move, imbalance); - /* Disable priority-based load balance */ - *this_best_prio = 0; - thisload = this_cfs_rq->load.weight; + *this_best_prio = cfs_rq_best_prio(this_cfs_rq); #else # define maxload rem_load_move #endif @@ -1242,33 +1219,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; - load_moved = balance_tasks(this_rq, this_cpu, busiest, + rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, maxload, sd, idle, all_pinned, this_best_prio, &cfs_rq_iterator); -#ifdef CONFIG_FAIR_GROUP_SCHED - /* - * load_moved holds the task load that was moved. The - * effective (group) weight moved would be: - * load_moved_eff = load_moved/task_load * group_weight; - */ - load_moved = (group_weight * load_moved) / task_load; - - /* Adjust shares on both cpus to reflect load_moved */ - group_weight -= load_moved; - set_se_shares(se, group_weight); - - se = busy_cfs_rq->tg->se[this_cpu]; - if (!thisload) - group_weight = load_moved; - else - group_weight = se->load.weight + load_moved; - set_se_shares(se, group_weight); -#endif - - rem_load_move -= load_moved; - if (rem_load_move <= 0) break; } diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index f54792b175b..76e82851754 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -393,8 +393,6 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) */ for_each_sched_rt_entity(rt_se) enqueue_rt_entity(rt_se); - - inc_cpu_load(rq, p->se.load.weight); } static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) @@ -414,8 +412,6 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) if (rt_rq && rt_rq->rt_nr_running) enqueue_rt_entity(rt_se); } - - dec_cpu_load(rq, p->se.load.weight); } /* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8b7e9541179..b2a2d6889ba 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -311,24 +311,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, -#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) - { - .ctl_name = CTL_UNNUMBERED, - .procname = "sched_min_bal_int_shares", - .data = &sysctl_sched_min_bal_int_shares, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "sched_max_bal_int_shares", - .data = &sysctl_sched_max_bal_int_shares, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif #endif { .ctl_name = CTL_UNNUMBERED, -- cgit v1.2.3-70-g09d2 From ec8670f1f795badedaa056a3a3245b9b82201747 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 1 Mar 2008 07:51:29 -0700 Subject: dmaengine: fix sparse warning include/linux/dmaengine.h:364:2: warning: returning void-valued expression Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index acbb364674f..261e43a4c87 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -366,7 +366,7 @@ __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) */ static inline void dma_async_issue_pending(struct dma_chan *chan) { - return chan->device->device_issue_pending(chan); + chan->device->device_issue_pending(chan); } #define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan) -- cgit v1.2.3-70-g09d2 From d9452e9f81e997cbd0c9bface8d2c2a4b064cc3e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 4 Mar 2008 12:28:49 -0800 Subject: [NETPOLL]: Revert two bogus cleanups that broke netconsole. Based upon a report by Andrew Morton and code analysis done by Jarek Poplawski. This reverts 33f807ba0d9259e7c75c7a2ce8bd2787e5b540c7 ("[NETPOLL]: Kill NETPOLL_RX_DROP, set but never tested.") and c7b6ea24b43afb5749cb704e143df19d70e23dea ("[NETPOLL]: Don't need rx_flags."). The rx_flags did get tested for zero vs. non-zero and therefore we do need those tests and that code which sets NETPOLL_RX_DROP et al. Signed-off-by: David S. Miller --- include/linux/netpoll.h | 7 ++++--- net/core/netpoll.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index a0525a1f471..e3d79593fb3 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -25,6 +25,7 @@ struct netpoll { struct netpoll_info { atomic_t refcnt; + int rx_flags; spinlock_t rx_lock; struct netpoll *rx_np; /* netpoll that registered an rx_hook */ struct sk_buff_head arp_tx; /* list of arp requests to reply to */ @@ -50,12 +51,12 @@ static inline int netpoll_rx(struct sk_buff *skb) unsigned long flags; int ret = 0; - if (!npinfo || !npinfo->rx_np) + if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags)) return 0; spin_lock_irqsave(&npinfo->rx_lock, flags); - /* check rx_np again with the lock held */ - if (npinfo->rx_np && __netpoll_rx(skb)) + /* check rx_flags again with the lock held */ + if (npinfo->rx_flags && __netpoll_rx(skb)) ret = 1; spin_unlock_irqrestore(&npinfo->rx_lock, flags); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 6faa128a4c8..4b7e756181c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -39,6 +39,8 @@ static struct sk_buff_head skb_pool; static atomic_t trapped; #define USEC_PER_POLL 50 +#define NETPOLL_RX_ENABLED 1 +#define NETPOLL_RX_DROP 2 #define MAX_SKB_SIZE \ (MAX_UDP_CHUNK + sizeof(struct udphdr) + \ @@ -126,11 +128,13 @@ static int poll_one_napi(struct netpoll_info *npinfo, if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return budget; + npinfo->rx_flags |= NETPOLL_RX_DROP; atomic_inc(&trapped); work = napi->poll(napi, budget); atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; return budget - work; } @@ -472,7 +476,7 @@ int __netpoll_rx(struct sk_buff *skb) if (skb->dev->type != ARPHRD_ETHER) goto out; - /* if receive ARP during middle of NAPI poll, then queue */ + /* check if netpoll clients need ARP */ if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { skb_queue_tail(&npi->arp_tx, skb); @@ -534,9 +538,6 @@ int __netpoll_rx(struct sk_buff *skb) return 1; out: - /* If packet received while already in poll then just - * silently drop. - */ if (atomic_read(&trapped)) { kfree_skb(skb); return 1; @@ -675,6 +676,7 @@ int netpoll_setup(struct netpoll *np) goto release; } + npinfo->rx_flags = 0; npinfo->rx_np = NULL; spin_lock_init(&npinfo->rx_lock); @@ -756,6 +758,7 @@ int netpoll_setup(struct netpoll *np) if (np->rx_hook) { spin_lock_irqsave(&npinfo->rx_lock, flags); + npinfo->rx_flags |= NETPOLL_RX_ENABLED; npinfo->rx_np = np; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } @@ -797,6 +800,7 @@ void netpoll_cleanup(struct netpoll *np) if (npinfo->rx_np == np) { spin_lock_irqsave(&npinfo->rx_lock, flags); npinfo->rx_np = NULL; + npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; spin_unlock_irqrestore(&npinfo->rx_lock, flags); } -- cgit v1.2.3-70-g09d2 From a6cd6322d594014240465210ccb290971469c6e8 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Mon, 25 Feb 2008 14:32:22 +0900 Subject: [IA64] Fix irq migration in multiple vector domain Fix the problem that the following error message is sometimes displayed at irq migration when vector domain is enabled. "Unexpected interrupt vector %d on CPU %d is not mapped to any IRQ!" The cause of this problem is an interrupt is sent to the previous target CPU after cleaning up vector to irq mapping table. To clean up vector to irq map on the previous target CPU safty, change the irq migration in multiple vector domain as follows. The original idea is from x86 interrupt management code. - Delay vector to irq table cleanup until the interrupts are sent to new target CPUs. By this, it is ensured that target CPU is completely changed on the interrupt controller side. - Even after the interrupts are sent to new target CPUs, there can be pended interrupts remaining on the previous target CPU. So we need to delay clearning up vector to irq table until the pended interrupt is handled. For this, send IPI to the previous target CPU with lower priority vector and clean up vector to irq table in its handler. This patch affects only to irq migration code with multiple vector domain is enabled. Signed-off-by: Kenji Kaneshige Signed-off-by: Tony Luck --- arch/ia64/kernel/iosapic.c | 4 +- arch/ia64/kernel/irq_ia64.c | 134 ++++++++++++++++++++++++++++++++++---------- arch/ia64/kernel/msi_ia64.c | 3 +- include/asm-ia64/hw_irq.h | 12 +++- 4 files changed, 120 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c index 398e2fd1cd2..7b3292282de 100644 --- a/arch/ia64/kernel/iosapic.c +++ b/arch/ia64/kernel/iosapic.c @@ -345,7 +345,7 @@ iosapic_set_affinity (unsigned int irq, cpumask_t mask) if (cpus_empty(mask)) return; - if (reassign_irq_vector(irq, first_cpu(mask))) + if (irq_prepare_move(irq, first_cpu(mask))) return; dest = cpu_physical_id(first_cpu(mask)); @@ -397,6 +397,7 @@ iosapic_end_level_irq (unsigned int irq) struct iosapic_rte_info *rte; int do_unmask_irq = 0; + irq_complete_move(irq); if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; mask_irq(irq); @@ -450,6 +451,7 @@ iosapic_ack_edge_irq (unsigned int irq) { irq_desc_t *idesc = irq_desc + irq; + irq_complete_move(irq); move_native_irq(irq); /* * Once we have recorded IRQ_PENDING already, we can mask the diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c index 0b52f19ed04..2b8cf6e85af 100644 --- a/arch/ia64/kernel/irq_ia64.c +++ b/arch/ia64/kernel/irq_ia64.c @@ -260,6 +260,8 @@ void __setup_vector_irq(int cpu) } #if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)) +#define IA64_IRQ_MOVE_VECTOR IA64_DEF_FIRST_DEVICE_VECTOR + static enum vector_domain_type { VECTOR_DOMAIN_NONE, VECTOR_DOMAIN_PERCPU @@ -272,6 +274,101 @@ static cpumask_t vector_allocation_domain(int cpu) return CPU_MASK_ALL; } +static int __irq_prepare_move(int irq, int cpu) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + int vector; + cpumask_t domain; + + if (cfg->move_in_progress || cfg->move_cleanup_count) + return -EBUSY; + if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu)) + return -EINVAL; + if (cpu_isset(cpu, cfg->domain)) + return 0; + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector < 0) + return -ENOSPC; + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + cfg->vector = IRQ_VECTOR_UNASSIGNED; + cfg->domain = CPU_MASK_NONE; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + return 0; +} + +int irq_prepare_move(int irq, int cpu) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __irq_prepare_move(irq, cpu); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + +void irq_complete_move(unsigned irq) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + cpumask_t cleanup_mask; + int i; + + if (likely(!cfg->move_in_progress)) + return; + + if (unlikely(cpu_isset(smp_processor_id(), cfg->old_domain))) + return; + + cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); + cfg->move_cleanup_count = cpus_weight(cleanup_mask); + for_each_cpu_mask(i, cleanup_mask) + platform_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0); + cfg->move_in_progress = 0; +} + +static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) +{ + int me = smp_processor_id(); + ia64_vector vector; + unsigned long flags; + + for (vector = IA64_FIRST_DEVICE_VECTOR; + vector < IA64_LAST_DEVICE_VECTOR; vector++) { + int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __get_cpu_var(vector_irq)[vector]; + if (irq < 0) + continue; + + desc = irq_desc + irq; + cfg = irq_cfg + irq; + spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if (!cpu_isset(me, cfg->old_domain)) + goto unlock; + + spin_lock_irqsave(&vector_lock, flags); + __get_cpu_var(vector_irq)[vector] = -1; + cpu_clear(me, vector_table[vector]); + spin_unlock_irqrestore(&vector_lock, flags); + cfg->move_cleanup_count--; + unlock: + spin_unlock(&desc->lock); + } + return IRQ_HANDLED; +} + +static struct irqaction irq_move_irqaction = { + .handler = smp_irq_move_cleanup_interrupt, + .flags = IRQF_DISABLED, + .name = "irq_move" +}; + static int __init parse_vector_domain(char *arg) { if (!arg) @@ -303,36 +400,6 @@ void destroy_and_reserve_irq(unsigned int irq) spin_unlock_irqrestore(&vector_lock, flags); } -static int __reassign_irq_vector(int irq, int cpu) -{ - struct irq_cfg *cfg = &irq_cfg[irq]; - int vector; - cpumask_t domain; - - if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu)) - return -EINVAL; - if (cpu_isset(cpu, cfg->domain)) - return 0; - domain = vector_allocation_domain(cpu); - vector = find_unassigned_vector(domain); - if (vector < 0) - return -ENOSPC; - __clear_irq_vector(irq); - BUG_ON(__bind_irq_vector(irq, vector, domain)); - return 0; -} - -int reassign_irq_vector(int irq, int cpu) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&vector_lock, flags); - ret = __reassign_irq_vector(irq, cpu); - spin_unlock_irqrestore(&vector_lock, flags); - return ret; -} - /* * Dynamic irq allocate and deallocation for MSI */ @@ -578,6 +645,13 @@ init_IRQ (void) register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); register_percpu_irq(IA64_IPI_RESCHEDULE, &resched_irqaction); register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, &tlb_irqaction); +#if defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG) + if (vector_domain_type != VECTOR_DOMAIN_NONE) { + BUG_ON(IA64_FIRST_DEVICE_VECTOR != IA64_IRQ_MOVE_VECTOR); + IA64_FIRST_DEVICE_VECTOR++; + register_percpu_irq(IA64_IRQ_MOVE_VECTOR, &irq_move_irqaction); + } +#endif #endif #ifdef CONFIG_PERFMON pfm_init_percpu(); diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index e86d0295979..60c6ef67ebb 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -57,7 +57,7 @@ static void ia64_set_msi_irq_affinity(unsigned int irq, cpumask_t cpu_mask) if (!cpu_online(cpu)) return; - if (reassign_irq_vector(irq, cpu)) + if (irq_prepare_move(irq, cpu)) return; read_msi_msg(irq, &msg); @@ -119,6 +119,7 @@ void ia64_teardown_msi_irq(unsigned int irq) static void ia64_ack_msi_irq(unsigned int irq) { + irq_complete_move(irq); move_native_irq(irq); ia64_eoi(); } diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index 7e6e3779670..76366dc9c1a 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -93,6 +93,9 @@ extern __u8 isa_irq_to_vector_map[16]; struct irq_cfg { ia64_vector vector; cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; + u8 move_in_progress : 1; }; extern spinlock_t vector_lock; extern struct irq_cfg irq_cfg[NR_IRQS]; @@ -106,12 +109,19 @@ extern int assign_irq_vector (int irq); /* allocate a free vector */ extern void free_irq_vector (int vector); extern int reserve_irq_vector (int vector); extern void __setup_vector_irq(int cpu); -extern int reassign_irq_vector(int irq, int cpu); extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); extern void register_percpu_irq (ia64_vector vec, struct irqaction *action); extern int check_irq_used (int irq); extern void destroy_and_reserve_irq (unsigned int irq); +#if defined(CONFIG_SMP) && (defined(CONFIG_IA64_GENERIC) || defined(CONFIG_IA64_DIG)) +extern int irq_prepare_move(int irq, int cpu); +extern void irq_complete_move(unsigned int irq); +#else +static inline int irq_prepare_move(int irq, int cpu) { return 0; } +static inline void irq_complete_move(unsigned int irq) {} +#endif + static inline void ia64_resend_irq(unsigned int vector) { platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0); -- cgit v1.2.3-70-g09d2 From 6ed0dc5ba811ce682f48988bf114669265e1120d Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 27 Feb 2008 18:41:38 -0700 Subject: [IA64] workaround tiger ia64_sal_get_physical_id_info hang This fixes regression introduced in 113134fcbca83619be4c68d0ca66db6093777b5d Intel Tiger platforms hang when calling SAL_GET_PHYSICAL_ID_INFO instead of properly returning -1 for unimplemented, so add a version check. SGI Altix platforms have an incorrect SAL version hard-coded into their prom -- they encode 2.9, but actually implement 3.2 -- so fix it up and allow ia64_sal_get_physical_id_info to keep working. Signed-off-by: Alex Chiang Acked-by: Russ Anderson Signed-off-by: Tony Luck --- arch/ia64/kernel/sal.c | 7 +++++++ include/asm-ia64/sal.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'include') diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c index f44fe841216..a3022dc48ef 100644 --- a/arch/ia64/kernel/sal.c +++ b/arch/ia64/kernel/sal.c @@ -109,6 +109,13 @@ check_versions (struct ia64_sal_systab *systab) sal_revision = SAL_VERSION_CODE(2, 8); sal_version = SAL_VERSION_CODE(0, 0); } + + if (ia64_platform_is("sn2") && (sal_revision == SAL_VERSION_CODE(2, 9))) + /* + * SGI Altix has hard-coded version 2.9 in their prom + * but they actually implement 3.2, so let's fix it here. + */ + sal_revision = SAL_VERSION_CODE(3, 2); } static void __init diff --git a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h index 2251118894a..f4904db3b05 100644 --- a/include/asm-ia64/sal.h +++ b/include/asm-ia64/sal.h @@ -807,6 +807,10 @@ static inline s64 ia64_sal_physical_id_info(u16 *splid) { struct ia64_sal_retval isrv; + + if (sal_revision < SAL_VERSION_CODE(3,2)) + return -1; + SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0); if (splid) *splid = isrv.v0; -- cgit v1.2.3-70-g09d2 From 956d6cad87abdfaef35fa4fc2f2e4ac5bb4ee7a5 Mon Sep 17 00:00:00 2001 From: Doug Chapman Date: Fri, 29 Feb 2008 15:28:43 -0500 Subject: [IA64] move gcc_intrin.h from header-y to unifdef-y When I submitted 0df29025fd0379d5950d206314d0b10a2c8a9607 to ad an #ifdef __KERNEL__ to include/asm-ia64/gcc_intrin.h a few weeks ago I neglected to move gcc_intrin.h from header-y to unifdef-y. Thanks to David Woodhouse for pointing this out. Signed-off-by: Doug Chapman Signed-off-by: Jarod Wilson Signed-off-by: Tony Luck --- include/asm-ia64/Kbuild | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ia64/Kbuild b/include/asm-ia64/Kbuild index 4a1e48b9f40..eb24a3f47ca 100644 --- a/include/asm-ia64/Kbuild +++ b/include/asm-ia64/Kbuild @@ -3,7 +3,6 @@ include include/asm-generic/Kbuild.asm header-y += break.h header-y += fpu.h header-y += fpswa.h -header-y += gcc_intrin.h header-y += ia64regs.h header-y += intel_intrin.h header-y += intrinsics.h @@ -12,5 +11,6 @@ header-y += ptrace_offsets.h header-y += rse.h header-y += ucontext.h +unifdef-y += gcc_intrin.h unifdef-y += perfmon.h unifdef-y += ustack.h -- cgit v1.2.3-70-g09d2 From 7adc3830f90df04a13366914d80a3ed407db5381 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 4 Mar 2008 14:28:41 -0800 Subject: [TCP]: Improve ipv4 established hash function. If all of the entropy is in the local and foreign addresses, but xor'ing together would cancel out that entropy, the current hash performs poorly. Suggested by Cosmin Ratiu: Basically, the situation is as follows: There is a client machine and a server machine. Both create 15000 virtual interfaces, open up a socket for each pair of interfaces and do SIP traffic. By profiling I noticed that there is a lot of time spent walking the established hash chains with this particular setup. The addresses were distributed like this: client interfaces were 198.18.0.1/16 with increments of 1 and server interfaces were 198.18.128.1/16 with increments of 1. As I said, there were 15000 interfaces. Source and destination ports were 5060 for each connection. So in this case, ports don't matter for hashing purposes, and the bits from the address pairs used cancel each other, meaning there are no differences in the whole lot of pairs, so they all end up in the same hash chain. Signed-off-by: David S. Miller --- include/net/inet_sock.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 70013c5f4e5..89cd011edb9 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -175,7 +175,8 @@ extern void build_ehash_secret(void); static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { - return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr, + return jhash_3words((__force __u32) laddr, + (__force __u32) faddr, ((__u32) lport) << 16 | (__force __u32)fport, inet_ehash_secret); } -- cgit v1.2.3-70-g09d2 From 9dad6f5785a9f113dbbd58951d2f5ef9abd06dcc Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 3 Mar 2008 20:07:22 +0200 Subject: [IA64] fix ia64 kprobes compilation This patch fixes the following compile error with a recent gcc: CC kernel/kprobes.o /home/bunk/linux/kernel-2.6/git/linux-2.6/kernel/kprobes.c:1066: error: __ksymtab_jprobe_return causes a section type conflict Signed-off-by: Adrian Bunk Signed-off-by: Tony Luck --- arch/ia64/kernel/kprobes.c | 5 +++++ include/asm-ia64/kprobes.h | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index b618487cdc8..615c3d2b634 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -1001,6 +1001,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) return 1; } +/* ia64 does not need this */ +void __kprobes jprobe_return(void) +{ +} + int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index a93ce9ef07f..49684b65c18 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -122,10 +122,6 @@ extern int kprobes_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -/* ia64 does not need this */ -static inline void jprobe_return(void) -{ -} extern void invalidate_stacked_regs(void); extern void flush_register_stack(void); extern void arch_remove_kprobe(struct kprobe *p); -- cgit v1.2.3-70-g09d2 From 3634634edd49c115da931998b9540bcc17665b05 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 13 Feb 2008 17:08:16 -0800 Subject: debugfs: fix sparse warnings extern does not belong in C files, move declaration to linux/debugfs.h fs/debugfs/file.c:42:30: warning: symbol 'debugfs_file_operations' was not declared. Should it be static? fs/debugfs/file.c:54:31: warning: symbol 'debugfs_link_operations' was not declared. Should it be static? Signed-off-by: Harvey Harrison Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 4 ---- include/linux/debugfs.h | 5 +++++ 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index d26e2826ba5..e9602d85c11 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -29,10 +29,6 @@ #define DEBUGFS_MAGIC 0x64626720 -/* declared over in file.c */ -extern struct file_operations debugfs_file_operations; -extern struct inode_operations debugfs_link_operations; - static struct vfsmount *debugfs_mount; static int debugfs_mount_count; diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index f592d6de3b9..7266124361b 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -27,6 +27,11 @@ struct debugfs_blob_wrapper { }; #if defined(CONFIG_DEBUG_FS) + +/* declared over in file.c */ +extern const struct file_operations debugfs_file_operations; +extern const struct inode_operations debugfs_link_operations; + struct dentry *debugfs_create_file(const char *name, mode_t mode, struct dentry *parent, void *data, const struct file_operations *fops); -- cgit v1.2.3-70-g09d2 From d6d914f52b15d5a8e81ad481e02d9ab30d412a29 Mon Sep 17 00:00:00 2001 From: Lei Ming Date: Mon, 25 Feb 2008 18:07:28 +0800 Subject: USB: fix comment of struct usb_interface update the comment for the removed "driver" field and being out-of-order of @cur_altsetting and @num_altsetting. Signed-off-by: Lei Ming Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb.h b/include/linux/usb.h index 5bd3ae8aaaf..583e0481dfa 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -94,10 +94,9 @@ enum usb_interface_condition { * @altsetting: array of interface structures, one for each alternate * setting that may be selected. Each one includes a set of * endpoint configurations. They will be in no particular order. - * @num_altsetting: number of altsettings defined. * @cur_altsetting: the current altsetting. + * @num_altsetting: number of altsettings defined. * @intf_assoc: interface association descriptor - * @driver: the USB driver that is bound to this interface. * @minor: the minor number assigned to this interface, if this * interface is bound to a driver that uses the USB major number. * If this interface does not use the USB major, this field should -- cgit v1.2.3-70-g09d2 From 90a1ba0c5e39eeea278f263c28ae02166c5911c8 Mon Sep 17 00:00:00 2001 From: Jonas Bonn Date: Fri, 22 Feb 2008 11:02:21 +0100 Subject: PCI: Add DECLARE_PCI_DEVICE_TABLE macro The definitions of struct pci_device_id arrays should generally follow the same pattern across the entire kernel. This macro defines this array as const and puts it into the __devinitconst section. There are currently many definitions scattered about the kernel that omit the __devinitdata modifier despite the documentation stating that it should always be there. These definitions really also should have been const, which wasn't possible before but has become so with the addition of the __devinitconst attribute. Furthermore, there are definitions that use "const" and __devinitdata, which is explicitly wrong but the compiler doesn't catch section mismatches if there's only one such one case in the module (which is often the case). Adding the __devinitconst modifier where there was nothing before buys us memory. Adding the const modifier gives the compiler a chance to do its thing. Changing __devinitdata to __devinitconst where it was wrong actually fixes some compiler errors in older (mid-release) kernels that were patched over by "removing" the section attribute altogether (which wastes memory). This macro makes it pretty difficult to get this definition wrong in the future... Signed-off-by: Jonas Bonn Signed-off-by: Greg Kroah-Hartman --- Documentation/pci.txt | 6 ++++-- include/linux/pci.h | 10 ++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/pci.txt b/Documentation/pci.txt index 72b20c63959..bb7bd27d468 100644 --- a/Documentation/pci.txt +++ b/Documentation/pci.txt @@ -123,7 +123,8 @@ initialization with a pointer to a structure describing the driver The ID table is an array of struct pci_device_id entries ending with an -all-zero entry. Each entry consists of: +all-zero entry; use of the macro DECLARE_PCI_DEVICE_TABLE is the preferred +method of declaring the table. Each entry consists of: vendor,device Vendor and device ID to match (or PCI_ANY_ID) @@ -191,7 +192,8 @@ Tips on when/where to use the above attributes: o Do not mark the struct pci_driver. - o The ID table array should be marked __devinitdata. + o The ID table array should be marked __devinitconst; this is done + automatically if the table is declared with DECLARE_PCI_DEVICE_TABLE(). o The probe() and remove() functions should be marked __devinit and __devexit respectively. All initialization functions diff --git a/include/linux/pci.h b/include/linux/pci.h index 87195b62de5..f3165e7ac43 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -388,6 +388,16 @@ struct pci_driver { #define to_pci_driver(drv) container_of(drv, struct pci_driver, driver) +/** + * DECLARE_PCI_DEVICE_TABLE - macro used to describe a pci device table + * @_table: device table name + * + * This macro is used to create a struct pci_device_id array (a device table) + * in a generic manner. + */ +#define DECLARE_PCI_DEVICE_TABLE(_table) \ + const struct pci_device_id _table[] __devinitconst + /** * PCI_DEVICE - macro used to describe a specific pci device * @vend: the 16 bit PCI Vendor ID -- cgit v1.2.3-70-g09d2 From 7560fa60fcdcdb0da662f6a9fad9064b554ef46c Mon Sep 17 00:00:00 2001 From: David Brownell Date: Tue, 4 Mar 2008 14:28:27 -0800 Subject: gpio: and "no GPIO support here" stubs Add a defining fail/warn stubs for GPIO calls on platforms that don't support the GPIO programming interface. That includes the arch-specific implementation glue otherwise. This facilitates a new model for GPIO usage: drivers that can use GPIOs if they're available, but don't require them. One example of such a driver is NAND driver for various FreeScale chips. On platforms update with GPIO support, they can be used instead of a worst-case delay to verify that the BUSY signal is off. (Also includes a couple minor unrelated doc updates.) Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/gpio.txt | 16 ++++++--- include/linux/gpio.h | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+), 4 deletions(-) create mode 100644 include/linux/gpio.h (limited to 'include') diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index 8da724e2a0f..54630095aa3 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -2,6 +2,9 @@ GPIO Interfaces This provides an overview of GPIO access conventions on Linux. +These calls use the gpio_* naming prefix. No other calls should use that +prefix, or the related __gpio_* prefix. + What is a GPIO? =============== @@ -69,11 +72,13 @@ in this document, but drivers acting as clients to the GPIO interface must not care how it's implemented.) That said, if the convention is supported on their platform, drivers should -use it when possible. Platforms should declare GENERIC_GPIO support in -Kconfig (boolean true), which multi-platform drivers can depend on when -using the include file: +use it when possible. Platforms must declare GENERIC_GPIO support in their +Kconfig (boolean true), and provide an file. Drivers that can't +work without standard GPIO calls should have Kconfig entries which depend +on GENERIC_GPIO. The GPIO calls are available, either as "real code" or as +optimized-away stubs, when drivers use the include file: - #include + #include If you stick to this convention then it'll be easier for other developers to see what your code is doing, and help maintain it. @@ -316,6 +321,9 @@ pulldowns integrated on some platforms. Not all platforms support them, or support them in the same way; and any given board might use external pullups (or pulldowns) so that the on-chip ones should not be used. (When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) +Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a +platform-specific issue, as are models like (not) having a one-to-one +correspondence between configurable pins and GPIOs. There are other system-specific mechanisms that are not specified here, like the aforementioned options for input de-glitching and wire-OR output. diff --git a/include/linux/gpio.h b/include/linux/gpio.h new file mode 100644 index 00000000000..4987a84078e --- /dev/null +++ b/include/linux/gpio.h @@ -0,0 +1,95 @@ +#ifndef __LINUX_GPIO_H +#define __LINUX_GPIO_H + +/* see Documentation/gpio.txt */ + +#ifdef CONFIG_GENERIC_GPIO +#include + +#else + +/* + * Some platforms don't support the GPIO programming interface. + * + * In case some driver uses it anyway (it should normally have + * depended on GENERIC_GPIO), these routines help the compiler + * optimize out much GPIO-related code ... or trigger a runtime + * warning when something is wrongly called. + */ + +static inline int gpio_is_valid(int number) +{ + return 0; +} + +static inline int gpio_request(unsigned gpio, const char *label) +{ + return -ENOSYS; +} + +static inline void gpio_free(unsigned gpio) +{ + /* GPIO can never have been requested */ + WARN_ON(1); +} + +static inline int gpio_direction_input(unsigned gpio) +{ + return -ENOSYS; +} + +static inline int gpio_direction_output(unsigned gpio, int value) +{ + return -ENOSYS; +} + +static inline int gpio_get_value(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline void gpio_set_value(unsigned gpio, int value) +{ + /* GPIO can never have been requested or set as output */ + WARN_ON(1); +} + +static inline int gpio_cansleep(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline int gpio_get_value_cansleep(unsigned gpio) +{ + /* GPIO can never have been requested or set as {in,out}put */ + WARN_ON(1); + return 0; +} + +static inline void gpio_set_value_cansleep(unsigned gpio, int value) +{ + /* GPIO can never have been requested or set as output */ + WARN_ON(1); +} + +static inline int gpio_to_irq(unsigned gpio) +{ + /* GPIO can never have been requested or set as input */ + WARN_ON(1); + return -EINVAL; +} + +static inline int irq_to_gpio(unsigned irq) +{ + /* irq can never have been returned from gpio_to_irq() */ + WARN_ON(1); + return -EINVAL; +} + +#endif + +#endif /* __LINUX_GPIO_H */ -- cgit v1.2.3-70-g09d2 From 9edddaa200df18e08fe0cf21036e8ae467b1363c Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Tue, 4 Mar 2008 14:28:37 -0800 Subject: Kprobes: indicate kretprobe support in Kconfig Add CONFIG_HAVE_KRETPROBES to the arch//Kconfig file for relevant architectures with kprobes support. This facilitates easy handling of in-kernel modules (like samples/kprobes/kretprobe_example.c) that depend on kretprobes being present in the kernel. Thanks to Sam Ravnborg for helping make the patch more lean. Per Mathieu's suggestion, added CONFIG_KRETPROBES and fixed up dependencies. Signed-off-by: Ananth N Mavinakayanahalli Acked-by: Mathieu Desnoyers Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 7 +++++++ arch/arm/Kconfig | 1 + arch/ia64/Kconfig | 1 + arch/powerpc/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/sparc64/Kconfig | 1 + arch/x86/Kconfig | 1 + include/asm-arm/kprobes.h | 1 - include/asm-ia64/kprobes.h | 1 - include/asm-powerpc/kprobes.h | 1 - include/asm-s390/kprobes.h | 1 - include/asm-sparc64/kprobes.h | 2 -- include/asm-x86/kprobes.h | 1 - include/linux/kprobes.h | 6 +++--- kernel/kprobes.c | 9 +++------ 15 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index 3d72dc3fc8f..694c9af520b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -27,5 +27,12 @@ config KPROBES for kernel debugging, non-intrusive instrumentation and testing. If in doubt, say "N". +config KRETPROBES + def_bool y + depends on KPROBES && HAVE_KRETPROBES + config HAVE_KPROBES def_bool n + +config HAVE_KRETPROBES + def_bool n diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 16b82e1272b..955fc53c1c0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -12,6 +12,7 @@ config ARM select SYS_SUPPORTS_APM_EMULATION select HAVE_OPROFILE select HAVE_KPROBES if (!XIP_KERNEL) + select HAVE_KRETPROBES if (HAVE_KPROBES) help The ARM series is a line of low-power-consumption RISC chip designs licensed by ARM Ltd and targeted at embedded applications and diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index dff9edfc746..56762d3c2a6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -18,6 +18,7 @@ config IA64 select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5b8d8382b76..1189d8d6170 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -90,6 +90,7 @@ config PPC select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES config EARLY_PRINTK bool diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index b21444b681b..9892827b617 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -61,6 +61,7 @@ config S390 def_bool y select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES source "init/Kconfig" diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 3af378ddb6a..463d1be32c9 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -10,6 +10,7 @@ config SPARC default y select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES config SPARC64 bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 53800b80a20..f41c9538ca3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -21,6 +21,7 @@ config X86 select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES + select HAVE_KRETPROBES select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) diff --git a/include/asm-arm/kprobes.h b/include/asm-arm/kprobes.h index 4e7bd32288a..c042194d3ab 100644 --- a/include/asm-arm/kprobes.h +++ b/include/asm-arm/kprobes.h @@ -20,7 +20,6 @@ #include #include -#define ARCH_SUPPORTS_KRETPROBES #define __ARCH_WANT_KPROBES_INSN_SLOT #define MAX_INSN_SIZE 2 #define MAX_STACK_SIZE 64 /* 32 would probably be OK */ diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index a93ce9ef07f..adbaba14eb0 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -82,7 +82,6 @@ struct kprobe_ctlblk { struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ]; }; -#define ARCH_SUPPORTS_KRETPROBES #define kretprobe_blacklist_size 0 #define SLOT0_OPCODE_SHIFT (37) diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h index afabad230db..d0e7701fa1f 100644 --- a/include/asm-powerpc/kprobes.h +++ b/include/asm-powerpc/kprobes.h @@ -80,7 +80,6 @@ typedef unsigned int kprobe_opcode_t; #define is_trap(instr) (IS_TW(instr) || IS_TWI(instr)) #endif -#define ARCH_SUPPORTS_KRETPROBES #define flush_insn_slot(p) do { } while (0) #define kretprobe_blacklist_size 0 diff --git a/include/asm-s390/kprobes.h b/include/asm-s390/kprobes.h index 948db3d0d05..330f68caffe 100644 --- a/include/asm-s390/kprobes.h +++ b/include/asm-s390/kprobes.h @@ -46,7 +46,6 @@ typedef u16 kprobe_opcode_t; ? (MAX_STACK_SIZE) \ : (((unsigned long)current_thread_info()) + THREAD_SIZE - (ADDR))) -#define ARCH_SUPPORTS_KRETPROBES #define kretprobe_blacklist_size 0 #define KPROBE_SWAP_INST 0x10 diff --git a/include/asm-sparc64/kprobes.h b/include/asm-sparc64/kprobes.h index 7237dd87663..5879d71afda 100644 --- a/include/asm-sparc64/kprobes.h +++ b/include/asm-sparc64/kprobes.h @@ -14,8 +14,6 @@ typedef u32 kprobe_opcode_t; #define arch_remove_kprobe(p) do {} while (0) -#define ARCH_SUPPORTS_KRETPROBES - #define flush_insn_slot(p) \ do { flushi(&(p)->ainsn.insn[0]); \ flushi(&(p)->ainsn.insn[1]); \ diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h index 143476a3cb5..61ad7b5d142 100644 --- a/include/asm-x86/kprobes.h +++ b/include/asm-x86/kprobes.h @@ -42,7 +42,6 @@ typedef u8 kprobe_opcode_t; : (((unsigned long)current_thread_info()) + THREAD_SIZE \ - (unsigned long)(ADDR))) -#define ARCH_SUPPORTS_KRETPROBES #define flush_insn_slot(p) do { } while (0) extern const int kretprobe_blacklist_size; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 4a6ce82ba03..0f28486f636 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -125,11 +125,11 @@ struct jprobe { DECLARE_PER_CPU(struct kprobe *, current_kprobe); DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); -#ifdef ARCH_SUPPORTS_KRETPROBES +#ifdef CONFIG_KRETPROBES extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs); extern int arch_trampoline_kprobe(struct kprobe *p); -#else /* ARCH_SUPPORTS_KRETPROBES */ +#else /* CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -138,7 +138,7 @@ static inline int arch_trampoline_kprobe(struct kprobe *p) { return 0; } -#endif /* ARCH_SUPPORTS_KRETPROBES */ +#endif /* CONFIG_KRETPROBES */ /* * Function-return probe - * Note: diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7a86e643233..e6a61dcbc57 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -678,8 +678,7 @@ void __kprobes unregister_jprobe(struct jprobe *jp) unregister_kprobe(&jp->kp); } -#ifdef ARCH_SUPPORTS_KRETPROBES - +#ifdef CONFIG_KRETPROBES /* * This kprobe pre_handler is registered with every kretprobe. When probe * hits it will set up the return probe. @@ -769,8 +768,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) return ret; } -#else /* ARCH_SUPPORTS_KRETPROBES */ - +#else /* CONFIG_KRETPROBES */ int __kprobes register_kretprobe(struct kretprobe *rp) { return -ENOSYS; @@ -781,8 +779,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, { return 0; } - -#endif /* ARCH_SUPPORTS_KRETPROBES */ +#endif /* CONFIG_KRETPROBES */ void __kprobes unregister_kretprobe(struct kretprobe *rp) { -- cgit v1.2.3-70-g09d2 From 00f0b8259e48979c37212995d798f3fbd0374690 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Tue, 4 Mar 2008 14:28:39 -0800 Subject: Memory controller: rename to Memory Resource Controller Rename Memory Controller to Memory Resource Controller. Reflect the same changes in the CONFIG definition for the Memory Resource Controller. Group together the config options for Resource Counters and Memory Resource Controller. Signed-off-by: Balbir Singh Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/controllers/memory.txt | 8 ++++++-- include/linux/cgroup_subsys.h | 2 +- include/linux/memcontrol.h | 4 ++-- include/linux/mm_types.h | 4 ++-- init/Kconfig | 30 +++++++++++++++--------------- mm/Makefile | 2 +- mm/oom_kill.c | 2 +- mm/vmscan.c | 4 ++-- 8 files changed, 30 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index fba6af45225..866b9cd9a95 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt @@ -1,4 +1,8 @@ -Memory Controller +Memory Resource Controller + +NOTE: The Memory Resource Controller has been generically been referred +to as the memory controller in this document. Do not confuse memory controller +used here with the memory controller that is used in hardware. Salient features @@ -152,7 +156,7 @@ The memory controller uses the following hierarchy a. Enable CONFIG_CGROUPS b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_CONT +c. Enable CONFIG_CGROUP_MEM_RES_CTLR 1. Prepare the cgroups # mkdir -p /cgroups diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index ac6aad98b60..1ddebfc5256 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -37,7 +37,7 @@ SUBSYS(cpuacct) /* */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR SUBSYS(mem_cgroup) #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 04075628cb9..a8be8073b9e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -25,7 +25,7 @@ struct page_cgroup; struct page; struct mm_struct; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_free_cgroup(struct mm_struct *mm); @@ -72,7 +72,7 @@ extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem, extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, struct zone *zone, int priority); -#else /* CONFIG_CGROUP_MEM_CONT */ +#else /* CONFIG_CGROUP_MEM_RES_CTLR */ static inline void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) { diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 34023c65d46..af190ceab97 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -88,7 +88,7 @@ struct page { void *virtual; /* Kernel virtual address (NULL if not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long page_cgroup; #endif }; @@ -222,7 +222,7 @@ struct mm_struct { /* aio bits */ rwlock_t ioctx_list_lock; struct kioctx *ioctx_list; -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR struct mem_cgroup *mem_cgroup; #endif }; diff --git a/init/Kconfig b/init/Kconfig index f698a5af500..442850b984b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -366,6 +366,21 @@ config RESOURCE_COUNTERS infrastructure that works with cgroups depends on CGROUPS +config CGROUP_MEM_RES_CTLR + bool "Memory Resource Controller for Control Groups" + depends on CGROUPS && RESOURCE_COUNTERS + help + Provides a memory resource controller that manages both page cache and + RSS memory. + + Note that setting this option increases fixed memory overhead + associated with each page of memory in the system by 4/8 bytes + and also increases cache misses because struct page on many 64bit + systems will not fit into a single cache line anymore. + + Only enable when you're ok with these trade offs and really + sure you need the memory resource controller. + config SYSFS_DEPRECATED bool "Create deprecated sysfs files" depends on SYSFS @@ -387,21 +402,6 @@ config SYSFS_DEPRECATED If you are using a distro that was released in 2006 or later, it should be safe to say N here. -config CGROUP_MEM_CONT - bool "Memory controller for cgroups" - depends on CGROUPS && RESOURCE_COUNTERS - help - Provides a memory controller that manages both page cache and - RSS memory. - - Note that setting this option increases fixed memory overhead - associated with each page of memory in the system by 4/8 bytes - and also increases cache misses because struct page on many 64bit - systems will not fit into a single cache line anymore. - - Only enable when you're ok with these trade offs and really - sure you need the memory controller. - config PROC_PID_CPUSET bool "Include legacy /proc//cpuset file" depends on CPUSETS diff --git a/mm/Makefile b/mm/Makefile index 9f117bab532..a5b0dd93427 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -32,5 +32,5 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_SMP) += allocpercpu.o obj-$(CONFIG_QUICKLIST) += quicklist.o -obj-$(CONFIG_CGROUP_MEM_CONT) += memcontrol.o +obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 4194b9db010..44b2da11bf4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -412,7 +412,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, return oom_kill_task(p); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) { unsigned long points = 0; diff --git a/mm/vmscan.c b/mm/vmscan.c index a26dabd62fe..106ba10e1ac 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -126,7 +126,7 @@ long vm_total_pages; /* The total number of pages which the VM controls */ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR #define scan_global_lru(sc) (!(sc)->mem_cgroup) #else #define scan_global_lru(sc) (1) @@ -1427,7 +1427,7 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) return do_try_to_free_pages(zones, gfp_mask, &sc); } -#ifdef CONFIG_CGROUP_MEM_CONT +#ifdef CONFIG_CGROUP_MEM_RES_CTLR unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, gfp_t gfp_mask) -- cgit v1.2.3-70-g09d2 From 735c4fb916e9f83a9350aeb2680d77d01ea75094 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 4 Mar 2008 14:28:40 -0800 Subject: add noinline_for_stack People are adding `noinline' in various places to prevent excess stack consumption due to gcc inlining. But once this is done, it is quite unobvious why the `noinline' is present in the code. We can comment each and every site, or we can use noinline_for_stack. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index d0e17e1657d..dcae0c8d97e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -138,6 +138,12 @@ extern void __chk_io_ptr(const volatile void __iomem *); #define noinline #endif +/* + * Rather then using noinline to prevent stack consumption, use + * noinline_for_stack instead. For documentaiton reasons. + */ +#define noinline_for_stack noinline + #ifndef __always_inline #define __always_inline inline #endif -- cgit v1.2.3-70-g09d2 From 5cba6d22e35a05adb28fdea191b232501518c455 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 4 Mar 2008 14:28:45 -0800 Subject: ndelay(): switch to C function to avoid 64-bit division We should be able to do ndelay(some_u64), but that can cause a call to __divdi3() to be emitted because the ndelay() macros does a divide. Fix it by switching to static inline which will force the u64 arg to be treated as an unsigned long. udelay() takes an unsigned long arg. [bunk@kernel.org: reported m68k build breakage] Cc: Adrian Bunk Cc: Evgeniy Polyakov Cc: Martin Michlmayr Cc: Herbert Xu Cc: Ralf Baechle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/delay.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/delay.h b/include/linux/delay.h index 17ddb55430a..54552d21296 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -7,6 +7,8 @@ * Delay routines, using a pre-computed "loops_per_jiffy" value. */ +#include + extern unsigned long loops_per_jiffy; #include @@ -32,7 +34,11 @@ extern unsigned long loops_per_jiffy; #endif #ifndef ndelay -#define ndelay(x) udelay(((x)+999)/1000) +static inline void ndelay(unsigned long x) +{ + udelay(DIV_ROUND_UP(x, 1000)); +} +#define ndelay(x) ndelay(x) #endif void calibrate_delay(void); -- cgit v1.2.3-70-g09d2 From 3149be50d3a31df095bcc83d752293da65a37f62 Mon Sep 17 00:00:00 2001 From: Ville Syrjala Date: Tue, 4 Mar 2008 14:28:50 -0800 Subject: sm501: add support for the SM502 programmable PLL SM502 has a programmable PLL which can provide the panel pixel clock instead of the 288MHz and 336MHz PLLs. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Ville Syrjala Cc: Ben Dooks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mfd/sm501.c | 163 +++++++++++++++++++++++++++++++++++---------- include/linux/sm501-regs.h | 3 + include/linux/sm501.h | 3 +- 3 files changed, 133 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/mfd/sm501.c b/drivers/mfd/sm501.c index 4de8d467762..13bac53db69 100644 --- a/drivers/mfd/sm501.c +++ b/drivers/mfd/sm501.c @@ -48,6 +48,7 @@ struct sm501_devdata { unsigned int pdev_id; unsigned int irq; void __iomem *regs; + unsigned int rev; }; #define MHZ (1000 * 1000) @@ -417,46 +418,108 @@ struct sm501_clock { unsigned long mclk; int divider; int shift; + unsigned int m, n, k; }; +/* sm501_calc_clock + * + * Calculates the nearest discrete clock frequency that + * can be achieved with the specified input clock. + * the maximum divisor is 3 or 5 + */ + +static int sm501_calc_clock(unsigned long freq, + struct sm501_clock *clock, + int max_div, + unsigned long mclk, + long *best_diff) +{ + int ret = 0; + int divider; + int shift; + long diff; + + /* try dividers 1 and 3 for CRT and for panel, + try divider 5 for panel only.*/ + + for (divider = 1; divider <= max_div; divider += 2) { + /* try all 8 shift values.*/ + for (shift = 0; shift < 8; shift++) { + /* Calculate difference to requested clock */ + diff = sm501fb_round_div(mclk, divider << shift) - freq; + if (diff < 0) + diff = -diff; + + /* If it is less than the current, use it */ + if (diff < *best_diff) { + *best_diff = diff; + + clock->mclk = mclk; + clock->divider = divider; + clock->shift = shift; + ret = 1; + } + } + } + + return ret; +} + +/* sm501_calc_pll + * + * Calculates the nearest discrete clock frequency that can be + * achieved using the programmable PLL. + * the maximum divisor is 3 or 5 + */ + +static unsigned long sm501_calc_pll(unsigned long freq, + struct sm501_clock *clock, + int max_div) +{ + unsigned long mclk; + unsigned int m, n, k; + long best_diff = 999999999; + + /* + * The SM502 datasheet doesn't specify the min/max values for M and N. + * N = 1 at least doesn't work in practice. + */ + for (m = 2; m <= 255; m++) { + for (n = 2; n <= 127; n++) { + for (k = 0; k <= 1; k++) { + mclk = (24000000UL * m / n) >> k; + + if (sm501_calc_clock(freq, clock, max_div, + mclk, &best_diff)) { + clock->m = m; + clock->n = n; + clock->k = k; + } + } + } + } + + /* Return best clock. */ + return clock->mclk / (clock->divider << clock->shift); +} + /* sm501_select_clock * - * selects nearest discrete clock frequency the SM501 can achive + * Calculates the nearest discrete clock frequency that can be + * achieved using the 288MHz and 336MHz PLLs. * the maximum divisor is 3 or 5 */ + static unsigned long sm501_select_clock(unsigned long freq, struct sm501_clock *clock, int max_div) { unsigned long mclk; - int divider; - int shift; - long diff; long best_diff = 999999999; /* Try 288MHz and 336MHz clocks. */ for (mclk = 288000000; mclk <= 336000000; mclk += 48000000) { - /* try dividers 1 and 3 for CRT and for panel, - try divider 5 for panel only.*/ - - for (divider = 1; divider <= max_div; divider += 2) { - /* try all 8 shift values.*/ - for (shift = 0; shift < 8; shift++) { - /* Calculate difference to requested clock */ - diff = sm501fb_round_div(mclk, divider << shift) - freq; - if (diff < 0) - diff = -diff; - - /* If it is less than the current, use it */ - if (diff < best_diff) { - best_diff = diff; - - clock->mclk = mclk; - clock->divider = divider; - clock->shift = shift; - } - } - } + sm501_calc_clock(freq, clock, max_div, mclk, &best_diff); } /* Return best clock. */ @@ -478,6 +541,7 @@ unsigned long sm501_set_clock(struct device *dev, unsigned long gate = readl(sm->regs + SM501_CURRENT_GATE); unsigned long clock = readl(sm->regs + SM501_CURRENT_CLOCK); unsigned char reg; + unsigned int pll_reg = 0; unsigned long sm501_freq; /* the actual frequency acheived */ struct sm501_clock to; @@ -492,14 +556,28 @@ unsigned long sm501_set_clock(struct device *dev, * requested frequency the value must be multiplied by * 2. This clock also has an additional pre divisor */ - sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2); - reg=to.shift & 0x07;/* bottom 3 bits are shift */ - if (to.divider == 3) - reg |= 0x08; /* /3 divider required */ - else if (to.divider == 5) - reg |= 0x10; /* /5 divider required */ - if (to.mclk != 288000000) - reg |= 0x20; /* which mclk pll is source */ + if (sm->rev >= 0xC0) { + /* SM502 -> use the programmable PLL */ + sm501_freq = (sm501_calc_pll(2 * req_freq, + &to, 5) / 2); + reg = to.shift & 0x07;/* bottom 3 bits are shift */ + if (to.divider == 3) + reg |= 0x08; /* /3 divider required */ + else if (to.divider == 5) + reg |= 0x10; /* /5 divider required */ + reg |= 0x40; /* select the programmable PLL */ + pll_reg = 0x20000 | (to.k << 15) | (to.n << 8) | to.m; + } else { + sm501_freq = (sm501_select_clock(2 * req_freq, + &to, 5) / 2); + reg = to.shift & 0x07;/* bottom 3 bits are shift */ + if (to.divider == 3) + reg |= 0x08; /* /3 divider required */ + else if (to.divider == 5) + reg |= 0x10; /* /5 divider required */ + if (to.mclk != 288000000) + reg |= 0x20; /* which mclk pll is source */ + } break; case SM501_CLOCK_V2XCLK: @@ -560,6 +638,10 @@ unsigned long sm501_set_clock(struct device *dev, } writel(mode, sm->regs + SM501_POWER_MODE_CONTROL); + + if (pll_reg) + writel(pll_reg, sm->regs + SM501_PROGRAMMABLE_PLL_CONTROL); + sm501_sync_regs(sm); dev_info(sm->dev, "gate %08lx, clock %08lx, mode %08lx\n", @@ -580,15 +662,24 @@ EXPORT_SYMBOL_GPL(sm501_set_clock); * finds the closest available frequency for a given clock */ -unsigned long sm501_find_clock(int clksrc, +unsigned long sm501_find_clock(struct device *dev, + int clksrc, unsigned long req_freq) { + struct sm501_devdata *sm = dev_get_drvdata(dev); unsigned long sm501_freq; /* the frequency achiveable by the 501 */ struct sm501_clock to; switch (clksrc) { case SM501_CLOCK_P2XCLK: - sm501_freq = (sm501_select_clock(2 * req_freq, &to, 5) / 2); + if (sm->rev >= 0xC0) { + /* SM502 -> use the programmable PLL */ + sm501_freq = (sm501_calc_pll(2 * req_freq, + &to, 5) / 2); + } else { + sm501_freq = (sm501_select_clock(2 * req_freq, + &to, 5) / 2); + } break; case SM501_CLOCK_V2XCLK: @@ -895,6 +986,8 @@ static int sm501_init_dev(struct sm501_devdata *sm) dev_info(sm->dev, "SM501 At %p: Version %08lx, %ld Mb, IRQ %d\n", sm->regs, devid, (unsigned long)mem_avail >> 20, sm->irq); + sm->rev = devid & SM501_DEVICEID_REVMASK; + sm501_dump_gate(sm); ret = device_create_file(sm->dev, &dev_attr_dbg_regs); diff --git a/include/linux/sm501-regs.h b/include/linux/sm501-regs.h index 64236b73c72..d53642d2d89 100644 --- a/include/linux/sm501-regs.h +++ b/include/linux/sm501-regs.h @@ -129,11 +129,14 @@ #define SM501_DEVICEID_SM501 (0x05010000) #define SM501_DEVICEID_IDMASK (0xffff0000) +#define SM501_DEVICEID_REVMASK (0x000000ff) #define SM501_PLLCLOCK_COUNT (0x000064) #define SM501_MISC_TIMING (0x000068) #define SM501_CURRENT_SDRAM_CLOCK (0x00006C) +#define SM501_PROGRAMMABLE_PLL_CONTROL (0x000074) + /* GPIO base */ #define SM501_GPIO (0x010000) #define SM501_GPIO_DATA_LOW (0x00) diff --git a/include/linux/sm501.h b/include/linux/sm501.h index 932a9efee8a..bca13454470 100644 --- a/include/linux/sm501.h +++ b/include/linux/sm501.h @@ -24,7 +24,8 @@ extern int sm501_unit_power(struct device *dev, extern unsigned long sm501_set_clock(struct device *dev, int clksrc, unsigned long freq); -extern unsigned long sm501_find_clock(int clksrc, unsigned long req_freq); +extern unsigned long sm501_find_clock(struct device *dev, + int clksrc, unsigned long req_freq); /* sm501_misc_control * -- cgit v1.2.3-70-g09d2 From 040922c04cf2c8ac70be2e88a8a9614ecdb41d2e Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Tue, 4 Mar 2008 14:28:53 -0800 Subject: include falloc.h in header-y Include falloc.h in header-y; it defines a flag for the fallocate sysctl. Signed-off-by: Eric Sandeen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index aada32fffec..994df378000 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -61,6 +61,7 @@ header-y += efs_fs_sb.h header-y += elf-fdpic.h header-y += elf-em.h header-y += fadvise.h +header-y += falloc.h header-y += fd.h header-y += fdreg.h header-y += fib_rules.h -- cgit v1.2.3-70-g09d2 From acc4988bcf38f9618886eaeb9802aeacc6978ec2 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 4 Mar 2008 14:29:00 -0800 Subject: markers: add an if(0) to __mark_check_format() Wrap __mark_check_format() into an if(0) to make sure that parameters such as trace_mark(mm_page_alloc, "order %u pfn %lu", order, page?page_to_pfn(page):0); (where page_to_pfn() has side-effects) won't generate code because of the __mark_check_format(). Thanks to Jan Kiszka for reporting this. Signed-off-by: Mathieu Desnoyers Cc: Jan Kiszka Cc: "Frank Ch. Eigler" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/marker.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/marker.h b/include/linux/marker.h index 5df879dc377..430f6adf976 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -104,10 +104,16 @@ static inline void marker_update_probe_range(struct marker *begin, #define MARK_NOARGS " " /* To be used for string format validity checking with gcc */ -static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...) +static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...) { } +#define __mark_check_format(format, args...) \ + do { \ + if (0) \ + ___mark_check_format(format, ## args); \ + } while (0) + extern marker_probe_func __mark_empty_function; extern void marker_probe_cb(const struct marker *mdata, -- cgit v1.2.3-70-g09d2 From bd845e38c7a7251a95a8f2c38aa7fb87140b771d Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:01 -0800 Subject: memcg: mm_match_cgroup not vm_match_cgroup vm_match_cgroup is a perverse name for a macro to match mm with cgroup: rename it mm_match_cgroup, matching mm_init_cgroup and mm_free_cgroup. Signed-off-by: Hugh Dickins Acked-by: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 4 ++-- mm/memcontrol.c | 2 +- mm/rmap.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a8be8073b9e..e4247c83c1c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -48,7 +48,7 @@ extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); -#define vm_match_cgroup(mm, cgroup) \ +#define mm_match_cgroup(mm, cgroup) \ ((cgroup) == rcu_dereference((mm)->mem_cgroup)) extern int mem_cgroup_prepare_migration(struct page *page); @@ -118,7 +118,7 @@ static inline int mem_cgroup_cache_charge(struct page *page, return 0; } -static inline int vm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) +static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 631002d085d..41041c0a689 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -399,7 +399,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) int ret; task_lock(task); - ret = task->mm && vm_match_cgroup(task->mm, mem); + ret = task->mm && mm_match_cgroup(task->mm, mem); task_unlock(task); return ret; } diff --git a/mm/rmap.c b/mm/rmap.c index 8fd527c4e2b..0c9a2df06c3 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -321,7 +321,7 @@ static int page_referenced_anon(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) + if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) continue; referenced += page_referenced_one(page, vma, &mapcount); if (!mapcount) @@ -382,7 +382,7 @@ static int page_referenced_file(struct page *page, * counting on behalf of references from different * cgroups */ - if (mem_cont && !vm_match_cgroup(vma->vm_mm, mem_cont)) + if (mem_cont && !mm_match_cgroup(vma->vm_mm, mem_cont)) continue; if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) == (VM_LOCKED|VM_MAYSHARE)) { -- cgit v1.2.3-70-g09d2 From 427d5416f317681498337ab19218d195edea02d6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:03 -0800 Subject: memcg: move_lists on page not page_cgroup Each caller of mem_cgroup_move_lists is having to use page_get_page_cgroup: it's more convenient if it acts upon the page itself not the page_cgroup; and in a later patch this becomes important to handle within memcontrol.c. Signed-off-by: Hugh Dickins Cc: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 5 ++--- mm/memcontrol.c | 4 +++- mm/swap.c | 2 +- mm/vmscan.c | 5 +++-- 4 files changed, 9 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4247c83c1c..56432ff8d4e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -36,7 +36,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_uncharge(struct page_cgroup *pc); extern void mem_cgroup_uncharge_page(struct page *page); -extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); +extern void mem_cgroup_move_lists(struct page *page, bool active); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, @@ -106,8 +106,7 @@ static inline void mem_cgroup_uncharge_page(struct page *page) { } -static inline void mem_cgroup_move_lists(struct page_cgroup *pc, - bool active) +static inline void mem_cgroup_move_lists(struct page *page, bool active) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 41041c0a689..afdd406f618 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -407,11 +407,13 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem) /* * This routine assumes that the appropriate zone's lru lock is already held */ -void mem_cgroup_move_lists(struct page_cgroup *pc, bool active) +void mem_cgroup_move_lists(struct page *page, bool active) { + struct page_cgroup *pc; struct mem_cgroup_per_zone *mz; unsigned long flags; + pc = page_get_page_cgroup(page); if (!pc) return; diff --git a/mm/swap.c b/mm/swap.c index 710a20bb974..d4ec59aa5c4 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -176,7 +176,7 @@ void activate_page(struct page *page) SetPageActive(page); add_page_to_active_list(zone, page); __count_vm_event(PGACTIVATE); - mem_cgroup_move_lists(page_get_page_cgroup(page), true); + mem_cgroup_move_lists(page, true); } spin_unlock_irq(&zone->lru_lock); } diff --git a/mm/vmscan.c b/mm/vmscan.c index 106ba10e1ac..45711585684 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1128,7 +1128,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, ClearPageActive(page); list_move(&page->lru, &zone->inactive_list); - mem_cgroup_move_lists(page_get_page_cgroup(page), false); + mem_cgroup_move_lists(page, false); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); @@ -1156,8 +1156,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, VM_BUG_ON(PageLRU(page)); SetPageLRU(page); VM_BUG_ON(!PageActive(page)); + list_move(&page->lru, &zone->active_list); - mem_cgroup_move_lists(page_get_page_cgroup(page), true); + mem_cgroup_move_lists(page, true); pgmoved++; if (!pagevec_add(&pvec, page)) { __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); -- cgit v1.2.3-70-g09d2 From 9442ec9df40d952b0de185ae5638a74970388e01 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:07 -0800 Subject: memcg: bad page if page_cgroup when free Replace free_hot_cold_page's VM_BUG_ON(page_get_page_cgroup(page)) by a "Bad page state" and clear: most users don't have CONFIG_DEBUG_VM on, and if it were set here, it'd likely cause corruption when the page is reused. Don't use page_assign_page_cgroup to clear it: that should be private to memcontrol.c, and always called with the lock taken; and memmap_init_zone doesn't need it either - like page->mapping and other pointers throughout the kernel, Linux assumes pointers in zeroed structures are NULL pointers. Instead use page_reset_bad_cgroup, added to memcontrol.h for this only. Signed-off-by: Hugh Dickins Cc: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++---- mm/memcontrol.c | 27 ++++++++++++--------------- mm/page_alloc.c | 18 ++++++++++++------ 3 files changed, 28 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 56432ff8d4e..70789df7dab 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -29,8 +29,9 @@ struct mm_struct; extern void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p); extern void mm_free_cgroup(struct mm_struct *mm); -extern void page_assign_page_cgroup(struct page *page, - struct page_cgroup *pc); + +#define page_reset_bad_cgroup(page) ((page)->page_cgroup = 0) + extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); @@ -82,8 +83,7 @@ static inline void mm_free_cgroup(struct mm_struct *mm) { } -static inline void page_assign_page_cgroup(struct page *page, - struct page_cgroup *pc) +static inline void page_reset_bad_cgroup(struct page *page) { } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index afdd406f618..9e170d3c71e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -140,11 +140,17 @@ struct mem_cgroup { /* * We use the lower bit of the page->page_cgroup pointer as a bit spin - * lock. We need to ensure that page->page_cgroup is atleast two - * byte aligned (based on comments from Nick Piggin) + * lock. We need to ensure that page->page_cgroup is at least two + * byte aligned (based on comments from Nick Piggin). But since + * bit_spin_lock doesn't actually set that lock bit in a non-debug + * uniprocessor kernel, we should avoid setting it here too. */ #define PAGE_CGROUP_LOCK_BIT 0x0 -#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) +#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) +#define PAGE_CGROUP_LOCK (1 << PAGE_CGROUP_LOCK_BIT) +#else +#define PAGE_CGROUP_LOCK 0x0 +#endif /* * A page_cgroup page is associated with every page descriptor. The @@ -271,19 +277,10 @@ static inline int page_cgroup_locked(struct page *page) &page->page_cgroup); } -void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) +static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) { - int locked; - - /* - * While resetting the page_cgroup we might not hold the - * page_cgroup lock. free_hot_cold_page() is an example - * of such a scenario - */ - if (pc) - VM_BUG_ON(!page_cgroup_locked(page)); - locked = (page->page_cgroup & PAGE_CGROUP_LOCK); - page->page_cgroup = ((unsigned long)pc | locked); + VM_BUG_ON(!page_cgroup_locked(page)); + page->page_cgroup = ((unsigned long)pc | PAGE_CGROUP_LOCK); } struct page_cgroup *page_get_page_cgroup(struct page *page) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e76cf94725c..402a504f122 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -222,13 +222,19 @@ static inline int bad_range(struct zone *zone, struct page *page) static void bad_page(struct page *page) { - printk(KERN_EMERG "Bad page state in process '%s'\n" - KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n" - KERN_EMERG "Trying to fix it up, but a reboot is needed\n" - KERN_EMERG "Backtrace:\n", + void *pc = page_get_page_cgroup(page); + + printk(KERN_EMERG "Bad page state in process '%s'\n" KERN_EMERG + "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n", current->comm, page, (int)(2*sizeof(unsigned long)), (unsigned long)page->flags, page->mapping, page_mapcount(page), page_count(page)); + if (pc) { + printk(KERN_EMERG "cgroup:%p\n", pc); + page_reset_bad_cgroup(page); + } + printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n" + KERN_EMERG "Backtrace:\n"); dump_stack(); page->flags &= ~(1 << PG_lru | 1 << PG_private | @@ -454,6 +460,7 @@ static inline int free_pages_check(struct page *page) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | + (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | (page->flags & ( 1 << PG_lru | @@ -603,6 +610,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) { if (unlikely(page_mapcount(page) | (page->mapping != NULL) | + (page_get_page_cgroup(page) != NULL) | (page_count(page) != 0) | (page->flags & ( 1 << PG_lru | @@ -989,7 +997,6 @@ static void free_hot_cold_page(struct page *page, int cold) if (!PageHighMem(page)) debug_check_no_locks_freed(page_address(page), PAGE_SIZE); - VM_BUG_ON(page_get_page_cgroup(page)); arch_free_page(page, 0); kernel_map_pages(page, 1, 0); @@ -2528,7 +2535,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, set_page_links(page, zone, nid, pfn); init_page_count(page); reset_page_mapcount(page); - page_assign_page_cgroup(page, NULL); SetPageReserved(page); /* -- cgit v1.2.3-70-g09d2 From 8289546e573d5ff681cdf0fc7a1184cca66fdb55 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 4 Mar 2008 14:29:08 -0800 Subject: memcg: remove mem_cgroup_uncharge Nothing uses mem_cgroup_uncharge apart from mem_cgroup_uncharge_page, (a trivial wrapper around it) and mem_cgroup_end_migration (which does the same as mem_cgroup_uncharge_page). And it often ends up having to lock just to let its caller unlock. Remove it (but leave the silly locking until a later patch). Moved mem_cgroup_cache_charge next to mem_cgroup_charge in memcontrol.h. Signed-off-by: Hugh Dickins Cc: David Rientjes Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Hirokazu Takahashi Cc: YAMAMOTO Takashi Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 20 +++++++------------- mm/memcontrol.c | 23 ++++++++--------------- 2 files changed, 15 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 70789df7dab..8b1c4295848 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,7 +35,8 @@ extern void mm_free_cgroup(struct mm_struct *mm); extern struct page_cgroup *page_get_page_cgroup(struct page *page); extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); -extern void mem_cgroup_uncharge(struct page_cgroup *pc); +extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, + gfp_t gfp_mask); extern void mem_cgroup_uncharge_page(struct page *page); extern void mem_cgroup_move_lists(struct page *page, bool active); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, @@ -45,8 +46,6 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct mem_cgroup *mem_cont, int active); extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); -extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask); int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem); #define mm_match_cgroup(mm, cgroup) \ @@ -92,14 +91,16 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page) return NULL; } -static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, - gfp_t gfp_mask) +static inline int mem_cgroup_charge(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { return 0; } -static inline void mem_cgroup_uncharge(struct page_cgroup *pc) +static inline int mem_cgroup_cache_charge(struct page *page, + struct mm_struct *mm, gfp_t gfp_mask) { + return 0; } static inline void mem_cgroup_uncharge_page(struct page *page) @@ -110,13 +111,6 @@ static inline void mem_cgroup_move_lists(struct page *page, bool active) { } -static inline int mem_cgroup_cache_charge(struct page *page, - struct mm_struct *mm, - gfp_t gfp_mask) -{ - return 0; -} - static inline int mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *mem) { return 1; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 83ba13ad31e..1333d25163b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -697,20 +697,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, /* * Uncharging is always a welcome operation, we never complain, simply - * uncharge. This routine should be called with lock_page_cgroup held + * uncharge. */ -void mem_cgroup_uncharge(struct page_cgroup *pc) +void mem_cgroup_uncharge_page(struct page *page) { + struct page_cgroup *pc; struct mem_cgroup *mem; struct mem_cgroup_per_zone *mz; - struct page *page; unsigned long flags; /* * Check if our page_cgroup is valid */ + lock_page_cgroup(page); + pc = page_get_page_cgroup(page); if (!pc) - return; + goto unlock; if (atomic_dec_and_test(&pc->ref_cnt)) { page = pc->page; @@ -731,12 +733,8 @@ void mem_cgroup_uncharge(struct page_cgroup *pc) } lock_page_cgroup(page); } -} -void mem_cgroup_uncharge_page(struct page *page) -{ - lock_page_cgroup(page); - mem_cgroup_uncharge(page_get_page_cgroup(page)); +unlock: unlock_page_cgroup(page); } @@ -759,12 +757,7 @@ int mem_cgroup_prepare_migration(struct page *page) void mem_cgroup_end_migration(struct page *page) { - struct page_cgroup *pc; - - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - mem_cgroup_uncharge(pc); - unlock_page_cgroup(page); + mem_cgroup_uncharge_page(page); } /* * We know both *page* and *newpage* are now not-on-LRU and Pg_locked. -- cgit v1.2.3-70-g09d2 From 07f2402b4adbcd0e6822ddc27953b63d4504faec Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Tue, 4 Mar 2008 14:29:23 -0800 Subject: cris: correct usage of __user for copy to and from user space in lib/usercopy and uaccess.h Function __copy_user_zeroing in arch/lib/usercopy.c had the wrong parameter set as __user, and in include/asm-cris/uaccess.h, it was not set at all for some of the calling functions. This will cut the number of warnings quite dramatically when using sparse. While we're here, remove useless CVS log and correct confusing typo. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v10/lib/usercopy.c | 2 +- arch/cris/arch-v32/lib/usercopy.c | 2 +- include/asm-cris/uaccess.h | 53 +++++++-------------------------------- 3 files changed, 11 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c index b8e6c0430e5..b0a608da7bd 100644 --- a/arch/cris/arch-v10/lib/usercopy.c +++ b/arch/cris/arch-v10/lib/usercopy.c @@ -193,7 +193,7 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) inaccessible. */ unsigned long -__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn) +__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c index 04d0cf35a27..0b5b70d5f58 100644 --- a/arch/cris/arch-v32/lib/usercopy.c +++ b/arch/cris/arch-v32/lib/usercopy.c @@ -161,7 +161,7 @@ __copy_user (void __user *pdst, const void *psrc, unsigned long pn) inaccessible. */ unsigned long -__copy_user_zeroing (void __user *pdst, const void *psrc, unsigned long pn) +__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn) { /* We want the parameters put in special registers. Make sure the compiler is able to make something useful of this. diff --git a/include/asm-cris/uaccess.h b/include/asm-cris/uaccess.h index 69d48a2dc8e..ea11eaf0e92 100644 --- a/include/asm-cris/uaccess.h +++ b/include/asm-cris/uaccess.h @@ -1,43 +1,6 @@ /* * Authors: Bjorn Wesen (bjornw@axis.com) * Hans-Peter Nilsson (hp@axis.com) - * - * $Log: uaccess.h,v $ - * Revision 1.8 2001/10/29 13:01:48 bjornw - * Removed unused variable tmp2 in strnlen_user - * - * Revision 1.7 2001/10/02 12:44:52 hp - * Add support for 64-bit put_user/get_user - * - * Revision 1.6 2001/10/01 14:51:17 bjornw - * Added register prefixes and removed underscores - * - * Revision 1.5 2000/10/25 03:33:21 hp - * - Provide implementation for everything else but get_user and put_user; - * copying inline to/from user for constant length 0..16, 20, 24, and - * clearing for 0..4, 8, 12, 16, 20, 24, strncpy_from_user and strnlen_user - * always inline. - * - Constraints for destination addr in get_user cannot be memory, only reg. - * - Correct labels for PC at expected fault points. - * - Nits with assembly code. - * - Don't use statement expressions without value; use "do {} while (0)". - * - Return correct values from __generic_... functions. - * - * Revision 1.4 2000/09/12 16:28:25 bjornw - * * Removed comments from the get/put user asm code - * * Constrains for destination addr in put_user cannot be memory, only reg - * - * Revision 1.3 2000/09/12 14:30:20 bjornw - * MAX_ADDR_USER does not exist anymore - * - * Revision 1.2 2000/07/13 15:52:48 bjornw - * New user-access functions - * - * Revision 1.1.1.1 2000/07/10 16:32:31 bjornw - * CRIS architecture, working draft - * - * - * */ /* Asm:s have been tweaked (within the domain of correctness) to give @@ -209,9 +172,9 @@ extern long __get_user_bad(void); /* More complex functions. Most are inline, but some call functions that live in lib/usercopy.c */ -extern unsigned long __copy_user(void *to, const void *from, unsigned long n); -extern unsigned long __copy_user_zeroing(void *to, const void *from, unsigned long n); -extern unsigned long __do_clear_user(void *to, unsigned long n); +extern unsigned long __copy_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __copy_user_zeroing(void *to, const void __user *from, unsigned long n); +extern unsigned long __do_clear_user(void __user *to, unsigned long n); static inline unsigned long __generic_copy_to_user(void __user *to, const void *from, unsigned long n) @@ -253,7 +216,7 @@ strncpy_from_user(char *dst, const char __user *src, long count) } -/* Note that if these expand awfully if made into switch constructs, so +/* Note that these expand awfully if made into switch constructs, so don't do that. */ static inline unsigned long @@ -407,19 +370,21 @@ __constant_clear_user(void __user *to, unsigned long n) */ static inline unsigned long -__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n) +__generic_copy_from_user_nocheck(void *to, const void __user *from, + unsigned long n) { return __copy_user_zeroing(to,from,n); } static inline unsigned long -__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n) +__generic_copy_to_user_nocheck(void __user *to, const void *from, + unsigned long n) { return __copy_user(to,from,n); } static inline unsigned long -__generic_clear_user_nocheck(void *to, unsigned long n) +__generic_clear_user_nocheck(void __user *to, unsigned long n) { return __do_clear_user(to,n); } -- cgit v1.2.3-70-g09d2 From 87ffbe679e21cbf82ff8e3302520ff0ea2beed9a Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Tue, 4 Mar 2008 14:29:23 -0800 Subject: cris: correct syscall numbers in unistd.h for timerfd_settime and timerfd_gettime Last commit for unistd was not correct, it only had a partial update of syscall numbers for __NR_timerfd_settime and __NR_timerfd_gettime. Also, NR_syscalls was not incremented for the new syscalls. Signed-off-by: Jesper Nilsson Cc: Mikael Starvik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/unistd.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-cris/unistd.h b/include/asm-cris/unistd.h index 007cb16a6b5..76398ef87e9 100644 --- a/include/asm-cris/unistd.h +++ b/include/asm-cris/unistd.h @@ -329,12 +329,12 @@ #define __NR_timerfd_create 322 #define __NR_eventfd 323 #define __NR_fallocate 324 -#define __NR_timerfd_settime 315 -#define __NR_timerfd_gettime 316 +#define __NR_timerfd_settime 325 +#define __NR_timerfd_gettime 326 #ifdef __KERNEL__ -#define NR_syscalls 325 +#define NR_syscalls 327 #include -- cgit v1.2.3-70-g09d2 From 3715863aa142c4f4c5208f5f3e5e9bac06006d2f Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 4 Mar 2008 14:29:27 -0800 Subject: iommu: export iommu_is_span_boundary helper function iommu_is_span_boundary is used internally in the IOMMU helper (lib/iommu-helper.c), a primitive function that judges whether a memory area spans LLD's segment boundary or not. It's difficult to convert some IOMMUs to use the IOMMU helper but iommu_is_span_boundary is still useful for them. So this patch exports it. This is needed for the parisc iommu fixes. Signed-off-by: FUJITA Tomonori Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/iommu-helper.h | 3 +++ lib/iommu-helper.c | 10 ++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index 4dd4c04ff2f..c975caf7538 100644 --- a/include/linux/iommu-helper.h +++ b/include/linux/iommu-helper.h @@ -1,3 +1,6 @@ +extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size); extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, unsigned long shift, diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c index 495575a59ca..a3b8d4c3f77 100644 --- a/lib/iommu-helper.c +++ b/lib/iommu-helper.c @@ -40,10 +40,12 @@ static inline void set_bit_area(unsigned long *map, unsigned long i, } } -static inline int is_span_boundary(unsigned int index, unsigned int nr, - unsigned long shift, - unsigned long boundary_size) +int iommu_is_span_boundary(unsigned int index, unsigned int nr, + unsigned long shift, + unsigned long boundary_size) { + BUG_ON(!is_power_of_2(boundary_size)); + shift = (shift + index) & (boundary_size - 1); return shift + nr > boundary_size; } @@ -57,7 +59,7 @@ unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, again: index = find_next_zero_area(map, size, start, nr, align_mask); if (index != -1) { - if (is_span_boundary(index, nr, shift, boundary_size)) { + if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { /* we could do more effectively */ start = index + 1; goto again; -- cgit v1.2.3-70-g09d2 From 8311c29d40235062a843f4a8e8a70a44af6fe4c9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 4 Mar 2008 14:29:30 -0800 Subject: md: reduce CPU wastage on idle md array with a write-intent bitmap On an md array with a write-intent bitmap, a thread wakes up every few seconds and scans the bitmap looking for work to do. If the array is idle, there will be no work to do, but a lot of scanning is done to discover this. So cache the fact that the bitmap is completely clean, and avoid scanning the whole bitmap when the cache is known to be clean. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bitmap.c | 19 +++++++++++++++++-- include/linux/raid/bitmap.h | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7aeceedcf7d..831aed9c56f 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1047,6 +1047,11 @@ void bitmap_daemon_work(struct bitmap *bitmap) if (time_before(jiffies, bitmap->daemon_lastrun + bitmap->daemon_sleep*HZ)) return; bitmap->daemon_lastrun = jiffies; + if (bitmap->allclean) { + bitmap->mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; + return; + } + bitmap->allclean = 1; for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; @@ -1068,8 +1073,10 @@ void bitmap_daemon_work(struct bitmap *bitmap) clear_page_attr(bitmap, page, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - if (need_write) + if (need_write) { write_page(bitmap, page, 0); + bitmap->allclean = 0; + } continue; } @@ -1098,6 +1105,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) /* if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); */ + if (*bmc) + bitmap->allclean = 0; + if (*bmc == 2) { *bmc=1; /* maybe clear the bit next time */ set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); @@ -1132,6 +1142,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) } } + if (bitmap->allclean == 0) + bitmap->mddev->thread->timeout = bitmap->daemon_sleep * HZ; } static bitmap_counter_t *bitmap_get_counter(struct bitmap *bitmap, @@ -1226,6 +1238,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect sectors -= blocks; else sectors = 0; } + bitmap->allclean = 0; return 0; } @@ -1296,6 +1309,7 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks, } } spin_unlock_irq(&bitmap->lock); + bitmap->allclean = 0; return rv; } @@ -1332,6 +1346,7 @@ void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int ab } unlock: spin_unlock_irqrestore(&bitmap->lock, flags); + bitmap->allclean = 0; } void bitmap_close_sync(struct bitmap *bitmap) @@ -1399,7 +1414,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int n set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } spin_unlock_irq(&bitmap->lock); - + bitmap->allclean = 0; } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index e51b531cd0b..47fbcba1185 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -235,6 +235,8 @@ struct bitmap { unsigned long flags; + int allclean; + unsigned long max_write_behind; /* write-behind mode */ atomic_t behind_writes; -- cgit v1.2.3-70-g09d2 From d0fae18f1b53a1d39135a968792be034bdf7ff26 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 4 Mar 2008 14:29:31 -0800 Subject: md: clean up irregularity with raid autodetect When a raid1 array is stopped, all components currently get added to the list for auto-detection. However we should really only add components that were found by autodetection in the first place. So add a flag to record that information, and use it. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 +++- include/linux/raid/md_k.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/md/md.c b/drivers/md/md.c index b375de5c1af..a71241c5ae7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1503,7 +1503,8 @@ static void export_rdev(mdk_rdev_t * rdev) free_disk_sb(rdev); list_del_init(&rdev->same_set); #ifndef MODULE - md_autodetect_dev(rdev->bdev->bd_dev); + if (test_bit(AutoDetected, &rdev->flags)) + md_autodetect_dev(rdev->bdev->bd_dev); #endif unlock_rdev(rdev); kobject_put(&rdev->kobj); @@ -6025,6 +6026,7 @@ static void autostart_arrays(int part) MD_BUG(); continue; } + set_bit(AutoDetected, &rdev->flags); list_add(&rdev->same_set, &pending_raid_disks); i_passed++; } diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 85a068bab62..7bb6d1abf71 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -83,6 +83,7 @@ struct mdk_rdev_s #define BarriersNotsupp 5 /* BIO_RW_BARRIER is not supported */ #define AllReserved 6 /* If whole device is reserved for * one array */ +#define AutoDetected 7 /* added by auto-detect */ int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ -- cgit v1.2.3-70-g09d2 From 45ab33b6c190c4a8c58f1d13be2ff89ee62024ba Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 4 Mar 2008 13:26:55 -0600 Subject: [SCSI] iscsi class: regression - fix races with state manipulation and blocking/unblocking For qla4xxx, we could be starting a session, but some error (network, target, IO from a device that got started, etc) could cause the session to fail and curring the block/unblock and state manipulation could race with each other. This patch just has those operations done in the single threaded iscsi eh work queue, so that way they are serialized. Signed-off-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_iscsi.c | 76 +++++++++++++++++++++++-------------- include/scsi/scsi_transport_iscsi.h | 2 + 2 files changed, 50 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index dfb026b95a6..ca7bb6f63bd 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -373,24 +373,25 @@ static void session_recovery_timedout(struct work_struct *work) scsi_target_unblock(&session->dev); } -static void __iscsi_unblock_session(struct iscsi_cls_session *session) -{ - if (!cancel_delayed_work(&session->recovery_work)) - flush_workqueue(iscsi_eh_timer_workq); - scsi_target_unblock(&session->dev); -} - -void iscsi_unblock_session(struct iscsi_cls_session *session) +static void __iscsi_unblock_session(struct work_struct *work) { + struct iscsi_cls_session *session = + container_of(work, struct iscsi_cls_session, + unblock_work); struct Scsi_Host *shost = iscsi_session_to_shost(session); struct iscsi_host *ihost = shost->shost_data; unsigned long flags; + /* + * The recovery and unblock work get run from the same workqueue, + * so try to cancel it if it was going to run after this unblock. + */ + cancel_delayed_work(&session->recovery_work); spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_LOGGED_IN; spin_unlock_irqrestore(&session->lock, flags); - - __iscsi_unblock_session(session); + /* start IO */ + scsi_target_unblock(&session->dev); /* * Only do kernel scanning if the driver is properly hooked into * the async scanning code (drivers like iscsi_tcp do login and @@ -401,20 +402,43 @@ void iscsi_unblock_session(struct iscsi_cls_session *session) atomic_inc(&ihost->nr_scans); } } + +/** + * iscsi_unblock_session - set a session as logged in and start IO. + * @session: iscsi session + * + * Mark a session as ready to accept IO. + */ +void iscsi_unblock_session(struct iscsi_cls_session *session) +{ + queue_work(iscsi_eh_timer_workq, &session->unblock_work); + /* + * make sure all the events have completed before tell the driver + * it is safe + */ + flush_workqueue(iscsi_eh_timer_workq); +} EXPORT_SYMBOL_GPL(iscsi_unblock_session); -void iscsi_block_session(struct iscsi_cls_session *session) +static void __iscsi_block_session(struct work_struct *work) { + struct iscsi_cls_session *session = + container_of(work, struct iscsi_cls_session, + block_work); unsigned long flags; spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_FAILED; spin_unlock_irqrestore(&session->lock, flags); - scsi_target_block(&session->dev); queue_delayed_work(iscsi_eh_timer_workq, &session->recovery_work, session->recovery_tmo * HZ); } + +void iscsi_block_session(struct iscsi_cls_session *session) +{ + queue_work(iscsi_eh_timer_workq, &session->block_work); +} EXPORT_SYMBOL_GPL(iscsi_block_session); static void __iscsi_unbind_session(struct work_struct *work) @@ -463,6 +487,8 @@ iscsi_alloc_session(struct Scsi_Host *shost, INIT_DELAYED_WORK(&session->recovery_work, session_recovery_timedout); INIT_LIST_HEAD(&session->host_list); INIT_LIST_HEAD(&session->sess_list); + INIT_WORK(&session->unblock_work, __iscsi_unblock_session); + INIT_WORK(&session->block_work, __iscsi_block_session); INIT_WORK(&session->unbind_work, __iscsi_unbind_session); INIT_WORK(&session->scan_work, iscsi_scan_session); spin_lock_init(&session->lock); @@ -575,24 +601,25 @@ void iscsi_remove_session(struct iscsi_cls_session *session) list_del(&session->sess_list); spin_unlock_irqrestore(&sesslock, flags); + /* make sure there are no blocks/unblocks queued */ + flush_workqueue(iscsi_eh_timer_workq); + /* make sure the timedout callout is not running */ + if (!cancel_delayed_work(&session->recovery_work)) + flush_workqueue(iscsi_eh_timer_workq); /* * If we are blocked let commands flow again. The lld or iscsi * layer should set up the queuecommand to fail commands. + * We assume that LLD will not be calling block/unblock while + * removing the session. */ spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_FREE; spin_unlock_irqrestore(&session->lock, flags); - __iscsi_unblock_session(session); - __iscsi_unbind_session(&session->unbind_work); - /* flush running scans */ + scsi_target_unblock(&session->dev); + /* flush running scans then delete devices */ flush_workqueue(ihost->scan_workq); - /* - * If the session dropped while removing devices then we need to make - * sure it is not blocked - */ - if (!cancel_delayed_work(&session->recovery_work)) - flush_workqueue(iscsi_eh_timer_workq); + __iscsi_unbind_session(&session->unbind_work); /* hw iscsi may not have removed all connections from session */ err = device_for_each_child(&session->dev, NULL, @@ -802,23 +829,16 @@ EXPORT_SYMBOL_GPL(iscsi_recv_pdu); void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error) { - struct iscsi_cls_session *session = iscsi_conn_to_session(conn); struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = NLMSG_SPACE(sizeof(*ev)); - unsigned long flags; priv = iscsi_if_transport_lookup(conn->transport); if (!priv) return; - spin_lock_irqsave(&session->lock, flags); - if (session->state == ISCSI_SESSION_LOGGED_IN) - session->state = ISCSI_SESSION_FAILED; - spin_unlock_irqrestore(&session->lock, flags); - skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { iscsi_cls_conn_printk(KERN_ERR, conn, "gracefully ignored " diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index dbc96ef4cc7..aab1eae2ec4 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -177,6 +177,8 @@ struct iscsi_cls_session { struct list_head host_list; struct iscsi_transport *transport; spinlock_t lock; + struct work_struct block_work; + struct work_struct unblock_work; struct work_struct scan_work; struct work_struct unbind_work; -- cgit v1.2.3-70-g09d2 From e0007529893c1c064be90bd21422ca0da4a0198e Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Wed, 5 Mar 2008 10:31:54 -0500 Subject: LSM/SELinux: Interfaces to allow FS to control mount options Introduce new LSM interfaces to allow an FS to deal with their own mount options. This includes a new string parsing function exported from the LSM that an FS can use to get a security data blob and a new security data blob. This is particularly useful for an FS which uses binary mount data, like NFS, which does not pass strings into the vfs to be handled by the loaded LSM. Also fix a BUG() in both SELinux and SMACK when dealing with binary mount data. If the binary mount data is less than one page the copy_page() in security_sb_copy_data() can cause an illegal page fault and boom. Remove all NFSisms from the SELinux code since they were broken by past NFS changes. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Acked-by: Casey Schaufler Signed-off-by: James Morris --- fs/super.c | 4 +- include/linux/security.h | 99 ++++++++++++++------ security/dummy.c | 23 ++--- security/security.c | 23 +++-- security/selinux/hooks.c | 175 +++++++++++++++++++----------------- security/selinux/include/security.h | 5 ++ security/smack/smack_lsm.c | 9 +- 7 files changed, 204 insertions(+), 134 deletions(-) (limited to 'include') diff --git a/fs/super.c b/fs/super.c index 88811f60c8d..010446d8c40 100644 --- a/fs/super.c +++ b/fs/super.c @@ -870,12 +870,12 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void if (!mnt) goto out; - if (data) { + if (data && !(type->fs_flags & FS_BINARY_MOUNTDATA)) { secdata = alloc_secdata(); if (!secdata) goto out_mnt; - error = security_sb_copy_data(type, data, secdata); + error = security_sb_copy_data(data, secdata); if (error) goto out_free_secdata; } diff --git a/include/linux/security.h b/include/linux/security.h index fe52cdeab0a..b07357ca213 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -34,12 +34,6 @@ #include #include -/* only a char in selinux superblock security struct flags */ -#define FSCONTEXT_MNT 0x01 -#define CONTEXT_MNT 0x02 -#define ROOTCONTEXT_MNT 0x04 -#define DEFCONTEXT_MNT 0x08 - extern unsigned securebits; struct ctl_table; @@ -114,6 +108,32 @@ struct request_sock; #ifdef CONFIG_SECURITY +struct security_mnt_opts { + char **mnt_opts; + int *mnt_opts_flags; + int num_mnt_opts; +}; + +static inline void security_init_mnt_opts(struct security_mnt_opts *opts) +{ + opts->mnt_opts = NULL; + opts->mnt_opts_flags = NULL; + opts->num_mnt_opts = 0; +} + +static inline void security_free_mnt_opts(struct security_mnt_opts *opts) +{ + int i; + if (opts->mnt_opts) + for(i = 0; i < opts->num_mnt_opts; i++) + kfree(opts->mnt_opts[i]); + kfree(opts->mnt_opts); + opts->mnt_opts = NULL; + kfree(opts->mnt_opts_flags); + opts->mnt_opts_flags = NULL; + opts->num_mnt_opts = 0; +} + /** * struct security_operations - main security structure * @@ -262,19 +282,19 @@ struct request_sock; * @sb_get_mnt_opts: * Get the security relevant mount options used for a superblock * @sb the superblock to get security mount options from - * @mount_options array for pointers to mount options - * @mount_flags array of ints specifying what each mount options is - * @num_opts number of options in the arrays + * @opts binary data structure containing all lsm mount data * @sb_set_mnt_opts: * Set the security relevant mount options used for a superblock * @sb the superblock to set security mount options for - * @mount_options array for pointers to mount options - * @mount_flags array of ints specifying what each mount options is - * @num_opts number of options in the arrays + * @opts binary data structure containing all lsm mount data * @sb_clone_mnt_opts: * Copy all security options from a given superblock to another * @oldsb old superblock which contain information to clone * @newsb new superblock which needs filled in + * @sb_parse_opts_str: + * Parse a string of security data filling in the opts structure + * @options string containing all mount options known by the LSM + * @opts binary data structure usable by the LSM * * Security hooks for inode operations. * @@ -1238,8 +1258,7 @@ struct security_operations { int (*sb_alloc_security) (struct super_block * sb); void (*sb_free_security) (struct super_block * sb); - int (*sb_copy_data)(struct file_system_type *type, - void *orig, void *copy); + int (*sb_copy_data)(char *orig, char *copy); int (*sb_kern_mount) (struct super_block *sb, void *data); int (*sb_statfs) (struct dentry *dentry); int (*sb_mount) (char *dev_name, struct nameidata * nd, @@ -1257,12 +1276,12 @@ struct security_operations { void (*sb_post_pivotroot) (struct nameidata * old_nd, struct nameidata * new_nd); int (*sb_get_mnt_opts) (const struct super_block *sb, - char ***mount_options, int **flags, - int *num_opts); - int (*sb_set_mnt_opts) (struct super_block *sb, char **mount_options, - int *flags, int num_opts); + struct security_mnt_opts *opts); + int (*sb_set_mnt_opts) (struct super_block *sb, + struct security_mnt_opts *opts); void (*sb_clone_mnt_opts) (const struct super_block *oldsb, struct super_block *newsb); + int (*sb_parse_opts_str) (char *options, struct security_mnt_opts *opts); int (*inode_alloc_security) (struct inode *inode); void (*inode_free_security) (struct inode *inode); @@ -1507,7 +1526,7 @@ int security_bprm_check(struct linux_binprm *bprm); int security_bprm_secureexec(struct linux_binprm *bprm); int security_sb_alloc(struct super_block *sb); void security_sb_free(struct super_block *sb); -int security_sb_copy_data(struct file_system_type *type, void *orig, void *copy); +int security_sb_copy_data(char *orig, char *copy); int security_sb_kern_mount(struct super_block *sb, void *data); int security_sb_statfs(struct dentry *dentry); int security_sb_mount(char *dev_name, struct nameidata *nd, @@ -1520,12 +1539,12 @@ void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *d void security_sb_post_addmount(struct vfsmount *mnt, struct nameidata *mountpoint_nd); int security_sb_pivotroot(struct nameidata *old_nd, struct nameidata *new_nd); void security_sb_post_pivotroot(struct nameidata *old_nd, struct nameidata *new_nd); -int security_sb_get_mnt_opts(const struct super_block *sb, char ***mount_options, - int **flags, int *num_opts); -int security_sb_set_mnt_opts(struct super_block *sb, char **mount_options, - int *flags, int num_opts); +int security_sb_get_mnt_opts(const struct super_block *sb, + struct security_mnt_opts *opts); +int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts); void security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb); +int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts); int security_inode_alloc(struct inode *inode); void security_inode_free(struct inode *inode); @@ -1635,6 +1654,16 @@ int security_secctx_to_secid(char *secdata, u32 seclen, u32 *secid); void security_release_secctx(char *secdata, u32 seclen); #else /* CONFIG_SECURITY */ +struct security_mnt_opts { +}; + +static inline void security_init_mnt_opts(struct security_mnt_opts *opts) +{ +} + +static inline void security_free_mnt_opts(struct security_mnt_opts *opts) +{ +} /* * This is the default capabilities functionality. Most of these functions @@ -1762,8 +1791,7 @@ static inline int security_sb_alloc (struct super_block *sb) static inline void security_sb_free (struct super_block *sb) { } -static inline int security_sb_copy_data (struct file_system_type *type, - void *orig, void *copy) +static inline int security_sb_copy_data (char *orig, char *copy) { return 0; } @@ -1819,6 +1847,27 @@ static inline int security_sb_pivotroot (struct nameidata *old_nd, static inline void security_sb_post_pivotroot (struct nameidata *old_nd, struct nameidata *new_nd) { } +static inline int security_sb_get_mnt_opts(const struct super_block *sb, + struct security_mnt_opts *opts) +{ + security_init_mnt_opts(opts); + return 0; +} + +static inline int security_sb_set_mnt_opts(struct super_block *sb, + struct security_mnt_opts *opts) +{ + return 0; +} + +static inline void security_sb_clone_mnt_opts(const struct super_block *oldsb, + struct super_block *newsb) +{ } + +static inline int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) +{ + return 0; +} static inline int security_inode_alloc (struct inode *inode) { diff --git a/security/dummy.c b/security/dummy.c index 649326bf64e..78d8f92310a 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -181,8 +181,7 @@ static void dummy_sb_free_security (struct super_block *sb) return; } -static int dummy_sb_copy_data (struct file_system_type *type, - void *orig, void *copy) +static int dummy_sb_copy_data (char *orig, char *copy) { return 0; } @@ -245,19 +244,17 @@ static void dummy_sb_post_pivotroot (struct nameidata *old_nd, struct nameidata return; } -static int dummy_sb_get_mnt_opts(const struct super_block *sb, char ***mount_options, - int **flags, int *num_opts) +static int dummy_sb_get_mnt_opts(const struct super_block *sb, + struct security_mnt_opts *opts) { - *mount_options = NULL; - *flags = NULL; - *num_opts = 0; + security_init_mnt_opts(opts); return 0; } -static int dummy_sb_set_mnt_opts(struct super_block *sb, char **mount_options, - int *flags, int num_opts) +static int dummy_sb_set_mnt_opts(struct super_block *sb, + struct security_mnt_opts *opts) { - if (unlikely(num_opts)) + if (unlikely(opts->num_mnt_opts)) return -EOPNOTSUPP; return 0; } @@ -268,6 +265,11 @@ static void dummy_sb_clone_mnt_opts(const struct super_block *oldsb, return; } +static int dummy_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) +{ + return 0; +} + static int dummy_inode_alloc_security (struct inode *inode) { return 0; @@ -1028,6 +1030,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sb_get_mnt_opts); set_to_dummy_if_null(ops, sb_set_mnt_opts); set_to_dummy_if_null(ops, sb_clone_mnt_opts); + set_to_dummy_if_null(ops, sb_parse_opts_str); set_to_dummy_if_null(ops, inode_alloc_security); set_to_dummy_if_null(ops, inode_free_security); set_to_dummy_if_null(ops, inode_init_security); diff --git a/security/security.c b/security/security.c index d15e56cbaad..b1387a6b416 100644 --- a/security/security.c +++ b/security/security.c @@ -244,10 +244,11 @@ void security_sb_free(struct super_block *sb) security_ops->sb_free_security(sb); } -int security_sb_copy_data(struct file_system_type *type, void *orig, void *copy) +int security_sb_copy_data(char *orig, char *copy) { - return security_ops->sb_copy_data(type, orig, copy); + return security_ops->sb_copy_data(orig, copy); } +EXPORT_SYMBOL(security_sb_copy_data); int security_sb_kern_mount(struct super_block *sb, void *data) { @@ -306,24 +307,30 @@ void security_sb_post_pivotroot(struct nameidata *old_nd, struct nameidata *new_ } int security_sb_get_mnt_opts(const struct super_block *sb, - char ***mount_options, - int **flags, int *num_opts) + struct security_mnt_opts *opts) { - return security_ops->sb_get_mnt_opts(sb, mount_options, flags, num_opts); + return security_ops->sb_get_mnt_opts(sb, opts); } int security_sb_set_mnt_opts(struct super_block *sb, - char **mount_options, - int *flags, int num_opts) + struct security_mnt_opts *opts) { - return security_ops->sb_set_mnt_opts(sb, mount_options, flags, num_opts); + return security_ops->sb_set_mnt_opts(sb, opts); } +EXPORT_SYMBOL(security_sb_set_mnt_opts); void security_sb_clone_mnt_opts(const struct super_block *oldsb, struct super_block *newsb) { security_ops->sb_clone_mnt_opts(oldsb, newsb); } +EXPORT_SYMBOL(security_sb_clone_mnt_opts); + +int security_sb_parse_opts_str(char *options, struct security_mnt_opts *opts) +{ + return security_ops->sb_parse_opts_str(options, opts); +} +EXPORT_SYMBOL(security_sb_parse_opts_str); int security_inode_alloc(struct inode *inode) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 75c2e99bfb8..4bf4807f2d4 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -443,8 +443,7 @@ out: * mount options, or whatever. */ static int selinux_get_mnt_opts(const struct super_block *sb, - char ***mount_options, int **mnt_opts_flags, - int *num_opts) + struct security_mnt_opts *opts) { int rc = 0, i; struct superblock_security_struct *sbsec = sb->s_security; @@ -452,9 +451,7 @@ static int selinux_get_mnt_opts(const struct super_block *sb, u32 len; char tmp; - *num_opts = 0; - *mount_options = NULL; - *mnt_opts_flags = NULL; + security_init_mnt_opts(opts); if (!sbsec->initialized) return -EINVAL; @@ -470,18 +467,18 @@ static int selinux_get_mnt_opts(const struct super_block *sb, /* count the number of mount options for this sb */ for (i = 0; i < 8; i++) { if (tmp & 0x01) - (*num_opts)++; + opts->num_mnt_opts++; tmp >>= 1; } - *mount_options = kcalloc(*num_opts, sizeof(char *), GFP_ATOMIC); - if (!*mount_options) { + opts->mnt_opts = kcalloc(opts->num_mnt_opts, sizeof(char *), GFP_ATOMIC); + if (!opts->mnt_opts) { rc = -ENOMEM; goto out_free; } - *mnt_opts_flags = kcalloc(*num_opts, sizeof(int), GFP_ATOMIC); - if (!*mnt_opts_flags) { + opts->mnt_opts_flags = kcalloc(opts->num_mnt_opts, sizeof(int), GFP_ATOMIC); + if (!opts->mnt_opts_flags) { rc = -ENOMEM; goto out_free; } @@ -491,22 +488,22 @@ static int selinux_get_mnt_opts(const struct super_block *sb, rc = security_sid_to_context(sbsec->sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = FSCONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = FSCONTEXT_MNT; } if (sbsec->flags & CONTEXT_MNT) { rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = CONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = CONTEXT_MNT; } if (sbsec->flags & DEFCONTEXT_MNT) { rc = security_sid_to_context(sbsec->def_sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = DEFCONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = DEFCONTEXT_MNT; } if (sbsec->flags & ROOTCONTEXT_MNT) { struct inode *root = sbsec->sb->s_root->d_inode; @@ -515,24 +512,16 @@ static int selinux_get_mnt_opts(const struct super_block *sb, rc = security_sid_to_context(isec->sid, &context, &len); if (rc) goto out_free; - (*mount_options)[i] = context; - (*mnt_opts_flags)[i++] = ROOTCONTEXT_MNT; + opts->mnt_opts[i] = context; + opts->mnt_opts_flags[i++] = ROOTCONTEXT_MNT; } - BUG_ON(i != *num_opts); + BUG_ON(i != opts->num_mnt_opts); return 0; out_free: - /* don't leak context string if security_sid_to_context had an error */ - if (*mount_options && i) - for (; i > 0; i--) - kfree((*mount_options)[i-1]); - kfree(*mount_options); - *mount_options = NULL; - kfree(*mnt_opts_flags); - *mnt_opts_flags = NULL; - *num_opts = 0; + security_free_mnt_opts(opts); return rc; } @@ -553,12 +542,13 @@ static int bad_option(struct superblock_security_struct *sbsec, char flag, return 1; return 0; } + /* * Allow filesystems with binary mount data to explicitly set mount point * labeling information. */ -static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options, - int *flags, int num_opts) +static int selinux_set_mnt_opts(struct super_block *sb, + struct security_mnt_opts *opts) { int rc = 0, i; struct task_security_struct *tsec = current->security; @@ -568,6 +558,9 @@ static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options, struct inode_security_struct *root_isec = inode->i_security; u32 fscontext_sid = 0, context_sid = 0, rootcontext_sid = 0; u32 defcontext_sid = 0; + char **mount_options = opts->mnt_opts; + int *flags = opts->mnt_opts_flags; + int num_opts = opts->num_mnt_opts; mutex_lock(&sbsec->lock); @@ -588,6 +581,21 @@ static int selinux_set_mnt_opts(struct super_block *sb, char **mount_options, goto out; } + /* + * Binary mount data FS will come through this function twice. Once + * from an explicit call and once from the generic calls from the vfs. + * Since the generic VFS calls will not contain any security mount data + * we need to skip the double mount verification. + * + * This does open a hole in which we will not notice if the first + * mount using this sb set explict options and a second mount using + * this sb does not set any security options. (The first options + * will be used for both mounts) + */ + if (sbsec->initialized && (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) + && (num_opts == 0)) + goto out; + /* * parse the mount options, check if they are valid sids. * also check if someone is trying to mount the same sb more @@ -792,43 +800,14 @@ static void selinux_sb_clone_mnt_opts(const struct super_block *oldsb, mutex_unlock(&newsbsec->lock); } -/* - * string mount options parsing and call set the sbsec - */ -static int superblock_doinit(struct super_block *sb, void *data) +int selinux_parse_opts_str(char *options, struct security_mnt_opts *opts) { + char *p; char *context = NULL, *defcontext = NULL; char *fscontext = NULL, *rootcontext = NULL; - int rc = 0; - char *p, *options = data; - /* selinux only know about a fixed number of mount options */ - char *mnt_opts[NUM_SEL_MNT_OPTS]; - int mnt_opts_flags[NUM_SEL_MNT_OPTS], num_mnt_opts = 0; - - if (!data) - goto out; + int rc, num_mnt_opts = 0; - /* with the nfs patch this will become a goto out; */ - if (sb->s_type->fs_flags & FS_BINARY_MOUNTDATA) { - const char *name = sb->s_type->name; - /* NFS we understand. */ - if (!strcmp(name, "nfs")) { - struct nfs_mount_data *d = data; - - if (d->version != NFS_MOUNT_VERSION) - goto out; - - if (d->context[0]) { - context = kstrdup(d->context, GFP_KERNEL); - if (!context) { - rc = -ENOMEM; - goto out; - } - } - goto build_flags; - } else - goto out; - } + opts->num_mnt_opts = 0; /* Standard string-based options. */ while ((p = strsep(&options, "|")) != NULL) { @@ -901,26 +880,37 @@ static int superblock_doinit(struct super_block *sb, void *data) } } -build_flags: + rc = -ENOMEM; + opts->mnt_opts = kcalloc(NUM_SEL_MNT_OPTS, sizeof(char *), GFP_ATOMIC); + if (!opts->mnt_opts) + goto out_err; + + opts->mnt_opts_flags = kcalloc(NUM_SEL_MNT_OPTS, sizeof(int), GFP_ATOMIC); + if (!opts->mnt_opts_flags) { + kfree(opts->mnt_opts); + goto out_err; + } + if (fscontext) { - mnt_opts[num_mnt_opts] = fscontext; - mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = fscontext; + opts->mnt_opts_flags[num_mnt_opts++] = FSCONTEXT_MNT; } if (context) { - mnt_opts[num_mnt_opts] = context; - mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = context; + opts->mnt_opts_flags[num_mnt_opts++] = CONTEXT_MNT; } if (rootcontext) { - mnt_opts[num_mnt_opts] = rootcontext; - mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = rootcontext; + opts->mnt_opts_flags[num_mnt_opts++] = ROOTCONTEXT_MNT; } if (defcontext) { - mnt_opts[num_mnt_opts] = defcontext; - mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT; + opts->mnt_opts[num_mnt_opts] = defcontext; + opts->mnt_opts_flags[num_mnt_opts++] = DEFCONTEXT_MNT; } -out: - rc = selinux_set_mnt_opts(sb, mnt_opts, mnt_opts_flags, num_mnt_opts); + opts->num_mnt_opts = num_mnt_opts; + return 0; + out_err: kfree(context); kfree(defcontext); @@ -928,6 +918,33 @@ out_err: kfree(rootcontext); return rc; } +/* + * string mount options parsing and call set the sbsec + */ +static int superblock_doinit(struct super_block *sb, void *data) +{ + int rc = 0; + char *options = data; + struct security_mnt_opts opts; + + security_init_mnt_opts(&opts); + + if (!data) + goto out; + + BUG_ON(sb->s_type->fs_flags & FS_BINARY_MOUNTDATA); + + rc = selinux_parse_opts_str(options, &opts); + if (rc) + goto out_err; + +out: + rc = selinux_set_mnt_opts(sb, &opts); + +out_err: + security_free_mnt_opts(&opts); + return rc; +} static inline u16 inode_mode_to_security_class(umode_t mode) { @@ -2253,7 +2270,7 @@ static inline void take_selinux_option(char **to, char *from, int *first, } } -static int selinux_sb_copy_data(struct file_system_type *type, void *orig, void *copy) +static int selinux_sb_copy_data(char *orig, char *copy) { int fnosec, fsec, rc = 0; char *in_save, *in_curr, *in_end; @@ -2263,12 +2280,6 @@ static int selinux_sb_copy_data(struct file_system_type *type, void *orig, void in_curr = orig; sec_curr = copy; - /* Binary mount data: just copy */ - if (type->fs_flags & FS_BINARY_MOUNTDATA) { - copy_page(sec_curr, in_curr); - goto out; - } - nosec = (char *)get_zeroed_page(GFP_KERNEL); if (!nosec) { rc = -ENOMEM; @@ -5251,6 +5262,8 @@ static struct security_operations selinux_ops = { .sb_get_mnt_opts = selinux_get_mnt_opts, .sb_set_mnt_opts = selinux_set_mnt_opts, .sb_clone_mnt_opts = selinux_sb_clone_mnt_opts, + .sb_parse_opts_str = selinux_parse_opts_str, + .inode_alloc_security = selinux_inode_alloc_security, .inode_free_security = selinux_inode_free_security, diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 837ce420d2f..f7d2f03781f 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -35,6 +35,11 @@ #define POLICYDB_VERSION_MAX POLICYDB_VERSION_POLCAP #endif +#define CONTEXT_MNT 0x01 +#define FSCONTEXT_MNT 0x02 +#define ROOTCONTEXT_MNT 0x04 +#define DEFCONTEXT_MNT 0x08 + struct netlbl_lsm_secattr; extern int selinux_enabled; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 770eb067e16..0241fd35967 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -189,17 +189,10 @@ static void smack_sb_free_security(struct super_block *sb) * Copy the Smack specific mount options out of the mount * options list. */ -static int smack_sb_copy_data(struct file_system_type *type, void *orig, - void *smackopts) +static int smack_sb_copy_data(char *orig, char *smackopts) { char *cp, *commap, *otheropts, *dp; - /* Binary mount data: just copy */ - if (type->fs_flags & FS_BINARY_MOUNTDATA) { - copy_page(smackopts, orig); - return 0; - } - otheropts = (char *)get_zeroed_page(GFP_KERNEL); if (otheropts == NULL) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From eac738e6cea16bfbd7b9018d60d009aedd2d14b6 Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 11 Feb 2008 22:43:05 +0100 Subject: [IA64] convert sys_ptrace to arch_ptrace Convert sys_ptrace() to arch_ptrace(). Signed-off-by: Petr Tesarik Signed-off-by: Tony Luck --- arch/ia64/kernel/ptrace.c | 42 +++--------------------------------------- include/asm-ia64/ptrace.h | 2 -- 2 files changed, 3 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index e82fe296c2c..1dfff5a8f36 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1470,46 +1470,13 @@ ptrace_disable (struct task_struct *child) child_psr->tb = 0; } -asmlinkage long -sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) +long +arch_ptrace (struct task_struct *child, long request, long addr, long data) { struct pt_regs *pt; - struct task_struct *child; struct switch_stack *sw; long ret; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - ret = ptrace_traceme(); - goto out; - } - - ret = -ESRCH; - read_lock(&tasklist_lock); - { - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - } - read_unlock(&tasklist_lock); - if (!child) - goto out; - ret = -EPERM; - if (pid == 1) /* no messing around with init! */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - if (!ret) - arch_ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - pt = task_pt_regs(child); sw = (struct switch_stack *) (child->thread.ksp + 16); @@ -1594,7 +1561,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) */ if (child->exit_state == EXIT_ZOMBIE) /* already dead */ - goto out_tsk; + return 0; child->exit_code = SIGKILL; ptrace_disable(child); @@ -1643,9 +1610,6 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) goto out_tsk; } out_tsk: - put_task_struct(child); - out: - unlock_kernel(); return ret; } diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h index 0bdce7dde1b..5b523409878 100644 --- a/include/asm-ia64/ptrace.h +++ b/include/asm-ia64/ptrace.h @@ -233,8 +233,6 @@ struct switch_stack { #include #include -#define __ARCH_SYS_PTRACE 1 - /* * We use the ia64_psr(regs)->ri to determine which of the three * instructions in bundle (16 bytes) took the sample. Generate -- cgit v1.2.3-70-g09d2 From 8db3f5254151c3a06a764bbb18283570ba1897bf Mon Sep 17 00:00:00 2001 From: Petr Tesarik Date: Mon, 11 Feb 2008 22:43:38 +0100 Subject: [IA64] remove duplicate code from arch_ptrace() Remove all code which does exactly the same thing as ptrace_request(). Signed-off-by: Petr Tesarik Signed-off-by: Tony Luck --- arch/ia64/kernel/ptrace.c | 96 ++++++++++++++--------------------------------- include/asm-ia64/ptrace.h | 7 ++++ 2 files changed, 36 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 1dfff5a8f36..f10c8b40dd3 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -1454,6 +1454,35 @@ ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) return ret; } +void +user_enable_single_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + set_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->ss = 1; +} + +void +user_enable_block_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + set_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->tb = 1; +} + +void +user_disable_single_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + /* make sure the single step/taken-branch trap bits are not set: */ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->ss = 0; + child_psr->tb = 0; +} + /* * Called by kernel/ptrace.c when detaching.. * @@ -1528,73 +1557,6 @@ arch_ptrace (struct task_struct *child, long request, long addr, long data) ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data); goto out_tsk; - case PTRACE_SYSCALL: - /* continue and stop at next (return from) syscall */ - case PTRACE_CONT: - /* restart after signal. */ - ret = -EIO; - if (!valid_signal(data)) - goto out_tsk; - if (request == PTRACE_SYSCALL) - set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - else - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - child->exit_code = data; - - /* - * Make sure the single step/taken-branch trap bits - * are not set: - */ - clear_tsk_thread_flag(child, TIF_SINGLESTEP); - ia64_psr(pt)->ss = 0; - ia64_psr(pt)->tb = 0; - - wake_up_process(child); - ret = 0; - goto out_tsk; - - case PTRACE_KILL: - /* - * Make the child exit. Best I can do is send it a - * sigkill. Perhaps it should be put in the status - * that it wants to exit. - */ - if (child->exit_state == EXIT_ZOMBIE) - /* already dead */ - return 0; - child->exit_code = SIGKILL; - - ptrace_disable(child); - wake_up_process(child); - ret = 0; - goto out_tsk; - - case PTRACE_SINGLESTEP: - /* let child execute for one instruction */ - case PTRACE_SINGLEBLOCK: - ret = -EIO; - if (!valid_signal(data)) - goto out_tsk; - - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - set_tsk_thread_flag(child, TIF_SINGLESTEP); - if (request == PTRACE_SINGLESTEP) { - ia64_psr(pt)->ss = 1; - } else { - ia64_psr(pt)->tb = 1; - } - child->exit_code = data; - - /* give it a chance to run. */ - wake_up_process(child); - ret = 0; - goto out_tsk; - - case PTRACE_DETACH: - /* detach a process that was attached. */ - ret = ptrace_detach(child, data); - goto out_tsk; - case PTRACE_GETREGS: ret = ptrace_getregs(child, (struct pt_all_user_regs __user *) data); diff --git a/include/asm-ia64/ptrace.h b/include/asm-ia64/ptrace.h index 5b523409878..4b2a8d40ebc 100644 --- a/include/asm-ia64/ptrace.h +++ b/include/asm-ia64/ptrace.h @@ -312,6 +312,13 @@ struct switch_stack { #define arch_ptrace_attach(child) \ ptrace_attach_sync_user_rbs(child) + #define arch_has_single_step() (1) + extern void user_enable_single_step(struct task_struct *); + extern void user_disable_single_step(struct task_struct *); + + #define arch_has_block_step() (1) + extern void user_enable_block_step(struct task_struct *); + #endif /* !__KERNEL__ */ /* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */ -- cgit v1.2.3-70-g09d2 From 9821b1f4a145b20db08108362f0b4caf4f0832a1 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 5 Mar 2008 19:02:23 -0700 Subject: [Blackfin] arch: current_l1_stack_save is a pointer, so use NULL rather than 0 Signed-off-by: Mike Frysinger Signed-off-by: Bryan Wu --- include/asm-blackfin/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-blackfin/mmu_context.h b/include/asm-blackfin/mmu_context.h index b5eb67596ad..f55ec3c23a9 100644 --- a/include/asm-blackfin/mmu_context.h +++ b/include/asm-blackfin/mmu_context.h @@ -73,7 +73,7 @@ static inline void destroy_context(struct mm_struct *mm) struct sram_list_struct *tmp; if (current_l1_stack_save == mm->context.l1_stack_save) - current_l1_stack_save = 0; + current_l1_stack_save = NULL; if (mm->context.l1_stack_save) free_l1stack(); -- cgit v1.2.3-70-g09d2 From 7b9726a7a0d8c70ea44a5ed23726748de344f223 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 6 Mar 2008 17:23:15 +0900 Subject: sh: Fix up the sh64 build. Signed-off-by: Paul Mundt --- arch/sh/lib64/udelay.c | 21 +++++++-------------- include/asm-sh/delay.h | 5 ----- 2 files changed, 7 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/arch/sh/lib64/udelay.c b/arch/sh/lib64/udelay.c index 23c7d17fb9f..d76bd801194 100644 --- a/arch/sh/lib64/udelay.c +++ b/arch/sh/lib64/udelay.c @@ -21,7 +21,7 @@ * a 1GHz box, that's about 2 seconds. */ -void __delay(int loops) +void __delay(unsigned long loops) { long long dummy; __asm__ __volatile__("gettr tr0, %1\n\t" @@ -33,24 +33,17 @@ void __delay(int loops) :"0"(loops)); } -void __udelay(unsigned long long usecs, unsigned long lpj) +inline void __const_udelay(unsigned long xloops) { - usecs *= (((unsigned long long) HZ << 32) / 1000000) * lpj; - __delay((long long) usecs >> 32); + __delay(xloops * (HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy)); } -void __ndelay(unsigned long long nsecs, unsigned long lpj) +void __udelay(unsigned long usecs) { - nsecs *= (((unsigned long long) HZ << 32) / 1000000000) * lpj; - __delay((long long) nsecs >> 32); + __const_udelay(usecs * 0x000010c6); /* 2**32 / 1000000 */ } -void udelay(unsigned long usecs) +void __ndelay(unsigned long nsecs) { - __udelay(usecs, cpu_data[raw_smp_processor_id()].loops_per_jiffy); -} - -void ndelay(unsigned long nsecs) -{ - __ndelay(nsecs, cpu_data[raw_smp_processor_id()].loops_per_jiffy); + __const_udelay(nsecs * 0x00000005); } diff --git a/include/asm-sh/delay.h b/include/asm-sh/delay.h index d5d46404100..4b16bf9b56b 100644 --- a/include/asm-sh/delay.h +++ b/include/asm-sh/delay.h @@ -15,7 +15,6 @@ extern void __ndelay(unsigned long nsecs); extern void __const_udelay(unsigned long xloops); extern void __delay(unsigned long loops); -#ifdef CONFIG_SUPERH32 #define udelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c6ul)) : \ __udelay(n)) @@ -23,9 +22,5 @@ extern void __delay(unsigned long loops); #define ndelay(n) (__builtin_constant_p(n) ? \ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \ __ndelay(n)) -#else -extern void udelay(unsigned long usecs); -extern void ndelay(unsigned long nsecs); -#endif #endif /* __ASM_SH_DELAY_H */ -- cgit v1.2.3-70-g09d2 From 2ab42e24d63193d78f2e888a170b208f4776aaba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 6 Mar 2008 10:57:54 +0000 Subject: Really unexport asm/page.h Commit ed7b1889da256977574663689b598d88950bbd23 removed page.h from include/asm-generic/Kbuild so that it shouldn't get exported. However, it was redundantly listed in asm-mn10300/Kbuild and asm-x86/Kbuild too. Remove those as well, so it really stops being exported on those architectures. Also remove the redundant listing of ptrace.h and termios.h from mn10300. Signed-off-by: David Woodhouse Acked-by: David Howells Signed-off-by: Linus Torvalds --- include/asm-mn10300/Kbuild | 4 ---- include/asm-x86/Kbuild | 1 - 2 files changed, 5 deletions(-) (limited to 'include') diff --git a/include/asm-mn10300/Kbuild b/include/asm-mn10300/Kbuild index 79384c537dc..c68e1680da0 100644 --- a/include/asm-mn10300/Kbuild +++ b/include/asm-mn10300/Kbuild @@ -1,5 +1 @@ include include/asm-generic/Kbuild.asm - -unifdef-y += termios.h -unifdef-y += ptrace.h -unifdef-y += page.h diff --git a/include/asm-x86/Kbuild b/include/asm-x86/Kbuild index b04a7ff46df..3b8160a2b47 100644 --- a/include/asm-x86/Kbuild +++ b/include/asm-x86/Kbuild @@ -16,7 +16,6 @@ unifdef-y += ist.h unifdef-y += mce.h unifdef-y += msr.h unifdef-y += mtrr.h -unifdef-y += page.h unifdef-y += posix_types_32.h unifdef-y += posix_types_64.h unifdef-y += ptrace.h -- cgit v1.2.3-70-g09d2 From 45e18c228e131592a922859e1525770a1803191d Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 6 Mar 2008 09:49:01 -0800 Subject: [IA64] kprobes arch consolidation build fix ia64 named their handler kprobes_fault_handler while all other arches used kprobe_fault_handler. Change the function definition and header declaration. Signed-off-by: Harvey Harrison Signed-off-by: Tony Luck --- arch/ia64/kernel/kprobes.c | 2 +- arch/ia64/mm/fault.c | 2 +- include/asm-ia64/kprobes.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 615c3d2b634..8d9a446a0d1 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -838,7 +838,7 @@ out: return 1; } -int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 3e69881648a..23088bed111 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -26,7 +26,7 @@ static inline int notify_page_fault(struct pt_regs *regs, int trap) if (!user_mode(regs)) { /* kprobe_running() needs smp_processor_id() */ preempt_disable(); - if (kprobe_running() && kprobes_fault_handler(regs, trap)) + if (kprobe_running() && kprobe_fault_handler(regs, trap)) ret = 1; preempt_enable(); } diff --git a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h index 8233b3a964c..d03bf9ff68e 100644 --- a/include/asm-ia64/kprobes.h +++ b/include/asm-ia64/kprobes.h @@ -117,7 +117,7 @@ struct arch_specific_insn { unsigned short slot; }; -extern int kprobes_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -- cgit v1.2.3-70-g09d2 From 1c61fc40fc264059ff41a614ed2d899127288281 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 5 Mar 2008 13:58:17 -0800 Subject: slab - use angle brackets for include of kmalloc_sizes.h Make them all use angle brackets and the directory name. Acked-by: Pekka Enberg Signed-off-by: Joe Perches Signed-off-by: Christoph Lameter --- include/linux/slab_def.h | 4 ++-- mm/slab.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index fcc48096ee6..39c3a5eb8eb 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -41,7 +41,7 @@ static inline void *kmalloc(size_t size, gfp_t flags) goto found; \ else \ i++; -#include "kmalloc_sizes.h" +#include #undef CACHE { extern void __you_cannot_kmalloc_that_much(void); @@ -75,7 +75,7 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node) goto found; \ else \ i++; -#include "kmalloc_sizes.h" +#include #undef CACHE { extern void __you_cannot_kmalloc_that_much(void); diff --git a/mm/slab.c b/mm/slab.c index 5d16c8a3049..f7faff72cf5 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -333,7 +333,7 @@ static __always_inline int index_of(const size_t size) return i; \ else \ i++; -#include "linux/kmalloc_sizes.h" +#include #undef CACHE __bad_size(); } else -- cgit v1.2.3-70-g09d2 From 810b38179e9e4d4f57b4b733767bb08f8291a965 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Feb 2008 15:21:01 -0500 Subject: sched: retain vruntime Kei Tokunaga reported an interactivity problem when moving tasks between control groups. Tasks would retain their old vruntime when moved between groups, this can cause funny lags. Re-set the vruntime on group move to fit within the new tree. Reported-by: Kei Tokunaga Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ kernel/sched.c | 5 +++++ kernel/sched_fair.c | 14 ++++++++++++++ 3 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 9ae4030067a..11d8e9a74ef 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -899,6 +899,10 @@ struct sched_class { int running); void (*prio_changed) (struct rq *this_rq, struct task_struct *task, int oldprio, int running); + +#ifdef CONFIG_FAIR_GROUP_SCHED + void (*moved_group) (struct task_struct *p); +#endif }; struct load_weight { diff --git a/kernel/sched.c b/kernel/sched.c index dcd553cc4ee..0b949c4e73a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -7625,6 +7625,11 @@ void sched_move_task(struct task_struct *tsk) set_task_rq(tsk, task_cpu(tsk)); +#ifdef CONFIG_FAIR_GROUP_SCHED + if (tsk->sched_class->moved_group) + tsk->sched_class->moved_group(tsk); +#endif + if (on_rq) { if (unlikely(running)) tsk->sched_class->set_curr_task(rq); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 3df4d46994c..e2a53051561 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1353,6 +1353,16 @@ static void set_curr_task_fair(struct rq *rq) set_next_entity(cfs_rq_of(se), se); } +#ifdef CONFIG_FAIR_GROUP_SCHED +static void moved_group_fair(struct task_struct *p) +{ + struct cfs_rq *cfs_rq = task_cfs_rq(p); + + update_curr(cfs_rq); + place_entity(cfs_rq, &p->se, 1); +} +#endif + /* * All the scheduling class methods: */ @@ -1381,6 +1391,10 @@ static const struct sched_class fair_sched_class = { .prio_changed = prio_changed_fair, .switched_to = switched_to_fair, + +#ifdef CONFIG_FAIR_GROUP_SCHED + .moved_group = moved_group_fair, +#endif }; #ifdef CONFIG_SCHED_DEBUG -- cgit v1.2.3-70-g09d2 From c37dcd334c0b0a46a90cfa13b9f69e2aaa89bc09 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 6 Mar 2008 12:34:50 -0500 Subject: NFS: Fix the fsid revalidation in nfs_update_inode() When we detect that we've crossed a mountpoint on the remote server, we must take care not to use that inode to revalidate the fsid on our current superblock. To do so, we label the inode as a remote mountpoint, and check for that in nfs_update_inode(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 6 ++++-- include/linux/nfs_fs.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 966a8850aa3..a4c7cf2bff3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -299,6 +299,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) else inode->i_op = &nfs_mountpoint_inode_operations; inode->i_fop = NULL; + set_bit(NFS_INO_MOUNTPOINT, &nfsi->flags); } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; @@ -1003,8 +1004,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) server = NFS_SERVER(inode); /* Update the fsid? */ - if (S_ISDIR(inode->i_mode) - && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + if (S_ISDIR(inode->i_mode) && + !nfs_fsid_equal(&server->fsid, &fattr->fsid) && + !test_bit(NFS_INO_MOUNTPOINT, &nfsi->flags)) server->fsid = fattr->fsid; /* diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a69ba80f2df..f4a0e4c218d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -195,6 +195,7 @@ struct nfs_inode { #define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ #define NFS_INO_STALE (2) /* possible stale inode */ #define NFS_INO_ACL_LRU_SET (3) /* Inode is on the LRU list */ +#define NFS_INO_MOUNTPOINT (4) /* inode is remote mountpoint */ static inline struct nfs_inode *NFS_I(const struct inode *inode) { -- cgit v1.2.3-70-g09d2